diff --git a/README.md b/README.md
index 6535f108a..ad8368ab7 100644
--- a/README.md
+++ b/README.md
@@ -336,7 +336,7 @@ async def main():
     # Create RAGAnything configuration
     config = RAGAnythingConfig(
         working_dir="./rag_storage",
-        parser="mineru",  # Parser selection: mineru or docling
+        parser="mineru",  # Parser selection: mineru, docling, or paddleocr
         parse_method="auto",  # Parse method: auto, ocr, or txt
         enable_image_processing=True,
         enable_table_processing=True,
@@ -1047,7 +1047,7 @@ Create a `.env` file (refer to `.env.example`):
 OPENAI_API_KEY=your_openai_api_key
 OPENAI_BASE_URL=your_base_url  # Optional
 OUTPUT_DIR=./output             # Default output directory for parsed documents
-PARSER=mineru                   # Parser selection: mineru or docling
+PARSER=mineru                   # Parser selection: mineru, docling, or paddleocr
 PARSE_METHOD=auto              # Parse method: auto, ocr, or txt
 ```
 
@@ -1070,6 +1070,21 @@ RAGAnything now supports multiple parsers, each with specific advantages:
 - Better document structure preservation
 - Native support for multiple Office formats
 
+#### PaddleOCR Parser
+- OCR-focused parser for images and PDFs
+- Produces text blocks compatible with existing `content_list` processing
+- Supports optional Office/TXT/MD parsing by converting to PDF first
+
+Install PaddleOCR parser extras:
+
+```bash
+pip install -e ".[paddleocr]"
+# or
+uv sync --extra paddleocr
+```
+
+> **Note**: PaddleOCR also requires `paddlepaddle` (CPU/GPU package varies by platform). Install it with the official guide: https://www.paddlepaddle.org.cn/install/quick
+
 ### MinerU Configuration
 
 ```bash
@@ -1091,7 +1106,7 @@ await rag.process_document_complete(
     file_path="document.pdf",
     output_dir="./output/",
     parse_method="auto",          # or "ocr", "txt"
-    parser="mineru"               # Optional: "mineru" or "docling"
+    parser="mineru"               # Optional: "mineru", "docling", or "paddleocr"
 )
 
 # Advanced parsing configuration with special parameters
@@ -1099,7 +1114,7 @@ await rag.process_document_complete(
     file_path="document.pdf",
     output_dir="./output/",
     parse_method="auto",          # Parsing method: "auto", "ocr", "txt"
-    parser="mineru",              # Parser selection: "mineru" or "docling"
+    parser="mineru",              # Parser selection: "mineru", "docling", or "paddleocr"
 
     # MinerU special parameters - all supported kwargs:
     lang="ch",                   # Document language for OCR optimization (e.g., "ch", "en", "ja")
@@ -1119,7 +1134,7 @@ await rag.process_document_complete(
 )
 ```
 
-> **Note**: MinerU 2.0 no longer uses the `magic-pdf.json` configuration file. All settings are now passed as command-line parameters or function arguments. RAG-Anything now supports multiple document parsers - you can choose between MinerU and Docling based on your needs.
+> **Note**: MinerU 2.0 no longer uses the `magic-pdf.json` configuration file. All settings are now passed as command-line parameters or function arguments. RAG-Anything supports multiple document parsers, including MinerU, Docling, and PaddleOCR.
 
 ### Processing Requirements
 
@@ -1128,6 +1143,7 @@ Different content types require specific optional dependencies:
 - **Office Documents** (.doc, .docx, .ppt, .pptx, .xls, .xlsx): Install [LibreOffice](https://www.libreoffice.org/download/download/)
 - **Extended Image Formats** (.bmp, .tiff, .gif, .webp): Install with `pip install raganything[image]`
 - **Text Files** (.txt, .md): Install with `pip install raganything[text]`
+- **PaddleOCR Parser** (`parser="paddleocr"`): Install with `pip install raganything[paddleocr]`, then install `paddlepaddle` for your platform
 
 > **📋 Quick Install**: Use `pip install raganything[all]` to enable all format support (Python dependencies only - LibreOffice still needs separate installation)
 
diff --git a/docs/batch_processing.md b/docs/batch_processing.md
index fe51ab146..4005e689d 100644
--- a/docs/batch_processing.md
+++ b/docs/batch_processing.md
@@ -24,6 +24,9 @@ pip install raganything[all]
 
 # Required for batch processing
 pip install tqdm
+
+# Optional for parser='paddleocr'
+pip install raganything[paddleocr]
 ```
 
 ## Usage
@@ -35,7 +38,7 @@ from raganything.batch_parser import BatchParser
 
 # Create batch parser
 batch_parser = BatchParser(
-    parser_type="mineru",  # or "docling"
+    parser_type="mineru",  # or "docling" or "paddleocr"
     max_workers=4,
     show_progress=True,
     timeout_per_file=300,
@@ -123,6 +126,7 @@ python -m raganything.batch_parser examples/sample_docs/ --output ./output --wor
 
 # With specific parser
 python -m raganything.batch_parser examples/sample_docs/ --parser mineru --method auto
+python -m raganything.batch_parser examples/sample_docs/ --parser paddleocr --method ocr
 
 # Without progress bar
 python -m raganything.batch_parser examples/sample_docs/ --output ./output --no-progress
@@ -148,7 +152,7 @@ PARSER_OUTPUT_DIR=./parsed_output
 
 ### BatchParser Parameters
 
-- **parser_type**: `"mineru"` or `"docling"` (default: `"mineru"`)
+- **parser_type**: `"mineru"`, `"docling"`, or `"paddleocr"` (default: `"mineru"`)
 - **max_workers**: Number of parallel workers (default: `4`)
 - **show_progress**: Show progress bar (default: `True`)
 - **timeout_per_file**: Timeout per file in seconds (default: `300`)
diff --git a/env.example b/env.example
index 7e857166b..e6967e8c7 100644
--- a/env.example
+++ b/env.example
@@ -42,7 +42,7 @@ OLLAMA_EMULATING_MODEL_TAG=latest
 ### Parser Configuration
 # PARSE_METHOD=auto
 # OUTPUT_DIR=./output
-# PARSER=mineru
+# PARSER=mineru  # Options: mineru, docling, paddleocr
 # DISPLAY_CONTENT_STATS=true
 
 ### Multimodal Processing Configuration
diff --git a/examples/batch_dry_run_example.py b/examples/batch_dry_run_example.py
index 707ab5668..35d229f6d 100644
--- a/examples/batch_dry_run_example.py
+++ b/examples/batch_dry_run_example.py
@@ -7,6 +7,7 @@
   - pip install:
       python examples/batch_dry_run_example.py examples/sample_docs --parser mineru
       python examples/batch_dry_run_example.py examples/sample_docs/projects examples/sample_docs/web --parser docling
+      python examples/batch_dry_run_example.py examples/sample_docs --parser paddleocr
   - uv install:
       uv run python examples/batch_dry_run_example.py examples/sample_docs --parser mineru --recursive
       uv run python examples/batch_dry_run_example.py examples/sample_docs --parser mineru --no-recursive
@@ -22,7 +23,7 @@ def main() -> int:
     parser.add_argument("paths", nargs="+", help="File paths or directories to scan")
     parser.add_argument(
         "--parser",
-        choices=["mineru", "docling"],
+        choices=["mineru", "docling", "paddleocr"],
         default="mineru",
         help="Parser to use for file-type support",
     )
diff --git a/examples/raganything_example.py b/examples/raganything_example.py
index 5c22eeadc..c5e8e9e89 100644
--- a/examples/raganything_example.py
+++ b/examples/raganything_example.py
@@ -1,9 +1,9 @@
 #!/usr/bin/env python
 """
-Example script demonstrating the integration of MinerU parser with RAGAnything
+Example script demonstrating parser integration with RAGAnything
 
 This example shows how to:
-1. Process documents with RAGAnything using MinerU parser
+1. Process documents with RAGAnything using configurable parsers
 2. Perform pure text queries using aquery() method
 3. Perform multimodal queries with specific multimodal content using aquery_with_multimodal() method
 4. Handle different types of multimodal content (tables, equations) in queries
@@ -108,7 +108,7 @@ async def process_with_rag(
         # Create RAGAnything configuration
         config = RAGAnythingConfig(
             working_dir=working_dir or "./rag_storage",
-            parser=parser,  # Parser selection: mineru or docling
+            parser=parser,  # Parser selection: mineru, docling, or paddleocr
             parse_method="auto",  # Parse method: auto, ocr, or txt
             enable_image_processing=True,
             enable_table_processing=True,
@@ -289,7 +289,8 @@ def main():
     parser.add_argument(
         "--parser",
         default=os.getenv("PARSER", "mineru"),
-        help="Optional base URL for API",
+        choices=["mineru", "docling", "paddleocr"],
+        help="Parser selection",
     )
 
     args = parser.parse_args()
diff --git a/pyproject.toml b/pyproject.toml
index b8847d8d2..e612fa4e4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -31,6 +31,10 @@ dependencies = [
 image = ["Pillow>=10.0.0"]
 text = ["reportlab>=4.0.0"]
 office = []  # Requires LibreOffice (external program)
+paddleocr = [
+    "paddleocr>=2.7.0",
+    "pypdfium2>=4.25.0",
+]
 markdown = [
     "markdown>=3.4.0",
     "weasyprint>=60.0",
@@ -39,9 +43,11 @@ markdown = [
 all = [
     "Pillow>=10.0.0",
     "reportlab>=4.0.0",
+    "paddleocr>=2.7.0",
+    "pypdfium2>=4.25.0",
     "markdown>=3.4.0",
     "weasyprint>=60.0",
-    "pygments>=2.10.0"
+    "pygments>=2.10.0",
 ]
 
 [project.urls]
@@ -73,3 +79,8 @@ version = {attr = "raganything.__version__"}
 
 [tool.ruff]
 target-version = "py310"
+
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+python_files = ["test*.py"]
+norecursedirs = ["examples"]
diff --git a/raganything/batch_parser.py b/raganything/batch_parser.py
index 7112a5831..2111ec501 100644
--- a/raganything/batch_parser.py
+++ b/raganything/batch_parser.py
@@ -15,7 +15,7 @@
 
 from tqdm import tqdm
 
-from .parser import MineruParser, DoclingParser
+from .parser import SUPPORTED_PARSERS, get_parser
 
 
 @dataclass
@@ -70,7 +70,7 @@ def __init__(
         Initialize batch parser
 
         Args:
-            parser_type: Type of parser to use ("mineru" or "docling")
+            parser_type: Type of parser to use ("mineru", "docling", or "paddleocr")
             max_workers: Maximum number of parallel workers
             show_progress: Whether to show progress bars
             timeout_per_file: Timeout in seconds for each file
@@ -83,12 +83,10 @@ def __init__(
         self.logger = logging.getLogger(__name__)
 
         # Initialize parser
-        if parser_type == "mineru":
-            self.parser = MineruParser()
-        elif parser_type == "docling":
-            self.parser = DoclingParser()
-        else:
-            raise ValueError(f"Unsupported parser type: {parser_type}")
+        try:
+            self.parser = get_parser(parser_type)
+        except ValueError as exc:
+            raise ValueError(f"Unsupported parser type: {parser_type}") from exc
 
         # Check parser installation (optional)
         if not skip_installation_check:
@@ -384,7 +382,7 @@ def main():
     parser.add_argument("--output", "-o", required=True, help="Output directory")
     parser.add_argument(
         "--parser",
-        choices=["mineru", "docling"],
+        choices=list(SUPPORTED_PARSERS),
         default="mineru",
         help="Parser to use",
     )
diff --git a/raganything/config.py b/raganything/config.py
index bfce0ac27..ec6f88c06 100644
--- a/raganything/config.py
+++ b/raganything/config.py
@@ -27,7 +27,7 @@ class RAGAnythingConfig:
     """Default output directory for parsed content."""
 
     parser: str = field(default=get_env_value("PARSER", "mineru", str))
-    """Parser selection: 'mineru' or 'docling'."""
+    """Parser selection: 'mineru', 'docling', or 'paddleocr'."""
 
     display_content_stats: bool = field(
         default=get_env_value("DISPLAY_CONTENT_STATS", True, bool)
diff --git a/raganything/parser.py b/raganything/parser.py
index a07443e24..7595e0bd3 100644
--- a/raganything/parser.py
+++ b/raganything/parser.py
@@ -26,6 +26,7 @@
     Union,
     Tuple,
     Any,
+    Iterator,
     TypeVar,
 )
 
@@ -1740,12 +1741,354 @@ def check_installation(self) -> bool:
             return False
 
 
+class PaddleOCRParser(Parser):
+    """PaddleOCR document parser with optional PDF page rendering support."""
+
+    def __init__(self, default_lang: str = "en") -> None:
+        super().__init__()
+        self.default_lang = default_lang
+        self._ocr_instances: Dict[str, Any] = {}
+
+    def _require_paddleocr(self):
+        try:
+            from paddleocr import PaddleOCR
+        except ImportError as exc:
+            raise ImportError(
+                "PaddleOCR parser requires optional dependency `paddleocr`. "
+                "Install with `pip install -e '.[paddleocr]'` or "
+                "`uv sync --extra paddleocr`. "
+                "PaddleOCR also needs `paddlepaddle`; install it from "
+                "https://www.paddlepaddle.org.cn/install/quick."
+            ) from exc
+        return PaddleOCR
+
+    def _get_ocr(self, lang: Optional[str] = None):
+        PaddleOCR = self._require_paddleocr()
+        language = (lang or self.default_lang).strip() or self.default_lang
+        cached = self._ocr_instances.get(language)
+        if cached is not None:
+            return cached
+
+        init_candidates = [
+            {"lang": language, "show_log": False},
+            {"lang": language},
+            {},
+        ]
+        last_exception = None
+        for candidate_kwargs in init_candidates:
+            try:
+                ocr = PaddleOCR(**candidate_kwargs)
+                self._ocr_instances[language] = ocr
+                return ocr
+            except Exception as exc:  # pragma: no cover - defensive fallback
+                last_exception = exc
+                continue
+
+        raise RuntimeError(
+            f"Unable to initialize PaddleOCR for language '{language}': {last_exception}"
+        )
+
+    def _extract_text_lines(self, result: Any) -> List[str]:
+        lines: List[str] = []
+
+        def append_text(text: str) -> None:
+            clean_text = text.strip()
+            if clean_text:
+                lines.append(clean_text)
+
+        if isinstance(result, str):
+            append_text(result)
+            return lines
+
+        def visit(node: Any) -> None:
+            if node is None:
+                return
+
+            if hasattr(node, "to_dict"):
+                try:
+                    visit(node.to_dict())
+                    return
+                except Exception:
+                    pass
+
+            if isinstance(node, dict):
+                rec_texts = node.get("rec_texts")
+                if isinstance(rec_texts, list):
+                    for item in rec_texts:
+                        if isinstance(item, str):
+                            append_text(item)
+                        else:
+                            visit(item)
+
+                text_value = node.get("text")
+                if isinstance(text_value, str):
+                    append_text(text_value)
+
+                texts_value = node.get("texts")
+                if isinstance(texts_value, list):
+                    for item in texts_value:
+                        if isinstance(item, str):
+                            append_text(item)
+                        else:
+                            visit(item)
+
+                # Avoid double-visiting keys we already handled above; this prevents
+                # accidental duplication without content-level deduplication.
+                for key, value in node.items():
+                    if key in {"rec_texts", "text", "texts"}:
+                        continue
+                    visit(value)
+                return
+
+            if isinstance(node, (list, tuple)):
+                if node and all(isinstance(item, str) for item in node):
+                    for item in node:
+                        append_text(item)
+                    return
+
+                if (
+                    len(node) >= 2
+                    and isinstance(node[1], (list, tuple))
+                    and len(node[1]) >= 1
+                    and isinstance(node[1][0], str)
+                ):
+                    append_text(node[1][0])
+                    return
+
+                if (
+                    len(node) >= 1
+                    and isinstance(node[0], str)
+                    and (len(node) == 1 or isinstance(node[1], (int, float)))
+                ):
+                    append_text(node[0])
+                    return
+
+                for item in node:
+                    visit(item)
+                return
+
+            if isinstance(node, str):
+                append_text(node)
+                return
+
+        visit(result)
+        return lines
+
+    def _ocr_input(
+        self, input_data: Any, lang: Optional[str] = None, cls_enabled: bool = True
+    ) -> List[str]:
+        ocr = self._get_ocr(lang=lang)
+
+        if hasattr(ocr, "ocr"):
+            try:
+                result = ocr.ocr(input_data, cls=cls_enabled)
+            except TypeError:
+                result = ocr.ocr(input_data)
+            return self._extract_text_lines(result)
+
+        if hasattr(ocr, "predict"):
+            result = ocr.predict(input_data)
+            return self._extract_text_lines(result)
+
+        raise RuntimeError(
+            "Unsupported PaddleOCR API: expected `ocr` or `predict` method."
+        )
+
+    def _extract_pdf_page_inputs(self, pdf_path: Path) -> Iterator[Tuple[int, Any]]:
+        try:
+            import pypdfium2 as pdfium
+        except ImportError as exc:
+            raise ImportError(
+                "PDF parsing with parser='paddleocr' requires `pypdfium2`. "
+                "Install with `pip install -e '.[paddleocr]'` or "
+                "`uv sync --extra paddleocr`."
+            ) from exc
+
+        pdf = pdfium.PdfDocument(str(pdf_path))
+        try:
+            total_pages = len(pdf)
+            for page_idx in range(total_pages):
+                page = pdf[page_idx]
+                try:
+                    rendered = page.render(scale=2.0)
+                    if hasattr(rendered, "to_pil"):
+                        yield (page_idx, rendered.to_pil())
+                    elif hasattr(rendered, "to_numpy"):
+                        yield (page_idx, rendered.to_numpy())
+                    else:
+                        raise RuntimeError(
+                            "Unsupported rendered page format from pypdfium2."
+                        )
+                finally:
+                    if hasattr(page, "close"):
+                        page.close()
+        finally:
+            if hasattr(pdf, "close"):
+                pdf.close()
+
+    def _ocr_rendered_page(
+        self, rendered_page: Any, lang: Optional[str] = None, cls_enabled: bool = True
+    ) -> List[str]:
+        if hasattr(rendered_page, "save"):
+            temp_image_path: Optional[Path] = None
+            try:
+                with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp:
+                    temp_image_path = Path(temp.name)
+                rendered_page.save(temp_image_path)
+                return self._ocr_input(
+                    str(temp_image_path), lang=lang, cls_enabled=cls_enabled
+                )
+            finally:
+                if temp_image_path is not None and temp_image_path.exists():
+                    try:
+                        temp_image_path.unlink()
+                    except Exception:
+                        pass
+
+        return self._ocr_input(rendered_page, lang=lang, cls_enabled=cls_enabled)
+
+    def parse_pdf(
+        self,
+        pdf_path: Union[str, Path],
+        output_dir: Optional[str] = None,
+        method: str = "auto",
+        lang: Optional[str] = None,
+        **kwargs,
+    ) -> List[Dict[str, Any]]:
+        del output_dir, method
+        pdf_path = Path(pdf_path)
+        if not pdf_path.exists():
+            raise FileNotFoundError(f"PDF file does not exist: {pdf_path}")
+
+        cls_enabled = kwargs.get("cls", True)
+        content_list: List[Dict[str, Any]] = []
+        page_inputs = self._extract_pdf_page_inputs(pdf_path)
+        try:
+            for page_idx, rendered_page in page_inputs:
+                page_lines = self._ocr_rendered_page(
+                    rendered_page, lang=lang, cls_enabled=cls_enabled
+                )
+                for text in page_lines:
+                    content_list.append(
+                        {"type": "text", "text": text, "page_idx": int(page_idx)}
+                    )
+        finally:
+            # Ensure we promptly release PDF handles even if OCR fails mid-stream.
+            close = getattr(page_inputs, "close", None)
+            if callable(close):
+                close()
+        return content_list
+
+    def parse_image(
+        self,
+        image_path: Union[str, Path],
+        output_dir: Optional[str] = None,
+        lang: Optional[str] = None,
+        **kwargs,
+    ) -> List[Dict[str, Any]]:
+        del output_dir
+        image_path = Path(image_path)
+        if not image_path.exists():
+            raise FileNotFoundError(f"Image file does not exist: {image_path}")
+
+        ext = image_path.suffix.lower()
+        if ext not in self.IMAGE_FORMATS:
+            raise ValueError(
+                f"Unsupported image format: {ext}. Supported formats: {', '.join(sorted(self.IMAGE_FORMATS))}"
+            )
+
+        cls_enabled = kwargs.get("cls", True)
+        page_idx = int(kwargs.get("page_idx", 0))
+        text_lines = self._ocr_input(
+            str(image_path), lang=lang, cls_enabled=cls_enabled
+        )
+        return [
+            {"type": "text", "text": text, "page_idx": page_idx} for text in text_lines
+        ]
+
+    def parse_office_doc(
+        self,
+        doc_path: Union[str, Path],
+        output_dir: Optional[str] = None,
+        lang: Optional[str] = None,
+        **kwargs,
+    ) -> List[Dict[str, Any]]:
+        pdf_path = self.convert_office_to_pdf(doc_path, output_dir)
+        return self.parse_pdf(
+            pdf_path=pdf_path, output_dir=output_dir, lang=lang, **kwargs
+        )
+
+    def parse_text_file(
+        self,
+        text_path: Union[str, Path],
+        output_dir: Optional[str] = None,
+        lang: Optional[str] = None,
+        **kwargs,
+    ) -> List[Dict[str, Any]]:
+        pdf_path = self.convert_text_to_pdf(text_path, output_dir)
+        return self.parse_pdf(
+            pdf_path=pdf_path, output_dir=output_dir, lang=lang, **kwargs
+        )
+
+    def parse_document(
+        self,
+        file_path: Union[str, Path],
+        method: str = "auto",
+        output_dir: Optional[str] = None,
+        lang: Optional[str] = None,
+        **kwargs,
+    ) -> List[Dict[str, Any]]:
+        del method
+        file_path = Path(file_path)
+        if not file_path.exists():
+            raise FileNotFoundError(f"File does not exist: {file_path}")
+
+        ext = file_path.suffix.lower()
+        if ext == ".pdf":
+            return self.parse_pdf(file_path, output_dir, lang=lang, **kwargs)
+        if ext in self.IMAGE_FORMATS:
+            return self.parse_image(file_path, output_dir, lang=lang, **kwargs)
+        if ext in self.OFFICE_FORMATS:
+            return self.parse_office_doc(file_path, output_dir, lang=lang, **kwargs)
+        if ext in self.TEXT_FORMATS:
+            return self.parse_text_file(file_path, output_dir, lang=lang, **kwargs)
+
+        raise ValueError(
+            f"Unsupported file format: {ext}. "
+            "PaddleOCR parser supports PDF, image, office, and text formats."
+        )
+
+    def check_installation(self) -> bool:
+        try:
+            self._require_paddleocr()
+            return True
+        except ImportError:
+            return False
+
+
+SUPPORTED_PARSERS = ("mineru", "docling", "paddleocr")
+
+
+def get_parser(parser_type: str) -> Parser:
+    parser_name = (parser_type or "mineru").strip().lower()
+    if parser_name == "mineru":
+        return MineruParser()
+    if parser_name == "docling":
+        return DoclingParser()
+    if parser_name == "paddleocr":
+        return PaddleOCRParser()
+    raise ValueError(
+        f"Unsupported parser type: {parser_type}. "
+        f"Supported parsers: {', '.join(SUPPORTED_PARSERS)}"
+    )
+
+
 def main():
     """
     Main function to run the document parser from command line
     """
     parser = argparse.ArgumentParser(
-        description="Parse documents using MinerU 2.0 or Docling"
+        description="Parse documents using MinerU 2.0, Docling, or PaddleOCR"
     )
     parser.add_argument("file_path", help="Path to the document to parse")
     parser.add_argument("--output", "-o", help="Output directory path")
@@ -1805,7 +2148,7 @@ def main():
     )
     parser.add_argument(
         "--parser",
-        choices=["mineru", "docling"],
+        choices=list(SUPPORTED_PARSERS),
         default="mineru",
         help="Parser selection",
     )
@@ -1818,7 +2161,7 @@ def main():
 
     # Check installation if requested
     if args.check:
-        doc_parser = DoclingParser() if args.parser == "docling" else MineruParser()
+        doc_parser = get_parser(args.parser)
         if doc_parser.check_installation():
             print(f"✅ {args.parser.title()} is properly installed")
             return 0
@@ -1828,7 +2171,7 @@ def main():
 
     try:
         # Parse the document
-        doc_parser = DoclingParser() if args.parser == "docling" else MineruParser()
+        doc_parser = get_parser(args.parser)
         content_list = doc_parser.parse_document(
             file_path=args.file_path,
             method=args.method,
diff --git a/raganything/processor.py b/raganything/processor.py
index 7c6026286..048af1c73 100644
--- a/raganything/processor.py
+++ b/raganything/processor.py
@@ -12,7 +12,7 @@
 from pathlib import Path
 
 from raganything.base import DocStatus
-from raganything.parser import MineruParser, DoclingParser, MineruExecutionError
+from raganything.parser import MineruParser, MineruExecutionError, get_parser
 from raganything.utils import (
     separate_content,
     insert_text_content,
@@ -332,9 +332,10 @@ async def parse_document(
         ext = file_path.suffix.lower()
 
         try:
-            doc_parser = (
-                DoclingParser() if self.config.parser == "docling" else MineruParser()
-            )
+            doc_parser = getattr(self, "doc_parser", None)
+            if doc_parser is None:
+                doc_parser = get_parser(self.config.parser)
+                self.doc_parser = doc_parser
 
             # Log parser and method information
             self.logger.info(
@@ -361,21 +362,23 @@ async def parse_document(
                 ".webp",
             ]:
                 self.logger.info("Detected image file, using parser for images...")
-                # Use the selected parser's image parsing capability
-                if hasattr(doc_parser, "parse_image"):
+                try:
                     content_list = await asyncio.to_thread(
                         doc_parser.parse_image,
                         image_path=file_path,
                         output_dir=output_dir,
                         **kwargs,
                     )
-                else:
+                except NotImplementedError:
                     # Fallback to MinerU for image parsing if current parser doesn't support it
                     self.logger.warning(
                         f"{self.config.parser} parser doesn't support image parsing, falling back to MinerU"
                     )
-                    content_list = MineruParser().parse_image(
-                        image_path=file_path, output_dir=output_dir, **kwargs
+                    content_list = await asyncio.to_thread(
+                        MineruParser().parse_image,
+                        image_path=file_path,
+                        output_dir=output_dir,
+                        **kwargs,
                     )
             elif ext in [
                 ".doc",
@@ -573,7 +576,7 @@ async def _process_multimodal_content_individual(
             try:
                 content_type = item.get("type", "unknown")
                 self.logger.info(
-                    f"Processing item {i+1}/{len(multimodal_items)}: {content_type} content"
+                    f"Processing item {i + 1}/{len(multimodal_items)}: {content_type} content"
                 )
 
                 # Select appropriate processor
diff --git a/raganything/raganything.py b/raganything/raganything.py
index 321369d6b..3af8da6ce 100644
--- a/raganything/raganything.py
+++ b/raganything/raganything.py
@@ -33,7 +33,7 @@
 from raganything.processor import ProcessorMixin
 from raganything.batch import BatchMixin
 from raganything.utils import get_processor_supports
-from raganything.parser import MineruParser, DoclingParser
+from raganything.parser import MineruParser, SUPPORTED_PARSERS, get_parser
 
 # Import specialized processors
 from raganything.modalprocessors import (
@@ -109,9 +109,7 @@ def __post_init__(self):
         self.logger = logger
 
         # Set up document parser
-        self.doc_parser = (
-            DoclingParser() if self.config.parser == "docling" else MineruParser()
-        )
+        self.doc_parser = get_parser(self.config.parser)
 
         # Register close method for cleanup
         atexit.register(self.close)
@@ -554,6 +552,10 @@ def get_processor_info(self) -> Dict[str, Any]:
         """Get processor information"""
         base_info = {
             "mineru_installed": MineruParser.check_installation(MineruParser()),
+            "parser_installation": {
+                parser_name: get_parser(parser_name).check_installation()
+                for parser_name in SUPPORTED_PARSERS
+            },
             "config": self.get_config_info(),
             "models": {
                 "llm_model": "External function"
diff --git a/requirements.txt b/requirements.txt
index 9cd2d0e83..cbb59d951 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,6 +8,7 @@ tqdm
 # Note: Optional dependencies are now defined in setup.py extras_require:
 # - [image]: Pillow>=10.0.0 (for BMP, TIFF, GIF, WebP format conversion)
 # - [text]: reportlab>=4.0.0 (for TXT, MD to PDF conversion)
+# - [paddleocr]: paddleocr + pypdfium2 (for parser='paddleocr')
 # - [office]: requires LibreOffice (external program, not Python package)
 # - [all]: includes all optional dependencies
 #
diff --git a/setup.py b/setup.py
index 3c2277aa1..2675229c9 100644
--- a/setup.py
+++ b/setup.py
@@ -63,7 +63,16 @@ def read_requirements():
     "image": ["Pillow>=10.0.0"],  # For image format conversion (BMP, TIFF, GIF, WebP)
     "text": ["reportlab>=4.0.0"],  # For text file to PDF conversion (TXT, MD)
     "office": [],  # Office document processing requires LibreOffice (external program)
-    "all": ["Pillow>=10.0.0", "reportlab>=4.0.0"],  # All optional features
+    "paddleocr": ["paddleocr>=2.7.0", "pypdfium2>=4.25.0"],  # PaddleOCR parser
+    "all": [
+        "Pillow>=10.0.0",
+        "reportlab>=4.0.0",
+        "paddleocr>=2.7.0",
+        "pypdfium2>=4.25.0",
+        "markdown>=3.4.0",
+        "weasyprint>=60.0",
+        "pygments>=2.10.0",
+    ],  # All optional features
     "markdown": [
         "markdown>=3.4.0",
         "weasyprint>=60.0",
diff --git a/tests/testpaddleocr_parser.py b/tests/testpaddleocr_parser.py
new file mode 100644
index 000000000..cc9b04090
--- /dev/null
+++ b/tests/testpaddleocr_parser.py
@@ -0,0 +1,164 @@
+import importlib
+import sys
+
+import pytest
+
+import raganything.parser as parser_module
+from raganything.parser import PaddleOCRParser, SUPPORTED_PARSERS, get_parser
+
+
+def test_supported_parsers_include_paddleocr():
+    assert "paddleocr" in SUPPORTED_PARSERS
+
+
+def test_get_parser_returns_paddleocr_parser():
+    parser = get_parser("paddleocr")
+    assert isinstance(parser, PaddleOCRParser)
+
+
+def test_get_parser_rejects_unknown_parser():
+    with pytest.raises(ValueError, match="Unsupported parser type"):
+        get_parser("unknown-parser")
+
+
+def test_parser_module_import_does_not_import_paddleocr():
+    sys.modules.pop("paddleocr", None)
+    importlib.reload(parser_module)
+    assert "paddleocr" not in sys.modules
+
+
+def test_check_installation_false_when_dependency_missing(monkeypatch):
+    parser = PaddleOCRParser()
+
+    def missing_dependency():
+        raise ImportError("missing paddleocr")
+
+    monkeypatch.setattr(parser, "_require_paddleocr", missing_dependency)
+    assert parser.check_installation() is False
+
+
+def test_check_installation_true_when_pdf_renderer_missing(monkeypatch):
+    parser = PaddleOCRParser()
+
+    monkeypatch.setattr(parser, "_require_paddleocr", lambda: object())
+
+    import builtins
+
+    real_import = builtins.__import__
+
+    def fake_import(name, globals=None, locals=None, fromlist=(), level=0):
+        if name == "pypdfium2":
+            raise ImportError("missing pypdfium2")
+        return real_import(name, globals, locals, fromlist, level)
+
+    monkeypatch.setattr(builtins, "__import__", fake_import)
+
+    assert parser.check_installation() is True
+
+
+def test_parse_pdf_raises_import_error_when_pdf_renderer_missing(monkeypatch, tmp_path):
+    parser = PaddleOCRParser()
+    fake_pdf = tmp_path / "sample.pdf"
+    fake_pdf.write_bytes(b"%PDF-1.4\n")
+
+    monkeypatch.setattr(parser, "_require_paddleocr", lambda: object())
+
+    import builtins
+
+    real_import = builtins.__import__
+
+    def fake_import(name, globals=None, locals=None, fromlist=(), level=0):
+        if name == "pypdfium2":
+            raise ImportError("missing pypdfium2")
+        return real_import(name, globals, locals, fromlist, level)
+
+    monkeypatch.setattr(builtins, "__import__", fake_import)
+
+    with pytest.raises(ImportError, match="pypdfium2"):
+        parser.parse_pdf(fake_pdf)
+
+
+def test_parse_image_raises_import_error_with_install_hint(monkeypatch, tmp_path):
+    parser = PaddleOCRParser()
+    fake_image = tmp_path / "sample.png"
+    fake_image.write_bytes(b"not-a-real-image")
+
+    def missing_dependency():
+        raise ImportError("missing paddleocr")
+
+    monkeypatch.setattr(parser, "_require_paddleocr", missing_dependency)
+
+    with pytest.raises(ImportError, match="paddleocr"):
+        parser.parse_image(fake_image)
+
+
+def test_parse_image_returns_content_list_schema(monkeypatch, tmp_path):
+    parser = PaddleOCRParser()
+    fake_image = tmp_path / "sample.png"
+    fake_image.write_bytes(b"image-bytes")
+
+    class FakeOCR:
+        def ocr(self, input_data, cls=True):
+            return [
+                [
+                    [[[0, 0], [1, 0], [1, 1], [0, 1]], ("First line", 0.99)],
+                    [[[0, 2], [1, 2], [1, 3], [0, 3]], ("Second line", 0.95)],
+                ]
+            ]
+
+    monkeypatch.setattr(parser, "_get_ocr", lambda lang=None: FakeOCR())
+
+    content_list = parser.parse_image(fake_image, page_idx=7)
+
+    assert content_list == [
+        {"type": "text", "text": "First line", "page_idx": 7},
+        {"type": "text", "text": "Second line", "page_idx": 7},
+    ]
+
+
+def test_parse_image_preserves_repeated_ocr_lines(monkeypatch, tmp_path):
+    parser = PaddleOCRParser()
+    fake_image = tmp_path / "sample.png"
+    fake_image.write_bytes(b"image-bytes")
+
+    class FakeOCR:
+        def ocr(self, input_data, cls=True):
+            return [
+                [
+                    [[[0, 0], [1, 0], [1, 1], [0, 1]], ("Same", 0.99)],
+                    [[[0, 2], [1, 2], [1, 3], [0, 3]], ("Same", 0.95)],
+                ]
+            ]
+
+    monkeypatch.setattr(parser, "_get_ocr", lambda lang=None: FakeOCR())
+
+    content_list = parser.parse_image(fake_image, page_idx=1)
+
+    assert content_list == [
+        {"type": "text", "text": "Same", "page_idx": 1},
+        {"type": "text", "text": "Same", "page_idx": 1},
+    ]
+
+
+def test_parse_pdf_assigns_page_index(monkeypatch, tmp_path):
+    parser = PaddleOCRParser()
+    fake_pdf = tmp_path / "sample.pdf"
+    fake_pdf.write_bytes(b"%PDF-1.4\n")
+
+    monkeypatch.setattr(
+        parser,
+        "_extract_pdf_page_inputs",
+        lambda pdf_path: [(0, "page0"), (1, "page1")],
+    )
+    monkeypatch.setattr(
+        parser,
+        "_ocr_rendered_page",
+        lambda rendered_page, lang=None, cls_enabled=True: [f"{rendered_page}-text"],
+    )
+
+    content_list = parser.parse_pdf(fake_pdf)
+
+    assert content_list == [
+        {"type": "text", "text": "page0-text", "page_idx": 0},
+        {"type": "text", "text": "page1-text", "page_idx": 1},
+    ]
diff --git a/tests/testparser_wiring.py b/tests/testparser_wiring.py
new file mode 100644
index 000000000..13bb7ae9c
--- /dev/null
+++ b/tests/testparser_wiring.py
@@ -0,0 +1,132 @@
+import pytest
+
+from raganything.batch_parser import BatchParser
+
+
+def test_batch_parser_uses_paddleocr_parser():
+    batch_parser = BatchParser(
+        parser_type="paddleocr",
+        show_progress=False,
+        skip_installation_check=True,
+    )
+    assert batch_parser.parser.__class__.__name__ == "PaddleOCRParser"
+
+
+def test_raganything_initializes_selected_parser(monkeypatch, tmp_path):
+    pytest.importorskip("lightrag")
+
+    import raganything.raganything as rag_module
+    from raganything.config import RAGAnythingConfig
+
+    class StubParser:
+        def check_installation(self):
+            return True
+
+    captured = {}
+
+    def fake_get_parser(parser_name):
+        captured["parser_name"] = parser_name
+        return StubParser()
+
+    monkeypatch.setattr(rag_module, "get_parser", fake_get_parser)
+    monkeypatch.setattr(rag_module.atexit, "register", lambda *args, **kwargs: None)
+
+    config = RAGAnythingConfig(
+        working_dir=str(tmp_path / "rag_workdir"),
+        parser="paddleocr",
+    )
+    rag = rag_module.RAGAnything(config=config)
+
+    assert captured["parser_name"] == "paddleocr"
+    assert isinstance(rag.doc_parser, StubParser)
+
+
+@pytest.mark.asyncio
+async def test_processor_parse_document_uses_selected_parser(monkeypatch, tmp_path):
+    import raganything.processor as processor_module
+
+    class FakeLogger:
+        def info(self, *args, **kwargs):
+            pass
+
+        def warning(self, *args, **kwargs):
+            pass
+
+        def error(self, *args, **kwargs):
+            pass
+
+        def debug(self, *args, **kwargs):
+            pass
+
+    class FakeParser:
+        def parse_pdf(self, **kwargs):
+            return [{"type": "text", "text": "parsed by fake parser", "page_idx": 0}]
+
+        def parse_image(self, **kwargs):
+            return [{"type": "text", "text": "image parsed", "page_idx": 0}]
+
+        def parse_office_doc(self, **kwargs):
+            return [{"type": "text", "text": "office parsed", "page_idx": 0}]
+
+        def parse_document(self, **kwargs):
+            return [{"type": "text", "text": "generic parsed", "page_idx": 0}]
+
+    selected = {"calls": 0}
+
+    def fake_get_parser(parser_name):
+        selected["parser_name"] = parser_name
+        selected["calls"] += 1
+        return FakeParser()
+
+    monkeypatch.setattr(processor_module, "get_parser", fake_get_parser)
+
+    class DummyProcessor(processor_module.ProcessorMixin):
+        pass
+
+    dummy = DummyProcessor()
+    dummy.config = type(
+        "Config",
+        (),
+        {
+            "parser": "paddleocr",
+            "parser_output_dir": str(tmp_path / "output"),
+            "parse_method": "auto",
+            "display_content_stats": False,
+            "use_full_path": False,
+        },
+    )()
+    dummy.logger = FakeLogger()
+    dummy.parse_cache = None
+
+    async def fake_store_cached_result(*args, **kwargs):
+        return None
+
+    monkeypatch.setattr(
+        DummyProcessor,
+        "_store_cached_result",
+        fake_store_cached_result,
+        raising=False,
+    )
+    monkeypatch.setattr(
+        DummyProcessor,
+        "_generate_content_based_doc_id",
+        lambda self, content_list: "doc-fixed",
+        raising=False,
+    )
+
+    fake_pdf = tmp_path / "sample.pdf"
+    fake_pdf.write_bytes(b"%PDF-1.4\n")
+
+    content_list, doc_id = await dummy.parse_document(str(fake_pdf))
+    content_list_2, doc_id_2 = await dummy.parse_document(str(fake_pdf))
+
+    assert selected["parser_name"] == "paddleocr"
+    assert selected["calls"] == 1
+    assert doc_id == "doc-fixed"
+    assert doc_id_2 == "doc-fixed"
+    assert content_list == [
+        {"type": "text", "text": "parsed by fake parser", "page_idx": 0}
+    ]
+    assert content_list_2 == [
+        {"type": "text", "text": "parsed by fake parser", "page_idx": 0}
+    ]