HKUDS · LarFii · Feb 24, 2026 · Feb 16, 2026 · Feb 17, 2026 · Feb 18, 2026
diff --git a/README.md b/README.md
@@ -336,7 +336,7 @@ async def main():
     # Create RAGAnything configuration
     config = RAGAnythingConfig(
         working_dir="./rag_storage",
-        parser="mineru",  # Parser selection: mineru or docling
+        parser="mineru",  # Parser selection: mineru, docling, or paddleocr
         parse_method="auto",  # Parse method: auto, ocr, or txt
         enable_image_processing=True,
         enable_table_processing=True,
@@ -1047,7 +1047,7 @@ Create a `.env` file (refer to `.env.example`):
 OPENAI_API_KEY=your_openai_api_key
 OPENAI_BASE_URL=your_base_url  # Optional
 OUTPUT_DIR=./output             # Default output directory for parsed documents
-PARSER=mineru                   # Parser selection: mineru or docling
+PARSER=mineru                   # Parser selection: mineru, docling, or paddleocr
 PARSE_METHOD=auto              # Parse method: auto, ocr, or txt
 ```
 
@@ -1070,6 +1070,21 @@ RAGAnything now supports multiple parsers, each with specific advantages:
 - Better document structure preservation
 - Native support for multiple Office formats
 
+#### PaddleOCR Parser
+- OCR-focused parser for images and PDFs
+- Produces text blocks compatible with existing `content_list` processing
+- Supports optional Office/TXT/MD parsing by converting to PDF first
+
+Install PaddleOCR parser extras:
+
+```bash
+pip install -e ".[paddleocr]"
+# or
+uv sync --extra paddleocr
+```
+
+> **Note**: PaddleOCR also requires `paddlepaddle` (CPU/GPU package varies by platform). Install it with the official guide: https://www.paddlepaddle.org.cn/install/quick
+
 ### MinerU Configuration
 
 ```bash
@@ -1091,15 +1106,15 @@ await rag.process_document_complete(
     file_path="document.pdf",
     output_dir="./output/",
     parse_method="auto",          # or "ocr", "txt"
-    parser="mineru"               # Optional: "mineru" or "docling"
+    parser="mineru"               # Optional: "mineru", "docling", or "paddleocr"
 )
 
 # Advanced parsing configuration with special parameters
 await rag.process_document_complete(
     file_path="document.pdf",
     output_dir="./output/",
     parse_method="auto",          # Parsing method: "auto", "ocr", "txt"
-    parser="mineru",              # Parser selection: "mineru" or "docling"
+    parser="mineru",              # Parser selection: "mineru", "docling", or "paddleocr"
 
     # MinerU special parameters - all supported kwargs:
     lang="ch",                   # Document language for OCR optimization (e.g., "ch", "en", "ja")
@@ -1119,7 +1134,7 @@ await rag.process_document_complete(
 )
 ```
 
-> **Note**: MinerU 2.0 no longer uses the `magic-pdf.json` configuration file. All settings are now passed as command-line parameters or function arguments. RAG-Anything now supports multiple document parsers - you can choose between MinerU and Docling based on your needs.
+> **Note**: MinerU 2.0 no longer uses the `magic-pdf.json` configuration file. All settings are now passed as command-line parameters or function arguments. RAG-Anything supports multiple document parsers, including MinerU, Docling, and PaddleOCR.
 
 ### Processing Requirements
 
@@ -1128,6 +1143,7 @@ Different content types require specific optional dependencies:
 - **Office Documents** (.doc, .docx, .ppt, .pptx, .xls, .xlsx): Install [LibreOffice](https://www.libreoffice.org/download/download/)
 - **Extended Image Formats** (.bmp, .tiff, .gif, .webp): Install with `pip install raganything[image]`
 - **Text Files** (.txt, .md): Install with `pip install raganything[text]`
+- **PaddleOCR Parser** (`parser="paddleocr"`): Install with `pip install raganything[paddleocr]`, then install `paddlepaddle` for your platform
 
 > **📋 Quick Install**: Use `pip install raganything[all]` to enable all format support (Python dependencies only - LibreOffice still needs separate installation)
 

diff --git a/docs/batch_processing.md b/docs/batch_processing.md
@@ -24,6 +24,9 @@ pip install raganything[all]
 
 # Required for batch processing
 pip install tqdm
+
+# Optional for parser='paddleocr'
+pip install raganything[paddleocr]
 ```
 
 ## Usage
@@ -35,7 +38,7 @@ from raganything.batch_parser import BatchParser
 
 # Create batch parser
 batch_parser = BatchParser(
-    parser_type="mineru",  # or "docling"
+    parser_type="mineru",  # or "docling" or "paddleocr"
     max_workers=4,
     show_progress=True,
     timeout_per_file=300,
@@ -123,6 +126,7 @@ python -m raganything.batch_parser examples/sample_docs/ --output ./output --wor
 
 # With specific parser
 python -m raganything.batch_parser examples/sample_docs/ --parser mineru --method auto
+python -m raganything.batch_parser examples/sample_docs/ --parser paddleocr --method ocr
 
 # Without progress bar
 python -m raganything.batch_parser examples/sample_docs/ --output ./output --no-progress
@@ -148,7 +152,7 @@ PARSER_OUTPUT_DIR=./parsed_output
 
 ### BatchParser Parameters
 
-- **parser_type**: `"mineru"` or `"docling"` (default: `"mineru"`)
+- **parser_type**: `"mineru"`, `"docling"`, or `"paddleocr"` (default: `"mineru"`)
 - **max_workers**: Number of parallel workers (default: `4`)
 - **show_progress**: Show progress bar (default: `True`)
 - **timeout_per_file**: Timeout per file in seconds (default: `300`)

diff --git a/env.example b/env.example
@@ -42,7 +42,7 @@ OLLAMA_EMULATING_MODEL_TAG=latest
 ### Parser Configuration
 # PARSE_METHOD=auto
 # OUTPUT_DIR=./output
-# PARSER=mineru
+# PARSER=mineru  # Options: mineru, docling, paddleocr
 # DISPLAY_CONTENT_STATS=true
 
 ### Multimodal Processing Configuration

diff --git a/examples/batch_dry_run_example.py b/examples/batch_dry_run_example.py
@@ -7,6 +7,7 @@
   - pip install:
       python examples/batch_dry_run_example.py examples/sample_docs --parser mineru
       python examples/batch_dry_run_example.py examples/sample_docs/projects examples/sample_docs/web --parser docling
+      python examples/batch_dry_run_example.py examples/sample_docs --parser paddleocr
   - uv install:
       uv run python examples/batch_dry_run_example.py examples/sample_docs --parser mineru --recursive
       uv run python examples/batch_dry_run_example.py examples/sample_docs --parser mineru --no-recursive
@@ -22,7 +23,7 @@ def main() -> int:
     parser.add_argument("paths", nargs="+", help="File paths or directories to scan")
     parser.add_argument(
         "--parser",
-        choices=["mineru", "docling"],
+        choices=["mineru", "docling", "paddleocr"],
         default="mineru",
         help="Parser to use for file-type support",
     )

diff --git a/examples/raganything_example.py b/examples/raganything_example.py
@@ -1,9 +1,9 @@
 #!/usr/bin/env python
 """
-Example script demonstrating the integration of MinerU parser with RAGAnything
+Example script demonstrating parser integration with RAGAnything
 
 This example shows how to:
-1. Process documents with RAGAnything using MinerU parser
+1. Process documents with RAGAnything using configurable parsers
 2. Perform pure text queries using aquery() method
 3. Perform multimodal queries with specific multimodal content using aquery_with_multimodal() method
 4. Handle different types of multimodal content (tables, equations) in queries
@@ -108,7 +108,7 @@ async def process_with_rag(
         # Create RAGAnything configuration
         config = RAGAnythingConfig(
             working_dir=working_dir or "./rag_storage",
-            parser=parser,  # Parser selection: mineru or docling
+            parser=parser,  # Parser selection: mineru, docling, or paddleocr
             parse_method="auto",  # Parse method: auto, ocr, or txt
             enable_image_processing=True,
             enable_table_processing=True,
@@ -289,7 +289,8 @@ def main():
     parser.add_argument(
         "--parser",
         default=os.getenv("PARSER", "mineru"),
-        help="Optional base URL for API",
+        choices=["mineru", "docling", "paddleocr"],
+        help="Parser selection",
     )
 
     args = parser.parse_args()

diff --git a/pyproject.toml b/pyproject.toml
@@ -31,6 +31,10 @@ dependencies = [
 image = ["Pillow>=10.0.0"]
 text = ["reportlab>=4.0.0"]
 office = []  # Requires LibreOffice (external program)
+paddleocr = [
+    "paddleocr>=2.7.0",
+    "pypdfium2>=4.25.0",
+]
 markdown = [
     "markdown>=3.4.0",
     "weasyprint>=60.0",
@@ -39,9 +43,11 @@ markdown = [
 all = [
     "Pillow>=10.0.0",
     "reportlab>=4.0.0",
+    "paddleocr>=2.7.0",
+    "pypdfium2>=4.25.0",
     "markdown>=3.4.0",
     "weasyprint>=60.0",
-    "pygments>=2.10.0"
+    "pygments>=2.10.0",
 ]
 
 [project.urls]

diff --git a/raganything/batch_parser.py b/raganything/batch_parser.py
@@ -15,7 +15,7 @@
 
 from tqdm import tqdm
 
-from .parser import MineruParser, DoclingParser
+from .parser import SUPPORTED_PARSERS, get_parser
 
 
 @dataclass
@@ -70,7 +70,7 @@ def __init__(
         Initialize batch parser
 
         Args:
-            parser_type: Type of parser to use ("mineru" or "docling")
+            parser_type: Type of parser to use ("mineru", "docling", or "paddleocr")
             max_workers: Maximum number of parallel workers
             show_progress: Whether to show progress bars
             timeout_per_file: Timeout in seconds for each file
@@ -83,12 +83,10 @@ def __init__(
         self.logger = logging.getLogger(__name__)
 
         # Initialize parser
-        if parser_type == "mineru":
-            self.parser = MineruParser()
-        elif parser_type == "docling":
-            self.parser = DoclingParser()
-        else:
-            raise ValueError(f"Unsupported parser type: {parser_type}")
+        try:
+            self.parser = get_parser(parser_type)
+        except ValueError as exc:
+            raise ValueError(f"Unsupported parser type: {parser_type}") from exc
 
         # Check parser installation (optional)
         if not skip_installation_check:
@@ -384,7 +382,7 @@ def main():
     parser.add_argument("--output", "-o", required=True, help="Output directory")
     parser.add_argument(
         "--parser",
-        choices=["mineru", "docling"],
+        choices=list(SUPPORTED_PARSERS),
         default="mineru",
         help="Parser to use",
     )

diff --git a/raganything/config.py b/raganything/config.py
@@ -27,7 +27,7 @@ class RAGAnythingConfig:
     """Default output directory for parsed content."""
 
     parser: str = field(default=get_env_value("PARSER", "mineru", str))
-    """Parser selection: 'mineru' or 'docling'."""
+    """Parser selection: 'mineru', 'docling', or 'paddleocr'."""
 
     display_content_stats: bool = field(
         default=get_env_value("DISPLAY_CONTENT_STATS", True, bool)