From d2caaf60b3b2a82bc04d8d464f8d0c5213f790e6 Mon Sep 17 00:00:00 2001 From: HomunMage Date: Sat, 16 Nov 2024 21:45:34 +0800 Subject: [PATCH] pdf to image --- Productivity/Converter/PDF2Text/OCR/README.md | 120 ++++++++++++ .../Converter/PDF2Text/OCR/easy_ocr.py | 38 ++++ .../Converter/PDF2Text/PDF2Image/README.md | 77 ++++++++ .../{ => PDF2Text/PDF2Image}/pdf2img.py | 0 Productivity/Converter/PDF2Text/README.md | 25 +++ Productivity/Converter/PDF2Text/browse.html | 182 ++++++++++++++++++ Productivity/Converter/Video2Gif.md | 67 +++++++ _includes | 2 +- _layouts | 2 +- 9 files changed, 511 insertions(+), 2 deletions(-) create mode 100644 Productivity/Converter/PDF2Text/OCR/README.md create mode 100644 Productivity/Converter/PDF2Text/OCR/easy_ocr.py create mode 100644 Productivity/Converter/PDF2Text/PDF2Image/README.md rename Productivity/Converter/{ => PDF2Text/PDF2Image}/pdf2img.py (100%) create mode 100644 Productivity/Converter/PDF2Text/README.md create mode 100644 Productivity/Converter/PDF2Text/browse.html create mode 100644 Productivity/Converter/Video2Gif.md diff --git a/Productivity/Converter/PDF2Text/OCR/README.md b/Productivity/Converter/PDF2Text/OCR/README.md new file mode 100644 index 0000000..1424f9a --- /dev/null +++ b/Productivity/Converter/PDF2Text/OCR/README.md @@ -0,0 +1,120 @@ +# OCR + + +## easy ocr + +check lang codes at https://www.jaided.ai/easyocr/ + +
+ Loading content... +
+ + + +## other OCR with GPU + +If you're looking for open-source AI-based OCR solutions that can leverage your NVIDIA GPU and process Traditional Chinese (zh-TW), here are some excellent options: + +--- + +### 1. **Tesseract OCR with GPU Support** + - **Description**: Tesseract is a well-established open-source OCR engine that supports multiple languages, including Traditional Chinese. However, it doesn’t natively support GPU acceleration, but you can pair it with pre-processing tools like OpenCV or other AI models to boost performance. + - **Key Features**: + - High customization and language support (including Traditional Chinese). + - Works well for clean, printed text. + - **Limitations**: + - Relatively slow compared to modern AI-based OCR solutions. + - **Setup**: + - Install `tesseract-ocr` and the Traditional Chinese language data package (`chi_tra`). + - Can be used with Python via the `pytesseract` library. + - **GPU Option**: + - Pre-process images using GPU-accelerated libraries like OpenCV with CUDA. + +--- + +### 2. **EasyOCR** + - **Description**: EasyOCR is a modern, AI-powered OCR library written in PyTorch. It supports GPU acceleration out of the box and handles Traditional Chinese well. + - **Key Features**: + - Multilingual support, including zh-TW. + - Lightweight and easy to set up. + - Can leverage NVIDIA GPUs for faster processing. + - **Setup**: + 1. Install via pip: `pip install easyocr`. + 2. Run the code: + ```python + import easyocr + reader = easyocr.Reader(['zh-tw'], gpu=True) + result = reader.readtext('path_to_image') + ``` + - **Limitations**: + - Struggles with very complex or heavily distorted handwriting. + +--- + +### 3. **PaddleOCR** + - **Description**: PaddleOCR is a powerful OCR tool developed by Baidu. It supports GPU acceleration using NVIDIA GPUs and provides excellent accuracy, especially for Chinese text. + - **Key Features**: + - Optimized for Chinese languages. + - High accuracy for both printed and handwritten text. + - Built-in tools for image pre-processing and text detection. + - **Setup**: + 1. Install the PaddleOCR package: + ```bash + pip install paddleocr + pip install paddlepaddle-gpu # Ensure GPU support + ``` + 2. Use the library: + ```python + from paddleocr import PaddleOCR + ocr = PaddleOCR(use_gpu=True, lang='ch') + result = ocr.ocr('path_to_image', cls=True) + ``` + - **Limitations**: + - Requires installing PaddlePaddle, which can have specific system requirements. + +--- + +### 4. **OCR with OpenCV and Deep Learning Models** + - **Description**: OpenCV allows integration with custom deep learning OCR models like CRNN (Convolutional Recurrent Neural Network) or SAR (Sequence-to-Sequence Attention-based OCR). These models can be trained or fine-tuned on Traditional Chinese datasets. + - **Key Features**: + - Customizable for your specific needs. + - Full GPU acceleration using NVIDIA CUDA. + - **Setup**: + - Use OpenCV with CUDA for pre-processing (e.g., noise removal, binarization). + - Combine with a deep learning framework (e.g., PyTorch or TensorFlow) for OCR. + +--- + +### 5. **TrOCR by Microsoft** + - **Description**: TrOCR is a transformer-based OCR model provided by Microsoft. It supports multilingual text recognition, including Chinese, and works efficiently with GPU acceleration. + - **Key Features**: + - State-of-the-art accuracy. + - Uses transformers for improved contextual understanding. + - **Setup**: + 1. Install the `transformers` library: + ```bash + pip install transformers + ``` + 2. Use the model: + ```python + from transformers import TrOCRProcessor, VisionEncoderDecoderModel + from PIL import Image + import torch + + processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten") + model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten").cuda() + + image = Image.open('path_to_image').convert("RGB") + pixel_values = processor(images=image, return_tensors="pt").pixel_values.cuda() + generated_ids = model.generate(pixel_values) + text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] + print(text) + ``` + - **Limitations**: + - Requires fine-tuning for best performance on Traditional Chinese. + +--- + + + + \ No newline at end of file diff --git a/Productivity/Converter/PDF2Text/OCR/easy_ocr.py b/Productivity/Converter/PDF2Text/OCR/easy_ocr.py new file mode 100644 index 0000000..068be84 --- /dev/null +++ b/Productivity/Converter/PDF2Text/OCR/easy_ocr.py @@ -0,0 +1,38 @@ +import easyocr +import torch + +# Function to check if GPU is available +def check_gpu(): + if torch.cuda.is_available(): + print("GPU is available and will be used.") + else: + print("GPU is not available. Using CPU.") + +# Check GPU availability +check_gpu() + +# Initialize EasyOCR reader for Traditional Chinese (zh-tw) +reader = easyocr.Reader(['ch_tra', 'en'], gpu=True) # Set gpu=True to ensure it uses GPU + +# Loop through image files from 001 to 274 +for i in range(1, 275): # Loop from 1 to 274 + # Format the image file name + image_file = f'output-{i:03}.png' # This formats numbers with leading zeros (e.g., 001, 002, ..., 274) + + try: + # Perform OCR on the image + result = reader.readtext(image_file) + + # Create corresponding .txt file name + output_file = image_file.replace('.png', '.txt') # Replace .png with .txt + + # Save the recognized text to a .txt file + with open(output_file, 'w', encoding='utf-8') as f: + for detection in result: + text = detection[1] # The recognized text + f.write(text + '\n') # Write text to file, each on a new line + + print(f'Text from {image_file} saved to {output_file}') + + except Exception as e: + print(f"Error processing {image_file}: {e}") \ No newline at end of file diff --git a/Productivity/Converter/PDF2Text/PDF2Image/README.md b/Productivity/Converter/PDF2Text/PDF2Image/README.md new file mode 100644 index 0000000..6e12143 --- /dev/null +++ b/Productivity/Converter/PDF2Text/PDF2Image/README.md @@ -0,0 +1,77 @@ +# PDF2Image + +## PDF2Image by python + +
+ Loading content... +
+ + +## PDF2Image by CLI + +To convert each page of a PDF into separate image files using a CLI (Command Line Interface) tool, you can use **`pdftoppm`**, part of the `poppler-utils` package, or **`ImageMagick`**. Here are solutions using both: + +--- + +### **Option 1: Using `pdftoppm`** +1. **Install `poppler-utils`** (if not installed): + - On Debian/Ubuntu: + ```bash + sudo apt update + sudo apt install poppler-utils + ``` + - On macOS (via Homebrew): + ```bash + brew install poppler + ``` + +2. **Convert PDF to Images**: + ```bash + pdftoppm -png input.pdf output + ``` + - `-png`: Sets the output format to PNG (use `-jpeg` for JPEG). + - `input.pdf`: The input PDF file. + - `output`: The prefix for output image files (e.g., `output-1.png`, `output-2.png`). + +--- + +### **Option 2: Using ImageMagick** +1. **Install ImageMagick**: + - On Debian/Ubuntu: + ```bash + sudo apt update + sudo apt install imagemagick + ``` + - On macOS (via Homebrew): + ```bash + brew install imagemagick + ``` + +2. **Convert PDF to Images**: + ```bash + convert -density 300 input.pdf page-%03d.png + ``` + - `-density 300`: Sets resolution to 300 DPI (higher values produce better quality images). + - `input.pdf`: The input PDF file. + - `page-%03d.png`: Output filenames with a three-digit page number (e.g., `page-001.png`, `page-002.png`). + +--- + +### **Advanced Options** +- To extract specific pages with `pdftoppm`, use the `-f` (from) and `-l` (last) flags: + ```bash + pdftoppm -png -f 2 -l 5 input.pdf output + ``` + This converts pages 2 to 5 only. + +- To customize image size or quality in `ImageMagick`: + ```bash + convert -density 300 -quality 90 input.pdf page-%03d.png + ``` + - `-quality 90`: Sets the compression quality for JPEG/PNG output. + +Both tools are efficient and widely available on Linux, macOS, and Windows (via WSL or binaries). Let me know if you need further assistance! + + + + \ No newline at end of file diff --git a/Productivity/Converter/pdf2img.py b/Productivity/Converter/PDF2Text/PDF2Image/pdf2img.py similarity index 100% rename from Productivity/Converter/pdf2img.py rename to Productivity/Converter/PDF2Text/PDF2Image/pdf2img.py diff --git a/Productivity/Converter/PDF2Text/README.md b/Productivity/Converter/PDF2Text/README.md new file mode 100644 index 0000000..d154c11 --- /dev/null +++ b/Productivity/Converter/PDF2Text/README.md @@ -0,0 +1,25 @@ +# PDF2Text + + +## PDF to Images + +see [PDF2Image](./PDF2Image/) + + +## Images to Text + +see [OCR](./OCR/) + + +## View in browser + +After convert images to text, you can use this file to see left side is image and right side is text + + +
+ Loading content... +
+ + + + \ No newline at end of file diff --git a/Productivity/Converter/PDF2Text/browse.html b/Productivity/Converter/PDF2Text/browse.html new file mode 100644 index 0000000..528ac63 --- /dev/null +++ b/Productivity/Converter/PDF2Text/browse.html @@ -0,0 +1,182 @@ + + + + + + Image and Text Preview + + + + +
+
+ Image Preview +
+ +
+
+ Currently viewing: output-001.webp +
+ +
+
+ +
+ + + + +
+
+
+ + + + + diff --git a/Productivity/Converter/Video2Gif.md b/Productivity/Converter/Video2Gif.md new file mode 100644 index 0000000..5762086 --- /dev/null +++ b/Productivity/Converter/Video2Gif.md @@ -0,0 +1,67 @@ +# Video2Gif + +Creating high-quality GIFs from videos can be a useful skill, whether for social media, presentations, or personal projects. In this guide, we'll walk you through the steps to convert a video to a GIF using FFmpeg, covering both GPU-accelerated and non-GPU methods. + +## Prerequisites + +Before you start, make sure you have FFmpeg installed on your system. You can download it from [FFmpeg's official website](https://ffmpeg.org/download.html). + +## Step-by-Step Guide + +### Step 1: Convert Video to SRGB Color Space + +First, we need to convert the input video to an SRGB color space. This step is crucial to maintain color accuracy in the final GIF. You can do this using NVIDIA's NVENC for hardware acceleration or using CPU processing. + +#### **1.1. GPU-Accelerated Version (Using NVENC)** + +If you have an NVIDIA GPU, you can use the following command for faster processing: + +```bash +ffmpeg -hwaccel cuda -i input.mkv -vf "colorspace=bt709" -c:v h264_nvenc temp_srgb.mp4 +``` + +#### **1.2. Non-Hardware Accelerated Version** + +If you don’t have a GPU or prefer to use CPU processing, run this command: + +```bash +ffmpeg -i input.mkv -vf "colorspace=bt709" -c:v libx264 temp_srgb.mp4 +``` + +### Step 2: Generate the Color Palette + +Next, we need to create a color palette from the video. The palette will help improve the quality of the GIF by optimizing the color mapping. + +#### **2.1. Command for Generating the Palette** + +Run the following command to generate the palette: + +```bash +ffmpeg -i temp_srgb.mp4 -vf "fps=10,scale=1280:-1:flags=lanczos,palettegen" -frames:v 1 palette.png +``` + +### Step 3: Convert the Video to GIF + +Finally, we’ll use the generated palette to convert the video into a GIF. + +#### **3.1. GPU-Accelerated Version (Using NVENC)** + +Use the following command to create the GIF while applying the color palette: + +```bash +ffmpeg -hwaccel cuda -i temp_srgb.mp4 -i palette.png -lavfi "fps=10,scale=1280:-1:flags=lanczos [x]; [x][1:v] paletteuse" output.gif +``` + +#### **3.2. Non-Hardware Accelerated Version** + +For the CPU version, run this command: + +```bash +ffmpeg -i temp_srgb.mp4 -i palette.png -lavfi "fps=10,scale=1280:-1:flags=lanczos [x]; [x][1:v] paletteuse" output.gif +``` + +## Conclusion + +By following these steps, you can convert a video to a high-quality GIF using FFmpeg, with options for both GPU-accelerated and CPU-based processing. This approach ensures that your GIFs retain their original color integrity while allowing for customization based on your hardware capabilities. + +Feel free to reach out in the comments if you have any questions or run into issues while following this guide. Happy GIF-making! diff --git a/_includes b/_includes index a865b6b..cf97434 160000 --- a/_includes +++ b/_includes @@ -1 +1 @@ -Subproject commit a865b6b5999ddf608ecf42eee767745649bbe2ba +Subproject commit cf974347888f350a935d18c2ee2c275801a9db89 diff --git a/_layouts b/_layouts index 525e85b..ee04c5a 160000 --- a/_layouts +++ b/_layouts @@ -1 +1 @@ -Subproject commit 525e85b26bb5d068499a943647def67f91a619ac +Subproject commit ee04c5a27ac58aa417471f81edc5ee650e45f2d2