diff --git a/app.properties b/app.properties
index d1f4231..15503f4 100644
--- a/app.properties
+++ b/app.properties
@@ -1,4 +1,4 @@
-#Thu Nov 23 02:43:23 HST 2023
+#Sun Apr 28 04:53:24 HST 2024
recName=ch_PP-OCRv3_rec_infer
model=model
keysName=ppocr_keys_v1.txt
diff --git a/pom.xml b/pom.xml
index ce322a4..f9f8010 100644
--- a/pom.xml
+++ b/pom.xml
@@ -6,7 +6,7 @@
com.luooqi
tools-ocr
- 2.2.9
+ 2.3.0
UTF-8
@@ -57,41 +57,17 @@
${logback.version}
-
+
- ai.djl
- api
- ${djl.version}
-
-
- ai.djl
- basicdataset
- ${djl.version}
-
-
- ai.djl
- model-zoo
- ${djl.version}
-
-
-
-
- ai.djl.pytorch
- pytorch-engine
- ${djl.version}
- runtime
-
-
-
- ai.djl.onnxruntime
- onnxruntime-engine
- ${djl.version}
+ io.github.mymonstercat
+ rapidocr
+ 0.0.7
- ai.djl.opencv
- opencv
- ${djl.version}
+ io.github.mymonstercat
+ rapidocr-onnx-platform
+ 0.0.7
diff --git a/src/main/java/com/benjaminwan/ocrlibrary/OcrEngine.java b/src/main/java/com/benjaminwan/ocrlibrary/OcrEngine.java
deleted file mode 100644
index 7f9d5e5..0000000
--- a/src/main/java/com/benjaminwan/ocrlibrary/OcrEngine.java
+++ /dev/null
@@ -1,120 +0,0 @@
-package com.benjaminwan.ocrlibrary;
-
-import cn.hutool.core.io.FileUtil;
-import cn.hutool.log.StaticLog;
-
-import java.io.File;
-import java.nio.charset.Charset;
-
-public final class OcrEngine {
- /**
- * 图像外接白框,用于提升识别率,文字框没有正确框住所有文字时,增加此值。
- */
- private int padding;
- /**
- * 文字框置信度门限,文字框没有正确框住所有文字时,减小此值
- */
- private float boxScoreThresh;
-
- private float boxThresh;
- /**
- * 单个文字框大小倍率,越大时单个文字框越大
- */
- private float unClipRatio;
- /**
- * 启用(1)/禁用(0) 文字方向检测,只有图片倒置的情况下(旋转90~270度的图片),才需要启用文字方向检测
- */
- private boolean doAngle;
- /**
- * 启用(1)/禁用(0) 角度投票(整张图片以最大可能文字方向来识别),当禁用文字方向检测时,此项也不起作用
- */
- private boolean mostAngle;
-
- public native boolean setNumThread(int numThread);
-
- public native void initLogger(boolean isConsole, boolean isPartImg, boolean isResultImg);
-
- public native void enableResultText(String imagePath);
-
- public native boolean initModels(String modelsDir, String detName, String clsName, String recName, String keysName);
-
- /**
- * GPU0一般为默认GPU,参数选项:使用CPU(-1)/使用GPU0(0)/使用GPU1(1)/...
- */
- public native void setGpuIndex(int gpuIndex);
-
- public native String getVersion();
-
- public native OcrResult detect(String input, int padding, int maxSideLen, float boxScoreThresh, float boxThresh, float unClipRatio, boolean doAngle, boolean mostAngle);
-
- public OcrEngine() {
- try {
- StaticLog.info("java.library.path=" + System.getProperty("java.library.path"));
- System.loadLibrary("RapidOcrNcnn");
- } catch (Exception e) {
- e.printStackTrace();
- }
- this.padding = 15;
- this.boxScoreThresh = 0.25f;
- this.boxThresh = 0.3f;
- this.unClipRatio = 1.6f;
- this.doAngle = true;
- this.mostAngle = true;
- }
-
- public int getPadding() {
- return this.padding;
- }
-
- public void setPadding(int i) {
- this.padding = i;
- }
-
- public float getBoxScoreThresh() {
- return this.boxScoreThresh;
- }
-
- public void setBoxScoreThresh(float f) {
- this.boxScoreThresh = f;
- }
-
- public float getBoxThresh() {
- return this.boxThresh;
- }
-
- public void setBoxThresh(float f) {
- this.boxThresh = f;
- }
-
- public float getUnClipRatio() {
- return this.unClipRatio;
- }
-
- public void setUnClipRatio(float f) {
- this.unClipRatio = f;
- }
-
- public boolean getDoAngle() {
- return this.doAngle;
- }
-
- public void setDoAngle(boolean z) {
- this.doAngle = z;
- }
-
- public boolean getMostAngle() {
- return this.mostAngle;
- }
-
- public void setMostAngle(boolean z) {
- this.mostAngle = z;
- }
-
- public OcrResult detect(String input) {
- return detect(input, 0);
- }
-
- public OcrResult detect(String input, int maxSideLen) {
- return detect(input, this.padding, maxSideLen, this.boxScoreThresh, this.boxThresh, this.unClipRatio, this.doAngle, this.mostAngle);
- }
-}
\ No newline at end of file
diff --git a/src/main/java/com/benjaminwan/ocrlibrary/OcrFailed.java b/src/main/java/com/benjaminwan/ocrlibrary/OcrFailed.java
deleted file mode 100644
index 71e7a4c..0000000
--- a/src/main/java/com/benjaminwan/ocrlibrary/OcrFailed.java
+++ /dev/null
@@ -1,9 +0,0 @@
-package com.benjaminwan.ocrlibrary;
-
-public final class OcrFailed extends OcrOutput {
- public static final OcrFailed INSTANCE = new OcrFailed();
-
- private OcrFailed() {
- super();
- }
-}
diff --git a/src/main/java/com/benjaminwan/ocrlibrary/OcrOutput.java b/src/main/java/com/benjaminwan/ocrlibrary/OcrOutput.java
deleted file mode 100644
index 5215bba..0000000
--- a/src/main/java/com/benjaminwan/ocrlibrary/OcrOutput.java
+++ /dev/null
@@ -1,5 +0,0 @@
-package com.benjaminwan.ocrlibrary;
-
-public abstract class OcrOutput {
-
-}
diff --git a/src/main/java/com/benjaminwan/ocrlibrary/OcrResult.java b/src/main/java/com/benjaminwan/ocrlibrary/OcrResult.java
deleted file mode 100644
index 566929c..0000000
--- a/src/main/java/com/benjaminwan/ocrlibrary/OcrResult.java
+++ /dev/null
@@ -1,54 +0,0 @@
-package com.benjaminwan.ocrlibrary;
-
-import java.util.ArrayList;
-
-public final class OcrResult extends OcrOutput {
- private final double dbNetTime;
-
- private final ArrayList textBlocks;
- private double detectTime;
-
- private String strRes;
-
- public OcrResult copy(double dbNetTime, ArrayList textBlocks, double detectTime, String strRes) {
- return new OcrResult(dbNetTime, textBlocks, detectTime, strRes);
- }
-
- public String toString() {
- return "OcrResult(dbNetTime=" + this.dbNetTime + ", textBlocks=" + this.textBlocks + ", detectTime=" + this.detectTime + ", strRes=" + this.strRes + ')';
- }
-
- public double getDbNetTime() {
- return this.dbNetTime;
- }
-
-
- public ArrayList getTextBlocks() {
- return this.textBlocks;
- }
-
- public double getDetectTime() {
- return this.detectTime;
- }
-
- public void setDetectTime(double d) {
- this.detectTime = d;
- }
-
-
- public String getStrRes() {
- return this.strRes;
- }
-
- public void setStrRes(String str) {
- this.strRes = str;
- }
-
- public OcrResult(double dbNetTime, ArrayList textBlocks, double detectTime, String strRes) {
- super();
- this.dbNetTime = dbNetTime;
- this.textBlocks = textBlocks;
- this.detectTime = detectTime;
- this.strRes = strRes;
- }
-}
diff --git a/src/main/java/com/benjaminwan/ocrlibrary/OcrStop.java b/src/main/java/com/benjaminwan/ocrlibrary/OcrStop.java
deleted file mode 100644
index a7b6645..0000000
--- a/src/main/java/com/benjaminwan/ocrlibrary/OcrStop.java
+++ /dev/null
@@ -1,9 +0,0 @@
-package com.benjaminwan.ocrlibrary;
-
-public final class OcrStop extends OcrOutput {
- public static final OcrStop INSTANCE = new OcrStop();
-
- private OcrStop() {
- super();
- }
-}
diff --git a/src/main/java/com/benjaminwan/ocrlibrary/Point.java b/src/main/java/com/benjaminwan/ocrlibrary/Point.java
deleted file mode 100644
index 73e7d5f..0000000
--- a/src/main/java/com/benjaminwan/ocrlibrary/Point.java
+++ /dev/null
@@ -1,51 +0,0 @@
-package com.benjaminwan.ocrlibrary;
-
-public final class Point {
- private int x;
- private int y;
-
- public Point copy(int x, int y) {
- return new Point(x, y);
- }
-
- public String toString() {
- return "Point(x=" + this.x + ", y=" + this.y + ')';
- }
-
- public int hashCode() {
- int result = Integer.hashCode(this.x);
- return (result * 31) + Integer.hashCode(this.y);
- }
-
- public boolean equals(Object other) {
- if (this == other) {
- return true;
- }
- if (!(other instanceof Point)) {
- return false;
- }
- Point point = (Point) other;
- return this.x == point.x && this.y == point.y;
- }
-
- public Point(int x, int y) {
- this.x = x;
- this.y = y;
- }
-
- public int getX() {
- return this.x;
- }
-
- public void setX(int i) {
- this.x = i;
- }
-
- public int getY() {
- return this.y;
- }
-
- public void setY(int i) {
- this.y = i;
- }
-}
\ No newline at end of file
diff --git a/src/main/java/com/benjaminwan/ocrlibrary/TextBlock.java b/src/main/java/com/benjaminwan/ocrlibrary/TextBlock.java
deleted file mode 100644
index 6ac816b..0000000
--- a/src/main/java/com/benjaminwan/ocrlibrary/TextBlock.java
+++ /dev/null
@@ -1,79 +0,0 @@
-package com.benjaminwan.ocrlibrary;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-
-public final class TextBlock {
-
- private final ArrayList boxPoint;
- private float boxScore;
- private final int angleIndex;
- private final float angleScore;
- private final double angleTime;
-
- private final String text;
-
- private final float[] charScores;
- private final double crnnTime;
- private final double blockTime;
-
-
- public String toString() {
- return "TextBlock(boxPoint=" + this.boxPoint + ", boxScore=" + this.boxScore + ", angleIndex=" + this.angleIndex + ", angleScore=" + this.angleScore + ", angleTime=" + this.angleTime + ", text=" + this.text + ", charScores=" + Arrays.toString(this.charScores) + ", crnnTime=" + this.crnnTime + ", blockTime=" + this.blockTime + ')';
- }
-
- public TextBlock( ArrayList boxPoint, float boxScore, int angleIndex, float angleScore, double angleTime, String text, float[] charScores, double crnnTime, double blockTime) {
- this.boxPoint = boxPoint;
- this.boxScore = boxScore;
- this.angleIndex = angleIndex;
- this.angleScore = angleScore;
- this.angleTime = angleTime;
- this.text = text;
- this.charScores = charScores;
- this.crnnTime = crnnTime;
- this.blockTime = blockTime;
- }
-
-
- public ArrayList getBoxPoint() {
- return this.boxPoint;
- }
-
- public float getBoxScore() {
- return this.boxScore;
- }
-
- public void setBoxScore(float f) {
- this.boxScore = f;
- }
-
- public int getAngleIndex() {
- return this.angleIndex;
- }
-
- public float getAngleScore() {
- return this.angleScore;
- }
-
- public double getAngleTime() {
- return this.angleTime;
- }
-
-
- public String getText() {
- return this.text;
- }
-
-
- public float[] getCharScores() {
- return this.charScores;
- }
-
- public double getCrnnTime() {
- return this.crnnTime;
- }
-
- public double getBlockTime() {
- return this.blockTime;
- }
-}
diff --git a/src/main/java/com/litongjava/djl/paddle/ocr/v4/OcrV4DetExample.java b/src/main/java/com/litongjava/djl/paddle/ocr/v4/OcrV4DetExample.java
deleted file mode 100644
index 9dd6986..0000000
--- a/src/main/java/com/litongjava/djl/paddle/ocr/v4/OcrV4DetExample.java
+++ /dev/null
@@ -1,51 +0,0 @@
-package com.litongjava.djl.paddle.ocr.v4;
-
-import ai.djl.ModelException;
-import ai.djl.inference.Predictor;
-import ai.djl.modality.cv.Image;
-import ai.djl.ndarray.NDList;
-import ai.djl.ndarray.NDManager;
-import ai.djl.opencv.OpenCVImageFactory;
-import ai.djl.repository.zoo.ModelZoo;
-import ai.djl.repository.zoo.ZooModel;
-import ai.djl.translate.TranslateException;
-
-import com.litongjava.djl.paddle.ocr.v4.common.ImageUtils;
-import com.litongjava.djl.paddle.ocr.v4.detection.OcrV4Detection;
-import org.opencv.core.Mat;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-
-public final class OcrV4DetExample {
-
- private static final Logger logger = LoggerFactory.getLogger(OcrV4DetExample.class);
-
- private OcrV4DetExample() {
- }
-
- public static void main(String[] args) throws IOException, ModelException, TranslateException {
- Path imageFile = Paths.get("src/test/resources/2.jpg");
- Image image = OpenCVImageFactory.getInstance().fromFile(imageFile);
-
- OcrV4Detection detection = new OcrV4Detection();
- try (ZooModel detectionModel = ModelZoo.loadModel(detection.chDetCriteria());
- Predictor detector = detectionModel.newPredictor();
- NDManager manager = NDManager.newBaseManager();) {
-
- NDList dt_boxes = detector.predict(image);
- // 交给 NDManager自动管理内存
- // attach to manager for automatic memory management
- dt_boxes.attach(manager);
-
- for (int i = 0; i < dt_boxes.size(); i++) {
- ImageUtils.drawRect((Mat) image.getWrappedImage(), dt_boxes.get(i));
- }
- ImageUtils.saveImage(image, "detect_rect.png", "build/output");
- ((Mat) image.getWrappedImage()).release();
- }
- }
-}
\ No newline at end of file
diff --git a/src/main/java/com/litongjava/djl/paddle/ocr/v4/OcrV4RecExample.java b/src/main/java/com/litongjava/djl/paddle/ocr/v4/OcrV4RecExample.java
deleted file mode 100644
index 3945727..0000000
--- a/src/main/java/com/litongjava/djl/paddle/ocr/v4/OcrV4RecExample.java
+++ /dev/null
@@ -1,131 +0,0 @@
-package com.litongjava.djl.paddle.ocr.v4;
-
-import ai.djl.ModelException;
-import ai.djl.inference.Predictor;
-import ai.djl.modality.cv.Image;
-import ai.djl.ndarray.NDList;
-import ai.djl.ndarray.NDManager;
-import ai.djl.opencv.OpenCVImageFactory;
-import ai.djl.repository.zoo.ModelZoo;
-import ai.djl.repository.zoo.ZooModel;
-import ai.djl.translate.TranslateException;
-import com.litongjava.djl.paddle.ocr.v4.common.ImageUtils;
-import com.litongjava.djl.paddle.ocr.v4.common.RotatedBox;
-import com.litongjava.djl.paddle.ocr.v4.common.RotatedBoxCompX;
-import com.litongjava.djl.paddle.ocr.v4.detection.OcrV4Detection;
-import com.litongjava.djl.paddle.ocr.v4.opencv.OpenCVUtils;
-import com.litongjava.djl.paddle.ocr.v4.recognition.OcrV4Recognition;
-import org.opencv.core.Mat;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.awt.image.BufferedImage;
-import java.io.IOException;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-
-/**
- * OCR V4模型 文字识别. 支持文本有旋转角度
- * OCR V4 model for text recognition. Supports text with rotation angles.
- */
-public final class OcrV4RecExample {
-
- private static final Logger logger = LoggerFactory.getLogger(OcrV4RecExample.class);
-
- private OcrV4RecExample() {
- }
-
- public static void main(String[] args) throws IOException, ModelException, TranslateException {
- Path imageFile = Paths.get("src/test/resources/2.jpg");
- Image image = OpenCVImageFactory.getInstance().fromFile(imageFile);
-
- OcrV4Detection detection = new OcrV4Detection();
- OcrV4Recognition recognition = new OcrV4Recognition();
- try (ZooModel detectionModel = ModelZoo.loadModel(detection.chDetCriteria());
- Predictor detector = detectionModel.newPredictor();
- ZooModel recognitionModel = ModelZoo.loadModel(recognition.chRecCriteria());
- Predictor recognizer = recognitionModel.newPredictor();
- NDManager manager = NDManager.newBaseManager()) {
-
- long timeInferStart = System.currentTimeMillis();
- List detections = recognition.predict(manager, image, detector, recognizer);
-
-// for (int i = 0; i < 1000; i++) {
-// detections = recognition.predict(image, detector, recognizer);
-// for (RotatedBox result : detections) {
-// System.out.println(result.getText());
-// }
-// System.out.println("index : " + i);
-// }
-
- long timeInferEnd = System.currentTimeMillis();
- System.out.println("time: " + (timeInferEnd - timeInferStart));
-
- // 对检测结果根据坐标位置,根据从上到下,从做到右,重新排序,下面算法对图片倾斜旋转角度较小的情形适用
- // 如果图片旋转角度较大,则需要自行改进算法,需要根据斜率校正计算位置。
- // Reorder the detection results based on the coordinate positions, from top to bottom, from left to right. The algorithm below is suitable for situations where the image is slightly tilted or rotated.
- // If the image rotation angle is large, the algorithm needs to be improved, and the position needs to be calculated based on the slope correction.
- List initList = new ArrayList<>();
- for (RotatedBox result : detections) {
- // put low Y value at the head of the queue.
- initList.add(result);
- }
- Collections.sort(initList);
-
- List> lines = new ArrayList<>();
- List line = new ArrayList<>();
- RotatedBoxCompX firstBox = new RotatedBoxCompX(initList.get(0).getBox(), initList.get(0).getText());
- line.add(firstBox);
- lines.add((ArrayList) line);
- for (int i = 1; i < initList.size(); i++) {
- RotatedBoxCompX tmpBox = new RotatedBoxCompX(initList.get(i).getBox(), initList.get(i).getText());
- float y1 = firstBox.getBox().toFloatArray()[1];
- float y2 = tmpBox.getBox().toFloatArray()[1];
- float dis = Math.abs(y2 - y1);
- if (dis < 20) { // 认为是同 1 行 - Considered to be in the same line
- line.add(tmpBox);
- } else { // 换行 - Line break
- firstBox = tmpBox;
- Collections.sort(line);
- line = new ArrayList<>();
- line.add(firstBox);
- lines.add((ArrayList) line);
- }
- }
-
-
- String fullText = "";
- for (int i = 0; i < lines.size(); i++) {
- for (int j = 0; j < lines.get(i).size(); j++) {
- String text = lines.get(i).get(j).getText();
- if (text.trim().equals(""))
- continue;
- fullText += text + " ";
- }
- fullText += '\n';
- }
-
- System.out.println(fullText);
-
-
- // 转 BufferedImage 解决 Imgproc.putText 中文乱码问题
- Mat wrappedImage = (Mat) image.getWrappedImage();
- BufferedImage bufferedImage = OpenCVUtils.mat2Image(wrappedImage);
- for (RotatedBox result : detections) {
- ImageUtils.drawImageRectWithText(bufferedImage, result.getBox(), result.getText());
- }
-
- Mat image2Mat = OpenCVUtils.image2Mat(bufferedImage);
- image = OpenCVImageFactory.getInstance().fromImage(image2Mat);
- ImageUtils.saveImage(image, "ocr_result.png", "build/output");
-
- wrappedImage.release();
- image2Mat.release();
-
- logger.info("{}", detections);
- }
- }
-}
\ No newline at end of file
diff --git a/src/main/java/com/litongjava/djl/paddle/ocr/v4/common/ImageUtils.java b/src/main/java/com/litongjava/djl/paddle/ocr/v4/common/ImageUtils.java
deleted file mode 100644
index 6c67331..0000000
--- a/src/main/java/com/litongjava/djl/paddle/ocr/v4/common/ImageUtils.java
+++ /dev/null
@@ -1,241 +0,0 @@
-package com.litongjava.djl.paddle.ocr.v4.common;
-
-import ai.djl.modality.cv.Image;
-import ai.djl.modality.cv.ImageFactory;
-import ai.djl.modality.cv.output.DetectedObjects;
-import ai.djl.ndarray.NDArray;
-import org.opencv.core.Mat;
-import org.opencv.core.Point;
-import org.opencv.core.Scalar;
-import org.opencv.imgproc.Imgproc;
-
-import java.awt.*;
-import java.awt.image.BufferedImage;
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * 图像工具类
- */
-public class ImageUtils {
-
- /**
- * 保存BufferedImage图片
- *
- * @param img
- * @param name
- * @param path
- */
- public static void saveImage(BufferedImage img, String name, String path) {
- Image djlImg = ImageFactory.getInstance().fromImage(img); // 支持多种图片格式,自动适配
- Path outputDir = Paths.get(path);
- Path imagePath = outputDir.resolve(name);
- // OpenJDK 不能保存 jpg 图片的 alpha channel
- try {
- djlImg.save(Files.newOutputStream(imagePath), "png");
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
-
- /**
- * 保存DJL图片
- *
- * @param img
- * @param name
- * @param path
- */
- public static void saveImage(Image img, String name, String path) {
- Path outputDir = Paths.get(path);
- Path imagePath = outputDir.resolve(name);
- // OpenJDK 不能保存 jpg 图片的 alpha channel
- try {
- img.save(Files.newOutputStream(imagePath), "png");
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
-
- /**
- * 保存图片,含检测框
- *
- * @param img
- * @param detection
- * @param name
- * @param path
- * @throws IOException
- */
- public static void saveBoundingBoxImage(
- Image img, DetectedObjects detection, String name, String path) throws IOException {
- // Make image copy with alpha channel because original image was jpg
- img.drawBoundingBoxes(detection);
- Path outputDir = Paths.get(path);
- Files.createDirectories(outputDir);
- Path imagePath = outputDir.resolve(name);
- // OpenJDK can't save jpg with alpha channel
- img.save(Files.newOutputStream(imagePath), "png");
- }
-
- /**
- * 画矩形
- *
- * @param mat
- * @param box
- */
- public static void drawRect(Mat mat, NDArray box) {
-
- float[] points = box.toFloatArray();
- List list = new ArrayList<>();
-
- for (int i = 0; i < 4; i++) {
- Point point = new Point((int) points[2 * i], (int) points[2 * i + 1]);
- list.add(point);
- }
-
- Imgproc.line(mat, list.get(0), list.get(1), new Scalar(0, 255, 0), 1);
- Imgproc.line(mat, list.get(1), list.get(2), new Scalar(0, 255, 0), 1);
- Imgproc.line(mat, list.get(2), list.get(3), new Scalar(0, 255, 0), 1);
- Imgproc.line(mat, list.get(3), list.get(0), new Scalar(0, 255, 0), 1);
- }
-
- /**
- * 画矩形
- *
- * @param mat
- * @param box
- * @param text
- */
- public static void drawRectWithText(Mat mat, NDArray box, String text) {
-
- float[] points = box.toFloatArray();
- List list = new ArrayList<>();
-
- for (int i = 0; i < 4; i++) {
- Point point = new Point((int) points[2 * i], (int) points[2 * i + 1]);
- list.add(point);
- }
-
- Imgproc.line(mat, list.get(0), list.get(1), new Scalar(0, 255, 0), 1);
- Imgproc.line(mat, list.get(1), list.get(2), new Scalar(0, 255, 0), 1);
- Imgproc.line(mat, list.get(2), list.get(3), new Scalar(0, 255, 0), 1);
- Imgproc.line(mat, list.get(3), list.get(0), new Scalar(0, 255, 0), 1);
- // 中文乱码
- Imgproc.putText(mat, text, list.get(0), Imgproc.FONT_HERSHEY_SCRIPT_SIMPLEX, 1.0, new Scalar(0, 255, 0), 1);
- }
-
- /**
- * 画检测框(有倾斜角)
- *
- * @param image
- * @param box
- */
- public static void drawImageRect(BufferedImage image, NDArray box) {
- float[] points = box.toFloatArray();
- int[] xPoints = new int[5];
- int[] yPoints = new int[5];
-
- for (int i = 0; i < 4; i++) {
- xPoints[i] = (int) points[2 * i];
- yPoints[i] = (int) points[2 * i + 1];
- }
- xPoints[4] = xPoints[0];
- yPoints[4] = yPoints[0];
-
- // 将绘制图像转换为Graphics2D
- Graphics2D g = (Graphics2D) image.getGraphics();
- try {
- g.setColor(new Color(0, 255, 0));
- // 声明画笔属性 :粗 细(单位像素)末端无修饰 折线处呈尖角
- BasicStroke bStroke = new BasicStroke(4, BasicStroke.CAP_BUTT, BasicStroke.JOIN_MITER);
- g.setStroke(bStroke);
- g.drawPolyline(xPoints, yPoints, 5); // xPoints, yPoints, nPoints
- } finally {
- g.dispose();
- }
- }
-
- /**
- * 画检测框(有倾斜角)和文本
- *
- * @param image
- * @param box
- * @param text
- */
- public static void drawImageRectWithText(BufferedImage image, NDArray box, String text) {
- float[] points = box.toFloatArray();
- int[] xPoints = new int[5];
- int[] yPoints = new int[5];
-
- for (int i = 0; i < 4; i++) {
- xPoints[i] = (int) points[2 * i];
- yPoints[i] = (int) points[2 * i + 1];
- }
- xPoints[4] = xPoints[0];
- yPoints[4] = yPoints[0];
-
- // 将绘制图像转换为Graphics2D
- Graphics2D g = (Graphics2D) image.getGraphics();
- try {
- int fontSize = 32;
- Font font = new Font("楷体", Font.PLAIN, fontSize);
- g.setFont(font);
- g.setColor(new Color(0, 0, 255));
- // 声明画笔属性 :粗 细(单位像素)末端无修饰 折线处呈尖角
- BasicStroke bStroke = new BasicStroke(2, BasicStroke.CAP_BUTT, BasicStroke.JOIN_MITER);
- g.setStroke(bStroke);
- g.drawPolyline(xPoints, yPoints, 5); // xPoints, yPoints, nPoints
- g.drawString(text, xPoints[0], yPoints[0]);
- } finally {
- g.dispose();
- }
- }
-
- /**
- * 画检测框
- *
- * @param image
- * @param x
- * @param y
- * @param width
- * @param height
- */
- public static void drawImageRect(BufferedImage image, int x, int y, int width, int height) {
- // 将绘制图像转换为Graphics2D
- Graphics2D g = (Graphics2D) image.getGraphics();
- try {
- g.setColor(new Color(0, 255, 0));
- // 声明画笔属性 :粗 细(单位像素)末端无修饰 折线处呈尖角
- BasicStroke bStroke = new BasicStroke(2, BasicStroke.CAP_BUTT, BasicStroke.JOIN_MITER);
- g.setStroke(bStroke);
- g.drawRect(x, y, width, height);
- } finally {
- g.dispose();
- }
- }
-
- /**
- * 显示文字
- *
- * @param image
- * @param text
- * @param x
- * @param y
- */
- public static void drawImageText(BufferedImage image, String text, int x, int y) {
- Graphics graphics = image.getGraphics();
- int fontSize = 32;
- Font font = new Font("楷体", Font.PLAIN, fontSize);
- try {
- graphics.setFont(font);
- graphics.setColor(new Color(0, 0, 255));
- int strWidth = graphics.getFontMetrics().stringWidth(text);
- graphics.drawString(text, x, y);
- } finally {
- graphics.dispose();
- }
- }
-}
\ No newline at end of file
diff --git a/src/main/java/com/litongjava/djl/paddle/ocr/v4/common/RotatedBox.java b/src/main/java/com/litongjava/djl/paddle/ocr/v4/common/RotatedBox.java
deleted file mode 100644
index 1858258..0000000
--- a/src/main/java/com/litongjava/djl/paddle/ocr/v4/common/RotatedBox.java
+++ /dev/null
@@ -1,47 +0,0 @@
-package com.litongjava.djl.paddle.ocr.v4.common;
-
-import ai.djl.ndarray.NDArray;
-
-/**
- * 旋转检测框
- */
-public class RotatedBox implements Comparable {
- private NDArray box;
- private String text;
-
- public RotatedBox(NDArray box, String text) {
- this.box = box;
- this.text = text;
- }
-
- /**
- * 将左上角 Y 坐标升序排序
- *
- * @param o
- * @return
- */
- @Override
- public int compareTo(RotatedBox o) {
- NDArray lowBox = this.getBox();
- NDArray highBox = o.getBox();
- float lowY = lowBox.toFloatArray()[1];
- float highY = highBox.toFloatArray()[1];
- return (lowY < highY) ? -1 : 1;
- }
-
- public NDArray getBox() {
- return box;
- }
-
- public void setBox(NDArray box) {
- this.box = box;
- }
-
- public String getText() {
- return text;
- }
-
- public void setText(String text) {
- this.text = text;
- }
-}
\ No newline at end of file
diff --git a/src/main/java/com/litongjava/djl/paddle/ocr/v4/common/RotatedBoxCompX.java b/src/main/java/com/litongjava/djl/paddle/ocr/v4/common/RotatedBoxCompX.java
deleted file mode 100644
index 3010457..0000000
--- a/src/main/java/com/litongjava/djl/paddle/ocr/v4/common/RotatedBoxCompX.java
+++ /dev/null
@@ -1,46 +0,0 @@
-package com.litongjava.djl.paddle.ocr.v4.common;
-
-import ai.djl.ndarray.NDArray;
-
-/**
- */
-public class RotatedBoxCompX implements Comparable {
- private NDArray box;
- private String text;
-
- public RotatedBoxCompX(NDArray box, String text) {
- this.box = box;
- this.text = text;
- }
-
- /**
- * 将左上角 X 坐标升序排序
- *
- * @param o
- * @return
- */
- @Override
- public int compareTo(RotatedBoxCompX o) {
- NDArray leftBox = this.getBox();
- NDArray rightBox = o.getBox();
- float leftX = leftBox.toFloatArray()[0];
- float rightX = rightBox.toFloatArray()[0];
- return (leftX < rightX) ? -1 : 1;
- }
-
- public NDArray getBox() {
- return box;
- }
-
- public void setBox(NDArray box) {
- this.box = box;
- }
-
- public String getText() {
- return text;
- }
-
- public void setText(String text) {
- this.text = text;
- }
-}
\ No newline at end of file
diff --git a/src/main/java/com/litongjava/djl/paddle/ocr/v4/detection/OCRDetectionTranslator.java b/src/main/java/com/litongjava/djl/paddle/ocr/v4/detection/OCRDetectionTranslator.java
deleted file mode 100644
index ee59fdb..0000000
--- a/src/main/java/com/litongjava/djl/paddle/ocr/v4/detection/OCRDetectionTranslator.java
+++ /dev/null
@@ -1,525 +0,0 @@
-package com.litongjava.djl.paddle.ocr.v4.detection;
-
-import ai.djl.modality.cv.Image;
-import ai.djl.modality.cv.util.NDImageUtils;
-import ai.djl.ndarray.NDArray;
-import ai.djl.ndarray.NDArrays;
-import ai.djl.ndarray.NDList;
-import ai.djl.ndarray.NDManager;
-import ai.djl.ndarray.index.NDIndex;
-import ai.djl.ndarray.types.DataType;
-import ai.djl.ndarray.types.Shape;
-import ai.djl.translate.Batchifier;
-import ai.djl.translate.Translator;
-import ai.djl.translate.TranslatorContext;
-import com.litongjava.djl.paddle.ocr.v4.opencv.NDArrayUtils;
-import org.opencv.core.*;
-import org.opencv.imgproc.Imgproc;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-
-/**
- * 文字检测前后处理
- */
-public class OCRDetectionTranslator implements Translator {
- // det_algorithm == "DB"
- private final float thresh = 0.3f;
- private final boolean use_dilation = false;
- private final String score_mode = "fast";
- private final String box_type = "quad";
-
- private final int limit_side_len;
- private final int max_candidates;
- private final int min_size;
- private final float box_thresh;
- private final float unclip_ratio;
- private float ratio_h;
- private float ratio_w;
- private int img_height;
- private int img_width;
-
- public OCRDetectionTranslator(Map arguments) {
- limit_side_len =
- arguments.containsKey("limit_side_len")
- ? Integer.parseInt(arguments.get("limit_side_len").toString())
- : 960;
- max_candidates =
- arguments.containsKey("max_candidates")
- ? Integer.parseInt(arguments.get("max_candidates").toString())
- : 1000;
- min_size =
- arguments.containsKey("min_size")
- ? Integer.parseInt(arguments.get("min_size").toString())
- : 3;
- box_thresh =
- arguments.containsKey("box_thresh")
- ? Float.parseFloat(arguments.get("box_thresh").toString())
- : 0.6f; // 0.5f
- unclip_ratio =
- arguments.containsKey("unclip_ratio")
- ? Float.parseFloat(arguments.get("unclip_ratio").toString())
- : 1.6f;
- }
-
- @Override
- public NDList processOutput(TranslatorContext ctx, NDList list) {
- NDManager manager = ctx.getNDManager();
- NDArray pred = list.get(0);
- pred = pred.squeeze();
- NDArray segmentation = pred.gt(thresh); // thresh=0.3 .mul(255f)
-
- segmentation = segmentation.toType(DataType.UINT8, true);
- Shape shape = segmentation.getShape();
- int rows = (int) shape.get(0);
- int cols = (int) shape.get(1);
-
- Mat newMask = new Mat();
- if (this.use_dilation) {
- Mat mask = new Mat();
- //convert from NDArray to Mat
- Mat srcMat = NDArrayUtils.uint8NDArrayToMat(segmentation);
- // size 越小,腐蚀的单位越小,图片越接近原图
- // Mat dilation_kernel = Imgproc.getStructuringElement(Imgproc.MORPH_RECT, new Size(2, 2));
- Mat dilation_kernel = NDArrayUtils.uint8ArrayToMat(new byte[][]{{1, 1}, {1, 1}});
- /**
- * 膨胀说明: 图像的一部分区域与指定的核进行卷积, 求核的最`大`值并赋值给指定区域。 膨胀可以理解为图像中`高亮区域`的'领域扩大'。
- * 意思是高亮部分会侵蚀不是高亮的部分,使高亮部分越来越多。
- */
- Imgproc.dilate(srcMat, mask, dilation_kernel);
- //destination Matrix
- Scalar scalar = new Scalar(255);
- Core.multiply(mask, scalar, newMask);
- // release Mat
- mask.release();
- srcMat.release();
- dilation_kernel.release();
- } else {
- Mat srcMat = NDArrayUtils.uint8NDArrayToMat(segmentation);
- //destination Matrix
- Scalar scalar = new Scalar(255);
- Core.multiply(srcMat, scalar, newMask);
- // release Mat
- srcMat.release();
- }
-
- NDList dt_boxes = null;
- NDArray boxes = boxes_from_bitmap(manager, pred, newMask);
- if (boxes != null) {
- //boxes[:, :, 0] = boxes[:, :, 0] / ratio_w
- NDArray boxes1 = boxes.get(":, :, 0").div(ratio_w);
- boxes.set(new NDIndex(":, :, 0"), boxes1);
- //boxes[:, :, 1] = boxes[:, :, 1] / ratio_h
- NDArray boxes2 = boxes.get(":, :, 1").div(ratio_h);
- boxes.set(new NDIndex(":, :, 1"), boxes2);
-
- dt_boxes = this.filter_tag_det_res(boxes);
-
- dt_boxes.detach();
- }
-
- // release Mat
- newMask.release();
-
- return dt_boxes;
- }
-
-
- private NDList filter_tag_det_res(NDArray dt_boxes) {
- NDList boxesList = new NDList();
-
- int num = (int) dt_boxes.getShape().get(0);
- for (int i = 0; i < num; i++) {
- NDArray box = dt_boxes.get(i);
- box = order_points_clockwise(box);
- box = clip_det_res(box);
- float[] box0 = box.get(0).toFloatArray();
- float[] box1 = box.get(1).toFloatArray();
- float[] box3 = box.get(3).toFloatArray();
- int rect_width = (int) Math.sqrt(Math.pow(box1[0] - box0[0], 2) + Math.pow(box1[1] - box0[1], 2));
- int rect_height = (int) Math.sqrt(Math.pow(box3[0] - box0[0], 2) + Math.pow(box3[1] - box0[1], 2));
- if (rect_width <= 3 || rect_height <= 3)
- continue;
- boxesList.add(box);
- }
-
- return boxesList;
- }
-
- private NDArray clip_det_res(NDArray points) {
- for (int i = 0; i < points.getShape().get(0); i++) {
- int value = Math.max((int) points.get(i, 0).toFloatArray()[0], 0);
- value = Math.min(value, img_width - 1);
- points.set(new NDIndex(i + ",0"), value);
- value = Math.max((int) points.get(i, 1).toFloatArray()[0], 0);
- value = Math.min(value, img_height - 1);
- points.set(new NDIndex(i + ",1"), value);
- }
-
- return points;
- }
-
- /**
- * sort the points based on their x-coordinates
- * 顺时针
- *
- * @param pts
- * @return
- */
-
- private NDArray order_points_clockwise(NDArray pts) {
- NDList list = new NDList();
- long[] indexes = pts.get(":, 0").argSort().toLongArray();
-
- // grab the left-most and right-most points from the sorted
- // x-roodinate points
- Shape s1 = pts.getShape();
- NDArray leftMost1 = pts.get(indexes[0] + ",:");
- NDArray leftMost2 = pts.get(indexes[1] + ",:");
- NDArray leftMost = leftMost1.concat(leftMost2).reshape(2, 2);
- NDArray rightMost1 = pts.get(indexes[2] + ",:");
- NDArray rightMost2 = pts.get(indexes[3] + ",:");
- NDArray rightMost = rightMost1.concat(rightMost2).reshape(2, 2);
-
- // now, sort the left-most coordinates according to their
- // y-coordinates so we can grab the top-left and bottom-left
- // points, respectively
- indexes = leftMost.get(":, 1").argSort().toLongArray();
- NDArray lt = leftMost.get(indexes[0] + ",:");
- NDArray lb = leftMost.get(indexes[1] + ",:");
- indexes = rightMost.get(":, 1").argSort().toLongArray();
- NDArray rt = rightMost.get(indexes[0] + ",:");
- NDArray rb = rightMost.get(indexes[1] + ",:");
-
- list.add(lt);
- list.add(rt);
- list.add(rb);
- list.add(lb);
-
- NDArray rect = NDArrays.concat(list).reshape(4, 2);
- return rect;
- }
-
- /**
- * Get boxes from the binarized image predicted by DB
- *
- * @param manager
- * @param pred the binarized image predicted by DB.
- * @param bitmap new 'pred' after threshold filtering.
- */
- private NDArray boxes_from_bitmap(NDManager manager, NDArray pred, Mat bitmap) {
- int dest_height = (int) pred.getShape().get(0);
- int dest_width = (int) pred.getShape().get(1);
- int height = bitmap.rows();
- int width = bitmap.cols();
-
- List contours = new ArrayList<>();
- Mat hierarchy = new Mat();
- // 寻找轮廓
- Imgproc.findContours(
- bitmap,
- contours,
- hierarchy,
- Imgproc.RETR_LIST,
- Imgproc.CHAIN_APPROX_SIMPLE);
-
- int num_contours = Math.min(contours.size(), max_candidates);
- NDList boxList = new NDList();
- float[] scores = new float[num_contours];
-
- for (int index = 0; index < num_contours; index++) {
- MatOfPoint contour = contours.get(index);
- MatOfPoint2f newContour = new MatOfPoint2f(contour.toArray());
- float[][] pointsArr = new float[4][2];
- int sside = get_mini_boxes(newContour, pointsArr);
- if (sside < this.min_size)
- continue;
- NDArray points = manager.create(pointsArr);
- float score = box_score_fast(manager, pred, points);
- if (score < this.box_thresh)
- continue;
-
- NDArray box = unclip(manager, points); // TODO get_mini_boxes(box)
-
- // box[:, 0] = np.clip(np.round(box[:, 0] / width * dest_width), 0, dest_width)
- NDArray boxes1 = box.get(":,0").div(width).mul(dest_width).round().clip(0, dest_width);
- box.set(new NDIndex(":, 0"), boxes1);
- // box[:, 1] = np.clip(np.round(box[:, 1] / height * dest_height), 0, dest_height)
- NDArray boxes2 = box.get(":,1").div(height).mul(dest_height).round().clip(0, dest_height);
- box.set(new NDIndex(":, 1"), boxes2);
-
- boxList.add(box);
- scores[index] = score;
-
- // release memory
- contour.release();
- newContour.release();
- }
-
- // release
- hierarchy.release();
-
- NDArray boxes = null;
- if (boxList.size() > 0) {
- boxes = NDArrays.stack(boxList);
- return boxes;
- }
-
- return boxes;
-
-
- }
-
- /**
- * Shrink or expand the boxaccording to 'unclip_ratio'
- *
- * @param points The predicted box.
- * @return uncliped box
- */
- private NDArray unclip(NDManager manager, NDArray points) {
- points = order_points_clockwise(points);
- float[] pointsArr = points.toFloatArray();
- float[] lt = java.util.Arrays.copyOfRange(pointsArr, 0, 2);
- float[] lb = java.util.Arrays.copyOfRange(pointsArr, 6, 8);
-
- float[] rt = java.util.Arrays.copyOfRange(pointsArr, 2, 4);
- float[] rb = java.util.Arrays.copyOfRange(pointsArr, 4, 6);
-
- float width = distance(lt, rt);
- float height = distance(lt, lb);
-
- if (width > height) {
- float k = (lt[1] - rt[1]) / (lt[0] - rt[0]); // y = k * x + b
-
- float delta_dis = height;
- float delta_x = (float) Math.sqrt((delta_dis * delta_dis) / (k * k + 1));
- float delta_y = Math.abs(k * delta_x);
-
- if (k > 0) {
- pointsArr[0] = lt[0] - delta_x + delta_y;
- pointsArr[1] = lt[1] - delta_y - delta_x;
- pointsArr[2] = rt[0] + delta_x + delta_y;
- pointsArr[3] = rt[1] + delta_y - delta_x;
-
- pointsArr[4] = rb[0] + delta_x - delta_y;
- pointsArr[5] = rb[1] + delta_y + delta_x;
- pointsArr[6] = lb[0] - delta_x - delta_y;
- pointsArr[7] = lb[1] - delta_y + delta_x;
- } else {
- pointsArr[0] = lt[0] - delta_x - delta_y;
- pointsArr[1] = lt[1] + delta_y - delta_x;
- pointsArr[2] = rt[0] + delta_x - delta_y;
- pointsArr[3] = rt[1] - delta_y - delta_x;
-
- pointsArr[4] = rb[0] + delta_x + delta_y;
- pointsArr[5] = rb[1] - delta_y + delta_x;
- pointsArr[6] = lb[0] - delta_x + delta_y;
- pointsArr[7] = lb[1] + delta_y + delta_x;
- }
- } else {
- float k = (lt[1] - rt[1]) / (lt[0] - rt[0]); // y = k * x + b
-
- float delta_dis = width;
- float delta_y = (float) Math.sqrt((delta_dis * delta_dis) / (k * k + 1));
- float delta_x = Math.abs(k * delta_y);
-
- if (k > 0) {
- pointsArr[0] = lt[0] + delta_x - delta_y;
- pointsArr[1] = lt[1] - delta_y - delta_x;
- pointsArr[2] = rt[0] + delta_x + delta_y;
- pointsArr[3] = rt[1] - delta_y + delta_x;
-
- pointsArr[4] = rb[0] - delta_x + delta_y;
- pointsArr[5] = rb[1] + delta_y + delta_x;
- pointsArr[6] = lb[0] - delta_x - delta_y;
- pointsArr[7] = lb[1] + delta_y - delta_x;
- } else {
- pointsArr[0] = lt[0] - delta_x - delta_y;
- pointsArr[1] = lt[1] - delta_y + delta_x;
- pointsArr[2] = rt[0] - delta_x + delta_y;
- pointsArr[3] = rt[1] - delta_y - delta_x;
-
- pointsArr[4] = rb[0] + delta_x + delta_y;
- pointsArr[5] = rb[1] + delta_y - delta_x;
- pointsArr[6] = lb[0] + delta_x - delta_y;
- pointsArr[7] = lb[1] + delta_y + delta_x;
- }
- }
- points = manager.create(pointsArr).reshape(4, 2);
-
- return points;
- }
-
- private float distance(float[] point1, float[] point2) {
- float disX = point1[0] - point2[0];
- float disY = point1[1] - point2[1];
- float dis = (float) Math.sqrt(disX * disX + disY * disY);
- return dis;
- }
-
- /**
- * Get boxes from the contour or box.
- *
- * @param contour The predicted contour.
- * @param pointsArr The predicted box.
- * @return smaller side of box
- */
- private int get_mini_boxes(MatOfPoint2f contour, float[][] pointsArr) {
- // https://blog.csdn.net/qq_37385726/article/details/82313558
- // bounding_box[1] - rect 返回矩形的长和宽
- RotatedRect rect = Imgproc.minAreaRect(contour);
- Mat points = new Mat();
- Imgproc.boxPoints(rect, points);
-
- float[][] fourPoints = new float[4][2];
- for (int row = 0; row < 4; row++) {
- fourPoints[row][0] = (float) points.get(row, 0)[0];
- fourPoints[row][1] = (float) points.get(row, 1)[0];
- }
-
- float[] tmpPoint = new float[2];
- for (int i = 0; i < 4; i++) {
- for (int j = i + 1; j < 4; j++) {
- if (fourPoints[j][0] < fourPoints[i][0]) {
- tmpPoint[0] = fourPoints[i][0];
- tmpPoint[1] = fourPoints[i][1];
- fourPoints[i][0] = fourPoints[j][0];
- fourPoints[i][1] = fourPoints[j][1];
- fourPoints[j][0] = tmpPoint[0];
- fourPoints[j][1] = tmpPoint[1];
- }
- }
- }
-
- int index_1 = 0;
- int index_2 = 1;
- int index_3 = 2;
- int index_4 = 3;
-
- if (fourPoints[1][1] > fourPoints[0][1]) {
- index_1 = 0;
- index_4 = 1;
- } else {
- index_1 = 1;
- index_4 = 0;
- }
-
- if (fourPoints[3][1] > fourPoints[2][1]) {
- index_2 = 2;
- index_3 = 3;
- } else {
- index_2 = 3;
- index_3 = 2;
- }
-
- pointsArr[0] = fourPoints[index_1];
- pointsArr[1] = fourPoints[index_2];
- pointsArr[2] = fourPoints[index_3];
- pointsArr[3] = fourPoints[index_4];
-
- int height = rect.boundingRect().height;
- int width = rect.boundingRect().width;
- int sside = Math.min(height, width);
-
- // release
- points.release();
-
- return sside;
- }
-
- /**
- * Calculate the score of box.
- *
- * @param bitmap The binarized image predicted by DB.
- * @param points The predicted box
- * @return
- */
- private float box_score_fast(NDManager manager, NDArray bitmap, NDArray points) {
- NDArray box = points.get(":");
- long h = bitmap.getShape().get(0);
- long w = bitmap.getShape().get(1);
- // xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int), 0, w - 1)
- int xmin = box.get(":, 0").min().floor().clip(0, w - 1).toType(DataType.INT32, true).toIntArray()[0];
- int xmax = box.get(":, 0").max().ceil().clip(0, w - 1).toType(DataType.INT32, true).toIntArray()[0];
- int ymin = box.get(":, 1").min().floor().clip(0, h - 1).toType(DataType.INT32, true).toIntArray()[0];
- int ymax = box.get(":, 1").max().ceil().clip(0, h - 1).toType(DataType.INT32, true).toIntArray()[0];
-
- NDArray mask = manager.zeros(new Shape(ymax - ymin + 1, xmax - xmin + 1), DataType.UINT8);
-
- box.set(new NDIndex(":, 0"), box.get(":, 0").sub(xmin));
- box.set(new NDIndex(":, 1"), box.get(":, 1").sub(ymin));
-
- //mask - convert from NDArray to Mat
- Mat maskMat = NDArrayUtils.uint8NDArrayToMat(mask);
-
- //mask - convert from NDArray to Mat - 4 rows, 2 cols
- Mat boxMat = NDArrayUtils.floatNDArrayToMat(box, CvType.CV_32S);
-
-// boxMat.reshape(1, new int[]{1, 4, 2});
- List pts = new ArrayList<>();
- MatOfPoint matOfPoint = NDArrayUtils.matToMatOfPoint(boxMat); // new MatOfPoint(boxMat);
- pts.add(matOfPoint);
- Imgproc.fillPoly(maskMat, pts, new Scalar(1));
-
-
- NDArray subBitMap = bitmap.get(ymin + ":" + (ymax + 1) + "," + xmin + ":" + (xmax + 1));
- Mat bitMapMat = NDArrayUtils.floatNDArrayToMat(subBitMap);
-
- Scalar score = Core.mean(bitMapMat, maskMat);
- float scoreValue = (float) score.val[0];
- // release
- maskMat.release();
- boxMat.release();
- bitMapMat.release();
-
- return scoreValue;
- }
-
- @Override
- public NDList processInput(TranslatorContext ctx, Image input) {
- NDArray img = input.toNDArray(ctx.getNDManager());
- int h = input.getHeight();
- int w = input.getWidth();
- img_height = h;
- img_width = w;
-
- // limit the max side
- float ratio = 1.0f;
- if (Math.max(h, w) > limit_side_len) {
- if (h > w) {
- ratio = (float) limit_side_len / (float) h;
- } else {
- ratio = (float) limit_side_len / (float) w;
- }
- }
-
- int resize_h = (int) (h * ratio);
- int resize_w = (int) (w * ratio);
-
- resize_h = Math.round((float) resize_h / 32f) * 32;
- resize_w = Math.round((float) resize_w / 32f) * 32;
-
- ratio_h = resize_h / (float) h;
- ratio_w = resize_w / (float) w;
-
- img = NDImageUtils.resize(img, resize_w, resize_h);
-
- img = NDImageUtils.toTensor(img);
-
- img =
- NDImageUtils.normalize(
- img,
- new float[]{0.485f, 0.456f, 0.406f},
- new float[]{0.229f, 0.224f, 0.225f});
-
- img = img.expandDims(0);
-
- return new NDList(img);
- }
-
- @Override
- public Batchifier getBatchifier() {
- return null;
- }
-}
\ No newline at end of file
diff --git a/src/main/java/com/litongjava/djl/paddle/ocr/v4/detection/OcrV4Detection.java b/src/main/java/com/litongjava/djl/paddle/ocr/v4/detection/OcrV4Detection.java
deleted file mode 100644
index f2e2bc8..0000000
--- a/src/main/java/com/litongjava/djl/paddle/ocr/v4/detection/OcrV4Detection.java
+++ /dev/null
@@ -1,36 +0,0 @@
-package com.litongjava.djl.paddle.ocr.v4.detection;
-
-import ai.djl.modality.cv.Image;
-import ai.djl.ndarray.NDList;
-import ai.djl.repository.zoo.Criteria;
-import ai.djl.training.util.ProgressBar;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.util.concurrent.ConcurrentHashMap;
-import java.nio.file.Paths;
-
-/**
- * 文字检测
- */
-public final class OcrV4Detection {
- /**
- * 中文文本检测
- *
- * @return
- */
- public Criteria chDetCriteria() {
- Criteria criteria =
- Criteria.builder()
- .optEngine("OnnxRuntime")
- // .optModelName("inference")
- .setTypes(Image.class, NDList.class)
- .optModelPath(Paths.get("models/ch_PP-OCRv4_det_infer/inference.onnx"))
- .optTranslator(new OCRDetectionTranslator(new ConcurrentHashMap()))
- .optProgress(new ProgressBar())
- .build();
-
- return criteria;
- }
-
-}
\ No newline at end of file
diff --git a/src/main/java/com/litongjava/djl/paddle/ocr/v4/opencv/NDArrayUtils.java b/src/main/java/com/litongjava/djl/paddle/ocr/v4/opencv/NDArrayUtils.java
deleted file mode 100644
index 1e3bc4c..0000000
--- a/src/main/java/com/litongjava/djl/paddle/ocr/v4/opencv/NDArrayUtils.java
+++ /dev/null
@@ -1,236 +0,0 @@
-package com.litongjava.djl.paddle.ocr.v4.opencv;
-
-import ai.djl.ndarray.NDArray;
-import org.opencv.core.CvType;
-import org.opencv.core.Mat;
-import org.opencv.core.MatOfPoint;
-import org.opencv.core.Point;
-
-import java.util.ArrayList;
-import java.util.List;
-
-public class NDArrayUtils {
- /**
- * Mat To MatOfPoint
- *
- * @param mat
- * @return
- */
- public static MatOfPoint matToMatOfPoint(Mat mat) {
- int rows = mat.rows();
- MatOfPoint matOfPoint = new MatOfPoint();
-
- List list = new ArrayList<>();
- for (int i = 0; i < rows; i++) {
- Point point = new Point((float) mat.get(i, 0)[0], (float) mat.get(i, 1)[0]);
- list.add(point);
- }
- matOfPoint.fromList(list);
-
- return matOfPoint;
- }
-
- /**
- * float NDArray To float[][] Array
- *
- * @param ndArray
- * @return
- */
- public static float[][] floatNDArrayToArray(NDArray ndArray) {
- int rows = (int) (ndArray.getShape().get(0));
- int cols = (int) (ndArray.getShape().get(1));
- float[][] arr = new float[rows][cols];
-
- float[] arrs = ndArray.toFloatArray();
- for (int i = 0; i < rows; i++) {
- for (int j = 0; j < cols; j++) {
- arr[i][j] = arrs[i * cols + j];
- }
- }
- return arr;
- }
-
- /**
- * Mat To double[][] Array
- *
- * @param mat
- * @return
- */
- public static double[][] matToDoubleArray(Mat mat) {
- int rows = mat.rows();
- int cols = mat.cols();
-
- double[][] doubles = new double[rows][cols];
-
- for (int i = 0; i < rows; i++) {
- for (int j = 0; j < cols; j++) {
- doubles[i][j] = mat.get(i, j)[0];
- }
- }
-
- return doubles;
- }
-
- /**
- * Mat To float[][] Array
- *
- * @param mat
- * @return
- */
- public static float[][] matToFloatArray(Mat mat) {
- int rows = mat.rows();
- int cols = mat.cols();
-
- float[][] floats = new float[rows][cols];
-
- for (int i = 0; i < rows; i++) {
- for (int j = 0; j < cols; j++) {
- floats[i][j] = (float) mat.get(i, j)[0];
- }
- }
-
- return floats;
- }
-
- /**
- * Mat To byte[][] Array
- *
- * @param mat
- * @return
- */
- public static byte[][] matToUint8Array(Mat mat) {
- int rows = mat.rows();
- int cols = mat.cols();
-
- byte[][] bytes = new byte[rows][cols];
-
- for (int i = 0; i < rows; i++) {
- for (int j = 0; j < cols; j++) {
- bytes[i][j] = (byte) mat.get(i, j)[0];
- }
- }
-
- return bytes;
- }
-
- /**
- * float NDArray To float[][] Array
- *
- * @param ndArray
- * @param cvType
- * @return
- */
- public static Mat floatNDArrayToMat(NDArray ndArray, int cvType) {
- int rows = (int) (ndArray.getShape().get(0));
- int cols = (int) (ndArray.getShape().get(1));
- Mat mat = new Mat(rows, cols, cvType);
-
- float[] arrs = ndArray.toFloatArray();
- for (int i = 0; i < rows; i++) {
- for (int j = 0; j < cols; j++) {
- mat.put(i, j, arrs[i * cols + j]);
- }
- }
- return mat;
- }
-
- /**
- * float NDArray To Mat
- *
- * @param ndArray
- * @return
- */
- public static Mat floatNDArrayToMat(NDArray ndArray) {
- int rows = (int) (ndArray.getShape().get(0));
- int cols = (int) (ndArray.getShape().get(1));
- Mat mat = new Mat(rows, cols, CvType.CV_32F);
-
- float[] arrs = ndArray.toFloatArray();
- for (int i = 0; i < rows; i++) {
- for (int j = 0; j < cols; j++) {
- mat.put(i, j, arrs[i * cols + j]);
- }
- }
-
- return mat;
-
- }
-
- /**
- * uint8 NDArray To Mat
- *
- * @param ndArray
- * @return
- */
- public static Mat uint8NDArrayToMat(NDArray ndArray) {
- int rows = (int) (ndArray.getShape().get(0));
- int cols = (int) (ndArray.getShape().get(1));
- Mat mat = new Mat(rows, cols, CvType.CV_8U);
-
- byte[] arrs = ndArray.toByteArray();
-
- for (int i = 0; i < rows; i++) {
- for (int j = 0; j < cols; j++) {
- mat.put(i, j, arrs[i * cols + j]);
- }
- }
- return mat;
- }
-
- /**
- * float[][] Array To Mat
- *
- * @param arr
- * @return
- */
- public static Mat floatArrayToMat(float[][] arr) {
- int rows = arr.length;
- int cols = arr[0].length;
- Mat mat = new Mat(rows, cols, CvType.CV_32F);
-
- for (int i = 0; i < rows; i++) {
- for (int j = 0; j < cols; j++) {
- mat.put(i, j, arr[i][j]);
- }
- }
-
- return mat;
- }
-
- /**
- * byte[][] Array To Mat
- *
- * @param arr
- * @return
- */
- public static Mat uint8ArrayToMat(byte[][] arr) {
- int rows = arr.length;
- int cols = arr[0].length;
- Mat mat = new Mat(rows, cols, CvType.CV_8U);
-
- for (int i = 0; i < rows; i++) {
- for (int j = 0; j < cols; j++) {
- mat.put(i, j, arr[i][j]);
- }
- }
-
- return mat;
- }
-
- /**
- * List To Mat
- *
- * @param points
- * @return
- */
- public static Mat toMat(List points) {
- Mat mat = new Mat(points.size(), 2, CvType.CV_32F);
- for (int i = 0; i < points.size(); i++) {
- ai.djl.modality.cv.output.Point point = points.get(i);
- mat.put(i, 0, (float) point.getX());
- mat.put(i, 1, (float) point.getY());
- }
-
- return mat;
- }
-}
\ No newline at end of file
diff --git a/src/main/java/com/litongjava/djl/paddle/ocr/v4/opencv/OpenCVUtils.java b/src/main/java/com/litongjava/djl/paddle/ocr/v4/opencv/OpenCVUtils.java
deleted file mode 100644
index 81c0f44..0000000
--- a/src/main/java/com/litongjava/djl/paddle/ocr/v4/opencv/OpenCVUtils.java
+++ /dev/null
@@ -1,60 +0,0 @@
-package com.litongjava.djl.paddle.ocr.v4.opencv;
-
-import org.opencv.core.CvType;
-import org.opencv.core.Mat;
-import org.opencv.imgproc.Imgproc;
-
-import java.awt.image.BufferedImage;
-import java.awt.image.DataBufferByte;
-
-public class OpenCVUtils {
-
- /**
- * 透视变换
- *
- * @param src
- * @param srcPoints
- * @param dstPoints
- * @return
- */
- public static Mat perspectiveTransform(Mat src, Mat srcPoints, Mat dstPoints) {
- Mat dst = src.clone();
- Mat warp_mat = Imgproc.getPerspectiveTransform(srcPoints, dstPoints);
- Imgproc.warpPerspective(src, dst, warp_mat, dst.size());
- warp_mat.release();
-
- return dst;
- }
-
- /**
- * Mat to BufferedImage
- *
- * @param mat
- * @return
- */
- public static BufferedImage mat2Image(Mat mat) {
- int width = mat.width();
- int height = mat.height();
- byte[] data = new byte[width * height * (int) mat.elemSize()];
- Imgproc.cvtColor(mat, mat, 4);
- mat.get(0, 0, data);
- BufferedImage ret = new BufferedImage(width, height, 5);
- ret.getRaster().setDataElements(0, 0, width, height, data);
- return ret;
- }
-
- /**
- * BufferedImage to Mat
- *
- * @param img
- * @return
- */
- public static Mat image2Mat(BufferedImage img) {
- int width = img.getWidth();
- int height = img.getHeight();
- byte[] data = ((DataBufferByte) img.getRaster().getDataBuffer()).getData();
- Mat mat = new Mat(height, width, CvType.CV_8UC3);
- mat.put(0, 0, data);
- return mat;
- }
-}
\ No newline at end of file
diff --git a/src/main/java/com/litongjava/djl/paddle/ocr/v4/recognition/OcrV4Recognition.java b/src/main/java/com/litongjava/djl/paddle/ocr/v4/recognition/OcrV4Recognition.java
deleted file mode 100644
index 9ce8df4..0000000
--- a/src/main/java/com/litongjava/djl/paddle/ocr/v4/recognition/OcrV4Recognition.java
+++ /dev/null
@@ -1,154 +0,0 @@
-package com.litongjava.djl.paddle.ocr.v4.recognition;
-
-import ai.djl.inference.Predictor;
-import ai.djl.modality.cv.Image;
-import ai.djl.modality.cv.ImageFactory;
-import ai.djl.modality.cv.output.Point;
-import ai.djl.modality.cv.util.NDImageUtils;
-import ai.djl.ndarray.NDArray;
-import ai.djl.ndarray.NDList;
-import ai.djl.ndarray.NDManager;
-import ai.djl.opencv.OpenCVImageFactory;
-import ai.djl.repository.zoo.Criteria;
-import ai.djl.training.util.ProgressBar;
-import ai.djl.translate.TranslateException;
-import com.litongjava.djl.paddle.ocr.v4.common.RotatedBox;
-import com.litongjava.djl.paddle.ocr.v4.opencv.NDArrayUtils;
-import com.litongjava.djl.paddle.ocr.v4.opencv.OpenCVUtils;
-import org.opencv.core.Mat;
-
-import java.awt.image.BufferedImage;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.concurrent.ConcurrentHashMap;
-
-/**
- * 文字识别
- */
-public final class OcrV4Recognition {
-
-
- /**
- * 中文简体
- *
- * @return
- */
- public Criteria chRecCriteria() {
- Path modelPath = Paths.get("models/ch_PP-OCRv4_rec_infer/inference.onnx");
- Criteria criteria =
- Criteria.builder()
- .optEngine("OnnxRuntime")
- //.optModelName("inference")
- .setTypes(Image.class, String.class)
- .optModelPath(modelPath)
- .optProgress(new ProgressBar())
- .optTranslator(new PpWordRecTranslator(new ConcurrentHashMap()))
- .build();
- return criteria;
- }
-
-
- /**
- * 图像推理
- *
- * @param manager
- * @param image
- * @param detector
- * @param recognizer
- * @return
- * @throws TranslateException
- */
- public List predict(NDManager manager,
- Image image, Predictor detector, Predictor recognizer)
- throws TranslateException {
- NDList boxes = detector.predict(image);
- if (boxes == null) {
- return null;
- }
- // 交给 NDManager自动管理内存
- // attach to manager for automatic memory management
- boxes.attach(manager);
-
- List result = new ArrayList<>();
-
- Mat mat = (Mat) image.getWrappedImage();
-
- for (int i = 0; i < boxes.size(); i++) {
- NDArray box = boxes.get(i);
-
- float[] pointsArr = box.toFloatArray();
- float[] lt = java.util.Arrays.copyOfRange(pointsArr, 0, 2);
- float[] rt = java.util.Arrays.copyOfRange(pointsArr, 2, 4);
- float[] rb = java.util.Arrays.copyOfRange(pointsArr, 4, 6);
- float[] lb = java.util.Arrays.copyOfRange(pointsArr, 6, 8);
- int img_crop_width = (int) Math.max(distance(lt, rt), distance(rb, lb));
- int img_crop_height = (int) Math.max(distance(lt, lb), distance(rt, rb));
- List srcPoints = new ArrayList<>();
- srcPoints.add(new Point(lt[0], lt[1]));
- srcPoints.add(new Point(rt[0], rt[1]));
- srcPoints.add(new Point(rb[0], rb[1]));
- srcPoints.add(new Point(lb[0], lb[1]));
- List dstPoints = new ArrayList<>();
- dstPoints.add(new Point(0, 0));
- dstPoints.add(new Point(img_crop_width, 0));
- dstPoints.add(new Point(img_crop_width, img_crop_height));
- dstPoints.add(new Point(0, img_crop_height));
-
- Mat srcPoint2f = NDArrayUtils.toMat(srcPoints);
- Mat dstPoint2f = NDArrayUtils.toMat(dstPoints);
-
- Mat cvMat = OpenCVUtils.perspectiveTransform(mat, srcPoint2f, dstPoint2f);
-
- Image subImg = OpenCVImageFactory.getInstance().fromImage(cvMat);
-// ImageUtils.saveImage(subImg, i + ".png", "build/output");
-
- subImg = subImg.getSubImage(0, 0, img_crop_width, img_crop_height);
- if (subImg.getHeight() * 1.0 / subImg.getWidth() > 1.5) {
- subImg = rotateImg(manager, subImg);
- }
-
- String name = recognizer.predict(subImg);
- RotatedBox rotatedBox = new RotatedBox(box, name);
- result.add(rotatedBox);
-
- cvMat.release();
- srcPoint2f.release();
- dstPoint2f.release();
-
- }
-
- return result;
- }
-
- private BufferedImage get_rotate_crop_image(Image image, NDArray box) {
- return null;
- }
-
- /**
- * 欧式距离计算
- *
- * @param point1
- * @param point2
- * @return
- */
- private float distance(float[] point1, float[] point2) {
- float disX = point1[0] - point2[0];
- float disY = point1[1] - point2[1];
- float dis = (float) Math.sqrt(disX * disX + disY * disY);
- return dis;
- }
-
- /**
- * 图片旋转
- *
- * @param manager
- * @param image
- * @return
- */
- private Image rotateImg(NDManager manager, Image image) {
- NDArray rotated = NDImageUtils.rotate90(image.toNDArray(manager), 1);
- return ImageFactory.getInstance().fromNDArray(rotated);
- }
-}
\ No newline at end of file
diff --git a/src/main/java/com/litongjava/djl/paddle/ocr/v4/recognition/PpWordRecTranslator.java b/src/main/java/com/litongjava/djl/paddle/ocr/v4/recognition/PpWordRecTranslator.java
deleted file mode 100644
index 473559c..0000000
--- a/src/main/java/com/litongjava/djl/paddle/ocr/v4/recognition/PpWordRecTranslator.java
+++ /dev/null
@@ -1,121 +0,0 @@
-package com.litongjava.djl.paddle.ocr.v4.recognition;
-
-import ai.djl.Model;
-import ai.djl.modality.cv.Image;
-import ai.djl.modality.cv.util.NDImageUtils;
-import ai.djl.ndarray.NDArray;
-import ai.djl.ndarray.NDList;
-import ai.djl.ndarray.index.NDIndex;
-import ai.djl.ndarray.types.DataType;
-import ai.djl.ndarray.types.Shape;
-import ai.djl.translate.Batchifier;
-import ai.djl.translate.Translator;
-import ai.djl.translate.TranslatorContext;
-import ai.djl.util.Utils;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Map;
-
-/**
- * 文字识别前后处理
- */
-public class PpWordRecTranslator implements Translator {
- private List table;
- private final boolean use_space_char;
-
- public PpWordRecTranslator(Map arguments) {
- use_space_char =
- arguments.containsKey("use_space_char")
- ? Boolean.parseBoolean(arguments.get("use_space_char").toString())
- : true;
- }
-
- @Override
- public void prepare(TranslatorContext ctx) throws IOException {
- Model model = ctx.getModel();
- try (InputStream is = model.getArtifact("dict.txt").openStream()) {
- table = Utils.readLines(is, true);
- table.add(0, "blank");
- if (use_space_char) {
- table.add(" ");
- table.add(" ");
- } else {
- table.add("");
- table.add("");
- }
-
- }
- }
-
- @Override
- public String processOutput(TranslatorContext ctx, NDList list) throws IOException {
- StringBuilder sb = new StringBuilder();
- NDArray tokens = list.singletonOrThrow();
-
- long[] indices = tokens.get(0).argMax(1).toLongArray();
- boolean[] selection = new boolean[indices.length];
- Arrays.fill(selection, true);
- for (int i = 1; i < indices.length; i++) {
- if (indices[i] == indices[i - 1]) {
- selection[i] = false;
- }
- }
-
- // 字符置信度
-// float[] probs = new float[indices.length];
-// for (int row = 0; row < indices.length; row++) {
-// NDArray value = tokens.get(0).get(new NDIndex(""+ row +":" + (row + 1) +"," + indices[row] +":" + ( indices[row] + 1)));
-// probs[row] = value.toFloatArray()[0];
-// }
-
- int lastIdx = 0;
- for (int i = 0; i < indices.length; i++) {
- if (selection[i] == true && indices[i] > 0 && !(i > 0 && indices[i] == lastIdx)) {
- sb.append(table.get((int) indices[i]));
- }
- }
- return sb.toString();
- }
-
- @Override
- public NDList processInput(TranslatorContext ctx, Image input) {
- NDArray img = input.toNDArray(ctx.getNDManager(), Image.Flag.COLOR);
- int imgC = 3;
- int imgH = 48;
- int imgW = 320;
-
- float max_wh_ratio = (float) imgW / (float) imgH;
-
- int h = input.getHeight();
- int w = input.getWidth();
- float wh_ratio = (float) w / (float) h;
-
- max_wh_ratio = Math.max(max_wh_ratio, wh_ratio);
- imgW = (int) (imgH * max_wh_ratio);
-
- int resized_w;
- if (Math.ceil(imgH * wh_ratio) > imgW) {
- resized_w = imgW;
- } else {
- resized_w = (int) (Math.ceil(imgH * wh_ratio));
- }
- NDArray resized_image = NDImageUtils.resize(img, resized_w, imgH);
- resized_image = resized_image.transpose(2, 0, 1).toType(DataType.FLOAT32, false);
- resized_image.divi(255f).subi(0.5f).divi(0.5f);
- NDArray padding_im = ctx.getNDManager().zeros(new Shape(imgC, imgH, imgW), DataType.FLOAT32);
- padding_im.set(new NDIndex(":,:,0:" + resized_w), resized_image);
-
- padding_im = padding_im.flip(0);
- padding_im = padding_im.expandDims(0);
- return new NDList(padding_im);
- }
-
- @Override
- public Batchifier getBatchifier() {
- return null;
- }
-
-}
\ No newline at end of file
diff --git a/src/main/java/com/luooqi/ocr/controller/ProcessController.java b/src/main/java/com/luooqi/ocr/controller/ProcessController.java
index f09f020..1080320 100644
--- a/src/main/java/com/luooqi/ocr/controller/ProcessController.java
+++ b/src/main/java/com/luooqi/ocr/controller/ProcessController.java
@@ -1,6 +1,7 @@
package com.luooqi.ocr.controller;
import com.luooqi.ocr.utils.CommUtils;
+
import javafx.geometry.Insets;
import javafx.geometry.Pos;
import javafx.scene.Scene;
diff --git a/src/main/java/com/luooqi/ocr/local/PaddlePaddleOCRV4.java b/src/main/java/com/luooqi/ocr/local/PaddlePaddleOCRV4.java
index 8f443fc..ff77b49 100644
--- a/src/main/java/com/luooqi/ocr/local/PaddlePaddleOCRV4.java
+++ b/src/main/java/com/luooqi/ocr/local/PaddlePaddleOCRV4.java
@@ -1,114 +1,34 @@
package com.luooqi.ocr.local;
-import ai.djl.MalformedModelException;
-import ai.djl.inference.Predictor;
-import ai.djl.modality.cv.Image;
-import ai.djl.ndarray.NDList;
-import ai.djl.ndarray.NDManager;
-import ai.djl.opencv.OpenCVImageFactory;
-import ai.djl.repository.zoo.ModelNotFoundException;
-import ai.djl.repository.zoo.ModelZoo;
-import ai.djl.repository.zoo.ZooModel;
-import com.litongjava.djl.paddle.ocr.v4.common.RotatedBox;
-import com.litongjava.djl.paddle.ocr.v4.common.RotatedBoxCompX;
-import com.litongjava.djl.paddle.ocr.v4.detection.OcrV4Detection;
-import com.litongjava.djl.paddle.ocr.v4.recognition.OcrV4Recognition;
-
import java.io.File;
-import java.io.IOException;
-import java.nio.file.Path;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
+
+import com.benjaminwan.ocrlibrary.OcrResult;
+
+import io.github.mymonstercat.Model;
+import io.github.mymonstercat.ocr.InferenceEngine;
/**
* Created by litonglinux@qq.com on 11/23/2023_2:09 AM
*/
public enum PaddlePaddleOCRV4 {
INSTANCE;
- private static OcrV4Detection detection;
- private static OcrV4Recognition recognition;
- private static Predictor detector;
- private static Predictor recognizer;
- private static NDManager manager;
-
- PaddlePaddleOCRV4() {
- }
-
-
- //noting not to do.but init
- public static void init() throws ModelNotFoundException, MalformedModelException, IOException {
- detection = new OcrV4Detection();
- recognition = new OcrV4Recognition();
- ZooModel detectionModel = null;
- ZooModel recognitionModel = null;
- detectionModel = ModelZoo.loadModel(detection.chDetCriteria());
- recognitionModel = ModelZoo.loadModel(recognition.chRecCriteria());
+ static InferenceEngine engine = null;
- detector = detectionModel.newPredictor();
-
- recognizer = recognitionModel.newPredictor();
- manager = NDManager.newBaseManager();
+ PaddlePaddleOCRV4() {
}
- public String ocr(File imageFile) throws Exception {
- Path path = imageFile.toPath();
- Image image = OpenCVImageFactory.getInstance().fromFile(path);
- return ocr(image);
+ // noting not to do.but init
+ public static void init() {
+ engine = InferenceEngine.getInstance(Model.ONNX_PPOCR_V4_SERVER);
}
- public String ocr(Image image) throws Exception {
- List detections = recognition.predict(manager, image, detector, recognizer);
- if (detections == null) {
- return null;
- }
-
- List initList = new ArrayList<>();
- for (RotatedBox result : detections) {
- // put low Y value at the head of the queue.
- initList.add(result);
- }
- Collections.sort(initList);
-
- List> lines = new ArrayList<>();
- List line = new ArrayList<>();
- RotatedBoxCompX firstBox = new RotatedBoxCompX(initList.get(0).getBox(), initList.get(0).getText());
- line.add(firstBox);
- lines.add((ArrayList) line);
- for (int i = 1; i < initList.size(); i++) {
- RotatedBoxCompX tmpBox = new RotatedBoxCompX(initList.get(i).getBox(), initList.get(i).getText());
- float y1 = firstBox.getBox().toFloatArray()[1];
- float y2 = tmpBox.getBox().toFloatArray()[1];
- float dis = Math.abs(y2 - y1);
- if (dis < 20) { // 认为是同 1 行 - Considered to be in the same line
- line.add(tmpBox);
- } else { // 换行 - Line break
- firstBox = tmpBox;
- Collections.sort(line);
- line = new ArrayList<>();
- line.add(firstBox);
- lines.add((ArrayList) line);
- }
- }
-
-
- StringBuffer fullText = new StringBuffer();
- for (int i = 0; i < lines.size(); i++) {
- for (int j = 0; j < lines.get(i).size(); j++) {
- String text = lines.get(i).get(j).getText();
- if (text.trim().equals(""))
- continue;
- fullText.append(text + " ");
- }
- fullText.append('\n');
- }
- return fullText.toString();
+ public OcrResult ocr(File imageFile) {
+ return engine.runOcr(imageFile.getAbsolutePath());
}
public void close() {
- detector.close();
- recognizer.close();
+
}
}
diff --git a/src/main/java/com/luooqi/ocr/snap/ScreenCapture.java b/src/main/java/com/luooqi/ocr/snap/ScreenCapture.java
index 0afbe19..b2febe9 100644
--- a/src/main/java/com/luooqi/ocr/snap/ScreenCapture.java
+++ b/src/main/java/com/luooqi/ocr/snap/ScreenCapture.java
@@ -1,11 +1,17 @@
package com.luooqi.ocr.snap;
-import cn.hutool.core.swing.ScreenUtil;
-import cn.hutool.log.StaticLog;
+import java.awt.AWTException;
+import java.awt.Rectangle;
+import java.awt.Robot;
+import java.awt.image.BufferedImage;
+
import com.luooqi.ocr.model.CaptureInfo;
import com.luooqi.ocr.utils.CommUtils;
import com.luooqi.ocr.windows.MainForm;
+
+import cn.hutool.core.swing.ScreenUtil;
+import cn.hutool.log.StaticLog;
import javafx.animation.AnimationTimer;
import javafx.application.Platform;
import javafx.embed.swing.SwingFXUtils;
@@ -17,15 +23,18 @@
import javafx.scene.input.KeyCode;
import javafx.scene.input.KeyEvent;
import javafx.scene.input.MouseButton;
-import javafx.scene.layout.*;
+import javafx.scene.layout.Background;
+import javafx.scene.layout.BackgroundImage;
+import javafx.scene.layout.BackgroundPosition;
+import javafx.scene.layout.BackgroundRepeat;
+import javafx.scene.layout.BackgroundSize;
+import javafx.scene.layout.BorderPane;
+import javafx.scene.layout.Pane;
import javafx.scene.paint.Color;
import javafx.scene.text.Font;
import javafx.scene.text.FontWeight;
import javafx.stage.Stage;
-import java.awt.*;
-import java.awt.image.BufferedImage;
-
/**
* This is the Window which is used from the user to draw the rectangle representing an area on the screen to be captured.
*
@@ -353,23 +362,23 @@ private void repaintCanvas() {
// smart calculation of where the mouse has been dragged
data.rectWidth = (data.mouseXNow > data.mouseXPressed) ? data.mouseXNow - data.mouseXPressed // RIGHT
- : data.mouseXPressed - data.mouseXNow // LEFT
+ : data.mouseXPressed - data.mouseXNow // LEFT
;
data.rectHeight = (data.mouseYNow > data.mouseYPressed) ? data.mouseYNow - data.mouseYPressed // DOWN
- : data.mouseYPressed - data.mouseYNow // UP
+ : data.mouseYPressed - data.mouseYNow // UP
;
data.rectUpperLeftX = // -------->UPPER_LEFT_X
- (data.mouseXNow > data.mouseXPressed) ? data.mouseXPressed // RIGHT
- : data.mouseXNow// LEFT
+ (data.mouseXNow > data.mouseXPressed) ? data.mouseXPressed // RIGHT
+ : data.mouseXNow// LEFT
;
data.rectUpperLeftY = // -------->UPPER_LEFT_Y
- (data.mouseYNow > data.mouseYPressed) ? data.mouseYPressed // DOWN
- : data.mouseYNow // UP
+ (data.mouseYNow > data.mouseYPressed) ? data.mouseYPressed // DOWN
+ : data.mouseYNow // UP
;
gc.strokeRect(data.rectUpperLeftX - 1.00, data.rectUpperLeftY - 1.00, data.rectWidth + 2.00,
- data.rectHeight + 2.00);
+ data.rectHeight + 2.00);
gc.clearRect(data.rectUpperLeftX, data.rectUpperLeftY, data.rectWidth, data.rectHeight);
// draw the text
@@ -378,10 +387,10 @@ private void repaintCanvas() {
gc.setLineWidth(1);
gc.setFill(Color.FIREBRICK);
gc.fillRect(middle - 77, data.rectUpperLeftY < 50 ? data.rectUpperLeftY + 2 : data.rectUpperLeftY - 18.00, 100,
- 18);
+ 18);
gc.setFill(Color.WHITE);
gc.fillText(data.rectWidth + " * " + data.rectHeight, middle - 77 + 9,
- data.rectUpperLeftY < 50 ? data.rectUpperLeftY + 17.00 : data.rectUpperLeftY - 4.00);
+ data.rectUpperLeftY < 50 ? data.rectUpperLeftY + 17.00 : data.rectUpperLeftY - 4.00);
}
}
@@ -418,8 +427,8 @@ public void prepareForCapture() {
mainCanvas.setCursor(Cursor.CROSSHAIR);
initGraphContent();
rootPane.setBackground(new Background(new BackgroundImage(fxImage, BackgroundRepeat.NO_REPEAT,
- BackgroundRepeat.NO_REPEAT, BackgroundPosition.CENTER,
- new BackgroundSize(CaptureInfo.ScreenWidth, CaptureInfo.ScreenHeight, false, false, true, true))));
+ BackgroundRepeat.NO_REPEAT, BackgroundPosition.CENTER,
+ new BackgroundSize(CaptureInfo.ScreenWidth, CaptureInfo.ScreenHeight, false, false, true, true))));
repaintCanvas();
stage.setScene(scene);
stage.setFullScreenExitHint("");
@@ -439,8 +448,8 @@ private void prepareImage() {
try {
mainCanvas.setDisable(true);
image = new Robot().createScreenCapture(new Rectangle(data.rectUpperLeftX + CaptureInfo.ScreenMinX,
- data.rectUpperLeftY + (int) CommUtils.getCrtScreen(stage).getVisualBounds().getMinY(), data.rectWidth,
- data.rectHeight));
+ data.rectUpperLeftY + (int) CommUtils.getCrtScreen(stage).getVisualBounds().getMinY(), data.rectWidth,
+ data.rectHeight));
} catch (AWTException ex) {
StaticLog.error(ex);
return;
diff --git a/src/main/java/com/luooqi/ocr/utils/OcrUtils.java b/src/main/java/com/luooqi/ocr/utils/OcrUtils.java
index 85264de..c0152fe 100644
--- a/src/main/java/com/luooqi/ocr/utils/OcrUtils.java
+++ b/src/main/java/com/luooqi/ocr/utils/OcrUtils.java
@@ -1,7 +1,24 @@
package com.luooqi.ocr.utils;
-import ai.djl.modality.cv.Image;
-import ai.djl.opencv.OpenCVImageFactory;
+import java.awt.Point;
+import java.awt.image.BufferedImage;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import javax.imageio.ImageIO;
+
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.rendering.PDFRenderer;
+
+import com.benjaminwan.ocrlibrary.OcrResult;
+import com.luooqi.ocr.local.PaddlePaddleOCRV4;
+import com.luooqi.ocr.model.TextBlock;
+
import cn.hutool.core.codec.Base64;
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.lang.UUID;
@@ -17,21 +34,6 @@
import cn.hutool.json.JSONObject;
import cn.hutool.json.JSONUtil;
import cn.hutool.log.StaticLog;
-import com.benjaminwan.ocrlibrary.OcrResult;
-import com.luooqi.ocr.local.PaddlePaddleOCRV4;
-import com.luooqi.ocr.model.TextBlock;
-import org.apache.pdfbox.pdmodel.PDDocument;
-import org.apache.pdfbox.rendering.PDFRenderer;
-
-import javax.imageio.ImageIO;
-import java.awt.*;
-import java.awt.image.BufferedImage;
-import java.awt.image.DataBufferInt;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.InputStream;
-import java.util.List;
-import java.util.*;
/**
* tools-ocr
@@ -39,7 +41,6 @@
*/
public class OcrUtils {
-
public static String recImgLocal(byte[] imgData) {
String path = "tmp_" + Math.abs(Arrays.hashCode(imgData)) + ".png";
File file = FileUtil.writeBytes(imgData, path);
@@ -54,7 +55,7 @@ public static String recImgLocal(BufferedImage image) {
public static String recImgLocal(File file) {
if (file.exists()) {
try {
- return PaddlePaddleOCRV4.INSTANCE.ocr(file);
+ return extractLocalResult(PaddlePaddleOCRV4.INSTANCE.ocr(file));
} catch (Exception e) {
e.printStackTrace();
return e.getMessage();
@@ -63,7 +64,6 @@ public static String recImgLocal(File file) {
return "文件不存在";
}
-
public static String recPdfLocal(File pdfFile) {
if (pdfFile.exists()) {
try (PDDocument document = PDDocument.load(pdfFile)) {
@@ -90,19 +90,18 @@ public static String recPdfLocal(File pdfFile) {
return null;
}
-
public static String ocrImg(byte[] imgData) {
int i = Math.abs(UUID.randomUUID().hashCode()) % 4;
StaticLog.info("OCR Engine: " + i);
switch (i) {
- case 0:
- return bdGeneralOcr(imgData);
- case 1:
- return bdAccurateOcr(imgData);
- case 2:
- return sogouMobileOcr(imgData);
- default:
- return sogouWebOcr(imgData);
+ case 0:
+ return bdGeneralOcr(imgData);
+ case 1:
+ return bdAccurateOcr(imgData);
+ case 2:
+ return sogouMobileOcr(imgData);
+ default:
+ return sogouWebOcr(imgData);
}
}
@@ -115,7 +114,8 @@ private static String bdAccurateOcr(byte[] imgData) {
}
private static String bdBaseOcr(byte[] imgData, String type) {
- String[] urlArr = new String[]{"http://ai.baidu.com/tech/ocr/general", "http://ai.baidu.com/index/seccode?action=show"};
+ String[] urlArr = new String[] { "http://ai.baidu.com/tech/ocr/general",
+ "http://ai.baidu.com/index/seccode?action=show" };
StringBuilder cookie = new StringBuilder();
for (String url : urlArr) {
HttpResponse cookieResp = WebUtils.get(url);
@@ -129,7 +129,8 @@ private static String bdBaseOcr(byte[] imgData, String type) {
HashMap header = new HashMap<>();
header.put("Referer", "http://ai.baidu.com/tech/ocr/general");
header.put("Cookie", cookie.toString());
- String data = "type=" + URLUtil.encodeQuery(type) + "&detect_direction=false&image_url&image=" + URLUtil.encodeQuery("data:image/jpeg;base64," + Base64.encode(imgData)) + "&language_type=CHN_ENG";
+ String data = "type=" + URLUtil.encodeQuery(type) + "&detect_direction=false&image_url&image="
+ + URLUtil.encodeQuery("data:image/jpeg;base64," + Base64.encode(imgData)) + "&language_type=CHN_ENG";
HttpResponse response = WebUtils.postRaw("http://ai.baidu.com/aidemo", data, 0, header);
return extractBdResult(WebUtils.getSafeHtml(response));
}
@@ -137,9 +138,11 @@ private static String bdBaseOcr(byte[] imgData, String type) {
public static String sogouMobileOcr(byte[] imgData) {
String boundary = "------WebKitFormBoundary8orYTmcj8BHvQpVU";
String url = "http://ocr.shouji.sogou.com/v2/ocr/json";
- String header = boundary + "\r\nContent-Disposition: form-data; name=\"pic\"; filename=\"pic.jpg\"\r\nContent-Type: image/jpeg\r\n\r\n";
+ String header = boundary
+ + "\r\nContent-Disposition: form-data; name=\"pic\"; filename=\"pic.jpg\"\r\nContent-Type: image/jpeg\r\n\r\n";
String footer = "\r\n" + boundary + "--\r\n";
- byte[] postData = CommUtils.mergeByte(header.getBytes(CharsetUtil.CHARSET_ISO_8859_1), imgData, footer.getBytes(CharsetUtil.CHARSET_ISO_8859_1));
+ byte[] postData = CommUtils.mergeByte(header.getBytes(CharsetUtil.CHARSET_ISO_8859_1), imgData,
+ footer.getBytes(CharsetUtil.CHARSET_ISO_8859_1));
return extractSogouResult(CommUtils.postMultiData(url, postData, boundary.substring(2)));
}
@@ -148,7 +151,8 @@ public static String sogouWebOcr(byte[] imgData) {
String referer = "https://deepi.sogou.com/?from=picsearch&tdsourcetag=s_pctim_aiomsg";
String imageData = Base64.encode(imgData);
long t = System.currentTimeMillis();
- String sign = SecureUtil.md5("sogou_ocr_just_for_deepibasicOpenOcr" + t + imageData.substring(0, Math.min(1024, imageData.length())) + "4b66a37108dab018ace616c4ae07e644");
+ String sign = SecureUtil.md5("sogou_ocr_just_for_deepibasicOpenOcr" + t
+ + imageData.substring(0, Math.min(1024, imageData.length())) + "4b66a37108dab018ace616c4ae07e644");
Map data = new HashMap<>();
data.put("image", imageData);
data.put("lang", "zh-Chs");
@@ -178,7 +182,7 @@ private static String extractSogouResult(String html) {
JSONObject jObj = jsonArray.getJSONObject(i);
TextBlock textBlock = new TextBlock();
textBlock.setText(jObj.getStr("content").trim());
- //noinspection SuspiciousToArrayCall
+ // noinspection SuspiciousToArrayCall
String[] frames = jObj.getJSONArray("frame").toArray(new String[0]);
textBlock.setTopLeft(CommUtils.frameToPoint(frames[0]));
textBlock.setTopRight(CommUtils.frameToPoint(frames[1]));
@@ -205,7 +209,7 @@ private static String extractBdResult(String html) {
JSONObject jObj = jsonArray.getJSONObject(i);
TextBlock textBlock = new TextBlock();
textBlock.setText(jObj.getStr("words").trim());
- //noinspection SuspiciousToArrayCall
+ // noinspection SuspiciousToArrayCall
JSONObject location = jObj.getJSONObject("location");
int top = location.getInt("top");
int left = location.getInt("left");
@@ -220,7 +224,6 @@ private static String extractBdResult(String html) {
return CommUtils.combineTextBlocks(textBlocks, isEng);
}
-
private static String extractLocalResult(OcrResult ocrResult) {
if (ocrResult == null) {
return "";
@@ -240,5 +243,4 @@ private static String extractLocalResult(OcrResult ocrResult) {
return CommUtils.combineTextBlocks(textBlocks, isEng);
}
-
}
diff --git a/src/main/java/com/luooqi/ocr/windows/MainForm.java b/src/main/java/com/luooqi/ocr/windows/MainForm.java
index 7f883eb..8053cd5 100644
--- a/src/main/java/com/luooqi/ocr/windows/MainForm.java
+++ b/src/main/java/com/luooqi/ocr/windows/MainForm.java
@@ -1,11 +1,10 @@
package com.luooqi.ocr.windows;
-import ai.djl.MalformedModelException;
-import ai.djl.repository.zoo.ModelNotFoundException;
-import cn.hutool.core.io.FileTypeUtil;
-import cn.hutool.core.thread.ThreadUtil;
-import cn.hutool.core.util.StrUtil;
-import cn.hutool.log.StaticLog;
+import java.awt.image.BufferedImage;
+import java.io.File;
+import java.util.HashMap;
+import java.util.Map;
+
import com.luooqi.ocr.config.InitConfig;
import com.luooqi.ocr.controller.ProcessController;
import com.luooqi.ocr.local.PaddlePaddleOCRV4;
@@ -14,6 +13,11 @@
import com.luooqi.ocr.snap.ScreenCapture;
import com.luooqi.ocr.utils.CommUtils;
import com.luooqi.ocr.utils.OcrUtils;
+
+import cn.hutool.core.io.FileTypeUtil;
+import cn.hutool.core.thread.ThreadUtil;
+import cn.hutool.core.util.StrUtil;
+import cn.hutool.log.StaticLog;
import javafx.application.Platform;
import javafx.beans.property.SimpleStringProperty;
import javafx.geometry.Insets;
@@ -23,7 +27,13 @@
import javafx.scene.control.ToolBar;
import javafx.scene.input.Clipboard;
import javafx.scene.input.DataFormat;
-import javafx.scene.layout.*;
+import javafx.scene.layout.Border;
+import javafx.scene.layout.BorderPane;
+import javafx.scene.layout.BorderStroke;
+import javafx.scene.layout.BorderStrokeStyle;
+import javafx.scene.layout.BorderWidths;
+import javafx.scene.layout.CornerRadii;
+import javafx.scene.layout.HBox;
import javafx.scene.paint.Color;
import javafx.scene.text.Font;
import javafx.scene.text.FontPosture;
@@ -31,12 +41,6 @@
import javafx.stage.Stage;
import lombok.extern.slf4j.Slf4j;
-import java.awt.image.BufferedImage;
-import java.io.File;
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
-
/**
* Created by litonglinux@qq.com on 12/9/2023_4:40 PM
*/
@@ -54,8 +58,8 @@ public int hashCode() {
private static ScreenCapture screenCapture;
private static ProcessController processController;
private static TextArea textArea;
- //private static boolean isSegment = true;
- //private static String ocrText = "";
+ // private static boolean isSegment = true;
+ // private static String ocrText = "";
public void init(Stage primaryStage) {
@@ -84,21 +88,15 @@ public void init(Stage primaryStage) {
root.setTop(topBar);
root.setCenter(textArea);
root.setBottom(footerBar);
- root.getStylesheets().addAll(
- getClass().getResource("/css/main.css").toExternalForm()
- );
+ root.getStylesheets().addAll(getClass().getResource("/css/main.css").toExternalForm());
CommUtils.initStage(primaryStage);
mainScene = new Scene(root, 670, 470);
stage.setScene(mainScene);
- //启动引擎,加载模型,如果模型加载错误下屏幕显示错误
+ // 启动引擎,加载模型,如果模型加载错误下屏幕显示错误
try {
PaddlePaddleOCRV4.init();
- } catch (ModelNotFoundException e) {
- textArea.setText("加载模型出现错误" + e.getMessage());
- } catch (MalformedModelException e) {
- textArea.setText("加载模型出现错误" + e.getMessage());
- } catch (IOException e) {
- textArea.setText("加载模型出现错误" + e.getMessage());
+ } catch (Exception e) {
+ e.printStackTrace();
}
}
@@ -106,7 +104,8 @@ private TextArea getCenter() {
TextArea textArea = new TextArea();
textArea.setId("ocrTextArea");
textArea.setWrapText(true);
- textArea.setBorder(new Border(new BorderStroke(Color.DARKGRAY, BorderStrokeStyle.SOLID, CornerRadii.EMPTY, BorderWidths.DEFAULT)));
+ textArea.setBorder(
+ new Border(new BorderStroke(Color.DARKGRAY, BorderStrokeStyle.SOLID, CornerRadii.EMPTY, BorderWidths.DEFAULT)));
textArea.setFont(Font.font("Arial", FontPosture.REGULAR, 14));
return textArea;
}
@@ -116,21 +115,21 @@ private ToolBar getFooterBar() {
footerBar.setId("statsToolbar");
Label statsLabel = new Label();
SimpleStringProperty statsProperty = new SimpleStringProperty("总字数:0");
- textArea.textProperty().addListener((observable, oldValue, newValue) -> statsProperty.set("总字数:" + newValue.replaceAll(CommUtils.SPECIAL_CHARS, "").length()));
+ textArea.textProperty().addListener((observable, oldValue, newValue) -> statsProperty
+ .set("总字数:" + newValue.replaceAll(CommUtils.SPECIAL_CHARS, "").length()));
statsLabel.textProperty().bind(statsProperty);
footerBar.getItems().add(statsLabel);
return footerBar;
}
private HBox getTopBar() {
- HBox topBar = new HBox(
- CommUtils.createButton("snapBtn", MainForm::screenShotOcr, "截图"),
- CommUtils.createButton("openImageBtn", this::openImageOcr, "打开"),
- CommUtils.createButton("copyBtn", this::copyText, "复制"),
- CommUtils.createButton("pasteBtn", this::pasteText, "粘贴"),
- CommUtils.createButton("clearBtn", this::clearText, "清空"),
- CommUtils.createButton("wrapBtn", this::wrapText, "换行")
- //CommUtils.SEPARATOR, resetBtn, segmentBtn
+ HBox topBar = new HBox(CommUtils.createButton("snapBtn", MainForm::screenShotOcr, "截图"),
+ CommUtils.createButton("openImageBtn", this::openImageOcr, "打开"),
+ CommUtils.createButton("copyBtn", this::copyText, "复制"),
+ CommUtils.createButton("pasteBtn", this::pasteText, "粘贴"),
+ CommUtils.createButton("clearBtn", this::clearText, "清空"),
+ CommUtils.createButton("wrapBtn", this::wrapText, "换行")
+ // CommUtils.SEPARATOR, resetBtn, segmentBtn
);
topBar.setId("topBar");
topBar.setMinHeight(40);
@@ -157,7 +156,6 @@ private void wrapText() {
textArea.setWrapText(!textArea.isWrapText());
}
-
private void clearText() {
textArea.setText("");
}
@@ -167,9 +165,8 @@ private void pasteText() {
if (StrUtil.isBlank(text)) {
return;
}
- textArea.setText(textArea.getText()
- + (StrUtil.isBlank(textArea.getText()) ? "" : "\n")
- + Clipboard.getSystemClipboard().getString());
+ textArea.setText(textArea.getText() + (StrUtil.isBlank(textArea.getText()) ? "" : "\n")
+ + Clipboard.getSystemClipboard().getString());
}
private void copyText() {
@@ -198,24 +195,22 @@ public static void screenShotOcr() {
private void openImageOcr() {
FileChooser fileChooser = new FileChooser();
fileChooser.setTitle("Please Select Image File");
- String[] extensions = {"*.png", "*.jpg", "*.pdf", "*.PDF"};
+ String[] extensions = { "*.png", "*.jpg", "*.pdf", "*.PDF" };
fileChooser.getExtensionFilters().addAll(new FileChooser.ExtensionFilter("Image Files", extensions));
File selectedFile = fileChooser.showOpenDialog(stage);
if (selectedFile == null || !selectedFile.isFile()) {
return;
}
- stageInfo = new StageInfo(stage.getX(), stage.getY(),
- stage.getWidth(), stage.getHeight(), stage.isFullScreen());
+ stageInfo = new StageInfo(stage.getX(), stage.getY(), stage.getWidth(), stage.getHeight(), stage.isFullScreen());
try {
- //BufferedImage image = ImageIO.read(selectedFile);
+ // BufferedImage image = ImageIO.read(selectedFile);
doOcr(selectedFile);
} catch (Exception e) {
StaticLog.error(e);
}
}
-
public static void cancelSnap() {
Platform.runLater(screenCapture::cancelSnap);
}
diff --git a/src/main/resources/images/01.png b/src/main/resources/images/01.png
new file mode 100644
index 0000000..51df578
Binary files /dev/null and b/src/main/resources/images/01.png differ
diff --git a/src/test/java/com/benjaminwan/ocrlibrary/OcrEngineTest.java b/src/test/java/com/benjaminwan/ocrlibrary/OcrEngineTest.java
deleted file mode 100644
index 2a3a681..0000000
--- a/src/test/java/com/benjaminwan/ocrlibrary/OcrEngineTest.java
+++ /dev/null
@@ -1,47 +0,0 @@
-package com.benjaminwan.ocrlibrary;
-
-import cn.hutool.log.StaticLog;
-import com.luooqi.ocr.utils.LibraryUtils;
-import org.junit.Test;
-
-import static org.junit.Assert.*;
-
-/**
- * Created by litonglinux@qq.com on 10/11/2023_3:01 AM
- */
-public class OcrEngineTest {
-
- @Test
- public void test1() {
- // https://github.com/RapidAI/RapidOcrNcnnLibTest/tree/main/resource/models
-
- String libPath = "D:\\lib\\ocr-lib\\win64\\bin";
- LibraryUtils.addLibary(libPath);
-
- OcrEngine ocrEngine = new OcrEngine();
- StaticLog.info("version=" + ocrEngine.getVersion());
- ocrEngine.setNumThread(8);
- //------- init Logger -------
- ocrEngine.initLogger(true, false, false);
- //ocrEngine.enableResultText("");
- ocrEngine.setGpuIndex(-1);
- String modelsDir = "D:\\model\\ppocr-v3-NCNN-models";
- String detName = "ch_PP-OCRv3_det_infer";
- String clsName = "ch_ppocr_mobile_v2.0_cls_infer";
- String recName = "ch_PP-OCRv3_rec_infer";
- String keysName = "ppocr_keys_v1.txt";
-
- boolean initModelsRet = ocrEngine.initModels(modelsDir, detName, clsName, recName, keysName);
- if (!initModelsRet) {
- StaticLog.error("Error in models initialization, please check the models/keys path!");
- return;
- }
- StaticLog.info("padding(%d) boxScoreThresh(%f) boxThresh(%f) unClipRatio(%f) doAngle(%b) mostAngle(%b)", ocrEngine.getPadding(), ocrEngine.getBoxScoreThresh(), ocrEngine.getBoxThresh(), ocrEngine.getUnClipRatio(), ocrEngine.getDoAngle(), ocrEngine.getMostAngle());
-
- String imagePath = "D:\\images\\Snipaste_2023-10-11_02-08-03.png";
- OcrResult ocrResult = ocrEngine.detect(imagePath);
- System.out.println(ocrResult.getStrRes());
-
- }
-
-}
\ No newline at end of file
diff --git a/src/test/java/com/litongjava/RapidOcrTest.java b/src/test/java/com/litongjava/RapidOcrTest.java
new file mode 100644
index 0000000..27090aa
--- /dev/null
+++ b/src/test/java/com/litongjava/RapidOcrTest.java
@@ -0,0 +1,14 @@
+package com.litongjava;
+
+import com.benjaminwan.ocrlibrary.OcrResult;
+
+import io.github.mymonstercat.Model;
+import io.github.mymonstercat.ocr.InferenceEngine;
+
+public class RapidOcrTest {
+ public static void main(String[] args) {
+ InferenceEngine engine = InferenceEngine.getInstance(Model.ONNX_PPOCR_V4_SERVER);
+ OcrResult ocrResult = engine.runOcr("images/01.png");
+ System.out.println(ocrResult.getStrRes().trim());
+ }
+}
diff --git a/src/test/java/com/luooqi/ocr/utils/OcrUtilsTest.java b/src/test/java/com/luooqi/ocr/utils/OcrUtilsTest.java
index 478b01a..d6ea182 100644
--- a/src/test/java/com/luooqi/ocr/utils/OcrUtilsTest.java
+++ b/src/test/java/com/luooqi/ocr/utils/OcrUtilsTest.java
@@ -1,20 +1,12 @@
package com.luooqi.ocr.utils;
-import cn.hutool.core.swing.ScreenUtil;
-import cn.hutool.json.JSONArray;
-import cn.hutool.json.JSONObject;
-import cn.hutool.json.JSONUtil;
-import com.luooqi.ocr.model.TextBlock;
-import javafx.scene.SnapshotParameters;
-import org.junit.Test;
-
-import java.awt.*;
+import java.awt.GraphicsConfiguration;
+import java.awt.GraphicsEnvironment;
+import java.awt.Point;
import java.awt.geom.AffineTransform;
import java.io.File;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.List;
+
+import org.junit.Test;
public class OcrUtilsTest {
@@ -32,7 +24,8 @@ private Point frameToPoint(String text) {
@Test
public void sogouWebOcr() {
- GraphicsConfiguration asdf = GraphicsEnvironment.getLocalGraphicsEnvironment().getDefaultScreenDevice().getDefaultConfiguration();
+ GraphicsConfiguration asdf = GraphicsEnvironment.getLocalGraphicsEnvironment().getDefaultScreenDevice()
+ .getDefaultConfiguration();
AffineTransform asfd2 = asdf.getDefaultTransform();
double scaleX = asfd2.getScaleX();
double scaleY = asfd2.getScaleY();
@@ -46,7 +39,7 @@ public void recPdfLocal() {
}
@Test
- public void recImageLocal(){
+ public void recImageLocal() {
OcrUtils.recImgLocal(new File("temp_1010298_4.png"));
}
}