diff --git a/.gitignore b/.gitignore index 540cdf0c5..8c2e218bc 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ application-*.properties !application-test.properties document_analysis.properties .env +dossierfacile-process-file/testResult/ target/ mockstorage/ mock-storage/ diff --git a/Aptfile b/Aptfile index 74914b3f9..195407fa1 100644 --- a/Aptfile +++ b/Aptfile @@ -1,3 +1,5 @@ ttf-mscorefonts-installer tesseract-ocr imagemagick +libblas-dev +libopencv-dev diff --git a/Procfile b/Procfile index eca6648bc..aa39a3599 100644 --- a/Procfile +++ b/Procfile @@ -1 +1 @@ -web: java $JVM_OPTIONS -Djna.library.path=$JNA_LIBRARY_PATH -jar $APP_DIR/target/$APP_DIR.jar +web: bash bin/start.sh diff --git a/bin/start.sh b/bin/start.sh new file mode 100755 index 000000000..c6f8271a7 --- /dev/null +++ b/bin/start.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +ln -s /app/.apt/usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3 /app/.apt/usr/lib/x86_64-linux-gnu/ +ln -s /app/.apt/usr/lib/x86_64-linux-gnu/blas/libblas.so.3 /app/.apt/usr/lib/x86_64-linux-gnu/ + +java $JVM_OPTIONS -Djna.library.path=$JNA_LIBRARY_PATH -jar $APP_DIR/target/$APP_DIR.jar diff --git a/dossierfacile-common-library/src/main/java/fr/dossierfacile/common/entity/BlurryFileAnalysis.java b/dossierfacile-common-library/src/main/java/fr/dossierfacile/common/entity/BlurryFileAnalysis.java new file mode 100644 index 000000000..0e08bc2e5 --- /dev/null +++ b/dossierfacile-common-library/src/main/java/fr/dossierfacile/common/entity/BlurryFileAnalysis.java @@ -0,0 +1,43 @@ +package fr.dossierfacile.common.entity; + +import fr.dossierfacile.common.entity.ocr.BlurryResult; +import fr.dossierfacile.common.enums.BlurryFileAnalysisStatus; +import jakarta.persistence.*; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; +import org.hibernate.annotations.JdbcTypeCode; +import org.hibernate.type.SqlTypes; + +import java.io.Serial; +import java.io.Serializable; +import java.util.List; + +@Data +@Builder +@Entity +@Table(name = "blurry_file_analysis") +@AllArgsConstructor +@NoArgsConstructor +public class BlurryFileAnalysis implements Serializable { + + @Serial + private static final long serialVersionUID = 2405172041950251807L; + + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + private Long id; + + @OneToOne(targetEntity = File.class, fetch = FetchType.LAZY) + @JoinColumn(name = "file_id") + private File file; + + @Enumerated(EnumType.STRING) + private BlurryFileAnalysisStatus analysisStatus; + + @JdbcTypeCode(SqlTypes.JSON) + @Column(columnDefinition = "jsonb") + private List blurryResults; + +} diff --git a/dossierfacile-common-library/src/main/java/fr/dossierfacile/common/entity/DocumentRule.java b/dossierfacile-common-library/src/main/java/fr/dossierfacile/common/entity/DocumentRule.java index 6ee6a6b08..9a69b0b09 100644 --- a/dossierfacile-common-library/src/main/java/fr/dossierfacile/common/entity/DocumentRule.java +++ b/dossierfacile-common-library/src/main/java/fr/dossierfacile/common/entity/DocumentRule.java @@ -34,7 +34,9 @@ public enum DocumentRule { R_RENT_RECEIPT_NB_DOCUMENTS(Level.WARN, "Un seul document a été détecté"), R_FRANCE_IDENTITE_NAMES(Level.CRITICAL, "Les noms et prénoms ne correspondent pas"), - R_FRANCE_IDENTITE_STATUS(Level.CRITICAL, "Ce document n'a pas pu être validé par France Identité"); + R_FRANCE_IDENTITE_STATUS(Level.CRITICAL, "Ce document n'a pas pu être validé par France Identité"), + + R_BLURRY_FILE(Level.CRITICAL, "Votre document semble flou"); public enum Level { CRITICAL, WARN diff --git a/dossierfacile-common-library/src/main/java/fr/dossierfacile/common/entity/File.java b/dossierfacile-common-library/src/main/java/fr/dossierfacile/common/entity/File.java index 2cec92d7a..9db8cfba7 100644 --- a/dossierfacile-common-library/src/main/java/fr/dossierfacile/common/entity/File.java +++ b/dossierfacile-common-library/src/main/java/fr/dossierfacile/common/entity/File.java @@ -3,12 +3,7 @@ import fr.dossierfacile.common.enums.FileStorageStatus; import jakarta.annotation.Nullable; import jakarta.persistence.*; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Getter; -import lombok.NoArgsConstructor; -import lombok.Setter; -import lombok.ToString; +import lombok.*; import org.hibernate.Hibernate; import java.io.Serial; @@ -56,9 +51,13 @@ public class File implements Serializable { private BarCodeFileAnalysis fileAnalysis; @Nullable - @OneToOne(mappedBy= "file", fetch = FetchType.LAZY) + @OneToOne(mappedBy = "file", fetch = FetchType.LAZY) private ParsedFileAnalysis parsedFileAnalysis; + @Nullable + @OneToOne(mappedBy = "file", fetch = FetchType.LAZY) + private BlurryFileAnalysis blurryFileAnalysis; + @PreRemove void deleteCascade() { if (storageFile != null) diff --git a/dossierfacile-common-library/src/main/java/fr/dossierfacile/common/entity/ocr/BlurryAlgorithmType.java b/dossierfacile-common-library/src/main/java/fr/dossierfacile/common/entity/ocr/BlurryAlgorithmType.java new file mode 100644 index 000000000..39f8d34c6 --- /dev/null +++ b/dossierfacile-common-library/src/main/java/fr/dossierfacile/common/entity/ocr/BlurryAlgorithmType.java @@ -0,0 +1,8 @@ +package fr.dossierfacile.common.entity.ocr; + +public enum BlurryAlgorithmType { + LAPLACIEN, + FFT, + SOBEL, + DOG +} diff --git a/dossierfacile-common-library/src/main/java/fr/dossierfacile/common/entity/ocr/BlurryResult.java b/dossierfacile-common-library/src/main/java/fr/dossierfacile/common/entity/ocr/BlurryResult.java new file mode 100644 index 000000000..c5ef93139 --- /dev/null +++ b/dossierfacile-common-library/src/main/java/fr/dossierfacile/common/entity/ocr/BlurryResult.java @@ -0,0 +1,13 @@ +package fr.dossierfacile.common.entity.ocr; + +import java.io.Serial; +import java.io.Serializable; + +public record BlurryResult( + BlurryAlgorithmType algorithm, + double score +) implements Serializable { + + @Serial + private static final long serialVersionUID = 8347582394758234758L; +} \ No newline at end of file diff --git a/dossierfacile-common-library/src/main/java/fr/dossierfacile/common/enums/BlurryFileAnalysisStatus.java b/dossierfacile-common-library/src/main/java/fr/dossierfacile/common/enums/BlurryFileAnalysisStatus.java new file mode 100644 index 000000000..e4e15a746 --- /dev/null +++ b/dossierfacile-common-library/src/main/java/fr/dossierfacile/common/enums/BlurryFileAnalysisStatus.java @@ -0,0 +1,6 @@ +package fr.dossierfacile.common.enums; + +public enum BlurryFileAnalysisStatus { + COMPLETED, + FAILED +} diff --git a/dossierfacile-common-library/src/main/java/fr/dossierfacile/common/repository/BlurryFileAnalysisRepository.java b/dossierfacile-common-library/src/main/java/fr/dossierfacile/common/repository/BlurryFileAnalysisRepository.java new file mode 100644 index 000000000..f706a3de8 --- /dev/null +++ b/dossierfacile-common-library/src/main/java/fr/dossierfacile/common/repository/BlurryFileAnalysisRepository.java @@ -0,0 +1,7 @@ +package fr.dossierfacile.common.repository; + +import fr.dossierfacile.common.entity.BlurryFileAnalysis; +import org.springframework.data.jpa.repository.JpaRepository; + +public interface BlurryFileAnalysisRepository extends JpaRepository { +} diff --git a/dossierfacile-common-library/src/main/resources/db/changelog/databaseChangeLog.xml b/dossierfacile-common-library/src/main/resources/db/changelog/databaseChangeLog.xml index aafaad452..f614f9cca 100644 --- a/dossierfacile-common-library/src/main/resources/db/changelog/databaseChangeLog.xml +++ b/dossierfacile-common-library/src/main/resources/db/changelog/databaseChangeLog.xml @@ -166,5 +166,6 @@ + diff --git a/dossierfacile-common-library/src/main/resources/db/migration/202504220000-create-blurry-file-analysis-table.xml b/dossierfacile-common-library/src/main/resources/db/migration/202504220000-create-blurry-file-analysis-table.xml new file mode 100644 index 000000000..d2fce1e98 --- /dev/null +++ b/dossierfacile-common-library/src/main/resources/db/migration/202504220000-create-blurry-file-analysis-table.xml @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/dossierfacile-common-test-library/src/main/java/fr/dossierfacile/fileAnalysis/FileAnalysisTestData.kt b/dossierfacile-common-test-library/src/main/java/fr/dossierfacile/fileAnalysis/FileAnalysisTestData.kt index 9ce265911..965655e54 100644 --- a/dossierfacile-common-test-library/src/main/java/fr/dossierfacile/fileAnalysis/FileAnalysisTestData.kt +++ b/dossierfacile-common-test-library/src/main/java/fr/dossierfacile/fileAnalysis/FileAnalysisTestData.kt @@ -35,16 +35,21 @@ data class FileAnalysisTestData( val invalidDocuments: List> ) -abstract class DocumentData(open val bucketPath: String, open val fileDescription: F) +abstract class DocumentData(open val bucketPath: String, open val fileDescription: F?) + data class ValidDocumentData( override val bucketPath: String, - override val fileDescription: F, - val expectedResult: T -) : DocumentData(bucketPath, fileDescription) + override val fileDescription: F?, + val expectedResult: T? +) : DocumentData(bucketPath, fileDescription) { + constructor(bucketPath: String) : this(bucketPath, null, null) +} data class InvalidDocumentData( override val bucketPath: String, - override val fileDescription: F, - val expectedError: T -) : DocumentData(bucketPath, fileDescription) \ No newline at end of file + override val fileDescription: F?, + val expectedError: T? +) : DocumentData(bucketPath, fileDescription) { + constructor(bucketPath: String) : this(bucketPath, null, null) +} \ No newline at end of file diff --git a/dossierfacile-process-file/pom.xml b/dossierfacile-process-file/pom.xml index f95da299c..616637666 100644 --- a/dossierfacile-process-file/pom.xml +++ b/dossierfacile-process-file/pom.xml @@ -112,6 +112,13 @@ commons-imaging 1.0-alpha3 + + + org.bytedeco + opencv + 4.10.0-1.5.11 + + dossierfacile-process-file diff --git a/dossierfacile-process-file/src/main/java/fr/dossierfacile/process/file/service/AnalyzeFileService.java b/dossierfacile-process-file/src/main/java/fr/dossierfacile/process/file/service/AnalyzeFileService.java index ea09cb533..bae57295a 100644 --- a/dossierfacile-process-file/src/main/java/fr/dossierfacile/process/file/service/AnalyzeFileService.java +++ b/dossierfacile-process-file/src/main/java/fr/dossierfacile/process/file/service/AnalyzeFileService.java @@ -4,6 +4,7 @@ import fr.dossierfacile.process.file.repository.FileRepository; import fr.dossierfacile.process.file.service.processors.BarCodeFileProcessor; import fr.dossierfacile.process.file.service.processors.FileParserProcessor; +import fr.dossierfacile.process.file.service.processors.blurry.BlurryProcessor; import lombok.AllArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springframework.stereotype.Service; @@ -16,6 +17,7 @@ public class AnalyzeFileService { private final BarCodeFileProcessor barCodeFileProcessor; private final FileParserProcessor fileParserProcessor; + private final BlurryProcessor blurryProcessor; private final FileRepository fileRepository; public void processFile(Long fileId) { @@ -23,6 +25,7 @@ public void processFile(Long fileId) { if (optFile.isPresent()) { barCodeFileProcessor.process(optFile.get()); fileParserProcessor.process(optFile.get()); + blurryProcessor.process(optFile.get()); } } } diff --git a/dossierfacile-process-file/src/main/java/fr/dossierfacile/process/file/service/documentrules/BlurryRulesValidationService.java b/dossierfacile-process-file/src/main/java/fr/dossierfacile/process/file/service/documentrules/BlurryRulesValidationService.java new file mode 100644 index 000000000..525b6507c --- /dev/null +++ b/dossierfacile-process-file/src/main/java/fr/dossierfacile/process/file/service/documentrules/BlurryRulesValidationService.java @@ -0,0 +1,102 @@ +package fr.dossierfacile.process.file.service.documentrules; + +import fr.dossierfacile.common.entity.*; +import fr.dossierfacile.common.entity.ocr.BlurryAlgorithmType; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Service; +import org.springframework.util.CollectionUtils; + +@Service +@RequiredArgsConstructor +@Slf4j +public class BlurryRulesValidationService implements RulesValidationService { + + @Value("${blurry.laplacian.threshold:400}") + private double laplacianThreshold; + @Value("${blurry.sobel.threshold:30}") + private double sobelThreshold; + @Value("${blurry.fft.threshold:170}") + private double fftThreshold; + @Value("${blurry.dog.threshold:20}") + private double dogThreshold; + + @Override + public boolean shouldBeApplied(Document document) { + return document.getFiles().stream().allMatch(file -> file.getBlurryFileAnalysis() != null) + && !CollectionUtils.isEmpty(document.getFiles()); + } + + @Override + public DocumentAnalysisReport process(Document document, DocumentAnalysisReport report) { + var isBlurryDocument = false; + for (var file : document.getFiles()) { + if (isFileBlurry(file.getBlurryFileAnalysis())) { + isBlurryDocument = true; + } + } + if (isBlurryDocument) { + report.getBrokenRules().add(DocumentBrokenRule.builder() + .rule(DocumentRule.R_BLURRY_FILE) + .message(DocumentRule.R_BLURRY_FILE.getDefaultMessage()) + .build()); + report.setAnalysisStatus(DocumentAnalysisStatus.DENIED); + } + if (report.getBrokenRules().isEmpty()) { + report.setAnalysisStatus(DocumentAnalysisStatus.CHECKED); + } else if (report.getBrokenRules().stream().anyMatch(r -> r.getRule().getLevel() == DocumentRule.Level.CRITICAL)) { + report.setAnalysisStatus(DocumentAnalysisStatus.DENIED); + } else { + report.setAnalysisStatus(DocumentAnalysisStatus.UNDEFINED); + } + + return report; + } + + private boolean isFileBlurry(BlurryFileAnalysis blurryFileAnalysis) { + var score = 0; + if (checkLaplacianBlurryFile(blurryFileAnalysis)) { + score++; + } + if (checkSobelBlurryFile(blurryFileAnalysis)) { + score++; + } + if (checkFFTBlurryFile(blurryFileAnalysis)) { + score++; + } + if (checkDogBlurryFile(blurryFileAnalysis)) { + score++; + } + return score >= 3; + } + + private boolean checkLaplacianBlurryFile(BlurryFileAnalysis blurryFileAnalysis) { + var laplacianResult = blurryFileAnalysis.getBlurryResults().stream().filter(item -> item.algorithm() == BlurryAlgorithmType.LAPLACIEN).findFirst(); + return laplacianResult + .filter(blurryResult -> blurryResult.score() < laplacianThreshold) + .isPresent(); + } + + private boolean checkSobelBlurryFile(BlurryFileAnalysis blurryFileAnalysis) { + var sobelResult = blurryFileAnalysis.getBlurryResults().stream().filter(item -> item.algorithm() == BlurryAlgorithmType.SOBEL).findFirst(); + return sobelResult + .filter(blurryResult -> blurryResult.score() < sobelThreshold) + .isPresent(); + } + + private boolean checkFFTBlurryFile(BlurryFileAnalysis blurryFileAnalysis) { + var fftResult = blurryFileAnalysis.getBlurryResults().stream().filter(item -> item.algorithm() == BlurryAlgorithmType.FFT).findFirst(); + return fftResult + .filter(blurryResult -> blurryResult.score() > fftThreshold) + .isPresent(); + } + + private boolean checkDogBlurryFile(BlurryFileAnalysis blurryFileAnalysis) { + var fftResult = blurryFileAnalysis.getBlurryResults().stream().filter(item -> item.algorithm() == BlurryAlgorithmType.DOG).findFirst(); + return fftResult + .filter(blurryResult -> blurryResult.score() < dogThreshold) + .isPresent(); + } + +} \ No newline at end of file diff --git a/dossierfacile-process-file/src/main/java/fr/dossierfacile/process/file/service/parsers/AbstractImagesParser.java b/dossierfacile-process-file/src/main/java/fr/dossierfacile/process/file/service/parsers/AbstractImagesParser.java index eecd2a50b..b5d75b7a0 100644 --- a/dossierfacile-process-file/src/main/java/fr/dossierfacile/process/file/service/parsers/AbstractImagesParser.java +++ b/dossierfacile-process-file/src/main/java/fr/dossierfacile/process/file/service/parsers/AbstractImagesParser.java @@ -3,6 +3,7 @@ import fr.dossierfacile.common.entity.ocr.ParsedFile; import fr.dossierfacile.common.utils.FileUtility; import fr.dossierfacile.process.file.service.parsers.tools.PageExtractorModel; +import fr.dossierfacile.process.file.util.ImageUtils; import fr.dossierfacile.process.file.util.MemoryUtils; import lombok.extern.slf4j.Slf4j; import net.sourceforge.tess4j.ITessAPI; @@ -37,25 +38,10 @@ void init() { } } - private BufferedImage[] getImages(File file) throws IOException { - if ("pdf".equalsIgnoreCase(FilenameUtils.getExtension(file.getName()))) { - BufferedImage[] images = FileUtility.convertPdfToImage(file); - if (images == null || images.length < 1) { - throw new IllegalStateException("pdf file cannot be convert to images"); - } - return images; - } - BufferedImage image = ImageIO.read(file); - if (image == null) { - throw new IllegalStateException("image cannot be extracted from file " + file.getName()); - } - return new BufferedImage[]{image}; - } - @Override public T parse(File file) { try { - return parse(getImages(file)); + return parse(ImageUtils.getImagesFromFile(file)); } catch (IOException e) { log.error("Unable to read Image"); return null; diff --git a/dossierfacile-process-file/src/main/java/fr/dossierfacile/process/file/service/processors/blurry/BlurryProcessor.java b/dossierfacile-process-file/src/main/java/fr/dossierfacile/process/file/service/processors/blurry/BlurryProcessor.java new file mode 100644 index 000000000..cb0f03be7 --- /dev/null +++ b/dossierfacile-process-file/src/main/java/fr/dossierfacile/process/file/service/processors/blurry/BlurryProcessor.java @@ -0,0 +1,198 @@ +package fr.dossierfacile.process.file.service.processors.blurry; + +import fr.dossierfacile.common.entity.BlurryFileAnalysis; +import fr.dossierfacile.common.entity.File; +import fr.dossierfacile.common.entity.ocr.BlurryResult; +import fr.dossierfacile.common.enums.BlurryFileAnalysisStatus; +import fr.dossierfacile.common.repository.BlurryFileAnalysisRepository; +import fr.dossierfacile.process.file.repository.FileRepository; +import fr.dossierfacile.process.file.service.StorageFileLoaderService; +import fr.dossierfacile.process.file.service.processors.Processor; +import fr.dossierfacile.process.file.service.processors.blurry.algorithm.BlurryAlgorithm; +import fr.dossierfacile.process.file.util.ImageUtils; +import lombok.extern.slf4j.Slf4j; +import org.opencv.core.CvType; +import org.opencv.core.Mat; +import org.opencv.core.Size; +import org.opencv.imgproc.Imgproc; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Service; + +import javax.annotation.PostConstruct; +import java.awt.image.BufferedImage; +import java.awt.image.DataBufferByte; +import java.io.IOException; +import java.util.Arrays; +import java.util.List; + +@Service +@Slf4j +// This processor use OpenCV to analyze if a file is blurry +public class BlurryProcessor implements Processor { + + @Value("${blurry.laplacian.threshold:400}") + private int blurryLaplacianThreshold; + + @Value("${blurry.sobel.threshold:30}") + private int blurrySobelThreshold; + + @Value("${blurry.fft.threshold:170}") + private int blurryFftThreshold; + + @Value("${blurry.dog.threshold:20}") + private int blurryDogThreshold; + + @Value("${opencv.lib.path}") + private String opencvLibPath; + + public BlurryProcessor( + List blurryAlgorithms, + StorageFileLoaderService storageFileLoaderService, + BlurryFileAnalysisRepository blurryFileAnalysisRepository, + FileRepository fileRepository + ) { + this.blurryAlgorithms = blurryAlgorithms; + this.storageFileLoaderService = storageFileLoaderService; + this.blurryFileAnalysisRepository = blurryFileAnalysisRepository; + this.fileRepository = fileRepository; + } + + private static final int OPTIMIZED_FILE_DPI = 300; + private final List blurryAlgorithms; + private final StorageFileLoaderService storageFileLoaderService; + private final BlurryFileAnalysisRepository blurryFileAnalysisRepository; + private final FileRepository fileRepository; + + @PostConstruct + public void initBlurryProcessor() { + try { + log.info("Loading OpenCV library from path: {}", opencvLibPath); + System.load(opencvLibPath); + } catch (UnsatisfiedLinkError e) { + log.error("Error loading OpenCV library: {}", e.getMessage()); + } + } + + @Override + public File process(File dfFile) { + long start = System.currentTimeMillis(); + log.info("Starting blurry analysis of file"); + java.io.File file = storageFileLoaderService.getTemporaryFilePath(dfFile.getStorageFile()); + if (file == null) { + log.error("File reading Error"); + return dfFile; + } + + var blurryFileAnalysisBuilder = BlurryFileAnalysis.builder() + .file(dfFile); + + try { + var images = ImageUtils.getImagesFromFile(file); + // We get a blurryResult for each algorithm and each image of a file (multiple images for pdf) + List> listOfBlurryResults = Arrays.stream(images) + .map(image -> { + var img = getOpenCvOptimizedFile(image); + return blurryAlgorithms.stream().map(algo -> algo.getBlurryResult(img)).toList(); + }).toList(); + + blurryFileAnalysisBuilder.blurryResults(getWorstBlurryResult(listOfBlurryResults)); + blurryFileAnalysisBuilder.analysisStatus(BlurryFileAnalysisStatus.COMPLETED); + var analysisResult = blurryFileAnalysisBuilder.build(); + blurryFileAnalysisRepository.save(analysisResult); + + dfFile.setBlurryFileAnalysis(analysisResult); + } catch (IOException e) { + log.error("Unable to get Images"); + var analysisResult = blurryFileAnalysisBuilder + .analysisStatus(BlurryFileAnalysisStatus.FAILED) + .build(); + blurryFileAnalysisRepository.save(analysisResult); + dfFile.setBlurryFileAnalysis(analysisResult); + } + + long end = System.currentTimeMillis(); + log.info("Finished blurry analysis of file in {} ms with status : {}", + end - start, + dfFile.getBlurryFileAnalysis() != null ? dfFile.getBlurryFileAnalysis().getAnalysisStatus() : null + ); + + fileRepository.save(dfFile); + + return dfFile; + } + + private List getWorstBlurryResult(List> blurryResults) { + List worstBlurryResult = null; + double worstScore = Double.MIN_VALUE; + + for (var imageBlurryResult : blurryResults) { + double score = calculateBlurryScore(imageBlurryResult); + + if (score > worstScore) { + worstScore = score; + worstBlurryResult = imageBlurryResult; + } + } + + return worstBlurryResult; + } + + private double calculateBlurryScore(List imageBlurryResult) { + return imageBlurryResult.stream() + .mapToDouble(br -> switch (br.algorithm()) { + case LAPLACIEN -> br.score() / blurryLaplacianThreshold; + case SOBEL -> br.score() / blurrySobelThreshold; + case FFT -> blurryFftThreshold / br.score(); + case DOG -> br.score() / blurryDogThreshold; + }) + .sum(); + } + + private Mat getOpenCvOptimizedFile(BufferedImage image) { + Mat img = convert(image); + + final int DPI = OPTIMIZED_FILE_DPI; + final int A4_WIDTH_PX = (int) (8.27 * DPI); + final int A4_HEIGHT_PX = (int) (11.69 * DPI); + + int width = img.width(); + int height = img.height(); + + // Calculate scale factor + double widthScale = (double) A4_WIDTH_PX / width; + double heightScale = (double) A4_HEIGHT_PX / height; + double scale = Math.min(widthScale, heightScale); + + int newWidth = (int) (width * scale); + int newHeight = (int) (height * scale); + + Mat resizedImg = new Mat(); + Imgproc.resize(img, resizedImg, new Size(newWidth, newHeight)); + + // Convert to grayscale + Mat gray = new Mat(); + Imgproc.cvtColor(resizedImg, gray, Imgproc.COLOR_BGR2GRAY); + + return gray; + } + + private Mat convert(BufferedImage image) { + int width = image.getWidth(); + int height = image.getHeight(); + Mat mat; + + if (image.getType() == BufferedImage.TYPE_3BYTE_BGR) { + byte[] pixels = ((DataBufferByte) image.getRaster().getDataBuffer()).getData(); + mat = new Mat(height, width, CvType.CV_8UC3); + mat.put(0, 0, pixels); + } else { + BufferedImage convertedImg = new BufferedImage(width, height, BufferedImage.TYPE_3BYTE_BGR); + convertedImg.getGraphics().drawImage(image, 0, 0, null); + byte[] pixels = ((DataBufferByte) convertedImg.getRaster().getDataBuffer()).getData(); + mat = new Mat(height, width, CvType.CV_8UC3); + mat.put(0, 0, pixels); + } + + return mat; + } +} diff --git a/dossierfacile-process-file/src/main/java/fr/dossierfacile/process/file/service/processors/blurry/algorithm/BlurryAlgorithm.java b/dossierfacile-process-file/src/main/java/fr/dossierfacile/process/file/service/processors/blurry/algorithm/BlurryAlgorithm.java new file mode 100644 index 000000000..b5d69a6ab --- /dev/null +++ b/dossierfacile-process-file/src/main/java/fr/dossierfacile/process/file/service/processors/blurry/algorithm/BlurryAlgorithm.java @@ -0,0 +1,8 @@ +package fr.dossierfacile.process.file.service.processors.blurry.algorithm; + +import fr.dossierfacile.common.entity.ocr.BlurryResult; +import org.opencv.core.Mat; + +public interface BlurryAlgorithm { + BlurryResult getBlurryResult(Mat img); +} diff --git a/dossierfacile-process-file/src/main/java/fr/dossierfacile/process/file/service/processors/blurry/algorithm/DifferenceOfGaussiansBlurryAlgorithm.java b/dossierfacile-process-file/src/main/java/fr/dossierfacile/process/file/service/processors/blurry/algorithm/DifferenceOfGaussiansBlurryAlgorithm.java new file mode 100644 index 000000000..4e702501a --- /dev/null +++ b/dossierfacile-process-file/src/main/java/fr/dossierfacile/process/file/service/processors/blurry/algorithm/DifferenceOfGaussiansBlurryAlgorithm.java @@ -0,0 +1,59 @@ +package fr.dossierfacile.process.file.service.processors.blurry.algorithm; + +import fr.dossierfacile.common.entity.ocr.BlurryAlgorithmType; +import fr.dossierfacile.common.entity.ocr.BlurryResult; +import lombok.AllArgsConstructor; +import org.opencv.core.Core; +import org.opencv.core.Mat; +import org.opencv.core.MatOfDouble; +import org.opencv.core.Size; +import org.opencv.imgproc.Imgproc; +import org.springframework.stereotype.Service; + +/* + * DifferenceOfGaussiansBlurryAlgorithm evaluates image sharpness based on local contrast and texture strength. + * + * The algorithm applies the following steps: + * 1. It applies two Gaussian blurs with different sigma values (σ1 < σ2) to the image. + * 2. It computes the absolute difference between the two blurred images, emphasizing mid-frequency details. + * 3. It calculates the variance (energy) of the resulting Difference of Gaussians (DoG) image. + * + * - A high energy value indicates strong texture and details (sharp image). + * - A low energy value suggests a loss of local contrast, characteristic of blurry images. + * + * Important notes: + * - The Gaussian blur kernel size is controlled by GAUSSIAN_SIZE. + * - σ1 and σ2 are set to detect mid-range spatial frequencies (default: σ1=1.0, σ2=2.0). + * - The result is encapsulated in a BlurryResult with the BlurryAlgorithmType.DOG. + * + * Usage context: + * This method complements edge-based (Sobel, Laplacian) and frequency-based (FFT) approaches, + * enhancing the robustness of the multi-algorithm blur detection system. + */ +@Service +@AllArgsConstructor +public class DifferenceOfGaussiansBlurryAlgorithm implements BlurryAlgorithm { + + private static final int GAUSSIAN_SIZE = 5; + private static final double SIGMA1 = 1.0; + private static final double SIGMA2 = 2.0; + + @Override + public BlurryResult getBlurryResult(Mat img) { + Mat blur1 = new Mat(); + Mat blur2 = new Mat(); + Imgproc.GaussianBlur(img, blur1, new Size(GAUSSIAN_SIZE, GAUSSIAN_SIZE), SIGMA1); + Imgproc.GaussianBlur(img, blur2, new Size(GAUSSIAN_SIZE, GAUSSIAN_SIZE), SIGMA2); + + Mat dog = new Mat(); + Core.absdiff(blur1, blur2, dog); + + MatOfDouble mean = new MatOfDouble(); + MatOfDouble stddev = new MatOfDouble(); + Core.meanStdDev(dog, mean, stddev); + + double energy = Math.pow(stddev.get(0, 0)[0], 2); + + return new BlurryResult(BlurryAlgorithmType.DOG, energy); + } +} diff --git a/dossierfacile-process-file/src/main/java/fr/dossierfacile/process/file/service/processors/blurry/algorithm/FFTBlurryAlgorithm.java b/dossierfacile-process-file/src/main/java/fr/dossierfacile/process/file/service/processors/blurry/algorithm/FFTBlurryAlgorithm.java new file mode 100644 index 000000000..2f4fe4304 --- /dev/null +++ b/dossierfacile-process-file/src/main/java/fr/dossierfacile/process/file/service/processors/blurry/algorithm/FFTBlurryAlgorithm.java @@ -0,0 +1,117 @@ +package fr.dossierfacile.process.file.service.processors.blurry.algorithm; + +import fr.dossierfacile.common.entity.ocr.BlurryAlgorithmType; +import fr.dossierfacile.common.entity.ocr.BlurryResult; +import lombok.AllArgsConstructor; +import org.opencv.core.*; +import org.opencv.imgproc.Imgproc; +import org.springframework.stereotype.Service; + +import java.util.ArrayList; +import java.util.List; + +/* + * FFTBlurryAlgorithm assesses the sharpness of an image using frequency domain analysis. + * + * The algorithm performs the following steps: + * 1. It applies a Fourier Transform (FFT) to the image to compute its frequency spectrum. + * 2. It shifts the low-frequency components to the center of the spectrum (fftShift). + * 3. It analyzes a circular region around the center to calculate the mean energy. + * + * - A low mean energy in the center suggests a rich presence of high frequencies (sharp image). + * - A high mean energy in the center indicates that most of the energy is concentrated in low frequencies (potentially blurry image). + * + * Important notes: + * - The FFT is computed with optimal padding for efficient processing. + * - The magnitude spectrum is log-scaled and normalized for better numerical stability. + * - The radius of the center analysis area is defined by DEFAULT_RADIUS (typically set to 30 pixels). + * + * The final mean value is returned in a BlurryResult with the BlurryAlgorithmType.FFT. + * + * Usage context: + * This algorithm complements spatial-based methods (e.g., Sobel, Laplacian) within a voting system + * to provide a robust multi-perspective assessment of document sharpness. + */ +@Service +@AllArgsConstructor +public class FFTBlurryAlgorithm implements BlurryAlgorithm { + + private static final int DEFAULT_RADIUS = 30; + + @Override + public BlurryResult getBlurryResult(Mat img) { + Mat padded = optimalPadding(img); + // FFT + Mat complexImage = new Mat(); + padded.convertTo(padded, CvType.CV_32F); + Core.dft(padded, complexImage, Core.DFT_COMPLEX_OUTPUT); + + // Magnitude spectrum + Mat magnitude = magnitudeSpectrum(complexImage); + + // Décalage (fftshift) + Mat shiftedMagnitude = fftShift(magnitude); + + // Analyse autour du centre + double fftMean = analyzeCenterRegion(shiftedMagnitude); + + return new BlurryResult(BlurryAlgorithmType.FFT, fftMean); + } + + private Mat optimalPadding(Mat image) { + int rows = Core.getOptimalDFTSize(image.rows()); + int cols = Core.getOptimalDFTSize(image.cols()); + Mat padded = new Mat(); + Core.copyMakeBorder(image, padded, 0, rows - image.rows(), 0, cols - image.cols(), Core.BORDER_CONSTANT, Scalar.all(0)); + return padded; + } + + private Mat magnitudeSpectrum(Mat complexImage) { + List planes = new ArrayList<>(); + Core.split(complexImage, planes); + Mat magnitude = new Mat(); + Core.magnitude(planes.get(0), planes.get(1), magnitude); + + Core.add(Mat.ones(magnitude.size(), CvType.CV_32F), magnitude, magnitude); + Core.log(magnitude, magnitude); + + Core.normalize(magnitude, magnitude, 0, 255, Core.NORM_MINMAX); + magnitude.convertTo(magnitude, CvType.CV_8U); + + return magnitude; + } + + private Mat fftShift(Mat input) { + Mat output = input.clone(); + int cx = output.cols() / 2; + int cy = output.rows() / 2; + + Mat q0 = new Mat(output, new Rect(0, 0, cx, cy)); + Mat q1 = new Mat(output, new Rect(cx, 0, cx, cy)); + Mat q2 = new Mat(output, new Rect(0, cy, cx, cy)); + Mat q3 = new Mat(output, new Rect(cx, cy, cx, cy)); + + Mat tmp = new Mat(); + q0.copyTo(tmp); + q3.copyTo(q0); + tmp.copyTo(q3); + + q1.copyTo(tmp); + q2.copyTo(q1); + tmp.copyTo(q2); + + return output; + } + + private double analyzeCenterRegion(Mat magnitude) { + double x = (double) magnitude.cols() / 2; + double y = (double) magnitude.rows() / 2; + Point center = new Point(x, y); + Mat mask = Mat.zeros(magnitude.size(), CvType.CV_8U); + Imgproc.circle(mask, center, DEFAULT_RADIUS, new Scalar(255), -1); + + Scalar meanVal = Core.mean(magnitude, mask); + + return meanVal.val[0]; + } +} diff --git a/dossierfacile-process-file/src/main/java/fr/dossierfacile/process/file/service/processors/blurry/algorithm/LaplacianBlurryAlgorithm.java b/dossierfacile-process-file/src/main/java/fr/dossierfacile/process/file/service/processors/blurry/algorithm/LaplacianBlurryAlgorithm.java new file mode 100644 index 000000000..10b1a6e7b --- /dev/null +++ b/dossierfacile-process-file/src/main/java/fr/dossierfacile/process/file/service/processors/blurry/algorithm/LaplacianBlurryAlgorithm.java @@ -0,0 +1,49 @@ +package fr.dossierfacile.process.file.service.processors.blurry.algorithm; + +import fr.dossierfacile.common.entity.ocr.BlurryAlgorithmType; +import fr.dossierfacile.common.entity.ocr.BlurryResult; +import lombok.AllArgsConstructor; +import org.opencv.core.Core; +import org.opencv.core.CvType; +import org.opencv.core.Mat; +import org.opencv.core.MatOfDouble; +import org.opencv.imgproc.Imgproc; +import org.springframework.stereotype.Service; + +/* + * LaplacianBlurryAlgorithm is a simple and efficient method to assess the sharpness of an image. + * + * The algorithm applies a Laplacian filter to highlight areas of rapid intensity change, + * such as edges and fine details. It then calculates the variance of the Laplacian response. + * + * - A high variance indicates that many edges and details are present (sharp image). + * - A low variance suggests few edges and smooth areas (potentially blurry image). + * + * This variance score can then be compared against a predefined threshold to classify + * whether the document is considered blurry or not. + * + * Note: + * - The Laplacian is computed using 64-bit floating point precision (CvType.CV_64F). + * - The result is wrapped into a BlurryResult object with the type BlurryAlgorithmType.LAPLACIEN. + * + * Usage context: + * This algorithm is part of a multi-algorithm voting system designed to improve document + * blur detection reliability in dossier uploads. + */ +@Service +@AllArgsConstructor +public class LaplacianBlurryAlgorithm implements BlurryAlgorithm { + + @Override + public BlurryResult getBlurryResult(Mat img) { + Mat laplacian = new Mat(); + Imgproc.Laplacian(img, laplacian, CvType.CV_64F); + + MatOfDouble mean = new MatOfDouble(); + MatOfDouble stdDev = new MatOfDouble(); + Core.meanStdDev(laplacian, mean, stdDev); + + double variance = Math.pow(stdDev.get(0, 0)[0], 2); + return new BlurryResult(BlurryAlgorithmType.LAPLACIEN, variance); + } +} diff --git a/dossierfacile-process-file/src/main/java/fr/dossierfacile/process/file/service/processors/blurry/algorithm/SobelBlurryAlgorithm.java b/dossierfacile-process-file/src/main/java/fr/dossierfacile/process/file/service/processors/blurry/algorithm/SobelBlurryAlgorithm.java new file mode 100644 index 000000000..fe3e5382a --- /dev/null +++ b/dossierfacile-process-file/src/main/java/fr/dossierfacile/process/file/service/processors/blurry/algorithm/SobelBlurryAlgorithm.java @@ -0,0 +1,54 @@ +package fr.dossierfacile.process.file.service.processors.blurry.algorithm; + +import fr.dossierfacile.common.entity.ocr.BlurryAlgorithmType; +import fr.dossierfacile.common.entity.ocr.BlurryResult; +import lombok.AllArgsConstructor; +import org.opencv.core.Core; +import org.opencv.core.CvType; +import org.opencv.core.Mat; +import org.opencv.core.Scalar; +import org.opencv.imgproc.Imgproc; +import org.springframework.stereotype.Service; + +/* + * SobelBlurryAlgorithm estimates the sharpness of an image based on the intensity of edges detected. + * + * The algorithm applies the Sobel operator separately in the X and Y directions + * to compute gradients, which highlight transitions in pixel intensity (edges). + * It then calculates the magnitude of the gradients and computes the mean magnitude across the entire image. + * + * - A high mean magnitude indicates strong and numerous edges (sharp image). + * - A low mean magnitude suggests weak or few edges (potentially blurry image). + * + * This mean score can be compared against a predefined threshold to determine if + * the document should be classified as blurry. + * + * Note: + * - Gradients are computed using 64-bit floating point precision (CvType.CV_64F). + * - The final result is encapsulated into a BlurryResult with type BlurryAlgorithmType.SOBEL. + * + * Usage context: + * Sobel analysis is combined with other algorithms (Laplacian, FFT, DoG) within a voting system + * to enhance the overall reliability of document blur detection. + */ +@Service +@AllArgsConstructor +public class SobelBlurryAlgorithm implements BlurryAlgorithm { + + @Override + public BlurryResult getBlurryResult(Mat img) { + Mat sobelX = new Mat(); + Mat sobelY = new Mat(); + + Imgproc.Sobel(img, sobelX, CvType.CV_64F, 1, 0, 3); + Imgproc.Sobel(img, sobelY, CvType.CV_64F, 0, 1, 3); + + Mat magnitude = new Mat(); + Core.magnitude(sobelX, sobelY, magnitude); + + Scalar mean = Core.mean(magnitude); + double meanVal = mean.val[0]; + + return new BlurryResult(BlurryAlgorithmType.SOBEL, meanVal); + } +} diff --git a/dossierfacile-process-file/src/main/java/fr/dossierfacile/process/file/util/ImageUtils.java b/dossierfacile-process-file/src/main/java/fr/dossierfacile/process/file/util/ImageUtils.java index b6c682d33..e69813371 100644 --- a/dossierfacile-process-file/src/main/java/fr/dossierfacile/process/file/util/ImageUtils.java +++ b/dossierfacile-process-file/src/main/java/fr/dossierfacile/process/file/util/ImageUtils.java @@ -1,11 +1,14 @@ package fr.dossierfacile.process.file.util; +import fr.dossierfacile.common.utils.FileUtility; import lombok.experimental.UtilityClass; +import org.apache.commons.io.FilenameUtils; import org.springframework.util.DigestUtils; import javax.imageio.ImageIO; import java.awt.image.BufferedImage; import java.io.ByteArrayOutputStream; +import java.io.File; import java.io.IOException; @UtilityClass @@ -18,4 +21,19 @@ public static String md5(BufferedImage image) throws IOException { return DigestUtils.md5DigestAsHex(bytes); } } + + public static BufferedImage[] getImagesFromFile(File file) throws IOException { + if ("pdf".equalsIgnoreCase(FilenameUtils.getExtension(file.getName()))) { + BufferedImage[] images = FileUtility.convertPdfToImage(file); + if (images == null || images.length < 1) { + throw new IllegalStateException("pdf file cannot be convert to images"); + } + return images; + } + BufferedImage image = ImageIO.read(file); + if (image == null) { + throw new IllegalStateException("image cannot be extracted from file " + file.getName()); + } + return new BufferedImage[]{image}; + } } \ No newline at end of file diff --git a/dossierfacile-process-file/src/main/resources/application.properties b/dossierfacile-process-file/src/main/resources/application.properties index c4db0d73f..c31420e6c 100644 --- a/dossierfacile-process-file/src/main/resources/application.properties +++ b/dossierfacile-process-file/src/main/resources/application.properties @@ -49,3 +49,8 @@ ants.tsl.uri=https://ants.gouv.fr/files/25362bbf-a54e-4ed9-b98a-71e2382b54e0/tsl france.identite.api.url=https://dossierfacile-france-identite-numerique-api.osc-secnum-fr1.scalingo.io/api/validation/v1/check-doc-valid?all-attributes=true dossierfacile.logging.job.aggregator=true + +blurry.laplacian.threshold=400 +blurry.sobel.threshold=30 +blurry.fft.threshold=170 +blurry.dog.threshold=20 \ No newline at end of file diff --git a/dossierfacile-process-file/src/test/java/fr/dossierfacile/process/file/fileAnalysis/BlurryDocumentTest.java b/dossierfacile-process-file/src/test/java/fr/dossierfacile/process/file/fileAnalysis/BlurryDocumentTest.java new file mode 100644 index 000000000..1adfd26cb --- /dev/null +++ b/dossierfacile-process-file/src/test/java/fr/dossierfacile/process/file/fileAnalysis/BlurryDocumentTest.java @@ -0,0 +1,191 @@ +package fr.dossierfacile.process.file.fileAnalysis; + +import ch.qos.logback.classic.Level; +import ch.qos.logback.classic.Logger; +import ch.qos.logback.classic.LoggerContext; +import ch.qos.logback.classic.encoder.PatternLayoutEncoder; +import ch.qos.logback.classic.spi.ILoggingEvent; +import ch.qos.logback.core.FileAppender; +import fr.dossierfacile.common.entity.Document; +import fr.dossierfacile.common.entity.DocumentAnalysisReport; +import fr.dossierfacile.common.entity.ocr.BlurryAlgorithmType; +import fr.dossierfacile.common.enums.BlurryFileAnalysisStatus; +import fr.dossierfacile.common.repository.BlurryFileAnalysisRepository; +import fr.dossierfacile.fileAnalysis.*; +import fr.dossierfacile.process.file.fileAnalysis.config.OVHConfiguration; +import fr.dossierfacile.process.file.fileAnalysis.config.OpenCVConfiguration; +import fr.dossierfacile.process.file.repository.FileRepository; +import fr.dossierfacile.process.file.service.StorageFileLoaderService; +import fr.dossierfacile.process.file.service.documentrules.BlurryRulesValidationService; +import fr.dossierfacile.process.file.service.processors.blurry.BlurryProcessor; +import fr.dossierfacile.process.file.service.processors.blurry.algorithm.DifferenceOfGaussiansBlurryAlgorithm; +import fr.dossierfacile.process.file.service.processors.blurry.algorithm.FFTBlurryAlgorithm; +import fr.dossierfacile.process.file.service.processors.blurry.algorithm.LaplacianBlurryAlgorithm; +import fr.dossierfacile.process.file.service.processors.blurry.algorithm.SobelBlurryAlgorithm; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.mock.mockito.MockBean; +import org.springframework.test.context.ContextConfiguration; +import org.springframework.test.context.TestPropertySource; +import org.springframework.test.context.junit.jupiter.SpringExtension; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.ExecutionException; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.when; + +@ExtendWith(SpringExtension.class) +@ContextConfiguration(classes = { + OVHConfiguration.class, + OpenCVConfiguration.class, + BlurryProcessor.class, + FFTBlurryAlgorithm.class, + LaplacianBlurryAlgorithm.class, + SobelBlurryAlgorithm.class, + StorageFileLoaderService.class, + BlurryFileAnalysisRepository.class, + BlurryRulesValidationService.class, + DifferenceOfGaussiansBlurryAlgorithm.class, + FileRepository.class, + DatasetLoader.class +}) +@TestPropertySource(locations = {"/document_analysis.properties", "classpath:application-dev.properties"}) +@EnabledIfEnvironmentVariable(named = "ENABLE_TESTS_FILE_ANALYSIS", matches = "true") +public class BlurryDocumentTest { + + @Autowired + private TestOvhFileStorageServiceImpl ovhFileStorageService; + + @Autowired + private BlurryProcessor blurryProcessor; + + @Autowired + private BlurryRulesValidationService blurryRulesValidationService; + + @MockBean + private BlurryFileAnalysisRepository blurryFileAnalysisRepository; + + @MockBean + private StorageFileLoaderService storageFileLoaderService; + + @MockBean + private FileRepository fileRepository; + + private static final Logger logger = (Logger) LoggerFactory.getLogger(BlurryDocumentTest.class); + + @BeforeAll + static void init() { + LoggerContext lc = (LoggerContext) LoggerFactory.getILoggerFactory(); + + PatternLayoutEncoder ple = new PatternLayoutEncoder(); + + ple.setPattern("%msg%n"); + ple.setContext(lc); + ple.start(); + + var fileAppender = new FileAppender(); + fileAppender.setFile("testResult/blurryTest.log"); + fileAppender.setEncoder(ple); + fileAppender.setContext(lc); + fileAppender.start(); + logger.addAppender(fileAppender); + logger.setLevel(Level.DEBUG); + logger.setAdditive(false); /* set to true if root should log too */ + } + + private static FileAnalysisTestData testData = null; + + @BeforeAll + static void initData(@Autowired DatasetLoader datasetLoader) throws IOException, ExecutionException, InterruptedException { + testData = datasetLoader.loadDataset("blurryFiles/dataset-documents.json", Void.class, Void.class, Void.class); + } + + private File getFile(String bucketPath) throws ExecutionException, InterruptedException { + return ovhFileStorageService.downloadAsync(bucketPath).get(); + } + + private Document getDocumentWithBlurryAnalysis(DocumentData documentData) throws ExecutionException, InterruptedException { + var document = Document.builder() + .id(1L) + .build(); + + var dtFile = fr.dossierfacile.common.entity.File.builder() + .id(1L) + .document(document) + .build(); + + document.setFiles(List.of(dtFile)); + + when(storageFileLoaderService.getTemporaryFilePath(any())).thenReturn(getFile(documentData.getBucketPath())); + var result = blurryProcessor.process(dtFile); + assert result.getBlurryFileAnalysis() != null; + var sobelScore = result.getBlurryFileAnalysis().getBlurryResults().stream().filter(blurryResult -> blurryResult.algorithm() == BlurryAlgorithmType.SOBEL).findFirst().orElseThrow().score(); + var laplacian = result.getBlurryFileAnalysis().getBlurryResults().stream().filter(blurryResult -> blurryResult.algorithm() == BlurryAlgorithmType.LAPLACIEN).findFirst().orElseThrow().score(); + var fftScore = result.getBlurryFileAnalysis().getBlurryResults().stream().filter(blurryResult -> blurryResult.algorithm() == BlurryAlgorithmType.FFT).findFirst().orElseThrow().score(); + var dogScore = result.getBlurryFileAnalysis().getBlurryResults().stream().filter(blurryResult -> blurryResult.algorithm() == BlurryAlgorithmType.DOG).findFirst().orElseThrow().score(); + logger.info("DOC: {} | Sobel: {} | Laplacian: {} | FFT: {} | DOG: {}", documentData.getBucketPath(), sobelScore, laplacian, fftScore, dogScore); + assertThat(result).isNotNull(); + assertThat(result.getBlurryFileAnalysis()).isNotNull(); + assertThat(result.getBlurryFileAnalysis().getAnalysisStatus()).isEqualTo(BlurryFileAnalysisStatus.COMPLETED); + return document; + } + + @Nested + class BlurryDocumentsTest { + + static List> blurryFiles() { + return testData.getInvalidDocuments(); + } + + @ParameterizedTest(name = "{0}") + @MethodSource("blurryFiles") + void whenBlurryFiles(InvalidDocumentData documentData) throws ExecutionException, InterruptedException { + var document = getDocumentWithBlurryAnalysis(documentData); + assertThat(document.getFiles().getFirst().getBlurryFileAnalysis()).isNotNull(); + + var documentAnalysisReport = DocumentAnalysisReport.builder() + .id(1L) + .document(document) + .brokenRules(new ArrayList<>()) + .build(); + + blurryRulesValidationService.process(document, documentAnalysisReport); + assertThat(documentAnalysisReport.getBrokenRules()).hasSize(1); + } + } + + @Nested + class ValidDocumentTest { + static List> notBlurryFiles() { + return testData.getValidDocuments(); + } + + @ParameterizedTest(name = "{0}") + @MethodSource("notBlurryFiles") + void whenNotBlurryFiles(ValidDocumentData documentData) throws ExecutionException, InterruptedException { + var document = getDocumentWithBlurryAnalysis(documentData); + assertThat(document.getFiles().getFirst().getBlurryFileAnalysis()).isNotNull(); + + var documentAnalysisReport = DocumentAnalysisReport.builder() + .id(1L) + .document(document) + .brokenRules(new ArrayList<>()) + .build(); + + blurryRulesValidationService.process(document, documentAnalysisReport); + assertThat(documentAnalysisReport.getBrokenRules()).isEmpty(); + } + } +} + diff --git a/dossierfacile-process-file/src/test/java/fr/dossierfacile/process/file/fileAnalysis/BlurryProcessorTests.java b/dossierfacile-process-file/src/test/java/fr/dossierfacile/process/file/fileAnalysis/BlurryProcessorTests.java new file mode 100644 index 000000000..380d7b2f4 --- /dev/null +++ b/dossierfacile-process-file/src/test/java/fr/dossierfacile/process/file/fileAnalysis/BlurryProcessorTests.java @@ -0,0 +1,160 @@ +package fr.dossierfacile.process.file.fileAnalysis; + +import fr.dossierfacile.common.enums.BlurryFileAnalysisStatus; +import fr.dossierfacile.common.repository.BlurryFileAnalysisRepository; +import fr.dossierfacile.fileAnalysis.FileAnalysisTestData; +import fr.dossierfacile.fileAnalysis.InvalidDocumentData; +import fr.dossierfacile.fileAnalysis.TestOvhFileStorageServiceImpl; +import fr.dossierfacile.fileAnalysis.ValidDocumentData; +import fr.dossierfacile.process.file.fileAnalysis.config.OVHConfiguration; +import fr.dossierfacile.process.file.fileAnalysis.config.OpenCVConfiguration; +import fr.dossierfacile.process.file.repository.FileRepository; +import fr.dossierfacile.process.file.service.StorageFileLoaderService; +import fr.dossierfacile.process.file.service.processors.blurry.BlurryProcessor; +import fr.dossierfacile.process.file.service.processors.blurry.algorithm.DifferenceOfGaussiansBlurryAlgorithm; +import fr.dossierfacile.process.file.service.processors.blurry.algorithm.FFTBlurryAlgorithm; +import fr.dossierfacile.process.file.service.processors.blurry.algorithm.LaplacianBlurryAlgorithm; +import fr.dossierfacile.process.file.service.processors.blurry.algorithm.SobelBlurryAlgorithm; +import fr.dossierfacile.process.file.util.ImageUtils; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; +import org.mockito.MockedStatic; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.mock.mockito.MockBean; +import org.springframework.test.context.ContextConfiguration; +import org.springframework.test.context.TestPropertySource; +import org.springframework.test.context.junit.jupiter.SpringExtension; + +import java.io.File; +import java.io.IOException; +import java.util.List; +import java.util.concurrent.ExecutionException; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mockStatic; +import static org.mockito.Mockito.when; + +@ExtendWith(SpringExtension.class) +@ContextConfiguration(classes = { + OVHConfiguration.class, + OpenCVConfiguration.class, + BlurryProcessor.class, + FFTBlurryAlgorithm.class, + LaplacianBlurryAlgorithm.class, + SobelBlurryAlgorithm.class, + DifferenceOfGaussiansBlurryAlgorithm.class, + StorageFileLoaderService.class, + BlurryFileAnalysisRepository.class, + FileRepository.class, + DatasetLoader.class +}) +@TestPropertySource(locations = {"/document_analysis.properties", "classpath:application-dev.properties"}) +@EnabledIfEnvironmentVariable(named = "ENABLE_TESTS_FILE_ANALYSIS", matches = "true") +public class BlurryProcessorTests { + + @Autowired + private TestOvhFileStorageServiceImpl ovhFileStorageService; + + @Autowired + private BlurryProcessor blurryProcessor; + + @MockBean + private BlurryFileAnalysisRepository blurryFileAnalysisRepository; + + @MockBean + private StorageFileLoaderService storageFileLoaderService; + + @MockBean + private FileRepository fileRepository; + + private static FileAnalysisTestData testData = null; + + @BeforeAll + static void initData(@Autowired DatasetLoader datasetLoader) throws IOException, ExecutionException, InterruptedException { + testData = datasetLoader.loadDataset("blurryFiles/dataset-processor.json", Void.class, Void.class, Void.class); + } + + private File getFile(String bucketPath) throws ExecutionException, InterruptedException { + return ovhFileStorageService.downloadAsync(bucketPath).get(); + } + + @Nested + class BlurryFiles { + static List> blurryFiles() { + return testData.getInvalidDocuments(); + } + + @ParameterizedTest(name = "{0}") + @MethodSource("blurryFiles") + void testBlurryFiles(InvalidDocumentData blurryFile) { + var dtFile = fr.dossierfacile.common.entity.File.builder() + .id(1L) + .build(); + + try { + when(storageFileLoaderService.getTemporaryFilePath(any())).thenReturn(getFile(blurryFile.getBucketPath())); + var result = blurryProcessor.process(dtFile); + assertThat(result).isNotNull(); + assertThat(result.getBlurryFileAnalysis()).isNotNull(); + assertThat(result.getBlurryFileAnalysis().getAnalysisStatus()).isEqualTo(BlurryFileAnalysisStatus.COMPLETED); + System.out.println(result.getBlurryFileAnalysis().getBlurryResults()); + } catch (ExecutionException | InterruptedException e) { + throw new RuntimeException(e); + } + } + } + + @Nested + class NotBlurryFiles { + static List> notBlurryFiles() { + return testData.getValidDocuments(); + } + + @ParameterizedTest(name = "{0}") + @MethodSource("notBlurryFiles") + void testNotBlurryFiles(ValidDocumentData blurryFile) { + var dtFile = fr.dossierfacile.common.entity.File.builder() + .id(1L) + .build(); + + try { + when(storageFileLoaderService.getTemporaryFilePath(any())).thenReturn(getFile(blurryFile.getBucketPath())); + var result = blurryProcessor.process(dtFile); + assertThat(result).isNotNull(); + assertThat(result.getBlurryFileAnalysis()).isNotNull(); + assertThat(result.getBlurryFileAnalysis().getAnalysisStatus()).isEqualTo(BlurryFileAnalysisStatus.COMPLETED); + System.out.println(result.getBlurryFileAnalysis().getBlurryResults()); + } catch (ExecutionException | InterruptedException e) { + throw new RuntimeException(e); + } + } + } + + @Nested + class exceptionTest { + + @Test + void testExceptionWhileGettingImages() { + var dtFile = fr.dossierfacile.common.entity.File.builder() + .id(1L) + .build(); + var testFile = testData.getValidDocuments().getFirst(); + try (MockedStatic imageUtilsMockedStatic = mockStatic(ImageUtils.class)) { + imageUtilsMockedStatic.when(() -> ImageUtils.getImagesFromFile(any())).thenThrow(new IOException()); + when(storageFileLoaderService.getTemporaryFilePath(any())).thenReturn(getFile(testFile.getBucketPath())); + var result = blurryProcessor.process(dtFile); + assertThat(result).isNotNull(); + assertThat(result.getBlurryFileAnalysis()).isNotNull(); + assertThat(result.getBlurryFileAnalysis().getAnalysisStatus()).isEqualTo(BlurryFileAnalysisStatus.FAILED); + } catch (ExecutionException | InterruptedException e) { + throw new RuntimeException(e); + } + } + } +} diff --git a/dossierfacile-process-file/src/test/java/fr/dossierfacile/process/file/fileAnalysis/GuaranteeProviderDocumentTest.java b/dossierfacile-process-file/src/test/java/fr/dossierfacile/process/file/fileAnalysis/GuaranteeProviderDocumentTest.java index 27cb1181c..1f3fc0973 100644 --- a/dossierfacile-process-file/src/test/java/fr/dossierfacile/process/file/fileAnalysis/GuaranteeProviderDocumentTest.java +++ b/dossierfacile-process-file/src/test/java/fr/dossierfacile/process/file/fileAnalysis/GuaranteeProviderDocumentTest.java @@ -4,8 +4,12 @@ import fr.dossierfacile.common.entity.ocr.GuaranteeProviderFile; import fr.dossierfacile.common.enums.*; import fr.dossierfacile.fileAnalysis.*; +import fr.dossierfacile.process.file.fileAnalysis.config.OVHConfiguration; import fr.dossierfacile.process.file.service.documentrules.GuaranteeProviderRulesValidationService; import fr.dossierfacile.process.file.service.parsers.GuaranteeVisaleParser; +import fr.dossierfacile.process.file.service.processors.blurry.algorithm.FFTBlurryAlgorithm; +import fr.dossierfacile.process.file.service.processors.blurry.algorithm.LaplacianBlurryAlgorithm; +import fr.dossierfacile.process.file.service.processors.blurry.algorithm.SobelBlurryAlgorithm; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; @@ -30,7 +34,7 @@ import static org.assertj.core.api.Assertions.assertThat; @ExtendWith(SpringExtension.class) -@ContextConfiguration(classes = {OvhConfiguration.class, DatasetLoader.class}) +@ContextConfiguration(classes = {OVHConfiguration.class, DatasetLoader.class, LaplacianBlurryAlgorithm.class, SobelBlurryAlgorithm.class, FFTBlurryAlgorithm.class}) @TestPropertySource(locations = "/document_analysis.properties") @EnabledIfEnvironmentVariable(named = "ENABLE_TESTS_FILE_ANALYSIS", matches = "true") // See README.md for tutorial to start those test. diff --git a/dossierfacile-process-file/src/test/java/fr/dossierfacile/process/file/fileAnalysis/config/OVHConfiguration.java b/dossierfacile-process-file/src/test/java/fr/dossierfacile/process/file/fileAnalysis/config/OVHConfiguration.java new file mode 100644 index 000000000..49269a01b --- /dev/null +++ b/dossierfacile-process-file/src/test/java/fr/dossierfacile/process/file/fileAnalysis/config/OVHConfiguration.java @@ -0,0 +1,37 @@ +package fr.dossierfacile.process.file.fileAnalysis.config; + +import fr.dossierfacile.fileAnalysis.S3Configuration; +import fr.dossierfacile.fileAnalysis.TestOvhFileStorageServiceImpl; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +@Configuration +public class OVHConfiguration { + @Value("${s3.endpoint}") + private String s3Endpoint; + @Value("${s3.account.id}") + private String s3AccountId; + @Value("${s3.access.key}") + private String s3AccessKey; + @Value("${s3.secret.key}") + private String s3SecretKey; + @Value("${s3.region}") + private String s3Region; + @Value("${s3.bucket.name}") + private String s3BucketName; + + @Bean + TestOvhFileStorageServiceImpl getOvhFileStorageService() { + return new TestOvhFileStorageServiceImpl( + new S3Configuration( + s3Endpoint, + s3AccountId, + s3AccessKey, + s3SecretKey, + s3Region, + s3BucketName + ) + ); + } +} diff --git a/dossierfacile-process-file/src/test/java/fr/dossierfacile/process/file/fileAnalysis/config/OpenCVConfiguration.java b/dossierfacile-process-file/src/test/java/fr/dossierfacile/process/file/fileAnalysis/config/OpenCVConfiguration.java new file mode 100644 index 000000000..3449e781a --- /dev/null +++ b/dossierfacile-process-file/src/test/java/fr/dossierfacile/process/file/fileAnalysis/config/OpenCVConfiguration.java @@ -0,0 +1,44 @@ +package fr.dossierfacile.process.file.fileAnalysis.config; + +import fr.dossierfacile.process.file.service.processors.blurry.algorithm.*; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +import java.util.List; + +@Configuration +public class OpenCVConfiguration { + @Value("${opencv.lib.path}") + private String opencvLibPath; + + @Autowired + private LaplacianBlurryAlgorithm laplacianBlurryAlgorithm; + @Autowired + private SobelBlurryAlgorithm sobelBlurryAlgorithm; + @Autowired + private FFTBlurryAlgorithm fftBlurryAlgorithm; + @Autowired + private DifferenceOfGaussiansBlurryAlgorithm differenceOfGaussiansBlurryAlgorithm; + + @Bean + TestOpenCvConfig getOpenCvConfig() { + return new TestOpenCvConfig( + opencvLibPath + ); + } + + @Bean + List getBlurryAlgorithms() { + return List.of( + laplacianBlurryAlgorithm, + sobelBlurryAlgorithm, + fftBlurryAlgorithm, + differenceOfGaussiansBlurryAlgorithm + ); + } +} + +record TestOpenCvConfig(String libPath) { +}