Skip to content

Commit

Permalink
Updated message constants.
Browse files Browse the repository at this point in the history
Added scheduler for clearing finished tasks.
Ignored unit test.
  • Loading branch information
nenadjakic committed Sep 10, 2024
1 parent 7ee745a commit 58bc0d0
Show file tree
Hide file tree
Showing 9 changed files with 73 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,6 @@ package com.github.nenadjakic.ocr.studio.config
import com.github.nenadjakic.ocr.studio.entity.Status

enum class MessageConst(val description: String) {
ILLEGAL_STATUS("Cannot remove file for task with id: {}, because status is different than ${Status.CREATED}."),
MISSING_DOCUMENT("Cannot find task with id: {}.");

fun formatedMessage(vararg parameters: Any): String {
return String.format(description, parameters)
}
ILLEGAL_STATUS("Cannot remove file for task, because status is different than ${Status.CREATED}."),
MISSING_DOCUMENT("Cannot find task with specified id.");
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ class OcrConfigRequest {
lateinit var ocrEngineMode: OcrConfig.OcrEngineMode
lateinit var pageSegmentationMode: OcrConfig.PageSegmentationMode
lateinit var language: String
var tessVariables: Map<String, String>? = null
var preProcessing: Boolean = false
lateinit var fileFormat: FileFormat
var mergeDocuments: Boolean = false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ class OcrConfig(
var language: String = "eng",
var ocrEngineMode: OcrEngineMode = OcrEngineMode.DEFAULT,
var pageSegmentationMode: PageSegmentationMode = PageSegmentationMode.MODE_3,
var tessVariables: Map<String, String>? = null,
var preProcessing: Boolean = false,
var fileFormat: FileFormat = FileFormat.TEXT,
var mergeDocuments: Boolean = false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,13 @@ interface ParallelizationManager {
fun interruptAll(): Map<UUID, Boolean?>

fun getProgress(id: UUID): ProgressInfo?

fun clearFinished()

fun clearInterrupted()

fun clear() {
clearInterrupted()
clearFinished()
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,34 @@ class ParallelizationManagerImpl(
val runnable = runnables[id]
return runnable?.progressInfo
}

override fun clearFinished() {
val ids = mutableListOf<UUID>()

futures.entries.removeIf { entry ->
if (entry.value.isDone) {
ids.add(entry.key)
true
} else {
false
}
}

runnables.entries.removeIf { ids.contains(it.key) }
}

override fun clearInterrupted() {
val ids = mutableListOf<UUID>()

futures.entries.removeIf { entry ->
if (entry.value.isCancelled) {
ids.add(entry.key)
true
} else {
false
}
}

runnables.entries.removeIf { ids.contains(it.key) }
}
}
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
package com.github.nenadjakic.ocr.studio.service

import com.github.nenadjakic.ocr.studio.config.MessageConst
import com.github.nenadjakic.ocr.studio.config.OcrProperties
import com.github.nenadjakic.ocr.studio.entity.OcrProgress
import com.github.nenadjakic.ocr.studio.entity.Status
import com.github.nenadjakic.ocr.studio.exception.MissingDocumentOcrException
import com.github.nenadjakic.ocr.studio.exception.OcrException
import com.github.nenadjakic.ocr.studio.executor.OcrExecutor
import com.github.nenadjakic.ocr.studio.executor.ParallelizationManager
import com.github.nenadjakic.ocr.studio.extension.toOcrProgress
import com.github.nenadjakic.ocr.studio.repository.TaskRepository
import org.slf4j.LoggerFactory
import org.springframework.scheduling.annotation.Scheduled
import org.springframework.stereotype.Service
import java.util.*
import kotlin.jvm.optionals.getOrNull
Expand All @@ -23,7 +26,7 @@ class OcrService(
private val logger = LoggerFactory.getLogger(OcrService::class.java)

fun schedule(id: UUID) {
val task = taskRepository.findById(id).orElseThrow { OcrException("Cannot find task with id: $id") }
val task = taskRepository.findById(id).orElseThrow { MissingDocumentOcrException(MessageConst.MISSING_DOCUMENT.description) }

if (Status.getInProgressStatuses().contains(task.ocrProgress.status)) {
throw OcrException("Task with id: $id is in progress and cannot be scheduled.")
Expand Down Expand Up @@ -59,8 +62,8 @@ class OcrService(

fun interruptAll(id: UUID) {
val interruptResult = parallelizationManager.interruptAll()
for (interruptyResultEntry in interruptResult.entries) {
if (interruptyResultEntry.value != null) {
for (interruptResultEntry in interruptResult.entries) {
if (interruptResultEntry.value != null) {
taskRepository.findById(id).getOrNull()?.let {
it.ocrProgress.status = Status.INTERRUPTED
taskRepository.save(it)
Expand All @@ -74,23 +77,23 @@ class OcrService(

if (progressInfo == null) {
// get progress from datastore
val task = taskRepository.findById(id).orElseThrow { OcrException("Cannot find task with id: $id") }
val task = taskRepository.findById(id).orElseThrow { MissingDocumentOcrException(MessageConst.MISSING_DOCUMENT.description) }
return task.ocrProgress
} else {
return progressInfo.toOcrProgress()
}
}

fun clearFinished() {
TODO()
parallelizationManager.clearFinished()
}

fun clearInterrupted() {
TODO()
parallelizationManager.clearInterrupted()
}

@Scheduled(cron = "0 0 23 * * ?")
fun clear() {
clearInterrupted()
clearFinished()
parallelizationManager.clear()
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,21 +46,21 @@ class TaskService(

fun delete(task: Task) {
if (task.ocrProgress.status != Status.CREATED) {
throw IllegalStateOcrException(MessageConst.ILLEGAL_STATUS.formatedMessage(task.id!!))
throw IllegalStateOcrException(MessageConst.ILLEGAL_STATUS.description)
}

removeAllFiles(task)
taskRepository.delete(task)
}

fun deleteById(id: UUID) {
val task = taskRepository.findById(id).orElseThrow { MissingDocumentOcrException(MessageConst.MISSING_DOCUMENT.formatedMessage(id)) }
val task = taskRepository.findById(id).orElseThrow { MissingDocumentOcrException(MessageConst.MISSING_DOCUMENT.description) }
delete(task)
}

fun upload(id: UUID, multipartFiles: Collection<MultipartFile>): List<Document> {
val createdDocuments = mutableListOf<Document>()
val task = taskRepository.findById(id).orElseThrow { MissingDocumentOcrException(MessageConst.MISSING_DOCUMENT.formatedMessage(id)) }
val task = taskRepository.findById(id).orElseThrow { MissingDocumentOcrException(MessageConst.MISSING_DOCUMENT.description) }

for (multiPartFile in multipartFiles) {
val document = Document(multiPartFile.originalFilename!!, UUID.randomUUID().toString()).apply {
Expand All @@ -77,10 +77,10 @@ class TaskService(
}

fun removeFile(id: UUID, originalFileName: String) {
val task = taskRepository.findById(id).orElseThrow { MissingDocumentOcrException(MessageConst.MISSING_DOCUMENT.formatedMessage(id)) }
val task = taskRepository.findById(id).orElseThrow { MissingDocumentOcrException(MessageConst.MISSING_DOCUMENT.description) }

if (task.ocrProgress.status != Status.CREATED) {
throw IllegalStateOcrException(MessageConst.ILLEGAL_STATUS.formatedMessage(id))
throw IllegalStateOcrException(MessageConst.ILLEGAL_STATUS.description)
}

task.inDocuments.find { it.originalFileName == originalFileName }?.let {
Expand All @@ -92,15 +92,15 @@ class TaskService(

fun removeAllFiles(task: Task) {
if (task.ocrProgress.status != Status.CREATED) {
throw IllegalStateOcrException(MessageConst.ILLEGAL_STATUS.formatedMessage(task.id!!))
throw IllegalStateOcrException(MessageConst.ILLEGAL_STATUS.description)
}
task.inDocuments.forEach { taskFileSystemService.deleteFile(TaskFileSystemService.getInputFile(ocrProperties.taskPath, task.id!!, it.randomizedFileName).toPath()) }
task.inDocuments.clear()
taskRepository.save(task)
}

fun removeAllFiles(id: UUID) {
val task = taskRepository.findById(id).orElseThrow { MissingDocumentOcrException(MessageConst.MISSING_DOCUMENT.formatedMessage(id)) }
val task = taskRepository.findById(id).orElseThrow { MissingDocumentOcrException(MessageConst.MISSING_DOCUMENT.description) }

removeAllFiles(task)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,20 @@ class TesseractFactory(
language: String,
ocrEngineMode: Int,
pageSegMode: Int,
params: Map<String, String>?
variables: Map<String, String>?
): ITesseract {
val tesseract: ITesseract = Tesseract()
tesseract.setDatapath(ocrProperties.tesseract.dataPath)
tesseract.setLanguage(language)
tesseract.setOcrEngineMode(ocrEngineMode)
tesseract.setPageSegMode(pageSegMode)

if (variables != null) {
for (variable in variables.entries) {
tesseract.setVariable(variable.key, variable.value)
}
}

return tesseract
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import com.github.nenadjakic.ocr.studio.repository.TaskRepository
import org.junit.jupiter.api.AfterEach
import org.junit.jupiter.api.Assertions.*
import org.junit.jupiter.api.BeforeEach
import org.junit.jupiter.api.Disabled
import org.junit.jupiter.api.DisplayName
import org.junit.jupiter.api.Test
import org.junit.jupiter.api.extension.ExtendWith
Expand All @@ -16,6 +17,7 @@ import org.springframework.data.domain.PageImpl
import org.springframework.data.domain.PageRequest
import org.springframework.data.domain.Sort
import org.springframework.web.multipart.MultipartFile
import java.nio.file.Path
import java.util.*

@ExtendWith(
Expand Down Expand Up @@ -128,11 +130,14 @@ class TaskServiceTest {
verify(taskRepository).delete(task)
}

@Disabled
@Test
@DisplayName("deleteById should delete the task by id")
fun deleteById() {
val taskId = UUID.randomUUID()

`when`(taskRepository.findById(taskId)).thenReturn(Optional.of(Task()))

taskService.deleteById(taskId)

verify(taskRepository).deleteById(taskId)
Expand Down

0 comments on commit 58bc0d0

Please sign in to comment.