diff --git a/README.rst b/README.rst index 6a82944..754c5db 100644 --- a/README.rst +++ b/README.rst @@ -213,7 +213,8 @@ Fine-tuning Tesseract/Ghostscript/others You can specify Tesseract and Ghostscript executable locations manually, as well as the number of concurrent processes allowed during preprocessing and -tesseract. Use the following in your configuration file: +tesseract. Defaults to the number of CPUs available. Use the following in +your configuration file: :: diff --git a/pypdfocr/pypdfocr_gs.py b/pypdfocr/pypdfocr_gs.py index 5599082..22f9bc2 100644 --- a/pypdfocr/pypdfocr_gs.py +++ b/pypdfocr/pypdfocr_gs.py @@ -39,7 +39,7 @@ def __init__(self, config): 'GS_OUTDATED': 'Your Ghostscript version is probably out of date. Please upgrade to the latest version', 'GS_MISSING_BINARY': 'Could not find Ghostscript in the usual place; please specify it using your config file', } - self.threads = config.get('threads',4) + self.threads = config.get('threads', os.cpu_count()) if "binary" in config: # Override location of binary binary = config['binary'] diff --git a/pypdfocr/pypdfocr_preprocess.py b/pypdfocr/pypdfocr_preprocess.py index e942cc3..9d56ec7 100644 --- a/pypdfocr/pypdfocr_preprocess.py +++ b/pypdfocr/pypdfocr_preprocess.py @@ -44,7 +44,7 @@ def __init__(self, config): self.msgs = { 'CV_FAILED': 'convert execution failed', } - self.threads = config.get('threads', 4) + self.threads = config.get('threads', os.cpu_count()) def _warn(self, msg): # pragma: no cover print("WARNING: %s" % msg) diff --git a/pypdfocr/pypdfocr_tesseract.py b/pypdfocr/pypdfocr_tesseract.py index 8f246ee..ffd264a 100644 --- a/pypdfocr/pypdfocr_tesseract.py +++ b/pypdfocr/pypdfocr_tesseract.py @@ -46,7 +46,7 @@ def __init__(self, config): """ self.lang = 'eng' self.required = "3.02.02" - self.threads = config.get('threads',4) + self.threads = config.get('threads', os.cpu_count()) if "binary" in config: # Override location of binary binary = config['binary']