From bf806f1b18d096d486e19a1a6e97f1cc17ca1e85 Mon Sep 17 00:00:00 2001 From: Vinayak Mehta Date: Wed, 25 Dec 2024 15:17:54 +0100 Subject: [PATCH 1/6] Update dependency versions --- excalibur/configuration.py | 12 +++++++--- excalibur/executors/celery_executor.py | 6 +++-- excalibur/executors/sequential_executor.py | 6 +++-- excalibur/tasks.py | 10 ++++++++- excalibur/utils/task.py | 26 +++++++++++----------- excalibur/www/static/js/job.js | 2 +- excalibur/www/templates/workspace.html | 3 +-- excalibur/www/views.py | 2 +- setup.py | 6 ++--- 9 files changed, 45 insertions(+), 28 deletions(-) diff --git a/excalibur/configuration.py b/excalibur/configuration.py index 1a6251d..a4f23b2 100644 --- a/excalibur/configuration.py +++ b/excalibur/configuration.py @@ -1,7 +1,12 @@ import os import six -from backports.configparser import ConfigParser + +# With this: +try: + from configparser import ConfigParser +except ImportError: + from backports.configparser import ConfigParser def _read_default_config_file(file_name): @@ -69,8 +74,9 @@ def get(self, section, key, **kwargs): else: raise ValueError( - "section/key [{section}/{key}] not found in" - " config".format(**locals()) + "section/key [{section}/{key}] not found in" " config".format( + **locals() + ) ) def read(self, filename): diff --git a/excalibur/executors/celery_executor.py b/excalibur/executors/celery_executor.py index d111ee2..040512c 100755 --- a/excalibur/executors/celery_executor.py +++ b/excalibur/executors/celery_executor.py @@ -1,6 +1,6 @@ +import sys import traceback import subprocess -import sys from celery import Celery @@ -23,7 +23,9 @@ @app.task def execute_command(command): try: - subprocess.check_call(command, stderr=subprocess.STDOUT, close_fds=(sys.platform != 'win32')) + subprocess.check_call( + command, stderr=subprocess.STDOUT, close_fds=(sys.platform != "win32") + ) except Exception as e: traceback.print_exc(e) diff --git a/excalibur/executors/sequential_executor.py b/excalibur/executors/sequential_executor.py index 4e758c2..3308323 100644 --- a/excalibur/executors/sequential_executor.py +++ b/excalibur/executors/sequential_executor.py @@ -1,14 +1,16 @@ +import sys import traceback import subprocess from concurrent.futures import ProcessPoolExecutor -import sys from .base_executor import BaseExecutor def execute_command(command): try: - subprocess.check_call(command, stderr=subprocess.STDOUT, close_fds=(sys.platform != 'win32')) + subprocess.check_call( + command, stderr=subprocess.STDOUT, close_fds=(sys.platform != "win32") + ) except FileNotFoundError: # TODO: PyInstaller does not package console_scripts # https://github.com/pyinstaller/pyinstaller/issues/305 diff --git a/excalibur/tasks.py b/excalibur/tasks.py index 4bf92be..1846f70 100644 --- a/excalibur/tasks.py +++ b/excalibur/tasks.py @@ -4,6 +4,7 @@ import logging import datetime as dt +import pandas as pd from camelot.core import TableList from camelot.parsers import Stream, Lattice from camelot.ext.ghostscript import Ghostscript @@ -123,7 +124,14 @@ def extract(job_id): mkdirs(f_datapath) ext = f if f != "excel" else "xlsx" f_datapath = os.path.join(f_datapath, f"{froot}.{ext}") - tables.export(f_datapath, f=f, compress=True) + + if f == "excel": + with pd.ExcelWriter(f_datapath) as writer: + for i, table in enumerate(tables): + sheet_name = f"Table_{i + 1}" + table.df.to_excel(writer, sheet_name=sheet_name, index=False) + else: + tables.export(f_datapath, f=f, compress=True) # for render jsonpath = os.path.join(datapath, "json") diff --git a/excalibur/utils/task.py b/excalibur/utils/task.py index fda7a3c..30c00b1 100644 --- a/excalibur/utils/task.py +++ b/excalibur/utils/task.py @@ -1,7 +1,7 @@ import os import cv2 -from PyPDF2 import PdfFileReader, PdfFileWriter +from PyPDF2 import PdfReader, PdfWriter from camelot.utils import get_rotation, get_page_layout, get_text_objects @@ -26,21 +26,21 @@ def get_pages(filename, pages, password=""): """ page_numbers = [] inputstream = open(filename, "rb") - infile = PdfFileReader(inputstream, strict=False) - N = infile.getNumPages() + infile = PdfReader(inputstream, strict=False) + N = len(infile.pages) if pages == "1": page_numbers.append({"start": 1, "end": 1}) else: if infile.isEncrypted: infile.decrypt(password) if pages == "all": - page_numbers.append({"start": 1, "end": infile.getNumPages()}) + page_numbers.append({"start": 1, "end": len(infile.pages)}) else: for r in pages.split(","): if "-" in r: a, b = r.split("-") if b == "end": - b = infile.getNumPages() + b = len(infile.pages) page_numbers.append({"start": int(a), "end": int(b)}) else: page_numbers.append({"start": int(r), "end": int(r)}) @@ -52,10 +52,10 @@ def get_pages(filename, pages, password=""): def save_page(filepath, page_number): - infile = PdfFileReader(open(filepath, "rb"), strict=False) - page = infile.getPage(page_number - 1) - outfile = PdfFileWriter() - outfile.addPage(page) + infile = PdfReader(open(filepath, "rb"), strict=False) + page = infile.pages[page_number - 1] + outfile = PdfWriter() + outfile.add_page(page) outpath = os.path.join(os.path.dirname(filepath), f"page-{page_number}.pdf") with open(outpath, "wb") as f: outfile.write(f) @@ -69,16 +69,16 @@ def save_page(filepath, page_number): if rotation != "": outpath_new = "".join([froot.replace("page", "p"), "_rotated", fext]) os.rename(outpath, outpath_new) - infile = PdfFileReader(open(outpath_new, "rb"), strict=False) + infile = PdfReader(open(outpath_new, "rb"), strict=False) if infile.isEncrypted: infile.decrypt("") - outfile = PdfFileWriter() - p = infile.getPage(0) + outfile = PdfWriter() + p = infile.pages[0] if rotation == "anticlockwise": p.rotateClockwise(90) elif rotation == "clockwise": p.rotateCounterClockwise(90) - outfile.addPage(p) + outfile.add_page(p) with open(outpath, "wb") as f: outfile.write(f) diff --git a/excalibur/www/static/js/job.js b/excalibur/www/static/js/job.js index 2ce29d2..5340d3d 100644 --- a/excalibur/www/static/js/job.js +++ b/excalibur/www/static/js/job.js @@ -6,4 +6,4 @@ $(document).ready(function () { $('#download-form').append($(input)); $('#download-form').submit(); }); -}); \ No newline at end of file +}); diff --git a/excalibur/www/templates/workspace.html b/excalibur/www/templates/workspace.html index 91eb6a9..400f2d2 100644 --- a/excalibur/www/templates/workspace.html +++ b/excalibur/www/templates/workspace.html @@ -9,7 +9,7 @@ {% endblock %} {% block workspace %} - {% if imagepaths is not none %} + {% if imagepaths is not none or or imagepaths|length == 0 %}
@@ -171,7 +171,6 @@

Processing

{% block javascript %} -