Skip to content

Commit

Permalink
Merge pull request #190 from camelot-dev/vinayak/2025-01-03-fix-depre…
Browse files Browse the repository at this point in the history
…cated-camelot-methods

Fix deprecated camelot methods
  • Loading branch information
vinayak-mehta authored Jan 3, 2025
2 parents f47c132 + d1995ff commit 2019488
Show file tree
Hide file tree
Showing 35 changed files with 1,487 additions and 7,931 deletions.
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
# This workflow will install Python dependencies, run tests
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions

name: Run Unittests
name: Tests

on:
push:
Expand All @@ -26,7 +23,7 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install ".[all]"
python -m pip install ".[dev]"
- name: Test with pytest
run: |
python -m pytest
5 changes: 1 addition & 4 deletions docs/user/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,10 @@ This part of the documentation covers the steps to install Excalibur.
Using pip
---------

After installing `ghostscript`_, which is one of the requirements for Camelot (See `install instructions`_), you can simply use pip to install Excalibur::
You can simply use pip to install Excalibur::

$ pip install excalibur-py

.. _ghostscript: https://www.ghostscript.com/
.. _install instructions: https://camelot-py.readthedocs.io/en/master/user/install-deps.html

From the source code
--------------------

Expand Down
8 changes: 4 additions & 4 deletions excalibur/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@

import click

from . import settings, __version__
from . import __version__, settings
from . import configuration as conf
from .tasks import split, extract
from .www.app import create_app
from .utils.database import reset_database, initialize_database
from .operators.python_operator import PythonOperator
from .tasks import extract, split
from .utils.database import initialize_database, reset_database
from .www.app import create_app


def abort_if_false(ctx, param, value):
Expand Down
10 changes: 5 additions & 5 deletions excalibur/models.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
import json
import datetime as dt
import json
from typing import Any # noqa

from sqlalchemy import (
Text,
Column,
String,
Boolean,
Integer,
Column,
DateTime,
ForeignKey,
Integer,
String,
Text,
)
from sqlalchemy.ext.declarative import declarative_base

Expand Down
23 changes: 4 additions & 19 deletions excalibur/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import camelot
import pandas as pd
from camelot.backends.ghostscript_backend import GhostscriptBackend
from camelot.backends.pdfium_backend import PdfiumBackend
from camelot.core import TableList
from camelot.parsers import Lattice, Stream

Expand Down Expand Up @@ -43,23 +43,8 @@ def split(file_id):
imagepath = os.path.join(conf.PDFS_FOLDER, file_id, imagename)

# convert single-page PDF to PNG
try:
backend = GhostscriptBackend()
backend.convert(filepath, imagepath, 300)
except OSError:
gs_command = [
"gs",
"-q",
"-sDEVICE=png16m",
f"-o{imagepath}",
"-r300",
filepath,
]
try:
subprocess.run(gs_command, check=True, capture_output=True)
except subprocess.CalledProcessError as e:
logging.error(f"Ghostscript conversion failed: {e.stderr.decode()}")
raise
backend = PdfiumBackend()
backend.convert(filepath, imagepath, 300)

filenames[page] = filename
filepaths[page] = filepath
Expand Down Expand Up @@ -123,7 +108,7 @@ def extract(job_id):
if flavor.lower() == "lattice":
kwargs.pop("columns", None)

t = camelot.read_pdf(filepaths[p], **kwargs, backend="poppler")
t = camelot.read_pdf(filepaths[p], **kwargs)
for _t in t:
_t.page = int(p)
tables.extend(t)
Expand Down
12 changes: 8 additions & 4 deletions excalibur/utils/task.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
import os

import cv2
from camelot.utils import (
get_image_char_and_text_objects,
get_page_layout,
get_rotation,
)
from PyPDF2 import PdfReader, PdfWriter
from camelot.utils import get_rotation, get_page_layout, get_text_objects


def get_pages(filename, pages, password=""):
Expand Down Expand Up @@ -62,9 +66,9 @@ def save_page(filepath, page_number):
froot, fext = os.path.splitext(outpath)
layout, __ = get_page_layout(outpath)
# fix rotated PDF
chars = get_text_objects(layout, ltype="char")
horizontal_text = get_text_objects(layout, ltype="horizontal_text")
vertical_text = get_text_objects(layout, ltype="vertical_text")
images, chars, horizontal_text, vertical_text = get_image_char_and_text_objects(
layout
)
rotation = get_rotation(chars, horizontal_text, vertical_text)
if rotation != "":
outpath_new = "".join([froot.replace("page", "p"), "_rotated", fext])
Expand Down
8 changes: 4 additions & 4 deletions excalibur/www/static/js/workspace.js
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ const onSavedRuleClick = function (e) {

if (ruleOptions['flavor'].toLowerCase() == 'lattice') {
document.getElementById('process-background').value = ruleOptions['process_background'];
document.getElementById('line-size-scaling').value = ruleOptions['line_size_scaling'];
document.getElementById('line-scale').value = ruleOptions['line_scale'];
document.getElementById('split-text-l').value = ruleOptions['split_text'];
document.getElementById('flag-size-l').value = ruleOptions['flag_size'];
} else if (ruleOptions['flavor'].toLowerCase() == 'stream') {
Expand Down Expand Up @@ -176,14 +176,14 @@ const getRuleOptions = function () {
switch(flavor.toString().toLowerCase()) {
case 'lattice': {
ruleOptions['process_background'] = $("#process-background").val() ? true : false;
ruleOptions['line_size_scaling'] = $('#line-size-scaling').val() ? Number($('#line-size-scaling').val()) : 15;
ruleOptions['line_scale'] = $('#line-scale').val() ? Number($('#line-scale').val()) : 15;
ruleOptions['split_text'] = $("#split-text-l").val() ? true : false;
ruleOptions['flag_size'] = $("#flag-size-l").val() ? true : false;
break;
}
case 'stream': {
ruleOptions['row_close_tol'] = $('#row-close-tol').val() ? Number($('#line-size-scaling').val()) : 2;
ruleOptions['col_close_tol'] = $('#col-close-tol').val() ? Number($('#line-size-scaling').val()) : 0;
ruleOptions['row_close_tol'] = $('#row-close-tol').val() ? Number($('#line-scale').val()) : 2;
ruleOptions['col_close_tol'] = $('#col-close-tol').val() ? Number($('#line-scale').val()) : 0;
ruleOptions['split_text'] = $("#split-text-s").val() ? true : false;
ruleOptions['flag_size'] = $("#flag-size-s").val() ? true : false;
break;
Expand Down
1 change: 0 additions & 1 deletion public/.gitignore

This file was deleted.

11 changes: 0 additions & 11 deletions public/.travis.yml

This file was deleted.

1 change: 0 additions & 1 deletion public/CNAME

This file was deleted.

27 changes: 0 additions & 27 deletions public/LICENSE

This file was deleted.

74 changes: 0 additions & 74 deletions public/README.md

This file was deleted.

Loading

0 comments on commit 2019488

Please sign in to comment.