Skip to content
This repository was archived by the owner on Apr 11, 2025. It is now read-only.

Commit 37c9a38

Browse files
committed
Fixup handler
1 parent 5fee62d commit 37c9a38

File tree

1 file changed

+15
-10
lines changed

1 file changed

+15
-10
lines changed

Diff for: camelot/handlers.py

+15-10
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from .utils import get_text_objects
1919
from .utils import is_url
2020

21+
import warnings
2122

2223
class PDFHandler:
2324
"""Handles all operations like temp directory creation, splitting
@@ -36,7 +37,7 @@ class PDFHandler:
3637
3738
"""
3839

39-
def __init__(self, filepath: Union[StrByteType, Path], pages="1", password=None, multi=[]):
40+
def __init__(self, filepath: Union[StrByteType, Path], pages="1", password=None, multi={}):
4041
if is_url(filepath):
4142
filepath = download_url(filepath)
4243
self.filepath: Union[StrByteType, Path] = filepath
@@ -188,35 +189,39 @@ def parse(
188189
if parallel and len(self.pages) > 1 and cpu_count > 1:
189190
with mp.get_context("spawn").Pool(processes=cpu_count) as pool:
190191
jobs = []
191-
for p in self.pages:
192-
192+
for i, p in enumerate(self.pages, 1):
193+
p_no = str(i) # [start] # [-5]
193194
page_kwargs = kwargs
194195
page_parser = parser
195-
196-
if p in self.multi:
196+
# assert p == 0
197+
# print("test")
198+
# warnings.warn(UserWarning("{}".format(p)))
199+
if p_no in self.multi:
200+
print(p + " is found in " + self.multi)
197201
page_kwargs.update(self.multi[p_no])
198202
page_parser = Lattice(**page_kwargs) if flavor == 'lattice' else Stream(**page_kwargs)
199203

200204
j = pool.apply_async(
201-
self._parse_page,(p, tempdir, parser, suppress_stdout, layout_kwargs)
205+
self._parse_page,(p, tempdir, page_parser, suppress_stdout, layout_kwargs)
202206
)
203207
jobs.append(j)
204208

205209
for j in jobs:
206210
t = j.get()
207211
tables.extend(t)
208212
else:
209-
for p in self.pages:
210-
# p_no = p
213+
for i, p in enumerate(self.pages, 1):
214+
p_no = str(i) # [start] # [-5]
211215

212216
page_kwargs = kwargs
213217
page_parser = parser
214218

215-
if p in self.multi:
219+
if p_no in self.multi:
220+
print(i,p) # debug
216221
page_kwargs.update(self.multi[p_no])
217222
page_parser = Lattice(**page_kwargs) if flavor == 'lattice' else Stream(**page_kwargs)
218223

219-
t = self._parse_page(p, tempdir, parser, suppress_stdout, layout_kwargs)
224+
t = self._parse_page(p, tempdir, page_parser, suppress_stdout, layout_kwargs)
220225
tables.extend(t)
221226

222227
return TableList(sorted(tables))

0 commit comments

Comments
 (0)