Skip to content

Commit b4b4fea

Browse files
authored
Add efficientdet models (Layout-Parser#67)
* add effdet check * Add effdet models and catalogs * clean-up * register effdet models * Add generalized image type support for layout models * Add effdet tests * Update reqs
1 parent 9b73ff1 commit b4b4fea

12 files changed

+402
-24
lines changed

dev-requirements.txt

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
pytest
2+
torch
23
numpy
34
opencv-python
45
pandas
@@ -11,4 +12,5 @@ google-cloud-vision==1
1112
pytesseract
1213
pycocotools
1314
git+https://github.com/facebookresearch/[email protected]#egg=detectron2
14-
paddlepaddle
15+
paddlepaddle
16+
effdet

setup.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
"pandas",
2525
"pillow",
2626
"pyyaml>=5.1",
27-
"torch",
2827
"torchvision",
2928
"iopath",
3029
],
@@ -33,6 +32,10 @@
3332
'google-cloud-vision==1',
3433
'pytesseract'
3534
],
35+
"effdet": [
36+
"torch",
37+
"effdet"
38+
]
3639
},
3740
include_package_data=True
3841
)

src/layoutparser/__init__.py

+4
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
_LazyModule,
77
is_detectron2_available,
88
is_paddle_available,
9+
is_effdet_available,
910
is_pytesseract_available,
1011
is_gcv_available,
1112
)
@@ -45,6 +46,9 @@
4546
if is_paddle_available():
4647
_import_structure["models.paddledetection"] = ["PaddleDetectionLayoutModel"]
4748

49+
if is_effdet_available():
50+
_import_structure["models.effdet"] = ["EfficientDetLayoutModel"]
51+
4852
if is_pytesseract_available():
4953
_import_structure["ocr.tesseract_agent"] = [
5054
"TesseractAgent",

src/layoutparser/file_utils.py

+22-5
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,18 @@
3939
# The name of the paddlepaddle library:
4040
# Install name: pip install paddlepaddle
4141
# Import name: import paddle
42-
_paddle_version = importlib_metadata.version("paddlepaddle")
42+
_paddle_version = importlib_metadata.version("paddlepaddle")
4343
logger.debug(f"Paddle version {_paddle_version} available.")
4444
except importlib_metadata.PackageNotFoundError:
4545
_paddle_available = False
4646

47+
_effdet_available = importlib.util.find_spec("effdet") is not None
48+
try:
49+
_effdet_version = importlib_metadata.version("effdet")
50+
logger.debug(f"Effdet version {_effdet_version} available.")
51+
except importlib_metadata.PackageNotFoundError:
52+
_effdet_version = False
53+
4754
###########################################
4855
############## OCR Tool Deps ##############
4956
###########################################
@@ -78,12 +85,16 @@ def is_torch_cuda_available():
7885
return False
7986

8087

88+
def is_detectron2_available():
89+
return _detectron2_available
90+
91+
8192
def is_paddle_available():
8293
return _paddle_available
8394

8495

85-
def is_detectron2_available():
86-
return _detectron2_available
96+
def is_effdet_available():
97+
return _effdet_available
8798

8899

89100
def is_pytesseract_available():
@@ -111,6 +122,11 @@ def is_gcv_available():
111122
installation page: https://github.com/PaddlePaddle/Paddle and follow the ones that match your environment.
112123
"""
113124

125+
EFFDET_IMPORT_ERROR = """
126+
{0} requires the effdet library but it was not found in your environment. You can install it with pip:
127+
`pip install effdet`
128+
"""
129+
114130
PYTESSERACT_IMPORT_ERROR = """
115131
{0} requires the PyTesseract library but it was not found in your environment. You can install it with pip:
116132
`pip install pytesseract`
@@ -126,6 +142,7 @@ def is_gcv_available():
126142
("torch", (is_torch_available, PYTORCH_IMPORT_ERROR)),
127143
("detectron2", (is_detectron2_available, DETECTRON2_IMPORT_ERROR)),
128144
("paddle", (is_paddle_available, PADDLE_IMPORT_ERROR)),
145+
("effdet", (is_effdet_available, )),
129146
("pytesseract", (is_pytesseract_available, PYTESSERACT_IMPORT_ERROR)),
130147
("google-cloud-vision", (is_gcv_available, GCV_IMPORT_ERROR)),
131148
]
@@ -172,7 +189,7 @@ def __init__(
172189
self._import_structure = import_structure
173190

174191
# Following [PEP 366](https://www.python.org/dev/peps/pep-0366/)
175-
# The __package__ variable should be set
192+
# The __package__ variable should be set
176193
# https://docs.python.org/3/reference/import.html#__package__
177194
self.__package__ = self.__name__
178195

@@ -198,4 +215,4 @@ def _get_module(self, module_name: str):
198215
return importlib.import_module("." + module_name, self.__name__)
199216

200217
def __reduce__(self):
201-
return (self.__class__, (self._name, self.__file__, self._import_structure))
218+
return (self.__class__, (self._name, self.__file__, self._import_structure))

src/layoutparser/models/__init__.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
from .detectron2.layoutmodel import Detectron2LayoutModel
2-
from .paddledetection.layoutmodel import PaddleDetectionLayoutModel
2+
from .paddledetection.layoutmodel import PaddleDetectionLayoutModel
3+
from .effdet.layoutmodel import EfficientDetLayoutModel

src/layoutparser/models/base_layoutmodel.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,23 @@
1+
from typing import Union
12
from abc import ABC, abstractmethod
23

34
from ..file_utils import requires_backends
45

56

67
class BaseLayoutModel(ABC):
7-
88
@property
99
@abstractmethod
1010
def DETECTOR_NAME(self):
1111
pass
12-
12+
13+
@abstractmethod
14+
def detect(self, image):
15+
pass
16+
1317
@abstractmethod
14-
def detect(self):
18+
def image_loader(self, image: Union["ndarray", "Image"]):
19+
"""It will process the input images appropriately to the target format.
20+
"""
1521
pass
1622

1723
# Add lazy loading mechanisms for layout models, refer to

src/layoutparser/models/detectron2/layoutmodel.py

+10-5
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from typing import Union
12
from PIL import Image
23
import numpy as np
34

@@ -41,7 +42,7 @@ class Detectron2LayoutModel(BaseLayoutModel):
4142
4243
Examples::
4344
>>> import layoutparser as lp
44-
>>> model = lp.models.Detectron2LayoutModel('lp://HJDataset/faster_rcnn_R_50_FPN_3x/config')
45+
>>> model = lp.Detectron2LayoutModel('lp://HJDataset/faster_rcnn_R_50_FPN_3x/config')
4546
>>> model.detect(image)
4647
4748
"""
@@ -108,7 +109,7 @@ def _reconstruct_path_with_detector_name(self, path: str) -> str:
108109
model_name_segments = model_name.split("/")
109110
if (
110111
len(model_name_segments) == 3
111-
and "detectron2" not in model_name_segments
112+
and self.DETECTOR_NAME not in model_name_segments
112113
):
113114
return "lp://" + self.DETECTOR_NAME + "/" + path[len("lp://") :]
114115
return path
@@ -148,12 +149,16 @@ def detect(self, image):
148149
:obj:`~layoutparser.Layout`: The detected layout of the input image
149150
"""
150151

152+
image = self.image_loader(image)
153+
outputs = self.model(image)
154+
layout = self.gather_output(outputs)
155+
return layout
156+
157+
def image_loader(self, image: Union["np.ndarray", "Image.Image"]):
151158
# Convert PIL Image Input
152159
if isinstance(image, Image.Image):
153160
if image.mode != "RGB":
154161
image = image.convert("RGB")
155162
image = np.array(image)
156163

157-
outputs = self.model(image)
158-
layout = self.gather_output(outputs)
159-
return layout
164+
return image
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
from . import catalog as _UNUSED
2+
from .layoutmodel import EfficientDetLayoutModel
+53
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
from iopath.common.file_io import PathHandler
2+
3+
from ..base_catalog import PathManager
4+
5+
MODEL_CATALOG = {
6+
"PubLayNet": {
7+
"tf_efficientdet_d0": "https://www.dropbox.com/s/ukbw5s673633hsw/publaynet-tf_efficientdet_d0.pth.tar?dl=1",
8+
"tf_efficientdet_d1": "https://www.dropbox.com/s/gxy11xkkiwnpgog/publaynet-tf_efficientdet_d1.pth.tar?dl=1"
9+
},
10+
"MFD": {
11+
"tf_efficientdet_d0": "https://www.dropbox.com/s/dkr22iux7thlhel/mfd-tf_efficientdet_d0.pth.tar?dl=1",
12+
"tf_efficientdet_d1": "https://www.dropbox.com/s/icmbiaqr5s9bz1x/mfd-tf_efficientdet_d1.pth.tar?dl=1"
13+
}
14+
}
15+
16+
# In effdet training scripts, it requires the label_map starting
17+
# from 1 instead of 0
18+
LABEL_MAP_CATALOG = {
19+
"PubLayNet": {
20+
1: "Text",
21+
2: "Title",
22+
3: "List",
23+
4: "Table",
24+
5: "Figure"
25+
}
26+
}
27+
28+
class LayoutParserEfficientDetModelHandler(PathHandler):
29+
"""
30+
Resolve anything that's in LayoutParser model zoo.
31+
"""
32+
33+
PREFIX = "lp://efficientdet/"
34+
35+
def _get_supported_prefixes(self):
36+
return [self.PREFIX]
37+
38+
def _get_local_path(self, path, **kwargs):
39+
model_name = path[len(self.PREFIX) :]
40+
41+
dataset_name, *model_name, data_type = model_name.split("/")
42+
43+
if data_type == "weight":
44+
model_url = MODEL_CATALOG[dataset_name]["/".join(model_name)]
45+
else:
46+
raise ValueError(f"Unknown data_type {data_type}")
47+
return PathManager.get_local_path(model_url, **kwargs)
48+
49+
def _open(self, path, mode="r", **kwargs):
50+
return PathManager.open(self._get_local_path(path), mode, **kwargs)
51+
52+
53+
PathManager.register_handler(LayoutParserEfficientDetModelHandler())

0 commit comments

Comments
 (0)