Named Entity Recognition (NER) and Relation Extraction (RE) library using Regular Expressions
pip install extr
text = 'Ted is a Pitcher.'
Find Named Entities from text.
from extr import RegEx, RegExLabel
from extr.entities import EntityExtractor
entity_extractor = EntityExtractor([
RegExLabel('PERSON', [
RegEx([r'ted'], re.IGNORECASE)
]),
RegExLabel('POSITION', [
RegEx([r'pitcher'], re.IGNORECASE)
]),
])
entities = entity_extractor.get_entities(text)
## entities == [
## <Entity label="POSITION" text="Pitcher" span=(9, 16)>,
## <Entity label="PERSON" text="Ted" span=(0, 3)>
## ]
or add a knowledge base
from extr import RegEx, RegExLabel
from extr.entities import create_entity_extractor
entity_extractor = create_entity_extractor(
[
RegExLabel('POSITION', [
RegEx([r'pitcher'], re.IGNORECASE)
]),
],
kb={
'PERSON': ['Ted']
}
)
entities = entity_extractor.get_entities(text)
## entities == [
## <Entity label="POSITION" text="Pitcher" span=(9, 16)>,
## <Entity label="PERSON" text="Ted" span=(0, 3)>
## ]
Annotate text to display in HTML.
from extr.entities.viewers import HtmlViewer
viewer = HtmlViewer()
viewer.append(text, entities)
html = viewer.create_view(custom_styles="""
.lb-PERSON {
background-color: orange;
}
.lb-POSITION {
background-color: yellow;
}
""")
Annotate and Extract Relationships between Entities
from extr.entities import EntityAnnotator
from extr.relations import RelationExtractor, \
RegExRelationLabelBuilder
## define relationship between PERSON and POSITION
relationship = RegExRelationLabelBuilder('is_a') \
.add_e1_to_e2(
'PERSON', ## e1
[
## define how the relationship exists in nature
r'\s+is\s+a\s+',
],
'POSITION' ## e2
) \
.build()
relations_to_extract = [relationship]
## `entities` see 'Entity Extraction' above
annotated_text = EntityAnnotator().annotate(text, entities)
relations = RelationExtractor(relations_to_extract).extract(annotated_text, entities)
## relations == [
## <Relation e1="Ted" r="is_a" e2="Pitcher">
## ]