forked from neo4j/neo4j-graphrag-python
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsimple_kg_builder_from_text.py
99 lines (84 loc) · 3.05 KB
/
simple_kg_builder_from_text.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
"""This example illustrates how to get started easily with the SimpleKGPipeline
and ingest text into a Neo4j Knowledge Graph.
This example assumes a Neo4j db is up and running. Update the credentials below
if needed.
NB: when building a KG from text, no 'Document' node is created in the Knowledge Graph.
"""
import asyncio
import logging
import neo4j
from neo4j_graphrag.embeddings import OpenAIEmbeddings
from neo4j_graphrag.experimental.pipeline.kg_builder import SimpleKGPipeline
from neo4j_graphrag.experimental.pipeline.pipeline import PipelineResult
from neo4j_graphrag.experimental.pipeline.types import (
EntityInputType,
RelationInputType,
)
from neo4j_graphrag.llm import LLMInterface
from neo4j_graphrag.llm.openai_llm import OpenAILLM
logging.basicConfig()
logging.getLogger("neo4j_graphrag").setLevel(logging.DEBUG)
# logging.getLogger("neo4j_graphrag").setLevel(logging.INFO)
# Neo4j db infos
URI = "neo4j://localhost:7687"
AUTH = ("neo4j", "password")
DATABASE = "neo4j"
# Text to process
TEXT = """The son of Duke Leto Atreides and the Lady Jessica, Paul is the heir of House Atreides,
an aristocratic family that rules the planet Caladan, the rainy planet, since 10191."""
# Instantiate Entity and Relation objects. This defines the
# entities and relations the LLM will be looking for in the text.
ENTITIES: list[EntityInputType] = [
# entities can be defined with a simple label...
"Person",
# ... or with a dict if more details are needed,
# such as a description:
{"label": "House", "description": "Family the person belongs to"},
# or a list of properties the LLM will try to attach to the entity:
{"label": "Planet", "properties": [{"name": "weather", "type": "STRING"}]},
]
# same thing for relationships:
RELATIONS: list[RelationInputType] = [
"PARENT_OF",
{
"label": "HEIR_OF",
"description": "Used for inheritor relationship between father and sons",
},
{"label": "RULES", "properties": [{"name": "fromYear", "type": "INTEGER"}]},
]
POTENTIAL_SCHEMA = [
("Person", "PARENT_OF", "Person"),
("Person", "HEIR_OF", "House"),
("House", "RULES", "Planet"),
]
async def define_and_run_pipeline(
neo4j_driver: neo4j.Driver,
llm: LLMInterface,
) -> PipelineResult:
# Create an instance of the SimpleKGPipeline
kg_builder = SimpleKGPipeline(
llm=llm,
driver=neo4j_driver,
embedder=OpenAIEmbeddings(),
entities=ENTITIES,
relations=RELATIONS,
potential_schema=POTENTIAL_SCHEMA,
from_pdf=False,
neo4j_database=DATABASE,
)
return await kg_builder.run_async(text=TEXT)
async def main() -> PipelineResult:
llm = OpenAILLM(
model_name="gpt-4o",
model_params={
"max_tokens": 2000,
"response_format": {"type": "json_object"},
},
)
with neo4j.GraphDatabase.driver(URI, auth=AUTH) as driver:
res = await define_and_run_pipeline(driver, llm)
await llm.async_client.close()
return res
if __name__ == "__main__":
res = asyncio.run(main())
print(res)