neo4j_vectorization

LOAD CSV WITH HEADERS
FROM '
https://data.neo4j.com/llm-fundamentals/openai-embeddings.csv'
AS row
MATCH (m:Movie {movieId: row.movieId})
CALL db.create.setNodeVectorProperty(m, 'plotEmbedding', apoc.convert.fromJsonList(row.embedding))
RETURN count(*)


MATCH (m:Movie {title: "Toy Story"})
RETURN m.title AS title, m.plot AS plot, m.plotEmbedding


Creating the Vector Index
 
CREATE VECTOR INDEX moviePlots IF NOT EXISTS
FOR (m:Movie)
ON m.plotEmbedding
OPTIONS {indexConfig: {
`vector.dimensions`: 1536,
`vector.similarity_function`: 'cosine'
}}


SHOW INDEXES  YIELD id, name, type, state, populationPercent WHERE type = "VECTOR"


Querying Vector Indexes
 
MATCH (m:Movie {title: 'Toy Story'})
 
CALL db.index.vector.queryNodes('moviePlots', 6, m.plotEmbedding)
YIELD node, score
 
RETURN node.title AS title, node.plot AS plot, score


-------------------------------------------------------------------------------------------------------------------------------------

Python```
from langchain_openai import OpenAIEmbeddings
from langchain_community.graphs import Neo4jGraph
from langchain_community.vectorstores import Neo4jVector

embedding_provider = OpenAIEmbeddings(
    openai_api_key="sk-..."
)

graph = Neo4jGraph(
    url="bolt://localhost:7687",
    username="neo4j",
    password="pleaseletmein"
)

movie_plot_vector = Neo4jVector.from_existing_index(
    embedding_provider,
    graph=graph,
    index_name="moviePlots",
    embedding_node_property="plotEmbedding",
    text_node_property="plot",
)

result = movie_plot_vector.similarity_search("A movie where aliens land and attack earth.")
for doc in result:
    print(doc.metadata["title"], "-", doc.page_content)
```

You can pass an optional k argument to the similarity_search() method to specify the number of documents to return. The default is 4.
vector.similarity_search(query, k=1)


----------------------------------------------------------------------------------------------------------------------------------------

Creating a new vector index
The Neo4jVector class can also generate embeddings and vector indexes - this is useful when creating vectors programmatically or at run time.

The following code would create embeddings and a new index called myVectorIndex in the database for Chunk nodes with a text property:

Python```
from langchain_openai import OpenAIEmbeddings
from langchain_community.graphs import Neo4jGraph
from langchain_community.vectorstores import Neo4jVector
from langchain.schema import Document

# A list of Documents
documents = [
    Document(
        page_content="Text to be indexed",
        metadata={"source": "local"}
    )
]

# Service used to create the embeddings
embedding_provider = OpenAIEmbeddings(
    openai_api_key="sk-..."
)

graph = Neo4jGraph(
    url="bolt://localhost:7687",
    username="neo4j",
    password="pleaseletmein"
)

new_vector = Neo4jVector.from_documents(
    documents,
    embedding_provider,
    graph=graph,
    index_name="myVectorIndex",
    node_label="Chunk",
    text_node_property="text",
    embedding_node_property="embedding",
    create_id_index=True,
)
```

----------------------------------------------------------------------------------------------------------------------------------------

Python```
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.graphs import Neo4jGraph
from langchain_community.vectorstores import Neo4jVector

OPENAI_API_KEY = "sk-..."

llm = ChatOpenAI(openai_api_key=OPENAI_API_KEY)

embedding_provider = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

graph = Neo4jGraph(
    url="bolt://localhost:7687",
    username="neo4j",
    password="pleaseletmein"
)

movie_plot_vector = Neo4jVector.from_existing_index(
    embedding_provider,
    graph=graph,
    index_name="moviePlots",
    embedding_node_property="plotEmbedding",
    text_node_property="plot",
)

plot_retriever = RetrievalQA.from_llm(
    llm=llm,
    retriever=movie_plot_vector.as_retriever()
)

response = plot_retriever.invoke(
    {"query": "A movie where a mission to the moon goes wrong"}
)

print(response)

```

Understanding the results

It can be difficult to understand how the model generated the response and how the retriever affected it.

By setting the optional verbose and return_source_documents arguments to True when creating the RetrievalQA chain, you can see the source documents and the retriever’s score for each document.


Python```
plot_retriever = RetrievalQA.from_llm(
    llm=chat_llm,
    retriever=movie_plot_vector.as_retriever(),
    verbose=True,
    return_source_documents=True
)
```


-------------------------------------------------------------------------- Load into Neo4j

LOAD CSV WITH HEADERS
FROM 'https://data.neo4j.com/llm-vectors-unstructured/Quora-QuAD-1000-embeddings.csv' AS row

MERGE (q:Question{text:row.question})
WITH row,q
CALL db.create.setNodeVectorProperty(q, 'embedding', apoc.convert.fromJsonList(row.question_embedding))

MERGE (a:Answer{text:row.answer})
WITH row,a,q
CALL db.create.setNodeVectorProperty(a, 'embedding', apoc.convert.fromJsonList(row.answer_embedding))

MERGE(q)-[:ANSWERED_BY]->(a)


MATCH (q:Question)-[r:ANSWERED_BY]->(a:Answer)
RETURN q,r,a
LIMIT 100


------------------------------------------------------------------------------- Create the Question Index
To query embeddings, you need to create a vector index. A vector index significantly increases the speed of similarity searches by pre-computing the similarity between vectors and storing them in the index.

CREATE VECTOR INDEX questions IF NOT EXISTS
FOR (q:Question)
ON q.embedding
OPTIONS {indexConfig: {
 `vector.dimensions`: 1536,
 `vector.similarity_function`: 'cosine'
}}


CREATE VECTOR INDEX answers IF NOT EXISTS
FOR (a:Answer)
ON a.embeddings
OPTIONS {indexConfig: {
 `vector.dimensions`: 1536,
 `vector.similarity_function`: 'cosine'
}}


------------------------------------------------------------------------------- Query a Vector Index

MATCH (q:Question {text: "What are the most touristic countries in the world?"})
CALL db.index.vector.queryNodes('questions', 6, q.embedding)
YIELD node, score
RETURN node.text, score

MATCH (q:Question {text: "What are the most touristic countries in the world?"})
CALL db.index.vector.queryNodes('questions', 6, q.embedding)
YIELD node, score
MATCH (node)-[:ANSWERED_BY]->(a)
RETURN a.text, score


You can generate a new embedding in Cypher using the genai.vector.encode function:
genai.vector.encode(
    resource :: STRING,
    provider :: STRING,
    configuration :: MAP = {}
) :: LIST<FLOAT>


For example, you can use the OpenAI provider to generate an embedding passing the API key as token in the configuration map:
WITH genai.vector.encode("Test", "OpenAI", { token: "sk-..." }) AS embedding
RETURN embedding


You can incorporate the embedding into your query to find similar questions:
WITH genai.vector.encode(
    "What are good open source projects",
    "OpenAI",
    { token: "sk-..." }) AS userEmbedding
CALL db.index.vector.queryNodes('questions', 6, userEmbedding)
YIELD node, score
RETURN node.text, score