Skip to content

Add sentence-transformers embeddings #40

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
May 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions src/neo4j_genai/embeddings/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from .sentence_transformers import SentenceTransformerEmbeddings

__all__ = [
"SentenceTransformerEmbeddings",
]
17 changes: 17 additions & 0 deletions src/neo4j_genai/embeddings/sentence_transformers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from neo4j_genai.embedder import Embedder


class SentenceTransformerEmbeddings(Embedder):
def __init__(self, model="all-MiniLM-L6-v2", *args, **kwargs):
try:
from sentence_transformers import SentenceTransformer
except ImportError:
raise ImportError(
"Could not import sentence_transformers python package. "
"Please install it with `pip install sentence-transformers`."
)

self.model = SentenceTransformer(model, *args, **kwargs)

def embed_query(self, text: str) -> list[float]:
return self.model.encode([text]).flatten().tolist()
38 changes: 38 additions & 0 deletions tests/unit/embeddings/test_sentence_transformers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import pytest
from unittest.mock import patch
from neo4j_genai.embeddings import SentenceTransformerEmbeddings
from neo4j_genai.embedder import Embedder
import numpy as np


@patch("sentence_transformers.SentenceTransformer")
def test_initialization(MockSentenceTransformer):
instance = SentenceTransformerEmbeddings()
MockSentenceTransformer.assert_called_with("all-MiniLM-L6-v2")
assert isinstance(instance, Embedder)


@patch("sentence_transformers.SentenceTransformer")
def test_initialization_with_custom_model(MockSentenceTransformer):
custom_model = "distilbert-base-nli-stsb-mean-tokens"
SentenceTransformerEmbeddings(model=custom_model)
MockSentenceTransformer.assert_called_with(custom_model)


@patch("sentence_transformers.SentenceTransformer")
def test_embed_query(MockSentenceTransformer):
mock_model = MockSentenceTransformer.return_value
mock_model.encode.return_value = np.array([[0.1, 0.2, 0.3]])

instance = SentenceTransformerEmbeddings()
result = instance.embed_query("test query")

mock_model.encode.assert_called_with(["test query"])
assert result == [0.1, 0.2, 0.3]
assert isinstance(result, list)


@patch("sentence_transformers.SentenceTransformer", side_effect=ImportError)
def test_import_error(MockSentenceTransformer):
with pytest.raises(ImportError):
SentenceTransformerEmbeddings()
Loading