superlinear-ai · undo76 · Jan 13, 2025 · Jan 13, 2025 · Jan 14, 2025 · Jan 14, 2025
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -34,11 +34,11 @@ scipy = ">=1.5.0"
 wtpsplit-lite = ">=0.1.0"
 # Large Language Models:
 huggingface-hub = ">=0.22.0"
-litellm = ">=1.48.4,<1.56.10"
+litellm = ">=1.48.4,<1.56.2"
 llama-cpp-python = ">=0.3.2"
 pydantic = ">=2.7.0"
 # Approximate Nearest Neighbors:
-pynndescent = ">=0.5.12"
+pynndescent = ">=0.5.13"
 # Reranking:
 langdetect = ">=1.0.9"
 rerankers = { extras = ["flashrank"], version = ">=0.6.0" }

diff --git a/src/raglite/__init__.py b/src/raglite/__init__.py
@@ -15,7 +15,7 @@
     vector_search,
 )
 
-__all__ = [
+__all__ = [  # noqa: RUF022
     # Config
     "RAGLiteConfig",
     # Insert

diff --git a/src/raglite/_chainlit.py b/src/raglite/_chainlit.py
@@ -26,14 +26,14 @@ async def start_chat() -> None:
         llm=os.environ.get("RAGLITE_LLM", default_config.llm),
         embedder=os.environ.get("RAGLITE_EMBEDDER", default_config.embedder),
     )
-    settings = await cl.ChatSettings(  # type: ignore[no-untyped-call]
+    settings = await cl.ChatSettings(
         [
             TextInput(id="db_url", label="Database URL", initial=str(config.db_url)),
             TextInput(id="llm", label="LLM", initial=config.llm),
             TextInput(id="embedder", label="Embedder", initial=config.embedder),
             Switch(id="vector_search_query_adapter", label="Query adapter", initial=True),
         ]
-    ).send()
+    ).send()  # type: ignore[no-untyped-call]
     await update_config(settings)
 
 

diff --git a/src/raglite/_chatml_function_calling.py b/src/raglite/_chatml_function_calling.py
@@ -361,7 +361,7 @@ def chatml_function_calling_with_streaming(
     if isinstance(tool_choice, dict):
         tools = [t for t in tools if t["function"]["name"] == tool_choice["function"]["name"]]
         assert tools
-    function_names = " | ".join([f'''"functions.{t['function']['name']}:"''' for t in tools])
+    function_names = " | ".join([f'''"functions.{t["function"]["name"]}:"''' for t in tools])
     prompt = template_renderer.render(
         messages=messages, tools=tools, tool_calls=True, add_generation_prompt=True
     )
@@ -408,8 +408,7 @@ def chatml_function_calling_with_streaming(
 
     # Case 2 step 2B: One or more function calls
     follow_up_gbnf_tool_grammar = (
-        'root ::= functions | "</function_calls>" | "<|im_end|>"\n'
-        f"functions ::= {function_names}\n"
+        f'root ::= functions | "</function_calls>" | "<|im_end|>"\nfunctions ::= {function_names}\n'
     )
     prompt += "<function_calls>\n"
     if stream:
@@ -474,28 +473,31 @@ def chatml_function_calling_with_streaming(
         "created": completion["created"],
         "model": completion["model"],
         "choices": [
-            {
-                "finish_reason": "tool_calls",
-                "index": 0,
-                "logprobs": completion["choices"][0]["logprobs"],
-                "message": {
-                    "role": "assistant",
-                    "content": None,
-                    "tool_calls": [
-                        {
-                            "id": "call_" + f"_{i}_" + tool_name + "_" + completion["id"],
-                            "type": "function",
-                            "function": {
-                                "name": tool_name,
-                                "arguments": completion["choices"][0]["text"],
-                            },
-                        }
-                        for i, (tool_name, completion) in enumerate(
-                            zip(completions_tool_name, completions, strict=True)
-                        )
-                    ],
+            cast(
+                llama_types.ChatCompletionResponseChoice,
+                {
+                    "finish_reason": "tool_calls",
+                    "index": 0,
+                    "logprobs": completion["choices"][0]["logprobs"],
+                    "message": {
+                        "role": "assistant",
+                        "content": None,
+                        "tool_calls": [
+                            {
+                                "id": "call_" + f"_{i}_" + tool_name + "_" + completion["id"],
+                                "type": "function",
+                                "function": {
+                                    "name": tool_name,
+                                    "arguments": completion["choices"][0]["text"],
+                                },
+                            }
+                            for i, (tool_name, completion) in enumerate(
+                                zip(completions_tool_name, completions, strict=True)
+                            )
+                        ],
+                    },
                 },
-            }
+            )
         ],
         "usage": {
             "completion_tokens": sum(

diff --git a/src/raglite/_embed.py b/src/raglite/_embed.py
@@ -178,7 +178,7 @@ def _embed_string_batch(string_batch: list[str], *, config: RAGLiteConfig) -> Fl
     )
     batch_embeddings = [
         _embed_string_batch(sentence_windows[i : i + batch_size], config=config)
-        for i in batch_range(0, len(sentence_windows), batch_size)  # type: ignore[operator]
+        for i in batch_range(0, len(sentence_windows), batch_size)
     ]
     sentence_embeddings = np.vstack(batch_embeddings)
     return sentence_embeddings

diff --git a/src/raglite/_eval.py b/src/raglite/_eval.py
@@ -263,7 +263,7 @@ def embed_documents(self, texts: list[str]) -> list[list[float]]:
             verbose=llm.verbose,
         )
     else:
-        lc_llm = ChatLiteLLM(model=config.llm)  # type: ignore[call-arg]
+        lc_llm = ChatLiteLLM(model=config.llm)
     embedder = RAGLiteRagasEmbeddings(config=config)
     # Evaluate the answered evals with Ragas.
     evaluation_df = ragas_evaluate(

diff --git a/src/raglite/_extract.py b/src/raglite/_extract.py
@@ -34,8 +34,9 @@ class MyNameResponse(BaseModel):
     # Load the default config if not provided.
     config = config or RAGLiteConfig()
     # Check if the LLM supports the response format.
+    custom_llm_provider = "custom_openai" if config.llm.startswith("llama-cpp-python") else None
     llm_supports_response_format = "response_format" in (
-        get_supported_openai_params(model=config.llm) or []
+        get_supported_openai_params(model=config.llm, custom_llm_provider=custom_llm_provider) or []
     )
     # Update the system prompt with the JSON schema of the return type to help the LLM.
     system_prompt = getattr(return_type, "system_prompt", "").strip()
@@ -45,7 +46,6 @@ class MyNameResponse(BaseModel):
     # is disabled by default because it only supports a subset of JSON schema features [2].
     # [1] https://docs.litellm.ai/docs/completion/json_mode
     # [2] https://platform.openai.com/docs/guides/structured-outputs#some-type-specific-keywords-are-not-yet-supported
-    # TODO: Fall back to {"type": "json_object"} if JSON schema is not supported by the LLM.
     response_format: dict[str, Any] | None = (
         {
             "type": "json_schema",

diff --git a/src/raglite/_litellm.py b/src/raglite/_litellm.py
@@ -277,7 +277,7 @@ async def astreaming(  # type: ignore[misc,override]  # noqa: PLR0913
         litellm_params: dict[str, Any] | None = None,
         logger_fn: Callable | None = None,  # type: ignore[type-arg]
         headers: dict[str, Any] | None = None,
-        timeout: float | httpx.Timeout | None = None,  # noqa: ASYNC109
+        timeout: float | httpx.Timeout | None = None,
         client: AsyncHTTPHandler | None = None,
     ) -> AsyncIterator[GenericStreamingChunk]:
         # Start a synchronous stream.

diff --git a/tests/test_extract.py b/tests/test_extract.py
@@ -18,20 +18,20 @@ def test_extract(llm: str, strict: bool) -> None:  # noqa: FBT001
     config = RAGLiteConfig(llm=llm)
 
     # Define the JSON schema of the response.
-    class LoginResponse(BaseModel):
-        """The response to a login request."""
+    class UserProfileResponse(BaseModel):
+        """The response to a user profile extraction request."""
 
         model_config = ConfigDict(extra="forbid" if strict else "allow")
         username: str = Field(..., description="The username.")
-        password: str = Field(..., description="The password.")
-        system_prompt: ClassVar[str] = "Extract the username and password from the input."
+        email: str = Field(..., description="The email address.")
+        system_prompt: ClassVar[str] = "Extract the username and email from the input."
 
-    # Extract structured data.
-    username, password = "cypher", "steak"
-    login_response = extract_with_llm(
-        LoginResponse, f"username: {username}\npassword: {password}", strict=strict, config=config
+    # Example input data.
+    username, email = "cypher", "[email protected]"
+    profile_response = extract_with_llm(
+        UserProfileResponse, f"username: {username}\nemail: {email}", strict=strict, config=config
     )
     # Validate the response.
-    assert isinstance(login_response, LoginResponse)
-    assert login_response.username == username
-    assert login_response.password == password
+    assert isinstance(profile_response, UserProfileResponse)
+    assert profile_response.username == username
+    assert profile_response.email == email
diff --git a/tests/test_markdown.py b/tests/test_markdown.py
@@ -15,7 +15,7 @@ def test_pdf_with_missing_font_sizes() -> None:
     expected_heading = """
 # ON THE ELECTRODYNAMICS OF MOVING BODIES
 
-## By A. EINSTEIN  June 30, 1905
+## By A. EINSTEIN June 30, 1905
 
 It is known that Maxwell
     """.strip()

diff --git a/tests/test_rerank.py b/tests/test_rerank.py
@@ -52,7 +52,7 @@ def test_reranker(
     )
     # Search for a query.
     query = "What does it mean for two events to be simultaneous?"
-    chunk_ids, _ = hybrid_search(query, num_results=20, config=raglite_test_config)
+    chunk_ids, _ = hybrid_search(query, num_results=30, config=raglite_test_config)
     # Retrieve the chunks.
     chunks = retrieve_chunks(chunk_ids, config=raglite_test_config)
     assert all(isinstance(chunk, Chunk) for chunk in chunks)
@@ -67,4 +67,6 @@ def test_reranker(
             τ_search = kendall_tau(chunks, reranked_chunks)  # noqa: PLC2401
             τ_inverse = kendall_tau(chunks[::-1], reranked_chunks)  # noqa: PLC2401
             τ_random = kendall_tau(chunks_random, reranked_chunks)  # noqa: PLC2401
-            assert τ_search >= τ_random >= τ_inverse
+            assert τ_search >= τ_random >= τ_inverse, (
+                f"τ_search: {τ_search},  τ_random: {τ_random}, τ_inverse: {τ_inverse}"
+            )
diff --git a/tests/test_split_sentences.py b/tests/test_split_sentences.py
@@ -1,5 +1,6 @@
 """Test RAGLite's sentence splitting functionality."""
 
+import re
 from pathlib import Path
 
 from raglite._markdown import document_to_markdown
@@ -12,8 +13,8 @@ def test_split_sentences() -> None:
     doc = document_to_markdown(doc_path)
     sentences = split_sentences(doc)
     expected_sentences = [
-        "# ON THE ELECTRODYNAMICS OF MOVING BODIES\n\n",
-        "## By A. EINSTEIN  June 30, 1905\n\n",
+        "# ON THE ELECTRODYNAMICS OF MOVING BODIES\n",
+        "## By A. EINSTEIN June 30, 1905\n",
         "It is known that Maxwell’s electrodynamics—as usually understood at the\npresent time—when applied to moving bodies, leads to asymmetries which do\nnot appear to be inherent in the phenomena. ",  # noqa: RUF001
         "Take, for example, the recipro-\ncal electrodynamic action of a magnet and a conductor. ",
         "The observable phe-\nnomenon here depends only on the relative motion of the conductor and the\nmagnet, whereas the customary view draws a sharp distinction between the two\ncases in which either the one or the other of these bodies is in motion. ",
@@ -25,17 +26,17 @@ def test_split_sentences() -> None:
         "We will raise this conjecture (the purport\nof which will hereafter be called the “Principle of Relativity”) to the status\nof a postulate, and also introduce another postulate, which is only apparently\nirreconcilable with the former, namely, that light is always propagated in empty\nspace with a definite velocity c which is independent of the state of motion of the\nemitting body. ",
         "These two postulates suffice for the attainment of a simple and\nconsistent theory of the electrodynamics of moving bodies based on Maxwell’s\ntheory for stationary bodies. ",  # noqa: RUF001
         "The introduction of a “luminiferous ether” will\nprove to be superfluous inasmuch as the view here to be developed will not\nrequire an “absolutely stationary space” provided with special properties, nor\n",
-        "1The preceding memoir by Lorentz was not at this time known to the author.\n\n",
+        "1The preceding memoir by Lorentz was not at this time known to the author.\n",
         "assign a velocity-vector to a point of the empty space in which electromagnetic\nprocesses take place.\n",
         "The theory to be developed is based—like all electrodynamics—on the kine-\nmatics of the rigid body, since the assertions of any such theory have to do\nwith the relationships between rigid bodies (systems of co-ordinates), clocks,\nand electromagnetic processes. ",
-        "Insufficient consideration of this circumstance\nlies at the root of the difficulties which the electrodynamics of moving bodies\nat present encounters.\n\n",
-        "## I. KINEMATICAL PART  § **1. Definition of Simultaneity**\n\n",
+        "Insufficient consideration of this circumstance\nlies at the root of the difficulties which the electrodynamics of moving bodies\nat present encounters.\n",
+        "## I. KINEMATICAL PART § **1. Definition of Simultaneity**\n",
         "Let us take a system of co-ordinates in which the equations of Newtonian\nmechanics hold good.2 ",
     ]
     assert isinstance(sentences, list)
-    assert all(
-        sentence == expected_sentence
-        for sentence, expected_sentence in zip(
-            sentences[: len(expected_sentences)], expected_sentences, strict=True
-        )
-    )
+    # Remove repeated \n to make it more resilient to variations between pdftext versions
+    sentences = [re.sub(r"\n+", "\n", sentence) for sentence in sentences]
+    for sentence, expected_sentence in zip(
+        sentences[: len(expected_sentences)], expected_sentences, strict=True
+    ):
+        assert sentence == expected_sentence