-
Notifications
You must be signed in to change notification settings - Fork 60
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
chore: Update ruff, mypy and poetry.lock. #87
base: main
Are you sure you want to change the base?
Changes from all commits
1df8207
84d6860
3c9ab5e
aa705bf
4d01dd4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,7 +15,7 @@ | |
vector_search, | ||
) | ||
|
||
__all__ = [ | ||
__all__ = [ # noqa: RUF022 | ||
# Config | ||
"RAGLiteConfig", | ||
# Insert | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -361,7 +361,7 @@ def chatml_function_calling_with_streaming( | |
if isinstance(tool_choice, dict): | ||
tools = [t for t in tools if t["function"]["name"] == tool_choice["function"]["name"]] | ||
assert tools | ||
function_names = " | ".join([f'''"functions.{t['function']['name']}:"''' for t in tools]) | ||
function_names = " | ".join([f'''"functions.{t["function"]["name"]}:"''' for t in tools]) | ||
prompt = template_renderer.render( | ||
messages=messages, tools=tools, tool_calls=True, add_generation_prompt=True | ||
) | ||
|
@@ -408,8 +408,7 @@ def chatml_function_calling_with_streaming( | |
|
||
# Case 2 step 2B: One or more function calls | ||
follow_up_gbnf_tool_grammar = ( | ||
'root ::= functions | "</function_calls>" | "<|im_end|>"\n' | ||
f"functions ::= {function_names}\n" | ||
f'root ::= functions | "</function_calls>" | "<|im_end|>"\nfunctions ::= {function_names}\n' | ||
) | ||
prompt += "<function_calls>\n" | ||
if stream: | ||
|
@@ -474,28 +473,31 @@ def chatml_function_calling_with_streaming( | |
"created": completion["created"], | ||
"model": completion["model"], | ||
"choices": [ | ||
{ | ||
"finish_reason": "tool_calls", | ||
"index": 0, | ||
"logprobs": completion["choices"][0]["logprobs"], | ||
"message": { | ||
"role": "assistant", | ||
"content": None, | ||
"tool_calls": [ | ||
{ | ||
"id": "call_" + f"_{i}_" + tool_name + "_" + completion["id"], | ||
"type": "function", | ||
"function": { | ||
"name": tool_name, | ||
"arguments": completion["choices"][0]["text"], | ||
}, | ||
} | ||
for i, (tool_name, completion) in enumerate( | ||
zip(completions_tool_name, completions, strict=True) | ||
) | ||
], | ||
cast( | ||
llama_types.ChatCompletionResponseChoice, | ||
{ | ||
"finish_reason": "tool_calls", | ||
"index": 0, | ||
"logprobs": completion["choices"][0]["logprobs"], | ||
"message": { | ||
"role": "assistant", | ||
"content": None, | ||
"tool_calls": [ | ||
{ | ||
"id": "call_" + f"_{i}_" + tool_name + "_" + completion["id"], | ||
"type": "function", | ||
"function": { | ||
"name": tool_name, | ||
"arguments": completion["choices"][0]["text"], | ||
}, | ||
} | ||
for i, (tool_name, completion) in enumerate( | ||
zip(completions_tool_name, completions, strict=True) | ||
) | ||
], | ||
}, | ||
}, | ||
} | ||
) | ||
Comment on lines
+476
to
+500
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Instead of a cast, it would be better to actually construct the object here. That way we benefit from proper type checking. |
||
], | ||
"usage": { | ||
"completion_tokens": sum( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,20 +18,20 @@ def test_extract(llm: str, strict: bool) -> None: # noqa: FBT001 | |
config = RAGLiteConfig(llm=llm) | ||
|
||
# Define the JSON schema of the response. | ||
class LoginResponse(BaseModel): | ||
"""The response to a login request.""" | ||
class UserProfileResponse(BaseModel): | ||
"""The response to a user profile extraction request.""" | ||
|
||
model_config = ConfigDict(extra="forbid" if strict else "allow") | ||
username: str = Field(..., description="The username.") | ||
password: str = Field(..., description="The password.") | ||
system_prompt: ClassVar[str] = "Extract the username and password from the input." | ||
email: str = Field(..., description="The email address.") | ||
system_prompt: ClassVar[str] = "Extract the username and email from the input." | ||
|
||
# Extract structured data. | ||
username, password = "cypher", "steak" | ||
login_response = extract_with_llm( | ||
LoginResponse, f"username: {username}\npassword: {password}", strict=strict, config=config | ||
# Example input data. | ||
username, email = "cypher", "[email protected]" | ||
profile_response = extract_with_llm( | ||
UserProfileResponse, f"username: {username}\nemail: {email}", strict=strict, config=config | ||
) | ||
# Validate the response. | ||
assert isinstance(login_response, LoginResponse) | ||
assert login_response.username == username | ||
assert login_response.password == password | ||
assert isinstance(profile_response, UserProfileResponse) | ||
assert profile_response.username == username | ||
assert profile_response.email == email | ||
Comment on lines
+21
to
+37
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For my information, why modify this test? Was it failing? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Llama was reluctant to write passwords as it considered it insecure. |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -52,7 +52,7 @@ def test_reranker( | |
) | ||
# Search for a query. | ||
query = "What does it mean for two events to be simultaneous?" | ||
chunk_ids, _ = hybrid_search(query, num_results=20, config=raglite_test_config) | ||
chunk_ids, _ = hybrid_search(query, num_results=30, config=raglite_test_config) | ||
# Retrieve the chunks. | ||
chunks = retrieve_chunks(chunk_ids, config=raglite_test_config) | ||
assert all(isinstance(chunk, Chunk) for chunk in chunks) | ||
|
@@ -67,4 +67,6 @@ def test_reranker( | |
τ_search = kendall_tau(chunks, reranked_chunks) # noqa: PLC2401 | ||
τ_inverse = kendall_tau(chunks[::-1], reranked_chunks) # noqa: PLC2401 | ||
τ_random = kendall_tau(chunks_random, reranked_chunks) # noqa: PLC2401 | ||
assert τ_search >= τ_random >= τ_inverse | ||
assert τ_search >= τ_random >= τ_inverse, ( | ||
f"τ_search: {τ_search}, τ_random: {τ_random}, τ_inverse: {τ_inverse}" | ||
) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are these changes necessary? Does the test fail without them? If so, do you know why? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was added for diagnosing the problem. Sometimes, tau_random was larger than tau_search. This has been mitigated taking a larger sample. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
"""Test RAGLite's sentence splitting functionality.""" | ||
|
||
import re | ||
from pathlib import Path | ||
|
||
from raglite._markdown import document_to_markdown | ||
|
@@ -12,8 +13,8 @@ def test_split_sentences() -> None: | |
doc = document_to_markdown(doc_path) | ||
sentences = split_sentences(doc) | ||
expected_sentences = [ | ||
"# ON THE ELECTRODYNAMICS OF MOVING BODIES\n\n", | ||
"## By A. EINSTEIN June 30, 1905\n\n", | ||
"# ON THE ELECTRODYNAMICS OF MOVING BODIES\n", | ||
"## By A. EINSTEIN June 30, 1905\n", | ||
"It is known that Maxwell’s electrodynamics—as usually understood at the\npresent time—when applied to moving bodies, leads to asymmetries which do\nnot appear to be inherent in the phenomena. ", # noqa: RUF001 | ||
"Take, for example, the recipro-\ncal electrodynamic action of a magnet and a conductor. ", | ||
"The observable phe-\nnomenon here depends only on the relative motion of the conductor and the\nmagnet, whereas the customary view draws a sharp distinction between the two\ncases in which either the one or the other of these bodies is in motion. ", | ||
|
@@ -25,17 +26,17 @@ def test_split_sentences() -> None: | |
"We will raise this conjecture (the purport\nof which will hereafter be called the “Principle of Relativity”) to the status\nof a postulate, and also introduce another postulate, which is only apparently\nirreconcilable with the former, namely, that light is always propagated in empty\nspace with a definite velocity c which is independent of the state of motion of the\nemitting body. ", | ||
"These two postulates suffice for the attainment of a simple and\nconsistent theory of the electrodynamics of moving bodies based on Maxwell’s\ntheory for stationary bodies. ", # noqa: RUF001 | ||
"The introduction of a “luminiferous ether” will\nprove to be superfluous inasmuch as the view here to be developed will not\nrequire an “absolutely stationary space” provided with special properties, nor\n", | ||
"1The preceding memoir by Lorentz was not at this time known to the author.\n\n", | ||
"1The preceding memoir by Lorentz was not at this time known to the author.\n", | ||
"assign a velocity-vector to a point of the empty space in which electromagnetic\nprocesses take place.\n", | ||
"The theory to be developed is based—like all electrodynamics—on the kine-\nmatics of the rigid body, since the assertions of any such theory have to do\nwith the relationships between rigid bodies (systems of co-ordinates), clocks,\nand electromagnetic processes. ", | ||
"Insufficient consideration of this circumstance\nlies at the root of the difficulties which the electrodynamics of moving bodies\nat present encounters.\n\n", | ||
"## I. KINEMATICAL PART § **1. Definition of Simultaneity**\n\n", | ||
"Insufficient consideration of this circumstance\nlies at the root of the difficulties which the electrodynamics of moving bodies\nat present encounters.\n", | ||
"## I. KINEMATICAL PART § **1. Definition of Simultaneity**\n", | ||
"Let us take a system of co-ordinates in which the equations of Newtonian\nmechanics hold good.2 ", | ||
] | ||
assert isinstance(sentences, list) | ||
assert all( | ||
sentence == expected_sentence | ||
for sentence, expected_sentence in zip( | ||
sentences[: len(expected_sentences)], expected_sentences, strict=True | ||
) | ||
) | ||
# Remove repeated \n to make it more resilient to variations between pdftext versions | ||
sentences = [re.sub(r"\n+", "\n", sentence) for sentence in sentences] | ||
for sentence, expected_sentence in zip( | ||
sentences[: len(expected_sentences)], expected_sentences, strict=True | ||
): | ||
assert sentence == expected_sentence | ||
Comment on lines
+37
to
+42
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just to be sure: is it a new version of pdftext that's causing the differences here, and not SaT? In addition: I'd prefer not to make changes to the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, PDF text is now returning slightly different strings. I understand what you mean, it makes sense to create a custom |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What I think we want to do here is:
poetry lock --no-update
to avoid updating any dependencies that we don't explicitly upgrade.Running a full
poetry lock
would significantly change the dependency set we run the tests against.In the future, we should test against both the 'oldest' solution to the specification, and the 'newest' solution to the specification. That's something we can do when we migrate from Poetry to uv.