Skip to content

Commit

Permalink
Improve import and example
Browse files Browse the repository at this point in the history
  • Loading branch information
carlesonielfa committed Jul 23, 2024
1 parent 60b2e2c commit 2434ee2
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 10 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
## 1.0.2 (unreleased)


- Nothing changed yet.
- Easier import of the REMi evaluator
- Improve example in README


## 1.0.1 (2024-07-23)
Expand Down
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ It has been finetuned by the team at [**nuclia**](nuclia.com) to evaluate the qu
```python
from nuclia_eval import REMi

evaluator = REMiEvaluator()
evaluator = REMi()

query = "By how many Octaves can I shift my OXYGEN PRO 49 keyboard?"

Expand All @@ -63,20 +63,20 @@ To change the transposition of the keyboard, press and hold Shift, and then use
The display will temporarily show TRANS and the current transposition (-12 to 12)."""
context2 ="""\
To change the octave of the keyboard, use the Key Octave –/+ buttons to lower or raise the octave, respectively
The display will temporarily show OCT and the current octave shift.\n\nOxygen Pro 25's keyboard can be shifted 4 octaves down or 5 octaves up""",
The display will temporarily show OCT and the current octave shift.\n\nOxygen Pro 25's keyboard can be shifted 4 octaves down or 5 octaves up"""
context3 = """\
If your DAW does not automatically configure your Oxygen Pro series keyboard, please follow the setup steps listed in the Oxygen Pro DAW Setup Guides.
To set the keyboard to operate in Preset Mode, press the DAW/Preset Button (on the Oxygen Pro 25) or Preset Button (on the Oxygen Pro 49 and 61).
On the Oxygen Pro 25 the DAW/Preset button LED will be off to show that Preset Mode is selected.
On the Oxygen Pro 49 and 61 the Preset button LED will be lit to show that Preset Mode is selected.""",
On the Oxygen Pro 49 and 61 the Preset button LED will be lit to show that Preset Mode is selected."""

answer = "Based on the context provided, The Oxygen Pro 49's keyboard can be shifted 3 octaves down or 4 octaves up."

result = evaluator.evaluate_rag(query=query, answer=answer, contexts=[context1, context2, context3])
answer_relevance, context_relevances, groundednesses = result

print(f"{answer_relevance.score}, {answer_relevance.reason}")
# 4, The response is relevant to the entire query and answers it completely, but it could be more specific about the limitations of the keyboard.
# 5, The response directly answers the query by specifying the range of octave shifts for the Oxygen Pro 49 keyboard.
print([cr.score for cr in context_relevances]) # [5, 1, 0]
print([g.score for g in groundednesses]) # [2, 0, 0]
```
Expand Down
4 changes: 4 additions & 0 deletions src/nuclia_eval/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,7 @@
import logging

logger = logging.getLogger(__name__)

from nuclia_eval.models.remi import REMiEvaluator as REMi # noqa: E402

__all__ = ["REMi"]
10 changes: 5 additions & 5 deletions tests/models/test_remi.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

import pytest

from nuclia_eval import REMi
from nuclia_eval.exceptions import InvalidToolCallException
from nuclia_eval.models.remi import REMiEvaluator
from nuclia_eval.settings import Settings

MANUAL_TEST = os.getenv("MANUAL_TEST", False)
Expand Down Expand Up @@ -36,7 +36,7 @@ def test_REMi_evaluator_mock(
nuclia_model_cache="my_cache/",
)
# Run code
evaluator = REMiEvaluator(settings=settings, device="my_device")
evaluator = REMi(settings=settings, device="my_device")
# Check evaluator variables are properly set
assert evaluator.settings == settings
# Check that the paths start with the cache path
Expand Down Expand Up @@ -69,7 +69,7 @@ def test_REMi_evaluator_mock(
fake_model.to.assert_called_once_with("my_device")

# Now load one with default settings
evaluator = REMiEvaluator()
evaluator = REMi()

# Configure mocks for a rag_evaluation call
tokenizer_mock.encode_chat_completion.return_value = [1, 2, 3]
Expand All @@ -95,7 +95,7 @@ def test_REMi_evaluator_mock(

# Create another evaluator, so that we can check that the model is not downloaded again
with patch("pathlib.Path.exists", return_value=True):
evaluator = REMiEvaluator(settings=settings, device="my_device")
evaluator = REMi(settings=settings, device="my_device")
# Check that the download calls were not made again
# assert len(snapshot_download_mock.mock_calls) == 2

Expand Down Expand Up @@ -135,7 +135,7 @@ def test_REMi_evaluator_mock(
def test_REMi_evaluator():
# Create an instance of the REMiEvaluator class
t0 = monotonic()
evaluator = REMiEvaluator()
evaluator = REMi()
t1 = monotonic()

query = "By how many Octaves can I shift my OXYGEN PRO 49 keyboard?"
Expand Down

0 comments on commit 2434ee2

Please sign in to comment.