Skip to content

Commit

Permalink
FEAT: include scored_prompt_id in orchestrator_identifier of the syst…
Browse files Browse the repository at this point in the history
…em prompt (#725)
  • Loading branch information
NicolePell authored Feb 25, 2025
1 parent d4a0e79 commit 3d0543c
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 1 deletion.
1 change: 1 addition & 0 deletions pyrit/score/insecure_code_scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ async def score_async(self, request_response: PromptRequestPiece, *, task: Optio
scored_prompt_id=request_response.id,
category=self._harm_category,
task=task,
orchestrator_identifier=request_response.orchestrator_identifier,
)

# Modify the UnvalidatedScore parsing to check for 'score_value'
Expand Down
6 changes: 5 additions & 1 deletion pyrit/score/scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ async def _score_value_with_llm(
scored_prompt_id: str,
category: str = None,
task: str = None,
orchestrator_identifier: dict[str, str] = None,
) -> UnvalidatedScore:
"""
Sends a request to a target, and takes care of retries.
Expand All @@ -242,10 +243,13 @@ async def _score_value_with_llm(

conversation_id = str(uuid.uuid4())

if orchestrator_identifier:
orchestrator_identifier["scored_prompt_id"] = str(scored_prompt_id)

prompt_target.set_system_prompt(
system_prompt=system_prompt,
conversation_id=conversation_id,
orchestrator_identifier=None,
orchestrator_identifier=orchestrator_identifier,
)
prompt_metadata = {"response_format": "json"}
scorer_llm_request = PromptRequestResponse(
Expand Down
1 change: 1 addition & 0 deletions pyrit/score/self_ask_category_scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ async def score_async(self, request_response: PromptRequestPiece, *, task: Optio
prompt_request_data_type=request_response.converted_value_data_type,
scored_prompt_id=request_response.id,
task=task,
orchestrator_identifier=request_response.orchestrator_identifier,
)

score = unvalidated_score.to_score(score_value=unvalidated_score.raw_score_value)
Expand Down
1 change: 1 addition & 0 deletions pyrit/score/self_ask_refusal_scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ async def score_async(self, request_response: PromptRequestPiece, *, task: Optio
scored_prompt_id=request_response.id,
category=self._score_category,
task=task,
orchestrator_identifier=request_response.orchestrator_identifier,
)

score = unvalidated_score.to_score(score_value=unvalidated_score.raw_score_value)
Expand Down
1 change: 1 addition & 0 deletions pyrit/score/self_ask_true_false_scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ async def score_async(self, request_response: PromptRequestPiece, *, task: Optio
scored_prompt_id=request_response.id,
category=self._score_category,
task=task,
orchestrator_identifier=request_response.orchestrator_identifier,
)

score = unvalidated_score.to_score(score_value=unvalidated_score.raw_score_value)
Expand Down
68 changes: 68 additions & 0 deletions tests/unit/score/test_scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,74 @@ async def test_scorer_score_value_with_llm_exception_display_prompt_id():
)


@pytest.mark.asyncio
async def test_scorer_score_value_with_llm_use_provided_orchestrator_identifier(good_json):
scorer = MockScorer()
scorer.scorer_type = "true_false"

prompt_response = PromptRequestResponse(
request_pieces=[PromptRequestPiece(role="assistant", original_value=good_json)]
)
chat_target = MagicMock(PromptChatTarget)
chat_target.send_prompt_async = AsyncMock(return_value=prompt_response)
chat_target.set_system_prompt = AsyncMock()

expected_system_prompt = "system_prompt"
expected_orchestrator_id = "orchestrator_id"
expected_scored_prompt_id = "123"

await scorer._score_value_with_llm(
prompt_target=chat_target,
system_prompt=expected_system_prompt,
prompt_request_value="prompt_request_value",
prompt_request_data_type="text",
scored_prompt_id=expected_scored_prompt_id,
category="category",
task="task",
orchestrator_identifier={"id": expected_orchestrator_id},
)

chat_target.set_system_prompt.assert_called_once()

_, set_sys_prompt_args = chat_target.set_system_prompt.call_args
assert set_sys_prompt_args["system_prompt"] == expected_system_prompt
assert isinstance(set_sys_prompt_args["conversation_id"], str)
assert set_sys_prompt_args["orchestrator_identifier"]["id"] == expected_orchestrator_id
assert set_sys_prompt_args["orchestrator_identifier"]["scored_prompt_id"] == expected_scored_prompt_id


@pytest.mark.asyncio
async def test_scorer_score_value_with_llm_does_not_add_score_prompt_id_for_empty_orchestrator_identifier(good_json):
scorer = MockScorer()
scorer.scorer_type = "true_false"

prompt_response = PromptRequestResponse(
request_pieces=[PromptRequestPiece(role="assistant", original_value=good_json)]
)
chat_target = MagicMock(PromptChatTarget)
chat_target.send_prompt_async = AsyncMock(return_value=prompt_response)
chat_target.set_system_prompt = AsyncMock()

expected_system_prompt = "system_prompt"

await scorer._score_value_with_llm(
prompt_target=chat_target,
system_prompt=expected_system_prompt,
prompt_request_value="prompt_request_value",
prompt_request_data_type="text",
scored_prompt_id="123",
category="category",
task="task",
)

chat_target.set_system_prompt.assert_called_once()

_, set_sys_prompt_args = chat_target.set_system_prompt.call_args
assert set_sys_prompt_args["system_prompt"] == expected_system_prompt
assert isinstance(set_sys_prompt_args["conversation_id"], str)
assert not set_sys_prompt_args["orchestrator_identifier"]


@pytest.mark.asyncio
async def test_scorer_send_chat_target_async_good_response(good_json):

Expand Down

0 comments on commit 3d0543c

Please sign in to comment.