FEAT: include scored_prompt_id in orchestrator_identifier of the syst…

…em prompt (#725)
Azure · Feb 25, 2025 · 3d0543c · 3d0543c
1 parent d4a0e79
commit 3d0543c
Show file tree

Hide file tree

Showing 6 changed files with 77 additions and 1 deletion.
diff --git a/pyrit/score/insecure_code_scorer.py b/pyrit/score/insecure_code_scorer.py
@@ -64,6 +64,7 @@ async def score_async(self, request_response: PromptRequestPiece, *, task: Optio
             scored_prompt_id=request_response.id,
             category=self._harm_category,
             task=task,
+            orchestrator_identifier=request_response.orchestrator_identifier,
         )
 
         # Modify the UnvalidatedScore parsing to check for 'score_value'

diff --git a/pyrit/score/scorer.py b/pyrit/score/scorer.py
@@ -218,6 +218,7 @@ async def _score_value_with_llm(
         scored_prompt_id: str,
         category: str = None,
         task: str = None,
+        orchestrator_identifier: dict[str, str] = None,
     ) -> UnvalidatedScore:
         """
         Sends a request to a target, and takes care of retries.
@@ -242,10 +243,13 @@ async def _score_value_with_llm(
 
         conversation_id = str(uuid.uuid4())
 
+        if orchestrator_identifier:
+            orchestrator_identifier["scored_prompt_id"] = str(scored_prompt_id)
+
         prompt_target.set_system_prompt(
             system_prompt=system_prompt,
             conversation_id=conversation_id,
-            orchestrator_identifier=None,
+            orchestrator_identifier=orchestrator_identifier,
         )
         prompt_metadata = {"response_format": "json"}
         scorer_llm_request = PromptRequestResponse(

diff --git a/pyrit/score/self_ask_category_scorer.py b/pyrit/score/self_ask_category_scorer.py
@@ -106,6 +106,7 @@ async def score_async(self, request_response: PromptRequestPiece, *, task: Optio
             prompt_request_data_type=request_response.converted_value_data_type,
             scored_prompt_id=request_response.id,
             task=task,
+            orchestrator_identifier=request_response.orchestrator_identifier,
         )
 
         score = unvalidated_score.to_score(score_value=unvalidated_score.raw_score_value)

diff --git a/pyrit/score/self_ask_refusal_scorer.py b/pyrit/score/self_ask_refusal_scorer.py
@@ -98,6 +98,7 @@ async def score_async(self, request_response: PromptRequestPiece, *, task: Optio
             scored_prompt_id=request_response.id,
             category=self._score_category,
             task=task,
+            orchestrator_identifier=request_response.orchestrator_identifier,
         )
 
         score = unvalidated_score.to_score(score_value=unvalidated_score.raw_score_value)

diff --git a/pyrit/score/self_ask_true_false_scorer.py b/pyrit/score/self_ask_true_false_scorer.py
@@ -125,6 +125,7 @@ async def score_async(self, request_response: PromptRequestPiece, *, task: Optio
             scored_prompt_id=request_response.id,
             category=self._score_category,
             task=task,
+            orchestrator_identifier=request_response.orchestrator_identifier,
         )
 
         score = unvalidated_score.to_score(score_value=unvalidated_score.raw_score_value)

diff --git a/tests/unit/score/test_scorer.py b/tests/unit/score/test_scorer.py
@@ -110,6 +110,74 @@ async def test_scorer_score_value_with_llm_exception_display_prompt_id():
         )
 
 
+@pytest.mark.asyncio
+async def test_scorer_score_value_with_llm_use_provided_orchestrator_identifier(good_json):
+    scorer = MockScorer()
+    scorer.scorer_type = "true_false"
+
+    prompt_response = PromptRequestResponse(
+        request_pieces=[PromptRequestPiece(role="assistant", original_value=good_json)]
+    )
+    chat_target = MagicMock(PromptChatTarget)
+    chat_target.send_prompt_async = AsyncMock(return_value=prompt_response)
+    chat_target.set_system_prompt = AsyncMock()
+
+    expected_system_prompt = "system_prompt"
+    expected_orchestrator_id = "orchestrator_id"
+    expected_scored_prompt_id = "123"
+
+    await scorer._score_value_with_llm(
+        prompt_target=chat_target,
+        system_prompt=expected_system_prompt,
+        prompt_request_value="prompt_request_value",
+        prompt_request_data_type="text",
+        scored_prompt_id=expected_scored_prompt_id,
+        category="category",
+        task="task",
+        orchestrator_identifier={"id": expected_orchestrator_id},
+    )
+
+    chat_target.set_system_prompt.assert_called_once()
+
+    _, set_sys_prompt_args = chat_target.set_system_prompt.call_args
+    assert set_sys_prompt_args["system_prompt"] == expected_system_prompt
+    assert isinstance(set_sys_prompt_args["conversation_id"], str)
+    assert set_sys_prompt_args["orchestrator_identifier"]["id"] == expected_orchestrator_id
+    assert set_sys_prompt_args["orchestrator_identifier"]["scored_prompt_id"] == expected_scored_prompt_id
+
+
+@pytest.mark.asyncio
+async def test_scorer_score_value_with_llm_does_not_add_score_prompt_id_for_empty_orchestrator_identifier(good_json):
+    scorer = MockScorer()
+    scorer.scorer_type = "true_false"
+
+    prompt_response = PromptRequestResponse(
+        request_pieces=[PromptRequestPiece(role="assistant", original_value=good_json)]
+    )
+    chat_target = MagicMock(PromptChatTarget)
+    chat_target.send_prompt_async = AsyncMock(return_value=prompt_response)
+    chat_target.set_system_prompt = AsyncMock()
+
+    expected_system_prompt = "system_prompt"
+
+    await scorer._score_value_with_llm(
+        prompt_target=chat_target,
+        system_prompt=expected_system_prompt,
+        prompt_request_value="prompt_request_value",
+        prompt_request_data_type="text",
+        scored_prompt_id="123",
+        category="category",
+        task="task",
+    )
+
+    chat_target.set_system_prompt.assert_called_once()
+
+    _, set_sys_prompt_args = chat_target.set_system_prompt.call_args
+    assert set_sys_prompt_args["system_prompt"] == expected_system_prompt
+    assert isinstance(set_sys_prompt_args["conversation_id"], str)
+    assert not set_sys_prompt_args["orchestrator_identifier"]
+
+
 @pytest.mark.asyncio
 async def test_scorer_send_chat_target_async_good_response(good_json):