init

Azure · Feb 22, 2025 · 04f97c3 · 04f97c3
1 parent a36c93c
commit 04f97c3
Show file tree

Hide file tree

Showing 2 changed files with 9 additions and 0 deletions.
diff --git a/pyrit/datasets/fetch_example_datasets.py b/pyrit/datasets/fetch_example_datasets.py
@@ -531,6 +531,9 @@ def fetch_adv_bench_dataset(
     # Extract and append the data to respective containers
     prompts = [item["goal"] for item in examples if "goal" in item]
 
+    # harm_categories = _fetch_from_file("pyrit/datasets/harm_categories/adv_bench_dataset.json", "json")
+    # harm_categories_list = list(harm_categories.values())
+
     harm_categories = [
         "Endangering National Security",
         "Insulting Behavior",
@@ -561,13 +564,15 @@ def fetch_adv_bench_dataset(
             name="AdvBench Dataset",
             dataset_name="AdvBench Dataset",
             harm_categories=harm_categories,
+            # harm_categories=harm_categories_list[i],
             description="""AdvBench is a set of 520 harmful behaviors formulated as instructions. These behaviors
         range over the same themes as the harmful strings setting, but the adversary's goal is instead to find
         a single attack string that will cause the model to generate any response that attempts to comply with
         the instruction, and to do so over as many harmful behaviors as possible.""",
             source="https://github.com/llm-attacks/llm-attacks/tree/main/data/advbench",
         )
         for prompt in prompts
+        # for i, prompt in enumerate(prompts)
     ]
 
     seed_prompt_dataset = SeedPromptDataset(prompts=seed_prompts)

diff --git a/pyrit/datasets/harm_categories/adv_bench_dataset.json b/pyrit/datasets/harm_categories/adv_bench_dataset.json
@@ -0,0 +1,4 @@
+{
+    "0": ["Cybercrime"],
+    "1": ["Endangering National Security", "Privacy Violation", "Cybercrime"]
+}