Hopefully more compact keys for known seeds

JasonGross · Nov 6, 2024 · 06b1c8d · 06b1c8d
1 parent df92705
commit 06b1c8d
Showing 1 changed file with 8 additions and 1 deletion.
diff --git a/notebooks_jason/max_of_K_all_models.py b/notebooks_jason/max_of_K_all_models.py
@@ -3266,7 +3266,13 @@ def do_linear_regression(X, Y):
         ]
         print(f"len after: {len(df)}")
 
-    sorted_known_seeds = np.unique(np.array(df["seed"].values), return_counts=True)
+    sorted_known_seeds, sorted_known_seeds_counts = np.unique(
+        np.array(df["seed"].values), return_counts=True
+    )
+    sorted_known_seeds = tuple(
+        zip(sorted_known_seeds.tolist(), sorted_known_seeds_counts.tolist())
+    )
+
     # Group by 'attention_error_handling' and calculate the max 'normalized-accuracy-bound' for sorting groups
     df = df[
         [
@@ -3774,6 +3780,7 @@ def do_linear_regression(X, Y):
     "all_tokens_datasets_lens",
     "sub_cfg_counts",
     "sorted_known_seeds",
+    "sorted_known_seeds_counts",
 ):
     if var in locals():
         del locals()[var]