huawei-csl · plex1 · Jan 15, 2026 · Jan 29, 2026 · Feb 2, 2026 · Feb 2, 2026
diff --git a/scripts/custom_scripts/check_flowy_data_records.py b/scripts/custom_scripts/check_flowy_data_records.py
@@ -0,0 +1,210 @@
+#!/usr/bin/env python3
+"""
+Scan synth_out/res_* folders, read flowy_data_record.parquet, and report:
+- number of unique run_identifier values (per file + histogram)
+- maximum step value (per file + global max)
+- number of res_* folders found
+
+Usage:
+  python scan_flowy_data_record.py
+  python scan_flowy_data_record.py --base /path/to/run
-  python scan_flowy_data_record.py
-  python scan_flowy_data_record.py --base /path/to/run
+  python check_flowy_data_records.py
+  python check_flowy_data_records.py --base /path/to/run
-  python scan_flowy_data_record.py
-  python scan_flowy_data_record.py --base /path/to/run
+  python check_flowy_data_records.py
+  python check_flowy_data_records.py --base /path/to/run
+"""
+
+from __future__ import annotations
+
+import argparse
+import glob
+import os
+from pathlib import Path
+from collections import Counter
+
+import pandas as pd
+
+# get env var
+DATA_DIR = os.getenv("DATA_DIR")
+
+DEFAULT_BASE = (
+    f"{DATA_DIR}/output/"
+    "multiplier_4bi_8bo_permuti_flowy/flowy_trans_run_12chains_3000steps_gen_iter0"
+)
+
+
+def text_hist(counter: Counter[int], *, title: str, bar_width: int = 40) -> str:
+    if not counter:
+        return f"{title}\n  (empty)\n"
+
+    items = sorted(counter.items(), key=lambda kv: kv[0])
+    max_count = max(counter.values())
+
+    lines = [title]
+    for k, c in items:
+        bar_len = int(round((c / max_count) * bar_width)) if max_count > 0 else 0
+        bar = "#" * bar_len
+        bar = ''
-        bar = ''
-        bar = ''
+        lines.append(f"  {k:>6}: {c:>6} occurrences {bar}")
+    return "\n".join(lines) + "\n"
+
+
+def _to_num(s: pd.Series) -> pd.Series:
+    return pd.to_numeric(s, errors="coerce")
+
+def main() -> int:
+    ap = argparse.ArgumentParser()
+    ap.add_argument(
+        "--base",
+        type=str,
+        default=DEFAULT_BASE,
+        help="Base folder containing synth_out/ (default is your GENIAL run path).",
+    )
+    ap.add_argument(
+        "--pattern",
+        type=str,
+        default="synth_out/res_*",
+        help="Glob pattern under base to find result folders.",
+    )
+    ap.add_argument(
+        "--parquet-name",
+        type=str,
+        default="flowy_data_record.parquet",
+        help="Parquet filename to read inside each res_* folder.",
+    )
+    ap.add_argument(
+        "--quiet",
+        action="store_true",
+        help="Only print aggregate statistics (no per-folder lines).",
+    )
+    args = ap.parse_args()
+
+    base = Path(args.base).expanduser()
+    res_glob = str(base / args.pattern)
+    res_dirs = [Path(p) for p in sorted(glob.glob(res_glob)) if Path(p).is_dir()]
+
+    # Always print number of folders found (as requested)
+    print(f"Found {len(res_dirs)} res_* folders matching: {res_glob}")
+
+    if not res_dirs:
+        print(f"[ERROR] No directories matched: {res_glob}")
+        return 2
+
+    uniq_hist: Counter[int] = Counter()
+    maxstep_hist: Counter[int] = Counter()
+    rowcount_hist: Counter[int] = Counter()
+    global_max_step: int | None = None
+
+    missing_files = 0
+    bad_files = 0
+    processed = 0
+
+    if not args.quiet:
+        print(f"Base: {base}")
+        print("-" * 80)
+
+    # Aggregate accumulators across all rows / all files
+    all_aig = []
+    all_tr = []
+
+    for d in res_dirs:
+        pq = d / args.parquet_name
+        if not pq.exists():
+            missing_files += 1
+            if not args.quiet:
+                print(f"{d.name}: MISSING {args.parquet_name}")
+            continue
+
+        try:
+            # read only needed columns (fast)
+                        # read only needed columns (fast)
+            df = pd.read_parquet(
+                pq,
+                columns=["run_identifier", "step", "aig_count", "nb_transistors"],
+            )
+        except Exception as e:
+            bad_files += 1
+            if not args.quiet:
+                print(f"{d.name}: ERROR reading parquet: {e}")
+            continue
+
+        if "run_identifier" not in df.columns or "step" not in df.columns or "aig_count" not in df.columns or "nb_transistors" not in df.columns:
+            bad_files += 1
+            if not args.quiet:
+                print(f"{d.name}: ERROR missing required columns in parquet")
+            continue
+
+        rowcount = int(len(df))
+        uniq = int(df["run_identifier"].nunique(dropna=True))
+
+        step_series = pd.to_numeric(df["step"], errors="coerce")
+        if step_series.notna().any():
+            max_step = int(step_series.max())
+        else:
+            max_step = None
+
+        uniq_hist[uniq] += 1
+        rowcount_hist[rowcount] += 1
+        if max_step is not None:
+            maxstep_hist[max_step] += 1
+            global_max_step = max_step if global_max_step is None else max(global_max_step, max_step)
+
+        aig = _to_num(df["aig_count"])
+        tr = _to_num(df["nb_transistors"])
+
+        # accumulate for global stats (ignore NaNs)
+
+        all_aig.extend(aig.dropna().tolist())        
+        all_tr.extend(tr.dropna().tolist())
+
+        processed += 1
+
+        if not args.quiet:
+            ms = "NA" if max_step is None else str(max_step)
+            print(f"{d.name}: unique(run_identifier)={uniq:4d} | max(step)={ms}")
+
+    if not args.quiet:
+        print("-" * 80)
+
+    print(f"Processed: {processed}/{len(res_dirs)}")
+    if missing_files:
+        print(f"Missing parquet files: {missing_files}")
+    if bad_files:
+        print(f"Unreadable/invalid parquet files: {bad_files}")
+
+    print()
+    print(text_hist(uniq_hist, title="Unique run_identifier per flowy_data_record.parquet"))
+
+    # if global_max_step is None:
+    #     print("Global max(step): NA (no valid step values found)")
+    # else:
+    #     print(f"Global max(step): {global_max_step}")
+
+    print()
+    print(text_hist(rowcount_hist, title="Row count per flowy_data_record.parquet"))
+
+    print()
+    print(text_hist(maxstep_hist, title="Max step per flowy_data_record.parquet"))
+
+    # Global aig/transistor summary
+    def safe_mean_min(vals: list[float]):
+        if not vals:
+            return None, None
+        s = pd.Series(vals, dtype="float64")
+        return float(s.mean()), float(s.min())
+
+    aig_g_mean, aig_g_min = safe_mean_min(all_aig)
+    tr_g_mean, tr_g_min = safe_mean_min(all_tr)
+
+    print("Aggregate over ALL rows in ALL files:")
+    if aig_g_mean is None:
+        print("  aig_count     : mean=NA, min=NA")
+    else:
+        print(f"  aig_count     : mean={aig_g_mean:.3f}, min={aig_g_min:.3f}")
+
+    if tr_g_mean is None:
+        print("  nb_transistors : mean=NA, min=NA")
+    else:
+        print(f"  nb_transistors : mean={tr_g_mean:.3f}, min={tr_g_min:.3f}")
+
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/scripts/slurm_scripts/sbatch_dispatch_slurm_temp.sh b/scripts/slurm_scripts/sbatch_dispatch_slurm_temp.sh
@@ -3,7 +3,7 @@
 #SBATCH --output="/home/%u/slurm_logs/genial/sbatch_info/genial_flowy_%j_%N_$timestamp.log"
 #SBATCH --error="/home/%u/slurm_logs/genial/sbatch_error/genial_flowy_%j_%N_$timestamp.log"
 # Move to working directory
-cd $$HOME/proj/genial
+cd $$HOME/proj/GENIAL
-cd $$HOME/proj/GENIAL
+cd "$HOME/proj/genial"
-cd $$HOME/proj/GENIAL
+cd "$HOME/proj/genial"
 
 # Activate Python Environment
 # set -a

diff --git a/src/genial/ext_plugins/flowy/flowy_launcher_helper.py b/src/genial/ext_plugins/flowy/flowy_launcher_helper.py
@@ -266,6 +266,46 @@ def flowy_synthesis(self):
                 else:
                     logger.warning(f"Could not find {path} in {best_data_path}")
 
+        def collect_output_txts(temp_dir: str | Path, out_file: str | Path) -> None:
+            """
+            Collect all `output.txt` files from:
+                temp_dir/output/db/genial/data_collection/*/output.txt
+            and write them into `out_file` as:
+
+                <dirname>:
+                <content>
+
+                <dirname>:
+                <content>
+                ...
+            """
+            temp_dir = Path(temp_dir)
+            out_file = Path(out_file)
+
+            pattern = temp_dir / "output" / "db" / "genial" / "data_collection" / "*" / "output.txt"
+            files = sorted(pattern.parent.parent.glob("*/output.txt"))  # one-level directories only
+
+            # Alternatively, simpler:
+            # files = sorted((temp_dir / "output/db/genial/data_collection").glob("*/output.txt"))
+
+            out_file.parent.mkdir(parents=True, exist_ok=True)
+
+            with out_file.open("w", encoding="utf-8") as out:
+                for f in files:
+                    # Use the one-level directory name as "filename" label (more informative than just "output.txt")
+                    label = "output.txt content of run " + f.parent.name
+
+                    try:
+                        content = f.read_text(encoding="utf-8", errors="replace")
+                    except OSError as e:
+                        content = f"[ERROR reading file: {e}]"
+
+                    out.write(f"{label}:\n")
+                    out.write(content.rstrip("\n"))
+                    out.write("\n\n")  # blank line between entries
+
+        collect_output_txts(flowy_tmp_dir.name, self.design_output_dir_path / "all_outputs.txt")
+
         # Restoring environment (in case this was not launched from a subprocess)
         os.environ["SRC_DIR"] = original_src_dir
         if original_data_dir is not None: