From c6ccb3951b101c5ec6845e57d76d0b463020ce88 Mon Sep 17 00:00:00 2001 From: RudraCodesForU Date: Wed, 10 Sep 2025 17:39:44 +0530 Subject: [PATCH 1/2] Updated the app.js --- web/app.js | 329 +++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 233 insertions(+), 96 deletions(-) diff --git a/web/app.js b/web/app.js index 7eada7f..fe33ada 100644 --- a/web/app.js +++ b/web/app.js @@ -1,14 +1,57 @@ +const DGM_META = { + "linear_gaussian": { + name: "Linear Gaussian", + equation: `\\begin{align} + X &= \\alpha^T Z + \\varepsilon_1 \\\\ + Y &= \\beta^T Z + \\text{effect\\_size} \\cdot X + \\varepsilon_2 + \\end{align} + \\text{where } \\varepsilon_1, \\varepsilon_2 \\sim \\mathcal{N}(0, \\sigma^2) \\text{ (Gaussian noise)}` + }, + "nonlinear_gaussian": { + name: "Nonlinear Gaussian", + equation: `\\begin{align} + X &= \\sin(\\text{effect\\_size} \\cdot \\sum_j Z_j) + \\varepsilon_1 \\\\ + Y &= \\exp(\\text{effect\\_size} \\cdot \\sum_j Z_j \\cdot 0.2) + \\varepsilon_2 + \\end{align} + \\text{where } \\varepsilon_1, \\varepsilon_2 \\sim \\mathcal{N}(0, \\sigma^2) \\text{ (Gaussian noise)}` + }, + "discrete_categorical": { + name: "Discrete Categorical", + equation: `\\begin{align} + Z_j &\\sim \\text{DiscreteUniform}(0, n_{\\text{categories}}-1) \\\\ + X &= \\sum_j Z_j + \\varepsilon_1 \\\\ + Y &= \\sum_j Z_j + \\varepsilon_2 + \\end{align} + \\text{where } \\varepsilon_1, \\varepsilon_2 \\sim \\mathcal{N}(0, \\sigma^2) \\text{ (Gaussian noise)}` + }, + "mixed_data": { + name: "Mixed Data", + equation: `\\begin{align} + X &= Z^T \\alpha + \\varepsilon_1 \\\\ + Y &= Z^T \\beta + \\text{effect\\_size} \\cdot X + \\varepsilon_2 + \\end{align} + \\text{where } \\varepsilon_1, \\varepsilon_2 \\sim \\mathcal{N}(0, \\sigma^2) \\text{ (Gaussian noise)}` + }, + "non_gaussian_continuous": { + name: "Non-Gaussian Continuous", + equation: `\\begin{align} + X &= |Z^T \\alpha| + \\varepsilon_1 \\\\ + Y &= (Z^T \\beta)^2 + \\text{effect\\_size} \\cdot X + \\varepsilon_2 + \\end{align} + \\text{where } \\varepsilon_1, \\varepsilon_2 \\sim \\text{Exponential}(\\lambda = 1.0) \\text{ (Exponential noise)}` + } +}; + let allData = []; let dgmMap = {}; let chartCalibration, chartPower; const dgmSelect = document.getElementById('dgm-select'); -const ciTestSelect = document.getElementById('ci-test-select'); -const effectSizeSelect = document.getElementById('effect-size-select'); -const significanceSelect = document.getElementById('significance-level-select'); +const sampleSizeSelect = document.getElementById('sample-size-select'); const csvInput = document.getElementById('csv-input'); +const dgmEquationEl = document.getElementById('dgm-equation'); -const DEFAULT_CSV = "results/default_ci_benchmark_summaries.csv"; +const DEFAULT_CSV = "results/ci_benchmark_summaries.csv"; window.addEventListener('DOMContentLoaded', () => { fetch(DEFAULT_CSV) @@ -21,20 +64,15 @@ window.addEventListener('DOMContentLoaded', () => { buildDGMMap(allData); populateDropdowns(); renderCharts(); - // Optional: show a small message to user - showStatus("Loaded default benchmark results.", "success"); }) - .catch(err => { - showStatus("Default results file not found. Please upload a CSV.", "warning"); + .catch(() => { + // file not found, don't crash }); }); csvInput.addEventListener('change', handleCSVUpload); - -dgmSelect.addEventListener('change', refreshControls); -ciTestSelect.addEventListener('change', refreshControls); -effectSizeSelect.addEventListener('change', refreshControls); -significanceSelect.addEventListener('change', refreshControls); +dgmSelect.addEventListener('change', onDGMChange); +sampleSizeSelect.addEventListener('change', renderCharts); function handleCSVUpload(event) { const file = event.target.files[0]; @@ -58,6 +96,7 @@ function parseCSV(csvText) { ) ); } + function parseValue(v) { if (!isNaN(parseFloat(v)) && isFinite(v)) return parseFloat(v); if (v === 'True') return true; @@ -68,131 +107,229 @@ function parseValue(v) { function buildDGMMap(data) { dgmMap = {}; data.forEach(row => { - const { dgm, ci_test, effect_size, significance_level } = row; - if (!dgmMap[dgm]) dgmMap[dgm] = {}; - if (!dgmMap[dgm][ci_test]) dgmMap[dgm][ci_test] = {}; - if (!dgmMap[dgm][ci_test][effect_size]) dgmMap[dgm][ci_test][effect_size] = {}; - dgmMap[dgm][ci_test][effect_size][significance_level] = true; + const { dgm, sample_size } = row; + if (!dgmMap[dgm]) dgmMap[dgm] = new Set(); + dgmMap[dgm].add(row.sample_size); + }); + Object.keys(dgmMap).forEach(dgm => { + dgmMap[dgm] = Array.from(dgmMap[dgm]).sort((a, b) => a - b); }); } +function setSelectOptions(select, list) { + select.innerHTML = ''; + for (const obj of list) { + const option = document.createElement('option'); + option.value = obj.value; + option.textContent = obj.label; + select.appendChild(option); + } +} + function populateDropdowns() { - setSelectOptions(dgmSelect, Object.keys(dgmMap)); + setSelectOptions(dgmSelect, Object.keys(dgmMap).map(d => ({ value: d, label: DGM_META[d]?.name || d }))); onDGMChange(); } function onDGMChange() { const dgm = dgmSelect.value; - const tests = dgm ? Object.keys(dgmMap[dgm]) : []; - setSelectOptions(ciTestSelect, tests); - onCITestChange(); -} -function onCITestChange() { - const dgm = dgmSelect.value, test = ciTestSelect.value; - const effects = dgm && test ? Object.keys(dgmMap[dgm][test]) : []; - setSelectOptions(effectSizeSelect, effects); - onEffectSizeChange(); -} -function onEffectSizeChange() { - const dgm = dgmSelect.value, test = ciTestSelect.value, effect = effectSizeSelect.value; - const sigs = dgm && test && effect ? Object.keys(dgmMap[dgm][test][effect]) : []; - setSelectOptions(significanceSelect, sigs); + const sizes = dgm ? dgmMap[dgm] : []; + setSelectOptions(sampleSizeSelect, sizes.map(s => ({ value: s, label: s }))); + updateDGMEquation(); + renderCharts(); } -dgmSelect.addEventListener('change', onDGMChange); -ciTestSelect.addEventListener('change', onCITestChange); -effectSizeSelect.addEventListener('change', onEffectSizeChange); - -function setSelectOptions(select, list) { - select.innerHTML = ''; - for (const v of list) { - const option = document.createElement('option'); - option.value = v; - option.textContent = v; - select.appendChild(option); - } +function updateDGMEquation() { + const dgm = dgmSelect.value; + dgmEquationEl.innerHTML = DGM_META[dgm]?.equation || ''; + if (window.MathJax) MathJax.typesetPromise([dgmEquationEl]); } -function refreshControls() { - renderCharts(); +function computeStats(data, groupBy, valueCol) { + const grouped = {}; + data.forEach(row => { + const key = groupBy(row); + if (!grouped[key]) grouped[key] = []; + grouped[key].push(row[valueCol]); + }); + + return Object.entries(grouped).map(([key, values]) => { + const mean = values.reduce((a, b) => a + b, 0) / values.length; + const variance = values.reduce((sum, val) => sum + Math.pow(val - mean, 2), 0) / values.length; + const stderr = Math.sqrt(variance / values.length); + return { + key: parseFloat(key), + mean, + stderr, + upper: mean + stderr, + lower: Math.max(0, mean - stderr) + }; + }).sort((a, b) => a.key - b.key); } function renderCharts() { if (!allData.length) return; - const dgm = dgmSelect.value, test = ciTestSelect.value; - const effect = parseFloat(effectSizeSelect.value); - const significance = parseFloat(significanceSelect.value); - - const filtered = allData.filter(row => - row.dgm === dgm && row.ci_test === test - ); - - // Calibration Plot (Type I error vs Significance Level, effect_size == 0) - const calibData = filtered.filter(r => r.effect_size === 0); - const calibSLs = calibData.map(r => r.significance_level); - const calibT1 = calibData.map(r => r.type1_error); + const dgm = dgmSelect.value; + const sampleSize = Number(sampleSizeSelect.value); - // Power Plot (Power vs Sample Size, user-selected effect size & significance level) - const powerData = allData.filter(row => - row.dgm === dgm && - row.ci_test === test && - row.effect_size === effect && - row.significance_level === significance - ); - const sampleSizes = powerData.map(r => r.sample_size); - const powers = powerData.map(r => r.power); + // Calibration plot: Type I error vs significance_level, effect_size == 0 + const calibData = allData.filter(r => r.dgm === dgm && r.sample_size === sampleSize && r.effect_size === 0); + const ciTestsCalib = Array.from(new Set(calibData.map(r => r.ci_test))); + + const calibDatasets = ciTestsCalib.map((ciTest, idx) => { + const testData = calibData.filter(r => r.ci_test === ciTest); + const stats = computeStats(testData, row => row.significance_level, 'type1_error'); + + return [ + { + label: `${ciTest}`, + data: stats.map(s => ({ x: s.key, y: s.mean })), + borderColor: chartColor(idx, 1), + backgroundColor: chartColor(idx, 0.2), + pointRadius: 4, + fill: false, + tension: 0.2 + }, + // Added ribbon plot for standard error + { + label: `${ciTest} - Error Band`, + data: stats.map(s => ({ x: s.key, y: s.upper })), + borderColor: chartColor(idx, 0), + backgroundColor: chartColor(idx, 0.15), + fill: '+1', + pointRadius: 0, + tension: 0.2 + }, + { + label: `${ciTest} - Lower`, + data: stats.map(s => ({ x: s.key, y: s.lower })), + borderColor: chartColor(idx, 0), + backgroundColor: chartColor(idx, 0.15), + fill: false, + pointRadius: 0, + tension: 0.2 + } + ]; + }).flat(); - // Rendering Calibration Plot if (chartCalibration) chartCalibration.destroy(); chartCalibration = new Chart(document.getElementById('calibration-plot').getContext('2d'), { type: 'line', data: { - labels: calibSLs, - datasets: [{ - label: 'Type I Error', - data: calibT1, - borderColor: 'rgba(54, 162, 235, 1)', - backgroundColor: 'rgba(54, 162, 235, 0.2)', - pointRadius: 4, - fill: true, - }] + datasets: calibDatasets }, options: { responsive: true, - plugins: { - legend: { display: false } + maintainAspectRatio: true, + aspectRatio: 1.6, + plugins: { + legend: { + display: true, + filter: (legendItem) => !legendItem.text.includes('Band') && !legendItem.text.includes('Lower') + } }, scales: { - x: { title: { display: true, text: 'Significance Level' } }, - y: { title: { display: true, text: 'Type I Error' }, min: 0, max: 1 } + x: { + title: { display: true, text: 'Significance Level' }, + min: 0, + max: 1, + type: 'linear' + }, + y: { + title: { display: true, text: 'Type I Error' }, + min: 0, + max: 1 + } } } }); - // Rendering Power Plot + // Power plot: Power vs effect size, significance_level == 0.05 + const powerData = allData.filter(row => + row.dgm === dgm && + row.sample_size === sampleSize && + row.significance_level === 0.05 + ); + + const ciTestsPower = Array.from(new Set(powerData.map(r => r.ci_test))); + const powerDatasets = ciTestsPower.map((ciTest, idx) => { + const testData = powerData.filter(r => r.ci_test === ciTest); + const stats = computeStats(testData, row => row.effect_size, 'power'); + + return [ + { + label: `${ciTest}`, + data: stats.map(s => ({ x: s.key, y: s.mean })), + borderColor: chartColor(idx, 1), + backgroundColor: chartColor(idx, 0.2), + pointRadius: 3, + fill: false, + tension: 0.2 + }, + { + label: `${ciTest} - Error Band`, + data: stats.map(s => ({ x: s.key, y: s.upper })), + borderColor: chartColor(idx, 0), + backgroundColor: chartColor(idx, 0.15), + fill: '+1', + pointRadius: 0, + tension: 0.2 + }, + { + label: `${ciTest} - Lower`, + data: stats.map(s => ({ x: s.key, y: s.lower })), + borderColor: chartColor(idx, 0), + backgroundColor: chartColor(idx, 0.15), + fill: false, + pointRadius: 0, + tension: 0.2 + } + ]; + }).flat(); + if (chartPower) chartPower.destroy(); chartPower = new Chart(document.getElementById('power-plot').getContext('2d'), { type: 'line', data: { - labels: sampleSizes, - datasets: [{ - label: 'Power', - data: powers, - borderColor: 'rgba(255,99,132,1)', - backgroundColor: 'rgba(255,99,132,0.2)', - pointRadius: 4, - fill: true, - }] + datasets: powerDatasets }, options: { responsive: true, - plugins: { - legend: { display: false } + maintainAspectRatio: true, + aspectRatio: 1.6, + plugins: { + legend: { + display: true, + filter: (legendItem) => !legendItem.text.includes('Band') && !legendItem.text.includes('Lower') + } }, scales: { - x: { title: { display: true, text: 'Sample Size' } }, - y: { title: { display: true, text: 'Power' }, min: 0, max: 1 } + x: { + title: { display: true, text: 'Effect Size' }, + min: 0, + max: 1, + type: 'linear' + }, + y: { + title: { display: true, text: 'Power' }, + min: 0, + max: 1 + } } } }); +} + +function chartColor(idx, alpha = 1) { + const colors = [ + `rgba(54, 162, 235, ${alpha})`, // blue + `rgba(255,99,132,${alpha})`, // red + `rgba(255, 205, 86, ${alpha})`, // yellow + `rgba(75, 192, 192, ${alpha})`, // teal + `rgba(153, 102, 255, ${alpha})`, // purple + `rgba(201, 203, 207, ${alpha})`, // grey + `rgba(0, 200, 83, ${alpha})`, // green + `rgba(255, 87, 34, ${alpha})`, // deep orange + ]; + return colors[idx % colors.length]; } \ No newline at end of file From 70ca190e09d0267ad2a3a1955e0dec6188524769 Mon Sep 17 00:00:00 2001 From: RudraCodesForU Date: Wed, 10 Sep 2025 17:41:09 +0530 Subject: [PATCH 2/2] Updated the ci_benchmark.py --- benchmarks/ci_benchmark.py | 172 +++++++++++++++++++++++++++---------- 1 file changed, 127 insertions(+), 45 deletions(-) diff --git a/benchmarks/ci_benchmark.py b/benchmarks/ci_benchmark.py index d9ee660..748306c 100644 --- a/benchmarks/ci_benchmark.py +++ b/benchmarks/ci_benchmark.py @@ -47,7 +47,7 @@ def run_benchmark( effect_sizes=np.linspace(0, 1, 6), n_repeats=10, ): - + results = [] dgm_pbar = tqdm(dgms.items()) @@ -57,7 +57,6 @@ def run_benchmark( compatible_tests = dgm_to_citests[dgm_name] for n_cond_var in tqdm(n_cond_vars, desc="No. of conditional variables", leave=False): for n in tqdm(sample_sizes, desc="Sample Size", leave=False): - # Null case (conditionally independent, effect size = 0) for rep in range(n_repeats): df = dgm(n_samples=n, effect_size=0.0, @@ -70,16 +69,23 @@ def run_benchmark( for test_name in compatible_tests: ci_func = ci_tests[test_name] result = ci_func("X", "Y", z_cols, df, boolean=False) - # Robust extraction of p-value + if isinstance(result, tuple): - # Heuristic: p-value is usually last in tuple - if isinstance(result[-1], float): - p_val = result[-1] + if len(result) >= 2 and isinstance(result[-1], (float, np.floating)): + p_val = float(result[-1]) + elif isinstance(result[0], (float, np.floating)): + p_val = float(result[0]) else: - # fallback to first item - p_val = result[0] + float_vals = [x for x in result if isinstance(x, (float, np.floating))] + if float_vals: + p_val = float(float_vals[0]) + else: + raise ValueError(f"No valid float p-value found in result: {result}") else: - p_val = result + p_val = float(result) + + if not (0 <= p_val <= 1): + raise ValueError(f"Invalid p-value {p_val} for {test_name} - must be between 0 and 1") results.append( { @@ -90,10 +96,10 @@ def run_benchmark( "repeat": rep, "ci_test": test_name, "cond_independent": True, - "p_value": p_val, + "p_value": p_val, } ) - # Alternative case (conditionally dependent, effect size > 0) + for eff in effect_sizes: if eff == 0.0: continue @@ -108,14 +114,24 @@ def run_benchmark( for test_name in compatible_tests: ci_func = ci_tests[test_name] result = ci_func("X", "Y", z_cols, df, boolean=False) - # Robust extraction of p-value + if isinstance(result, tuple): - if isinstance(result[-1], float): - p_val = result[-1] + if len(result) >= 2 and isinstance(result[-1], (float, np.floating)): + p_val = float(result[-1]) + elif isinstance(result[0], (float, np.floating)): + p_val = float(result[0]) else: - p_val = result[0] + float_vals = [x for x in result if isinstance(x, (float, np.floating))] + if float_vals: + p_val = float(float_vals[0]) + else: + raise ValueError(f"No valid float p-value found in result: {result}") else: - p_val = result + p_val = float(result) + + if not (0 <= p_val <= 1): + raise ValueError(f"Invalid p-value {p_val} for {test_name} - must be between 0 and 1") + results.append( { @@ -126,7 +142,7 @@ def run_benchmark( "repeat": rep, "ci_test": test_name, "cond_independent": False, - "p_value": p_val, + "p_value": p_val, } ) @@ -134,24 +150,40 @@ def run_benchmark( def compute_summary(df_results, significance_levels=[0.001, 0.01, 0.05, 0.1]): - """ - Computes Type I/II errors and power at multiple significance levels using collected p-values. - """ + if df_results.empty: + raise ValueError("No benchmark results to compute summary from!") + + if 'p_value' not in df_results.columns: + raise ValueError("p_value column missing from results!") + summary_rows = [] group_cols = ["dgm", "sample_size", "n_cond_vars", "effect_size", "ci_test"] + for keys, group in df_results.groupby(group_cols): null_group = group[group["cond_independent"]] alt_group = group[~group["cond_independent"]] + for sl in significance_levels: + null_valid = null_group.dropna(subset=['p_value']) + alt_valid = alt_group.dropna(subset=['p_value']) + + null_loss_rate = 1 - (len(null_valid) / len(null_group)) if len(null_group) > 0 else 0 + alt_loss_rate = 1 - (len(alt_valid) / len(alt_group)) if len(alt_group) > 0 else 0 + + if null_loss_rate > 0.5: + print(f"WARNING: Lost {null_loss_rate:.1%} of null data for {keys} due to invalid p-values") + if alt_loss_rate > 0.5: + print(f"WARNING: Lost {alt_loss_rate:.1%} of alternative data for {keys} due to invalid p-values") + type1 = ( - (null_group["p_value"] < sl).mean() if not null_group.empty else np.nan + (null_valid["p_value"] < sl).mean() if len(null_valid) > 0 else np.nan ) type2 = ( - 1 - (alt_group["p_value"] < sl).mean() - if not alt_group.empty - else np.nan + 1 - (alt_valid["p_value"] < sl).mean() + if len(alt_valid) > 0 else np.nan ) power = 1 - type2 if not np.isnan(type2) else np.nan + summary_rows.append( dict( zip(group_cols, keys), @@ -159,11 +191,27 @@ def compute_summary(df_results, significance_levels=[0.001, 0.01, 0.05, 0.1]): type1_error=type1, type2_error=type2, power=power, - N_null=len(null_group), - N_alt=len(alt_group), + N_null=len(null_valid), + N_alt=len(alt_valid), + N_null_invalid=len(null_group) - len(null_valid), + N_alt_invalid=len(alt_group) - len(alt_valid), ) ) + df_summary = pd.DataFrame(summary_rows) + total_invalid = df_summary['N_null_invalid'].sum() + df_summary['N_alt_invalid'].sum() + total_tests = len(df_results) + invalid_rate = total_invalid / total_tests if total_tests > 0 else 0 + + print(f"Data Quality Report:") + print(f" Total tests run: {total_tests}") + print(f" Invalid p-values: {total_invalid} ({invalid_rate:.1%})") + + if invalid_rate > 0.3: + raise ValueError(f"Too many test failures: {invalid_rate:.1%} of tests produced invalid p-values") + elif invalid_rate > 0.1: + print(f" WARNING: High failure rate of {invalid_rate:.1%}") + return df_summary @@ -183,7 +231,7 @@ def plot_benchmarks(df_summary, plot_dir="plots"): fig, axes = plt.subplots( len(sample_sizes), len(n_cond_vars_list), - figsize=(4 * len(n_cond_vars_list), 2.5 * len(sample_sizes)), + figsize=(5 * len(n_cond_vars_list), 3 * len(sample_sizes)), sharex=True, sharey=True, ) @@ -202,13 +250,21 @@ def plot_benchmarks(df_summary, plot_dir="plots"): ] for method, color in zip(methods, palette): s = subset[subset["ci_test"] == method] - if not s.empty: + if not s.empty and not s["type2_error"].isna().all(): x_vals = np.log10(s["significance_level"]) y_vals = np.log10(s["type2_error"]) - sort_idx = np.argsort(x_vals) + valid_mask = ~(np.isnan(x_vals) | np.isnan(y_vals) | np.isinf(y_vals)) + if not valid_mask.any(): + print(f"WARNING: No valid data points for {method} in {dgm} plot") + continue + if valid_mask.sum() < len(x_vals) * 0.8: + lost_pct = (1 - valid_mask.sum() / len(x_vals)) * 100 + print(f"WARNING: Lost {lost_pct:.1f}% of data points for {method} due to invalid values") + + sort_idx = np.argsort(x_vals[valid_mask]) ax.plot( - x_vals.iloc[sort_idx], - y_vals.iloc[sort_idx], + x_vals[valid_mask].iloc[sort_idx], + y_vals[valid_mask].iloc[sort_idx], marker="o", linestyle="-", label=method, @@ -221,7 +277,17 @@ def plot_benchmarks(df_summary, plot_dir="plots"): if i == len(sample_sizes) - 1: ax.set_xlabel("log10 Significance Level") ax.grid(True, alpha=0.4) - handles, labels = axes[0, 0].get_legend_handles_labels() + ax.set_aspect('auto') + + handles, labels = [], [] + for ax_row in axes: + for ax in ax_row: + h, l = ax.get_legend_handles_labels() + for handle, label in zip(h, l): + if label not in labels: + handles.append(handle) + labels.append(label) + fig.legend( handles, labels, @@ -235,15 +301,18 @@ def plot_benchmarks(df_summary, plot_dir="plots"): ) plt.tight_layout(rect=[0, 0, 1, 0.97]) fname1 = f"{plot_dir}/{dgm}_effect{eff}_typeII_vs_signif.png" - plt.savefig(fname1, bbox_inches="tight") + plt.savefig(fname1, bbox_inches="tight", dpi=150) plt.close(fig) # ---Power vs Sample Size, all significance levels in plot --- for eff in sorted(df_dgm["effect_size"].unique()): + if eff == 0.0: + continue + fig, axes = plt.subplots( len(n_cond_vars_list), 1, - figsize=(7, 2.5 * len(n_cond_vars_list)), + figsize=(8, 3 * len(n_cond_vars_list)), sharex=True, sharey=True, ) @@ -259,12 +328,21 @@ def plot_benchmarks(df_summary, plot_dir="plots"): & (df_dgm["significance_level"] == sl) & (df_dgm["ci_test"] == method) ] - if not subset.empty: - sort_idx = np.argsort(subset["sample_size"]) + if not subset.empty and not subset["power"].isna().all(): + + valid_subset = subset.dropna(subset=['power']) + if len(valid_subset) == 0: + print(f"WARNING: No valid power data for {method} at significance level {sl}") + continue + if len(valid_subset) < len(subset) * 0.8: + lost_pct = (1 - len(valid_subset) / len(subset)) * 100 + print(f"WARNING: Lost {lost_pct:.1f}% of power data for {method} at sl={sl}") + + sort_idx = np.argsort(valid_subset["sample_size"]) linestyle = ["-", "--", "-.", ":"][idx % 4] ax.plot( - subset["sample_size"].iloc[sort_idx], - subset["power"].iloc[sort_idx], + valid_subset["sample_size"].iloc[sort_idx], + valid_subset["power"].iloc[sort_idx], marker="o", linestyle=linestyle, color=color, @@ -277,6 +355,8 @@ def plot_benchmarks(df_summary, plot_dir="plots"): if j == len(n_cond_vars_list) - 1: ax.set_xlabel("Sample Size") ax.grid(True, alpha=0.4) + ax.set_aspect('auto') + handles, labels = [], [] for ax in axes: handles_ax, labels_ax = ax.get_legend_handles_labels() @@ -296,31 +376,33 @@ def plot_benchmarks(df_summary, plot_dir="plots"): ) plt.tight_layout(rect=[0, 0, 1, 0.97]) fname2 = f"{plot_dir}/{dgm}_effect{eff}_power_vs_samplesize_allSL.png" - plt.savefig(fname2, bbox_inches="tight") + plt.savefig(fname2, bbox_inches="tight", dpi=150) plt.close(fig) if __name__ == "__main__": os.makedirs("results", exist_ok=True) + print("Starting benchmark execution...") df_results = run_benchmark() + print(f"Benchmark completed. Generated {len(df_results)} test results.") df_results.to_csv("results/ci_benchmark_raw_result.csv", index=False) df_summary = compute_summary(df_results) + print(f"Summary computed with {len(df_summary)} summary rows.") df_summary.to_csv("results/ci_benchmark_summaries.csv", index=False) - print(df_summary) + print(df_summary.head()) print( "\nDetailed results and summary saved to ci_benchmark_raw_result.csv and ci_benchmark_summaries.csv" ) - raw_csv_path = "results/ci_benchmark_raw_result.csv" - if os.path.exists(raw_csv_path): - os.remove(raw_csv_path) + print("Generating plots...") plot_benchmarks(df_summary) + print("Plots generated successfully.") - ## Making a copy of the result to the web directory + # Making a copy of the result to the web directory web_results_dir = os.path.join("web", "results") os.makedirs(web_results_dir, exist_ok=True) src = os.path.join("results", "ci_benchmark_summaries.csv") - dst = os.path.join(web_results_dir, "default_ci_benchmark_summaries.csv") + dst = os.path.join(web_results_dir, "ci_benchmark_summaries.csv") shutil.copyfile(src, dst) print(f"Copied summary CSV to {dst} for web UI.")