From feab3fdfe22c86b8160b74b18e1720b0e26c9640 Mon Sep 17 00:00:00 2001 From: Alban Puech Date: Fri, 16 Jan 2026 18:49:52 +0100 Subject: [PATCH 1/3] fix of seed + gen cost permutation + pv to pq --- README.md | 2 + gridfm_datakit/generate.py | 124 ++++++++------ gridfm_datakit/interactive.py | 1 + gridfm_datakit/network.py | 87 +++++++++- .../perturbations/generator_perturbation.py | 56 +++++-- .../perturbations/load_perturbation.py | 44 ++--- gridfm_datakit/process/process_network.py | 110 +++++++++---- gridfm_datakit/utils/random_seed.py | 47 ++++++ scripts/compare_parquet_files.py | 25 +++ .../config/Texas2k_case1_2016summerpeak.yaml | 1 + scripts/config/default.yaml | 5 +- scripts/user_config.yaml | 39 +++++ tests/config/consistency_test.yaml | 1 + tests/config/default_opf_mode_test.yaml | 1 + tests/config/default_pf_mode_test.yaml | 1 + .../default_without_perturbation_test.yaml | 1 + tests/test_generate.py | 118 ++++++++++++-- tests/test_generator_perturbation.py | 153 ++++++++++++++---- tests/test_load_perturbation.py | 16 +- tests/test_random_seed.py | 105 ++++++++++++ tests/test_verify_network.py | 47 ++++++ 21 files changed, 823 insertions(+), 161 deletions(-) create mode 100644 gridfm_datakit/utils/random_seed.py create mode 100644 scripts/user_config.yaml create mode 100644 tests/test_random_seed.py diff --git a/README.md b/README.md index 61c3edb6b..ed919c5cd 100644 --- a/README.md +++ b/README.md @@ -187,6 +187,8 @@ settings: pf_fast: true # Whether to use fast PF solver by default (compute_ac_pf from powermodels.jl); if false, uses Ipopt-based PF. Some networks (typically large ones e.g. case10000_goc) do not work with pf_fast: true. pf_fast is faster and more accurate than the Ipopt-based PF. dcpf_fast: true # Whether to use fast DCPF solver by default (compute_dc_pf from PowerModels.jl) max_iter: 200 # Max iterations for Ipopt-based solvers + seed: null # Seed for random number generation. If null, a random seed is generated (RECOMMENDED). To get the same data across runs, set the seed and note that ALL OTHER PARAMETERS IN THE CONFIG FILE MUST BE THE SAME. + ```
diff --git a/gridfm_datakit/generate.py b/gridfm_datakit/generate.py index 157a23e3d..37c82a3be 100644 --- a/gridfm_datakit/generate.py +++ b/gridfm_datakit/generate.py @@ -36,11 +36,12 @@ import sys from gridfm_datakit.network import Network from gridfm_datakit.process.process_network import init_julia +from gridfm_datakit.utils.random_seed import custom_seed def _setup_environment( config: Union[str, Dict[str, Any], NestedNamespace], -) -> Tuple[NestedNamespace, str, Dict[str, str]]: +) -> Tuple[NestedNamespace, str, Dict[str, str], int]: """Setup the environment for data generation. Args: @@ -50,7 +51,7 @@ def _setup_environment( 3. NestedNamespace object (NestedNamespace) Returns: - Tuple of (args, base_path, file_paths) + Tuple of (args, base_path, file_paths, seed) """ # Load config from file if a path is provided if isinstance(config, str): @@ -63,6 +64,25 @@ def _setup_environment( else: args = config + # Set global seed if provided, otherwise generate a unique seed for this generation + if ( + hasattr(args.settings, "seed") + and args.settings.seed is not None + and args.settings.seed != "" + ): + seed = args.settings.seed + print(f"Global random seed set to: {seed}") + + else: + # Generate a unique seed for non-reproducible but independent scenarios + # This ensures scenarios are i.i.d. within a run, but different across runs + import secrets + + seed = secrets.randbelow(50_000) + # chunk_seed = seed * 20000 + start_idx + 1 < 2^31 - 1 + # seed < (2,147,483,647 - n_scenarios) / 20,000 ~= 100_000 so taking 50_000 to be safe + print(f"No seed provided. Using seed={seed}") + # Setup output directory base_path = os.path.join(args.settings.data_dir, args.network.name, "raw") if os.path.exists(base_path) and args.settings.overwrite: @@ -115,18 +135,20 @@ def _setup_environment( if log_file == file_paths["args_log"]: yaml.safe_dump(args.to_dict(), f) - return args, base_path, file_paths + return args, base_path, file_paths, seed def _prepare_network_and_scenarios( args: NestedNamespace, file_paths: Dict[str, str], + seed: int, ) -> Tuple[Network, np.ndarray]: """Prepare the network and generate load scenarios. Args: args: Configuration object file_paths: Dictionary of file paths + seed: Global random seed for reproducibility. Returns: Tuple of (network, scenarios) @@ -147,6 +169,7 @@ def _prepare_network_and_scenarios( args.load.scenarios, file_paths["scenarios_log"], max_iter=args.settings.max_iter, + seed=seed, ) scenarios_df = load_scenarios_to_df(scenarios) scenarios_df.to_parquet(file_paths["scenarios"], index=False, engine="pyarrow") @@ -230,10 +253,10 @@ def generate_power_flow_data( """ # Setup environment - args, base_path, file_paths = _setup_environment(config) + args, base_path, file_paths, seed = _setup_environment(config) # Prepare network and scenarios - net, scenarios = _prepare_network_and_scenarios(args, file_paths) + net, scenarios = _prepare_network_and_scenarios(args, file_paths, seed) # Initialize topology generator topology_generator = initialize_topology_generator(args.topology_perturbation, net) @@ -254,48 +277,50 @@ def generate_power_flow_data( processed_data = [] - # Process scenarios sequentially - with open(file_paths["tqdm_log"], "a") as f: - with tqdm( - total=args.load.scenarios, - desc="Processing scenarios", - file=Tee(sys.stdout, f), - miniters=5, - ) as pbar: - for scenario_index in range(args.load.scenarios): - # Process the scenario - if args.settings.mode == "opf": - processed_data = process_scenario_opf_mode( - net, - scenarios, - scenario_index, - topology_generator, - generation_generator, - admittance_generator, - processed_data, - file_paths["error_log"], - args.settings.include_dc_res, - jl, - ) - elif args.settings.mode == "pf": - processed_data = process_scenario_pf_mode( - net, - scenarios, - scenario_index, - topology_generator, - generation_generator, - admittance_generator, - processed_data, - file_paths["error_log"], - args.settings.include_dc_res, - args.settings.pf_fast, - args.settings.dcpf_fast, - jl, - ) - else: - raise ValueError("Invalid mode!") - - pbar.update(1) + # Process scenarios sequentially with deterministic seed + # Use custom_seed to control randomness for reproducibility + with custom_seed(seed + 1): + with open(file_paths["tqdm_log"], "a") as f: + with tqdm( + total=args.load.scenarios, + desc="Processing scenarios", + file=Tee(sys.stdout, f), + miniters=5, + ) as pbar: + for scenario_index in range(args.load.scenarios): + # Process the scenario + if args.settings.mode == "opf": + processed_data = process_scenario_opf_mode( + net, + scenarios, + scenario_index, + topology_generator, + generation_generator, + admittance_generator, + processed_data, + file_paths["error_log"], + args.settings.include_dc_res, + jl, + ) + elif args.settings.mode == "pf": + processed_data = process_scenario_pf_mode( + net, + scenarios, + scenario_index, + topology_generator, + generation_generator, + admittance_generator, + processed_data, + file_paths["error_log"], + args.settings.include_dc_res, + args.settings.pf_fast, + args.settings.dcpf_fast, + jl, + ) + else: + raise ValueError("Invalid mode!") + + pbar.update(1) # Save final data _save_generated_data( @@ -339,14 +364,14 @@ def generate_power_flow_data_distributed( - scenarios_{generator}.log: Load scenario generation notes """ # Setup environment - args, base_path, file_paths = _setup_environment(config) + args, base_path, file_paths, seed = _setup_environment(config) # check if mode is valid if args.settings.mode not in ["opf", "pf"]: raise ValueError("Invalid mode!") # Prepare network and scenarios - net, scenarios = _prepare_network_and_scenarios(args, file_paths) + net, scenarios = _prepare_network_and_scenarios(args, file_paths, seed) # Initialize topology generator topology_generator = initialize_topology_generator(args.topology_perturbation, net) @@ -405,6 +430,7 @@ def generate_power_flow_data_distributed( args.settings.dcpf_fast, file_paths["solver_log_dir"], args.settings.max_iter, + seed, ) for chunk in scenario_chunks ] diff --git a/gridfm_datakit/interactive.py b/gridfm_datakit/interactive.py index 17e9f9db0..96d21b996 100644 --- a/gridfm_datakit/interactive.py +++ b/gridfm_datakit/interactive.py @@ -97,6 +97,7 @@ def create_config() -> Dict[str, Any]: "dcpf_fast": dcpf_fast.value, "enable_solver_logs": enable_solver_logs.value, "max_iter": max_iter.value, + "seed": None, # seed is not used in the interactive interface }, } return config diff --git a/gridfm_datakit/network.py b/gridfm_datakit/network.py index 2881aaaea..b2220e1c5 100644 --- a/gridfm_datakit/network.py +++ b/gridfm_datakit/network.py @@ -22,6 +22,8 @@ VM, VA, REF, + PV, + PQ, ) from gridfm_datakit.utils.idx_gen import GEN_BUS, GEN_STATUS, PG, QG from gridfm_datakit.utils.idx_brch import ( @@ -36,12 +38,71 @@ BR_R_ASYM, BR_X_ASYM, ) -from gridfm_datakit.utils.idx_cost import NCOST +from gridfm_datakit.utils.idx_cost import NCOST, MODEL, POLYNOMIAL import warnings import networkx as nx import numpy as np import copy from typing import Dict, Tuple, Any +import tempfile +from juliapkg.state import STATE +from juliapkg.deps import run_julia, executable + + +def correct_network(network_path: str, force: bool = False) -> str: + """ + Load a MATPOWER network using PowerModels via run_julia + and save a corrected version. + + Args: + network_path: Path to the original MATPOWER .m file. + force: If True, regenerate the corrected file even if it exists. + + Returns: + Path to the corrected network file. + + Raises: + FileNotFoundError: If input file does not exist. + RuntimeError: If PowerModels fails. + """ + if not os.path.exists(network_path): + raise FileNotFoundError(f"Network file not found: {network_path}") + + base_path, ext = os.path.splitext(network_path) + corrected_path = f"{base_path}_corrected{ext}" + + if os.path.exists(corrected_path) and not force: + return corrected_path + + # Use temporary file for atomic replace + tmp_fd, tmp_path = tempfile.mkstemp(suffix=".m") + os.close(tmp_fd) + + try: + project = STATE["project"] + jl_exe = executable() + + # Julia script as a list of lines + julia_code = [ + "using PowerModels", + f'data = PowerModels.parse_file("{network_path}")', + f'PowerModels.export_matpower("{tmp_path}", data)', + ] + + # Run Julia + run_julia(julia_code, project=project, executable=jl_exe) + + # Sanity check + if not os.path.exists(tmp_path) or os.path.getsize(tmp_path) == 0: + raise RuntimeError("Julia produced empty MATPOWER file") + + # Atomically replace target file + os.replace(tmp_path, corrected_path) + return corrected_path + + finally: + if os.path.exists(tmp_path): + os.unlink(tmp_path) def numpy_to_matlab_matrix(array: np.ndarray, name: str) -> str: @@ -132,6 +193,11 @@ def __init__(self, mpc: Dict[str, Any]) -> None: assert np.all(np.isin(self.gens[:, GEN_BUS], self.buses[:, BUS_I])), ( "All generator buses should be in bus IDs" ) + + assert np.all(self.gencosts[:, MODEL] == POLYNOMIAL), ( + "MODEL should be POLYNOMIAL" + ) + # assert all generators have the same number of cost coefficients assert np.all(self.gencosts[:, NCOST] == self.gencosts[:, NCOST][0]), ( "All generators must have the same number of cost coefficients" @@ -345,6 +411,21 @@ def deactivate_gens(self, idx_gens: np.ndarray) -> None: ) self.gens[idx_gens, GEN_STATUS] = 0 + # ----------------------------- + # Update PV buses that lost all generators → PQ + # ----------------------------- + n_buses = self.buses.shape[0] + + # Count in-service generators per bus + gens_on = self.gens[self.idx_gens_in_service] + gen_count = np.bincount(gens_on[:, GEN_BUS].astype(int), minlength=n_buses) + + # Boolean mask: PV buses with no in-service generator + pv_no_gen = (self.buses[:, BUS_TYPE] == PV) & (gen_count == 0) + + # Set them to PQ + self.buses[pv_no_gen, BUS_TYPE] = PQ + def check_single_connected_component(self) -> bool: """ Check that the network forms a single connected component. @@ -541,6 +622,7 @@ def load_net_from_file(network_path: str) -> Network: ValueError: If the file format is invalid. """ # Load network using matpowercaseframes + network_path = correct_network(network_path) mpc_frames = CaseFrames(network_path) mpc = { key: mpc_frames.__getattribute__(key) @@ -569,6 +651,7 @@ def load_net_from_pglib(grid_name: str) -> Network: FileNotFoundError: If the file cannot be found after download. ValueError: If the file format is invalid. """ + # Construct file paths file_path = str( resources.files("gridfm_datakit.grids").joinpath(f"pglib_opf_{grid_name}.m"), @@ -586,6 +669,8 @@ def load_net_from_pglib(grid_name: str) -> Network: with open(file_path, "wb") as f: f.write(response.content) + file_path = correct_network(file_path) + # Load network using matpowercaseframes mpc_frames = CaseFrames(file_path) mpc = { diff --git a/gridfm_datakit/perturbations/generator_perturbation.py b/gridfm_datakit/perturbations/generator_perturbation.py index 5492bff66..b1a079e63 100644 --- a/gridfm_datakit/perturbations/generator_perturbation.py +++ b/gridfm_datakit/perturbations/generator_perturbation.py @@ -58,8 +58,10 @@ class PermuteGenCostGenerator(GenerationGenerator): """Class for permuting generator costs. This class is for generating different generation scenarios - by permuting all the coeffiecient costs between and among - generators of power grid networks. + by permuting cost coefficients between and among generators + of power grid networks. Only generators with non-zero linear (c1) + or quadratic (c2) cost coefficients are permuted, preserving + zero-cost and constant-only generators. """ def __init__(self, base_net: Network) -> None: @@ -75,6 +77,16 @@ def __init__(self, base_net: Network) -> None: "All generators must have the same number of cost coefficients" ) + # Identify generators to permute (skip zero-cost and constant-only cost) + costs = base_net.gencosts[ + :, + COST:, + ] # all cost coefficients [c2, c1, c0] for NCOST=3 + # Keep only generators with non-constant terms (c1, c2, ...) non-zero + # costs[:, :-1] excludes the last coefficient (c0, the constant term) + self.permutable_mask = np.any(costs[:, :-1] != 0, axis=1) + self.permutable_indices = np.where(self.permutable_mask)[0] + def generate( self, example_generator: Generator[Network, None, None], @@ -88,12 +100,16 @@ def generate( Yields: An example scenario with cost coeffiecients in the - poly_cost table permuted + poly_cost table permuted (only for dispatchable generators) """ for scenario in example_generator: - new_idx = np.random.permutation(self.num_gens) + # Only permute the selected generators + new_idx = np.random.permutation(self.permutable_indices) # Permute the rows (generators) of the cost coefficients (and NCOST, although we assume it is the same for all generators) - scenario.gencosts[:, NCOST:] = scenario.gencosts[:, NCOST:][new_idx] + scenario.gencosts[self.permutable_indices, NCOST:] = scenario.gencosts[ + new_idx, + NCOST:, + ] yield scenario @@ -101,9 +117,11 @@ class PerturbGenCostGenerator(GenerationGenerator): """Class for perturbing generator cost. This class is for generating different generation scenarios - by randomly perturbing all the cost coeffiecient of generators - in a power network by multiplying with a scaling factor sampled - from a uniform distribution. + by randomly perturbing cost coefficients of generators in a + power network by multiplying with a scaling factor sampled + from a uniform distribution. Only generators with non-zero + linear (c1) or quadratic (c2) cost coefficients are perturbed, + preserving zero-cost and constant-only generators. """ def __init__(self, base_net: Network, sigma: float) -> None: @@ -126,7 +144,19 @@ def __init__(self, base_net: Network, sigma: float) -> None: n_costs = int(base_net.gencosts[:, NCOST][0]) self.lower = np.max([0.0, 1.0 - sigma]) self.upper = 1.0 + sigma - self.sample_size = [self.num_gens, n_costs] + + # Mask generators to perturb (skip zero-cost or constant-only) + costs = base_net.gencosts[ + :, + COST:, + ] # all cost coefficients [c2, c1, c0] for NCOST=3 + # Keep only generators with non-constant terms (c1, c2, ...) non-zero + # costs[:, :-1] excludes the last coefficient (c0, the constant term) + self.perturb_mask = np.any(costs[:, :-1] != 0, axis=1) + self.n_perturbable = np.sum(self.perturb_mask) + + # Sample size for only the perturbable generators + self.sample_size = [self.n_perturbable, n_costs] def generate( self, @@ -140,13 +170,17 @@ def generate( Yields: An example scenario with cost coeffiecients in the poly_cost - table perturbed by multiplying with a scaling factor. + table perturbed by multiplying with a scaling factor (only for dispatchable generators). """ for example in example_generator: + # Generate scaling factors only for generators that can be perturbed scale_fact = np.random.uniform( low=self.lower, high=self.upper, size=self.sample_size, ) - example.gencosts[:, COST:] = example.gencosts[:, COST:] * scale_fact + # Apply scaling only to selected generators + example.gencosts[self.perturb_mask, COST:] = ( + example.gencosts[self.perturb_mask, COST:] * scale_fact + ) yield example diff --git a/gridfm_datakit/perturbations/load_perturbation.py b/gridfm_datakit/perturbations/load_perturbation.py index b25a73c3c..0d2b2e335 100644 --- a/gridfm_datakit/perturbations/load_perturbation.py +++ b/gridfm_datakit/perturbations/load_perturbation.py @@ -9,6 +9,7 @@ from multiprocessing import Pool from gridfm_datakit.network import Network from gridfm_datakit.process.process_network import init_julia +from gridfm_datakit.utils.random_seed import custom_seed from typing import Tuple, Any @@ -172,6 +173,7 @@ def __call__( n_scenarios: int, scenario_log: str, max_iter: int, + seed: int, ) -> np.ndarray: """Generates load scenarios for a power network. @@ -180,7 +182,7 @@ def __call__( n_scenarios: Number of scenarios to generate. scenario_log: Path to log file for scenario generation details. max_iter: Maximum iterations for the OPF solver. - + seed: Global random seed for reproducibility. Returns: numpy.ndarray: Array of shape (n_loads, n_scenarios, 2) containing p_mw and q_mvar values. """ @@ -352,6 +354,7 @@ def __call__( n_scenarios: int, scenarios_log: str, max_iter: int, + seed: int, ) -> np.ndarray: """Generates load profiles based on aggregated load data. @@ -360,7 +363,7 @@ def __call__( n_scenarios: Number of scenarios to generate. scenarios_log: Path to log file for scenario generation details. max_iter: Maximum iterations for the OPF solver. - + seed: Global random seed for reproducibility. Returns: numpy.ndarray: Array of shape (n_loads, n_scenarios, 2) containing p_mw and q_mvar values. @@ -430,25 +433,29 @@ def __call__( ) ref_curve = self.interpolate_row(ref_curve, data_points=n_scenarios) - load_profile_pmw = p_mw_array[:, np.newaxis] * ref_curve - noise = np.random.uniform( - 1 - self.sigma, - 1 + self.sigma, - size=load_profile_pmw.shape, - ) # Add uniform noise - load_profile_pmw *= noise - - if self.change_reactive_power: - load_profile_qmvar = q_mvar_array[:, np.newaxis] * ref_curve + # Use custom_seed context manager to temporarily set seed for noise generation + with custom_seed(seed): + load_profile_pmw = p_mw_array[:, np.newaxis] * ref_curve noise = np.random.uniform( 1 - self.sigma, 1 + self.sigma, - size=load_profile_qmvar.shape, + size=load_profile_pmw.shape, ) # Add uniform noise - load_profile_qmvar *= noise - else: - load_profile_qmvar = q_mvar_array[:, np.newaxis] * np.ones_like(ref_curve) - print("No change in reactive power across scenarios") + load_profile_pmw *= noise + + if self.change_reactive_power: + load_profile_qmvar = q_mvar_array[:, np.newaxis] * ref_curve + noise = np.random.uniform( + 1 - self.sigma, + 1 + self.sigma, + size=load_profile_qmvar.shape, + ) # Add uniform noise + load_profile_qmvar *= noise + else: + load_profile_qmvar = q_mvar_array[:, np.newaxis] * np.ones_like( + ref_curve, + ) + print("No change in reactive power across scenarios") # Stack profiles along the last dimension load_profiles = np.stack((load_profile_pmw, load_profile_qmvar), axis=-1) @@ -508,6 +515,7 @@ def __call__( n_scenarios: int, scenario_log: str, max_iter: int, + seed: int, ) -> np.ndarray: """Generates load profiles based on aggregated load data. @@ -516,7 +524,7 @@ def __call__( n_scenarios: Number of scenarios to generate. scenario_log: Path to log file for scenario generation details. max_iter: Maximum iterations for the OPF solver (unused for Powergraph). - + seed: Global random seed for reproducibility. Returns: numpy.ndarray: Array of shape (n_loads, n_scenarios, 2) containing p_mw and q_mvar values. """ diff --git a/gridfm_datakit/process/process_network.py b/gridfm_datakit/process/process_network.py index 1dcd6f0c6..3b52dc028 100644 --- a/gridfm_datakit/process/process_network.py +++ b/gridfm_datakit/process/process_network.py @@ -23,6 +23,7 @@ from gridfm_datakit.network import makeYbus, branch_vectors import copy from gridfm_datakit.process.solvers import run_opf, run_pf, run_dcpf, run_dcopf +from gridfm_datakit.utils.random_seed import custom_seed from gridfm_datakit.utils.idx_bus import ( GS, BS, @@ -694,6 +695,7 @@ def pf_post_processing( assert np.all(np.isin(net.buses[:, BUS_TYPE], [PQ, PV, REF])), ( "Bus type should be PQ, PV, or REF, no disconnected buses (4)" ) + X_bus[np.arange(n_buses), 8 + net.buses[:, BUS_TYPE].astype(int) - 1] = ( 1 # because type is 1, 2, 3, not 0, 1, 2 ) @@ -725,7 +727,11 @@ def pf_post_processing( X_bus[:, 17] = np.nan # --- Generator data --- - assert np.all(net.gencosts[:, NCOST] == 3), "NCOST should be 3" + + n_cost = net.gencosts[0, NCOST] + assert np.all(net.gencosts[:, NCOST] == n_cost), ( + "NCOST should be the same for all generators" + ) n_gens = net.gens.shape[0] n_cols = ( len(GEN_COLUMNS) + len(DC_GEN_COLUMNS) if include_dc_res else len(GEN_COLUMNS) @@ -741,9 +747,22 @@ def pf_post_processing( X_gen[:, 6] = net.gens[:, PMAX] X_gen[:, 7] = net.gens[:, QMIN] X_gen[:, 8] = net.gens[:, QMAX] - X_gen[:, 9] = net.gencosts[:, COST + 2] - X_gen[:, 10] = net.gencosts[:, COST + 1] - X_gen[:, 11] = net.gencosts[:, COST] + + if n_cost == 3: # order in .m file is c2, c1, c0 + X_gen[:, 9] = net.gencosts[:, COST + 2] + X_gen[:, 10] = net.gencosts[:, COST + 1] + X_gen[:, 11] = net.gencosts[:, COST] + + if n_cost == 2: # order in .m file is c1, c0, and there is no cp2 cost + X_gen[:, 9] = net.gencosts[:, COST + 1] + X_gen[:, 10] = net.gencosts[:, COST] + X_gen[:, 11] = 0 # no cp2 cost for linear cost function + + if n_cost == 1: # order in .m file is c0, and there is no cp1 or cp2 cost + X_gen[:, 9] = net.gencosts[:, COST] + X_gen[:, 10] = 0 # no cp1 cost for constant cost function + X_gen[:, 11] = 0 # no cp2 cost for constant cost function + X_gen[net.idx_gens_in_service, 12] = 1 # slack gen (can be any generator connected to the ref node) @@ -833,6 +852,9 @@ def process_scenario_pf_mode( Returns: Updated list of processed data (bus, gen, branch, Y_bus arrays) + + Note: + Random seed is controlled by the calling context (process_scenario_chunk). """ net = copy.deepcopy(net) @@ -923,6 +945,7 @@ def process_scenario_chunk( dcpf_fast: bool, solver_log_dir: str, max_iter: int, + seed: int, ) -> Tuple[ Union[None, Exception], Union[None, str], @@ -945,6 +968,11 @@ def process_scenario_chunk( admittance_generator: Generator for line admittance perturbations. error_log_path: Path to error log file for recording failures. include_dc_res: Whether to include DC power flow results in output. + pf_fast: Whether to use fast AC PF solver. + dcpf_fast: Whether to use fast DC PF solver. + solver_log_dir: Directory for solver logs. + max_iter: Maximum iterations for the solver. + seed: Global random seed for reproducibility. Returns: Tuple containing: @@ -956,37 +984,47 @@ def process_scenario_chunk( try: jl = init_julia(max_iter, solver_log_dir) local_processed_data = [] - for scenario_index in range(start_idx, end_idx): - if mode == "opf": - local_processed_data = process_scenario_opf_mode( - net, - scenarios, - scenario_index, - topology_generator, - generation_generator, - admittance_generator, - local_processed_data, - error_log_path, - include_dc_res, - jl, - ) - elif mode == "pf": - local_processed_data = process_scenario_pf_mode( - net, - scenarios, - scenario_index, - topology_generator, - generation_generator, - admittance_generator, - local_processed_data, - error_log_path, - include_dc_res, - pf_fast, - dcpf_fast, - jl, - ) - progress_queue.put(1) # update queue + # Use custom_seed to set seed based on start_idx for this chunk + # This ensures each chunk gets a unique but deterministic seed + # we multiply by 20_000 to ensure there is no collision with other runs where the seed would be close to each other + # example (assuming we have chunks of length 1, hence an increment of 1 between start indices) + # Run A: base seed = 42 → scenario seeds = 42, 43, 44, …, 10041 (for 10,000 scenarios) + # Run B: base seed = 120 → scenario seeds = 120, 121, 122, …, 10119 + # These sets overlap on seeds 120..10041 (so 9,922 overlapping seeds). + # we also add 1 in case the seed is 0, to not have collision witht he seed used for the load perturbations + with custom_seed(seed * 20_000 + start_idx + 1): + for scenario_index in range(start_idx, end_idx): + if mode == "opf": + local_processed_data = process_scenario_opf_mode( + net, + scenarios, + scenario_index, + topology_generator, + generation_generator, + admittance_generator, + local_processed_data, + error_log_path, + include_dc_res, + jl, + ) + elif mode == "pf": + local_processed_data = process_scenario_pf_mode( + net, + scenarios, + scenario_index, + topology_generator, + generation_generator, + admittance_generator, + local_processed_data, + error_log_path, + include_dc_res, + pf_fast, + dcpf_fast, + jl, + ) + + progress_queue.put(1) # update queue return ( None, @@ -1031,9 +1069,13 @@ def process_scenario_opf_mode( local_processed_data: List to accumulate processed data tuples. error_log_file: Path to error log file for recording failures. include_dc_res: Whether to include DC power flow results in output. + jl: Julia interface object for running power flow calculations. Returns: Updated list of processed data (bus, gen, branch, Y_bus arrays) + + Note: + Random seed is controlled by the calling context (process_scenario_chunk). """ # apply the load scenario to the network diff --git a/gridfm_datakit/utils/random_seed.py b/gridfm_datakit/utils/random_seed.py new file mode 100644 index 000000000..84b30977a --- /dev/null +++ b/gridfm_datakit/utils/random_seed.py @@ -0,0 +1,47 @@ +"""Context manager for temporary random seed management.""" + +import numpy as np +from typing import Optional + + +class custom_seed: + """Context manager to temporarily set a custom random seed. + + This context manager saves the current numpy random state, sets a new seed, + and restores the previous state upon exit. This is useful for ensuring + reproducibility in specific code blocks while maintaining the overall + random state flow. + + Example: + >>> import numpy as np + >>> np.random.seed(42) + >>> print(np.random.rand()) # Will use seed 42 + >>> with custom_seed(100): + ... print(np.random.rand()) # Will use seed 100 + >>> print(np.random.rand()) # Will continue from seed 42's sequence + + Args: + seed: The seed value to use within the context. If None, no seed is set. + """ + + def __init__(self, seed: Optional[int] = None): + """Initialize the context manager with a custom seed. + + Args: + seed: The seed value to use. If None, state is saved but no new seed is set. + """ + self.seed = seed + self.saved_state = None + + def __enter__(self): + """Save current random state and set the custom seed.""" + self.saved_state = np.random.get_state() + if self.seed is not None: + np.random.seed(self.seed) + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """Restore the previously saved random state.""" + if self.saved_state is not None: + np.random.set_state(self.saved_state) + return False # Don't suppress exceptions diff --git a/scripts/compare_parquet_files.py b/scripts/compare_parquet_files.py index 7d87d9467..821ddf0e0 100644 --- a/scripts/compare_parquet_files.py +++ b/scripts/compare_parquet_files.py @@ -55,6 +55,8 @@ pf_fast: true # Whether to use fast PF solver by default (compute_ac_pf from powermodels.jl); if false, uses Ipopt-based PF. Some networks (typically large ones e.g. case10000_goc) do not work with pf_fast: true. pf_fast is faster and more accurate than the Ipopt-based PF. dcpf_fast: true # Whether to use fast DCPF solver by default (compute_dc_pf from PowerModels.jl) max_iter: 200 # Max iterations for Ipopt-based solvers + seed: null # Seed for random number generation. If null, a random seed is generated (RECOMMENDED). To get the same data across runs, set the seed and note that ALL OTHER PARAMETERS IN THE CONFIG FILE MUST BE THE SAME. + """ @@ -178,6 +180,29 @@ def compare_parquet_files(dir1: str, dir2: str, verbose: bool = False) -> None: df1 = pd.read_parquet(files_dir1[filename]) df2 = pd.read_parquet(files_dir2[filename]) + # sort by load_scenario_idx if exists + if "load_scenario_idx" in df1.columns: + print(f"Sorting {filename} by load_scenario_idx") + df1 = df1.sort_values( + by="load_scenario_idx", + kind="stable", + ).reset_index(drop=True) + df2 = df2.sort_values( + by="load_scenario_idx", + kind="stable", + ).reset_index(drop=True) + + else: + print( + f"No load_scenario_idx column found in {filename}, using sort by scenario", + ) + df1 = df1.sort_values(by="scenario", kind="stable").reset_index( + drop=True, + ) + df2 = df2.sort_values(by="scenario", kind="stable").reset_index( + drop=True, + ) + cols1 = set(df1.columns) cols2 = set(df2.columns) diff --git a/scripts/config/Texas2k_case1_2016summerpeak.yaml b/scripts/config/Texas2k_case1_2016summerpeak.yaml index a0cfef664..308f07588 100644 --- a/scripts/config/Texas2k_case1_2016summerpeak.yaml +++ b/scripts/config/Texas2k_case1_2016summerpeak.yaml @@ -45,3 +45,4 @@ settings: pf_fast: true # Whether to use fast PF solver by default (compute_ac_pf from powermodels.jl); if false, uses Ipopt-based PF. Some networks (typically large ones e.g. case10000_goc) do not work with pf_fast: true. pf_fast is faster and more accurate than the Ipopt-based PF. dcpf_fast: true # Whether to use fast DCPF solver by default (compute_dc_pf from PowerModels.jl) max_iter: 200 # Max iterations for Ipopt-based solvers + seed: null # Seed for random number generation. If null, a random seed is generated (RECOMMENDED). To get the same data across runs, set the seed and note that ALL OTHER PARAMETERS IN THE CONFIG FILE MUST BE THE SAME. diff --git a/scripts/config/default.yaml b/scripts/config/default.yaml index 17dbaa680..37e8d56c0 100644 --- a/scripts/config/default.yaml +++ b/scripts/config/default.yaml @@ -7,7 +7,7 @@ network: load: generator: "agg_load_profile" # Name of the load generator; options: agg_load_profile, powergraph agg_profile: "default" # Name of the aggregated load profile - scenarios: 10000 # Number of different load scenarios to generate + scenarios: 100 # Number of different load scenarios to generate # WARNING: the following parameters are only used if generator is "agg_load_profile" # if using generator "powergraph", these parameters are ignored sigma: 0.2 # max local noise @@ -36,7 +36,7 @@ admittance_perturbation: settings: num_processes: 16 # Number of parallel processes to use - data_dir: "./data_out" # Directory to save generated data relative to the project root + data_dir: "./data_out_seed_none2" # Directory to save generated data relative to the project root large_chunk_size: 1000 # Number of load scenarios processed before saving overwrite: true # If true, overwrites existing files, if false, appends to files mode: "pf" # Mode of the script; options: pf, opf. pf: power flow data where one or more operating limits – the inequality constraints defined in OPF, e.g., voltage magnitude or branch limits – may be violated. opf: generates datapoints for training OPF solvers, with cost-optimal dispatches that satisfy all operating limits (OPF-feasible) @@ -45,3 +45,4 @@ settings: pf_fast: true # Whether to use fast PF solver by default (compute_ac_pf from powermodels.jl); if false, uses Ipopt-based PF. Some networks (typically large ones e.g. case10000_goc) do not work with pf_fast: true. pf_fast is faster and more accurate than the Ipopt-based PF. dcpf_fast: true # Whether to use fast DCPF solver by default (compute_dc_pf from PowerModels.jl) max_iter: 200 # Max iterations for Ipopt-based solvers + seed: null # Seed for random number generation. If null, a random seed is generated (RECOMMENDED). To get the same data across runs, set the seed and note that ALL OTHER PARAMETERS IN THE CONFIG FILE MUST BE THE SAME. diff --git a/scripts/user_config.yaml b/scripts/user_config.yaml new file mode 100644 index 000000000..037bc6100 --- /dev/null +++ b/scripts/user_config.yaml @@ -0,0 +1,39 @@ +admittance_perturbation: + sigma: 0.2 + type: random_perturbation +generation_perturbation: + sigma: 1.0 + type: cost_permutation +load: + agg_profile: default + change_reactive_power: true + generator: agg_load_profile + global_range: 0.4 + max_scaling_factor: 4.0 + scenarios: 10000 + sigma: 0.2 + start_scaling_factor: 1.0 + step_size: 0.1 +network: + name: case24_ieee_rts + network_dir: none + source: pglib +settings: + data_dir: ./data_out + dcpf_fast: true + enable_solver_logs: true + include_dc_res: true + large_chunk_size: 1000 + max_iter: 200 + mode: pf + num_processes: 16 + overwrite: true + pf_fast: true + seed: null +topology_perturbation: + elements: + - branch + - gen + k: 1 + n_topology_variants: 20 + type: random diff --git a/tests/config/consistency_test.yaml b/tests/config/consistency_test.yaml index 351edaf28..95d9f3be8 100644 --- a/tests/config/consistency_test.yaml +++ b/tests/config/consistency_test.yaml @@ -45,3 +45,4 @@ settings: pf_fast: true # Whether to use fast PF solver by default (compute_ac_pf from powermodels.jl); if false, uses Ipopt-based PF. Some networks (typically large ones e.g. case10000_goc) do not work with pf_fast: true. pf_fast is faster and more accurate than the Ipopt-based PF. dcpf_fast: true # Whether to use fast DCPF solver by default (compute_dc_pf from PowerModels.jl) max_iter: 200 # Max iterations for Ipopt-based solvers + seed: null # Seed for random number generation. If null, a random seed is generated (RECOMMENDED). To get the same data across runs, set the seed and note that ALL OTHER PARAMETERS IN THE CONFIG FILE MUST BE THE SAME. diff --git a/tests/config/default_opf_mode_test.yaml b/tests/config/default_opf_mode_test.yaml index 87318b41f..75fe384bf 100644 --- a/tests/config/default_opf_mode_test.yaml +++ b/tests/config/default_opf_mode_test.yaml @@ -45,3 +45,4 @@ settings: pf_fast: true # Whether to use fast PF solver by default (compute_ac_pf from powermodels.jl); if false, uses Ipopt-based PF. Some networks (typically large ones e.g. case10000_goc) do not work with pf_fast: true. pf_fast is faster and more accurate than the Ipopt-based PF. dcpf_fast: true # Whether to use fast DCPF solver by default (compute_dc_pf from PowerModels.jl) max_iter: 200 # Max iterations for Ipopt-based solvers + seed: null # Seed for random number generation. If null, a random seed is generated (RECOMMENDED). To get the same data across runs, set the seed and note that ALL OTHER PARAMETERS IN THE CONFIG FILE MUST BE THE SAME. diff --git a/tests/config/default_pf_mode_test.yaml b/tests/config/default_pf_mode_test.yaml index c84864f45..dfc057784 100644 --- a/tests/config/default_pf_mode_test.yaml +++ b/tests/config/default_pf_mode_test.yaml @@ -45,3 +45,4 @@ settings: pf_fast: true # Whether to use fast PF solver by default (compute_ac_pf from powermodels.jl); if false, uses Ipopt-based PF. Some networks (typically large ones e.g. case10000_goc) do not work with pf_fast: true. pf_fast is faster and more accurate than the Ipopt-based PF. dcpf_fast: true # Whether to use fast DCPF solver by default (compute_dc_pf from PowerModels.jl) max_iter: 200 # Max iterations for Ipopt-based solvers + seed: null # Seed for random number generation. If null, a random seed is generated (RECOMMENDED). To get the same data across runs, set the seed and note that ALL OTHER PARAMETERS IN THE CONFIG FILE MUST BE THE SAME. diff --git a/tests/config/default_without_perturbation_test.yaml b/tests/config/default_without_perturbation_test.yaml index a3bee11c3..989f1fcbe 100644 --- a/tests/config/default_without_perturbation_test.yaml +++ b/tests/config/default_without_perturbation_test.yaml @@ -45,3 +45,4 @@ settings: pf_fast: true # Whether to use fast PF solver by default (compute_ac_pf from powermodels.jl); if false, uses Ipopt-based PF. Some networks (typically large ones e.g. case10000_goc) do not work with pf_fast: true. pf_fast is faster and more accurate than the Ipopt-based PF. dcpf_fast: true # Whether to use fast DCPF solver by default (compute_dc_pf from PowerModels.jl) max_iter: 200 # Max iterations for Ipopt-based solvers + seed: null # Seed for random number generation. If null, a random seed is generated (RECOMMENDED). To get the same data across runs, set the seed and note that ALL OTHER PARAMETERS IN THE CONFIG FILE MUST BE THE SAME. diff --git a/tests/test_generate.py b/tests/test_generate.py index c2a44d884..4a1bebcb5 100644 --- a/tests/test_generate.py +++ b/tests/test_generate.py @@ -45,7 +45,7 @@ def test_setup_environment(conf): """ Tests if environment setup works correctly """ - args, base_path, file_paths = _setup_environment(conf) + args, base_path, file_paths, seed = _setup_environment(conf) assert isinstance(file_paths, dict), "File paths should be a dictionary" assert "y_bus_data" in file_paths, ( "Y-bus data file path should be in the dictionary" @@ -66,7 +66,7 @@ def test_fail_setup_environment(): """ # Test with a non-existent configuration file with pytest.raises(FileNotFoundError): - args, base_path, file_paths = _setup_environment( + args, base_path, file_paths, seed = _setup_environment( "scripts/config/non_existent_config.yaml", ) @@ -77,8 +77,8 @@ def test_prepare_network_and_scenarios(conf): Tests if network and scenarios are prepared correctly """ # Ensure the configuration is valid - args, base_path, file_paths = _setup_environment(conf) - net, scenarios = _prepare_network_and_scenarios(args, file_paths) + args, base_path, file_paths, seed = _setup_environment(conf) + net, scenarios = _prepare_network_and_scenarios(args, file_paths, seed) assert isinstance(net, Network), "Network should be a Network object" assert len(scenarios) > 0, "There should be at least one scenario" @@ -91,18 +91,18 @@ def test_fail_prepare_network_and_scenarios(): # Test with a non-existent configuration file config = "scripts/config/non_existent_config.yaml" with pytest.raises(FileNotFoundError): - args, base_path, file_paths = _setup_environment(config) - net, scenarios = _prepare_network_and_scenarios(args, file_paths) + args, base_path, file_paths, seed = _setup_environment(config) + net, scenarios = _prepare_network_and_scenarios(args, file_paths, seed) def test_fail_prepare_network_and_scenarios_config(conf): """ Tests if preparing network and scenarios fails with an invalid grid source in the configuration file """ - args, base_path, file_paths = _setup_environment(conf) + args, base_path, file_paths, seed = _setup_environment(conf) conf.network.source = "invalid_source" # Set invalid source with pytest.raises(ValueError, match="Invalid grid source!"): - net, scenarios = _prepare_network_and_scenarios(conf, file_paths) + net, scenarios = _prepare_network_and_scenarios(conf, file_paths, seed) # Test save network function @@ -221,7 +221,7 @@ def test_setup_environment_overwrite_behavior(): } # First setup creates directories - args, base_path, file_paths = _setup_environment(config) + args, base_path, file_paths, seed = _setup_environment(config) marker = os.path.join(base_path, "marker.txt") os.makedirs(base_path, exist_ok=True) with open(marker, "w") as f: @@ -229,13 +229,13 @@ def test_setup_environment_overwrite_behavior(): # overwrite=False should keep existing contents config["settings"]["overwrite"] = False - _args2, base_path2, _ = _setup_environment(config) + _args2, base_path2, _, _ = _setup_environment(config) assert base_path2 == base_path assert os.path.exists(marker), "Marker should persist when overwrite is False" # overwrite=True should remove and recreate directory config["settings"]["overwrite"] = True - _args3, base_path3, _ = _setup_environment(config) + _args3, base_path3, _, _ = _setup_environment(config) assert base_path3 == base_path assert not os.path.exists(marker), ( "Marker should be removed when overwrite is True" @@ -287,5 +287,99 @@ def test_parquet_append_vs_overwrite(): ) +def test_reproducibility_with_same_seed(): + """ + Test that generating data with the same seed produces identical results. + Uses case14_ieee network with 10 scenarios and 2 processes. + """ + # Load base config + config_path = "tests/config/default_pf_mode_test.yaml" + with open(config_path, "r") as f: + config = yaml.safe_load(f) + + # Modify config for this test + config["network"]["name"] = "case14_ieee" + config["load"]["scenarios"] = 6 + config["settings"]["num_processes"] = 2 + config["settings"]["seed"] = 42 # Fixed seed for reproducibility + config["settings"]["overwrite"] = True + + # Use temporary directories for both runs + with ( + tempfile.TemporaryDirectory() as tmpdir1, + tempfile.TemporaryDirectory() as tmpdir2, + ): + # First run + config["settings"]["data_dir"] = tmpdir1 + config1 = NestedNamespace(**config) + file_paths_1 = generate_power_flow_data_distributed(config1) + + # Second run with same seed + config["settings"]["data_dir"] = tmpdir2 + config2 = NestedNamespace(**config) + file_paths_2 = generate_power_flow_data_distributed(config2) + + # Load all generated parquet files and compare + data_files = ["bus_data", "gen_data", "branch_data", "y_bus_data"] + + for data_type in data_files: + df1 = pd.read_parquet(file_paths_1[data_type], engine="pyarrow") + df2 = pd.read_parquet(file_paths_2[data_type], engine="pyarrow") + + # sort by load_scenario_idx + df1 = df1.sort_values(by="load_scenario_idx", kind="stable").reset_index( + drop=True, + ) + df2 = df2.sort_values(by="load_scenario_idx", kind="stable").reset_index( + drop=True, + ) + + # Check that shapes match + assert df1.shape == df2.shape, ( + f"{data_type}: Shape mismatch - Run 1: {df1.shape}, Run 2: {df2.shape}" + ) + + # Check that columns match + assert list(df1.columns) == list(df2.columns), ( + f"{data_type}: Column mismatch - Run 1: {df1.columns}, Run 2: {df2.columns}" + ) + + # Check that all values are identical + # Use pandas testing to handle floating point comparison properly + pd.testing.assert_frame_equal( + df1, + df2, + check_dtype=True, + check_exact=False, + rtol=1e-10, + atol=1e-10, + obj=f"{data_type}", + ) + + print( + f" ✓ {data_type}: Both runs produced identical data ({df1.shape[0]} rows)", + ) + + # Also check the scenarios file + scenarios_1 = pd.read_parquet(file_paths_1["scenarios"], engine="pyarrow") + scenarios_2 = pd.read_parquet(file_paths_2["scenarios"], engine="pyarrow") + + os.makedirs("tests/test_data", exist_ok=True) + + pd.testing.assert_frame_equal( + scenarios_1, + scenarios_2, + check_dtype=True, + check_exact=False, + rtol=1e-10, + atol=1e-10, + ) + + print( + f" ✓ scenarios: Both runs produced identical data ({scenarios_1.shape[0]} rows)", + ) + print("\n✓ All data files are identical across runs with the same seed!") + + if __name__ == "__main__": - test_parquet_append_vs_overwrite() + test_reproducibility_with_same_seed() diff --git a/tests/test_generator_perturbation.py b/tests/test_generator_perturbation.py index 67d5bf7eb..6e5b21604 100644 --- a/tests/test_generator_perturbation.py +++ b/tests/test_generator_perturbation.py @@ -50,53 +50,52 @@ def example_generator(): ) def test_generator_cost_perturbation_changes_values(self): - """Test that PerturbGenCostGenerator changes cost values""" + """Test that PerturbGenCostGenerator changes ALL cost coefficients for non-constant generators""" # Load the original network original_network = load_net_from_pglib("case24_ieee_rts") # Store original cost values original_gencosts = original_network.gencosts.copy() - # Create perturbation generator with sigma=0.1 + # Identify generators with non-constant costs (c1 or c2 != 0) + # MATPOWER cost order: [c2, c1, c0] for NCOST=3 + costs = original_gencosts[:, COST:] + non_constant_mask = np.any(costs[:, :-1] != 0, axis=1) + + # Ensure there are actually non-constant generators in the test + assert np.sum(non_constant_mask) > 0, ( + "Test requires generators with non-constant costs" + ) + + # Create perturbation generator with significant sigma perturb_generator = PerturbGenCostGenerator( base_net=original_network, sigma=0.1, ) - # Create a simple generator that yields a copy of the original network - def example_generator(): - yield copy.deepcopy(original_network) + # Generate one perturbation + test_network = copy.deepcopy(original_network) - # Generate perturbed networks - perturbed_networks = list(perturb_generator.generate(example_generator())) + def example_gen(): + yield test_network - # Verify we got exactly one network - assert len(perturbed_networks) == 1, ( - f"Expected 1 network, got {len(perturbed_networks)}" - ) + perturbed_networks = list(perturb_generator.generate(example_gen())) + perturbed_gencosts = perturbed_networks[0].gencosts - perturbed_network = perturbed_networks[0] - perturbed_gencosts = perturbed_network.gencosts + # Check that ALL cost coefficients have changed for each non-constant generator + for gen_idx in np.where(non_constant_mask)[0]: + original_costs = original_gencosts[gen_idx, COST:] + perturbed_costs = perturbed_gencosts[gen_idx, COST:] - # Check that cost values are actually different - # This might not always be true due to randomness, so we'll check multiple times - different_perturbations = 0 - for _ in range(10): # Try multiple perturbations - test_network = copy.deepcopy(original_network) - - def example_gen(): - yield test_network - - perturbed_networks = list(perturb_generator.generate(example_gen())) - perturbed_gencosts = perturbed_networks[0].gencosts - - if not np.array_equal(original_gencosts, perturbed_gencosts): - different_perturbations += 1 - - # We expect at least some perturbations to be different - assert different_perturbations > 0, ( - "Perturbation should change the generator cost values" - ) + # Check each coefficient individually (c2, c1, c0) + for coeff_idx in range(len(original_costs)): + if original_costs[coeff_idx] != 0: + assert original_costs[coeff_idx] != perturbed_costs[coeff_idx], ( + f"Generator {gen_idx}, coefficient {coeff_idx}: " + f"Expected value to change from {original_costs[coeff_idx]}, " + f"but it remained the same. All non-constant generators should have " + f"all their cost coefficients changed by perturbation." + ) def test_generator_cost_perturbation_preserves_structure(self): """Test that PerturbGenCostGenerator preserves the structure of cost coefficients""" @@ -301,3 +300,93 @@ def gen_net(): assert np.allclose(net_out.gencosts, costs0, rtol=0.0, atol=0.0), ( "gencosts should be unchanged when sigma=0" ) + + +def test_permute_skips_zero_and_constant_cost_generators(): + """PermuteGenCostGenerator should skip generators with zero or constant-only costs.""" + net = load_net_from_pglib("case24_ieee_rts") + + # Modify some generators to have zero or constant-only costs + # MATPOWER cost order: [c2, c1, c0] for NCOST=3 + + # Generator 0: zero cost (all coefficients = 0) + net.gencosts[0, COST:] = 0 + + # Generator 1: constant only (c2=0, c1=0, c0=100) + net.gencosts[1, COST : COST + 2] = 0 # Set c2 and c1 to 0 + net.gencosts[1, COST + 2] = 100 # Set c0 to 100 + + # Save original costs for these generators + original_zero_cost = net.gencosts[0, COST:].copy() + original_constant_cost = net.gencosts[1, COST:].copy() + + # Create permutation generator + permute_gen = PermuteGenCostGenerator(base_net=net) + + # Test multiple permutations + for _ in range(10): + test_net = copy.deepcopy(net) + + def gen_net(): + yield test_net + + [perturbed_net] = list(permute_gen.generate(gen_net())) + + # Verify zero-cost generator is unchanged + np.testing.assert_array_equal( + perturbed_net.gencosts[0, COST:], + original_zero_cost, + "Zero-cost generator should not be permuted", + ) + + # Verify constant-only generator is unchanged + np.testing.assert_array_equal( + perturbed_net.gencosts[1, COST:], + original_constant_cost, + "Constant-only generator should not be permuted", + ) + + +def test_perturb_skips_zero_and_constant_cost_generators(): + """PerturbGenCostGenerator should skip generators with zero or constant-only costs.""" + net = load_net_from_pglib("case24_ieee_rts") + + # Modify some generators to have zero or constant-only costs + # MATPOWER cost order: [c2, c1, c0] for NCOST=3 + + # Generator 0: zero cost (all coefficients = 0) + net.gencosts[0, COST:] = 0 + + # Generator 1: constant only (c2=0, c1=0, c0=100) + net.gencosts[1, COST : COST + 2] = 0 # Set c2 and c1 to 0 + net.gencosts[1, COST + 2] = 100 # Set c0 to 100 + + # Save original costs for these generators + original_zero_cost = net.gencosts[0, COST:].copy() + original_constant_cost = net.gencosts[1, COST:].copy() + + # Create perturbation generator with significant sigma + perturb_gen = PerturbGenCostGenerator(base_net=net, sigma=0.5) + + # Test multiple perturbations + for _ in range(10): + test_net = copy.deepcopy(net) + + def gen_net(): + yield test_net + + [perturbed_net] = list(perturb_gen.generate(gen_net())) + + # Verify zero-cost generator is unchanged + np.testing.assert_array_equal( + perturbed_net.gencosts[0, COST:], + original_zero_cost, + "Zero-cost generator should not be perturbed", + ) + + # Verify constant-only generator is unchanged + np.testing.assert_array_equal( + perturbed_net.gencosts[1, COST:], + original_constant_cost, + "Constant-only generator should not be perturbed", + ) diff --git a/tests/test_load_perturbation.py b/tests/test_load_perturbation.py index ff3f8620d..03a010273 100644 --- a/tests/test_load_perturbation.py +++ b/tests/test_load_perturbation.py @@ -23,7 +23,13 @@ def test_agg_load_no_variation_when_sigma_and_range_zero_change_q_true(): step_size=0.1, start_scaling_factor=1.0, ) - scenarios = gen(net, n_scenarios=5, scenarios_log=scenario_log, max_iter=200) + scenarios = gen( + net, + n_scenarios=5, + scenarios_log=scenario_log, + max_iter=200, + seed=0, + ) # shape: (n_loads, n_scenarios, 2) # All scenarios across axis=1 should be equal to the first scenario p_first = scenarios[:, 0, 0] @@ -47,7 +53,13 @@ def test_agg_load_no_variation_when_sigma_and_range_zero_change_q_false(): step_size=0.1, start_scaling_factor=1.0, ) - scenarios = gen(net, n_scenarios=4, scenarios_log=scenario_log, max_iter=200) + scenarios = gen( + net, + n_scenarios=4, + scenarios_log=scenario_log, + max_iter=200, + seed=0, + ) # p columns identical across scenarios p_first = scenarios[:, 0, 0] assert np.allclose(scenarios[:, :, 0], p_first[:, None]) diff --git a/tests/test_random_seed.py b/tests/test_random_seed.py new file mode 100644 index 000000000..c7f663d2c --- /dev/null +++ b/tests/test_random_seed.py @@ -0,0 +1,105 @@ +"""Test suite for custom_seed context manager.""" + +import numpy as np +import pytest +from gridfm_datakit.utils.random_seed import custom_seed + + +def test_seed_set_inside_with_block(): + """Verify that the seed is properly set inside the with block.""" + # Get expected value with seed=100 + np.random.seed(100) + expected_value = np.random.rand() + + # Now test with context manager from a different seed + np.random.seed(42) + with custom_seed(100): + actual_value = np.random.rand() + + assert np.isclose(actual_value, expected_value), ( + "Seed not set correctly inside with block" + ) + + +def test_state_restored_after_with_block(): + """Verify that the random state is restored after exiting the with block.""" + # Set up: seed=42, consume one value + np.random.seed(42) + _ = np.random.rand() # Skip first value + + # Use different seed temporarily + with custom_seed(999): + _ = np.random.rand() # Use seed=999 inside + + # After exiting, should continue from seed=42 sequence (2nd value) + second_value = np.random.rand() + + # Verify it matches expected 2nd value from seed=42 sequence + np.random.seed(42) + _ = np.random.rand() # skip first + expected_second = np.random.rand() + + assert np.isclose(second_value, expected_second), ( + "State not restored correctly after exiting with block" + ) + + +def test_reproducibility_with_same_seed(): + """Verify that using the same seed produces identical results.""" + + def generate_data_with_seed(seed): + with custom_seed(seed): + return np.random.randn(10) + + # Generate data twice with same seed but different outer states + np.random.seed(999) + data1 = generate_data_with_seed(100) + + np.random.seed(777) + data2 = generate_data_with_seed(100) + + assert np.allclose(data1, data2), "Same seed didn't produce identical results" + + +def test_none_seed_preserves_state(): + """Verify that passing None as seed just saves/restores state without setting a new seed.""" + np.random.seed(42) + values_before = [np.random.rand() for _ in range(3)] + + np.random.seed(42) + values_with_none = [] + with custom_seed(None): + values_with_none = [np.random.rand() for _ in range(3)] + + assert np.allclose(values_before, values_with_none), ( + "None seed should not change the random sequence" + ) + + +def test_exception_still_restores_state(): + """Verify that state is restored even if an exception occurs inside the with block.""" + np.random.seed(42) + _ = np.random.rand() # Skip first value + + try: + with custom_seed(999): + _ = np.random.rand() + raise ValueError("Test exception") + except ValueError: + pass + + # State should still be restored + value_after = np.random.rand() + + # Verify it matches expected sequence + np.random.seed(42) + _ = np.random.rand() # skip first + expected_value = np.random.rand() + + assert np.isclose(value_after, expected_value), ( + "State not restored after exception in with block" + ) + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/tests/test_verify_network.py b/tests/test_verify_network.py index 7bb53d970..a1b19ce10 100644 --- a/tests/test_verify_network.py +++ b/tests/test_verify_network.py @@ -12,7 +12,9 @@ T_BUS, GEN_BUS, ) +from gridfm_datakit.utils.idx_bus import BUS_TYPE, PV, PQ from gridfm_datakit.process.process_network import init_julia +import numpy as np def verify_deactivated_branches(): @@ -167,6 +169,48 @@ def verify_generator_bus_assignment(): print(f" ✓ All {net.gens.shape[0]} generators have matching bus assignments") +def verify_pv_to_pq_on_gen_deactivation(): + """Check that PV buses become PQ when all their generators are deactivated.""" + print("\n5. Testing PV→PQ bus type change on generator deactivation...") + + case_name = "case24_ieee_rts" + net = load_net_from_pglib(case_name) + + # Find the continuous index for original bus 21 + original_bus_21 = 21 + if original_bus_21 not in net.bus_index_mapping: + raise ValueError(f"Bus {original_bus_21} not found in network") + + continuous_bus_21_idx = net.bus_index_mapping[original_bus_21] + + # Assert bus 21 is initially PV type + initial_bus_type = net.buses[continuous_bus_21_idx, BUS_TYPE] + assert initial_bus_type == PV, ( + f"Bus {original_bus_21} (continuous index {continuous_bus_21_idx}) should be PV type, " + f"but has type {initial_bus_type}" + ) + print(f" ✓ Bus {original_bus_21} is initially PV type") + + # Find all generators connected to bus 21 + gens_at_bus_21 = np.where(net.gens[:, GEN_BUS] == continuous_bus_21_idx)[0] + assert len(gens_at_bus_21) > 0, f"No generators found at bus {original_bus_21}" + print( + f" ✓ Found {len(gens_at_bus_21)} generator(s) at bus {original_bus_21}: {gens_at_bus_21}", + ) + + # Deactivate all generators at bus 21 + net.deactivate_gens(gens_at_bus_21) + print(f" ✓ Deactivated generator(s): {gens_at_bus_21}") + + # Check that bus 21 has changed to PQ type + final_bus_type = net.buses[continuous_bus_21_idx, BUS_TYPE] + assert final_bus_type == PQ, ( + f"Bus {original_bus_21} should have changed to PQ type after generator deactivation, " + f"but has type {final_bus_type}" + ) + print(f" ✓ Bus {original_bus_21} successfully changed from PV to PQ type") + + class TestVerifyNetwork: @classmethod def setup_class(cls): @@ -184,3 +228,6 @@ def test_deactivated_generators(self): def test_generator_bus_assignment(self): verify_generator_bus_assignment() + + def test_pv_to_pq_on_gen_deactivation(self): + verify_pv_to_pq_on_gen_deactivation() From b03b63110268a7cf8860b0304815c3b3c30eda06 Mon Sep 17 00:00:00 2001 From: Alban Puech Date: Fri, 16 Jan 2026 13:37:11 -0500 Subject: [PATCH 2/3] fixed default --- gridfm_datakit/network.py | 5 +++-- scripts/config/default.yaml | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/gridfm_datakit/network.py b/gridfm_datakit/network.py index b2220e1c5..9364e9de7 100644 --- a/gridfm_datakit/network.py +++ b/gridfm_datakit/network.py @@ -6,6 +6,7 @@ """ import os +import shutil import requests from importlib import resources import pandas as pd @@ -96,8 +97,8 @@ def correct_network(network_path: str, force: bool = False) -> str: if not os.path.exists(tmp_path) or os.path.getsize(tmp_path) == 0: raise RuntimeError("Julia produced empty MATPOWER file") - # Atomically replace target file - os.replace(tmp_path, corrected_path) + # Atomically replace target file (use shutil.move to allow cross-device) + shutil.move(tmp_path, corrected_path) return corrected_path finally: diff --git a/scripts/config/default.yaml b/scripts/config/default.yaml index 37e8d56c0..cecfd31c6 100644 --- a/scripts/config/default.yaml +++ b/scripts/config/default.yaml @@ -7,7 +7,7 @@ network: load: generator: "agg_load_profile" # Name of the load generator; options: agg_load_profile, powergraph agg_profile: "default" # Name of the aggregated load profile - scenarios: 100 # Number of different load scenarios to generate + scenarios: 10000 # Number of different load scenarios to generate # WARNING: the following parameters are only used if generator is "agg_load_profile" # if using generator "powergraph", these parameters are ignored sigma: 0.2 # max local noise @@ -36,7 +36,7 @@ admittance_perturbation: settings: num_processes: 16 # Number of parallel processes to use - data_dir: "./data_out_seed_none2" # Directory to save generated data relative to the project root + data_dir: "./data_out" # Directory to save generated data relative to the project root large_chunk_size: 1000 # Number of load scenarios processed before saving overwrite: true # If true, overwrites existing files, if false, appends to files mode: "pf" # Mode of the script; options: pf, opf. pf: power flow data where one or more operating limits – the inequality constraints defined in OPF, e.g., voltage magnitude or branch limits – may be violated. opf: generates datapoints for training OPF solvers, with cost-optimal dispatches that satisfy all operating limits (OPF-feasible) From 9aa34ba60d1821674d2951b61b21dcc999e05e59 Mon Sep 17 00:00:00 2001 From: PUECH Alban <72336171+albanpuech@users.noreply.github.com> Date: Fri, 16 Jan 2026 19:47:31 +0100 Subject: [PATCH 3/3] Delete scripts/user_config.yaml Signed-off-by: PUECH Alban <72336171+albanpuech@users.noreply.github.com> --- scripts/user_config.yaml | 39 --------------------------------------- 1 file changed, 39 deletions(-) delete mode 100644 scripts/user_config.yaml diff --git a/scripts/user_config.yaml b/scripts/user_config.yaml deleted file mode 100644 index 037bc6100..000000000 --- a/scripts/user_config.yaml +++ /dev/null @@ -1,39 +0,0 @@ -admittance_perturbation: - sigma: 0.2 - type: random_perturbation -generation_perturbation: - sigma: 1.0 - type: cost_permutation -load: - agg_profile: default - change_reactive_power: true - generator: agg_load_profile - global_range: 0.4 - max_scaling_factor: 4.0 - scenarios: 10000 - sigma: 0.2 - start_scaling_factor: 1.0 - step_size: 0.1 -network: - name: case24_ieee_rts - network_dir: none - source: pglib -settings: - data_dir: ./data_out - dcpf_fast: true - enable_solver_logs: true - include_dc_res: true - large_chunk_size: 1000 - max_iter: 200 - mode: pf - num_processes: 16 - overwrite: true - pf_fast: true - seed: null -topology_perturbation: - elements: - - branch - - gen - k: 1 - n_topology_variants: 20 - type: random