Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,8 @@ settings:
pf_fast: true # Whether to use fast PF solver by default (compute_ac_pf from powermodels.jl); if false, uses Ipopt-based PF. Some networks (typically large ones e.g. case10000_goc) do not work with pf_fast: true. pf_fast is faster and more accurate than the Ipopt-based PF.
dcpf_fast: true # Whether to use fast DCPF solver by default (compute_dc_pf from PowerModels.jl)
max_iter: 200 # Max iterations for Ipopt-based solvers
seed: null # Seed for random number generation. If null, a random seed is generated (RECOMMENDED). To get the same data across runs, set the seed and note that ALL OTHER PARAMETERS IN THE CONFIG FILE MUST BE THE SAME.

```

<br>
Expand Down
124 changes: 75 additions & 49 deletions gridfm_datakit/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,12 @@
import sys
from gridfm_datakit.network import Network
from gridfm_datakit.process.process_network import init_julia
from gridfm_datakit.utils.random_seed import custom_seed


def _setup_environment(
config: Union[str, Dict[str, Any], NestedNamespace],
) -> Tuple[NestedNamespace, str, Dict[str, str]]:
) -> Tuple[NestedNamespace, str, Dict[str, str], int]:
"""Setup the environment for data generation.

Args:
Expand All @@ -50,7 +51,7 @@ def _setup_environment(
3. NestedNamespace object (NestedNamespace)

Returns:
Tuple of (args, base_path, file_paths)
Tuple of (args, base_path, file_paths, seed)
"""
# Load config from file if a path is provided
if isinstance(config, str):
Expand All @@ -63,6 +64,25 @@ def _setup_environment(
else:
args = config

# Set global seed if provided, otherwise generate a unique seed for this generation
if (
hasattr(args.settings, "seed")
and args.settings.seed is not None
and args.settings.seed != ""
):
seed = args.settings.seed
print(f"Global random seed set to: {seed}")

else:
# Generate a unique seed for non-reproducible but independent scenarios
# This ensures scenarios are i.i.d. within a run, but different across runs
import secrets

seed = secrets.randbelow(50_000)
# chunk_seed = seed * 20000 + start_idx + 1 < 2^31 - 1
# seed < (2,147,483,647 - n_scenarios) / 20,000 ~= 100_000 so taking 50_000 to be safe
print(f"No seed provided. Using seed={seed}")

# Setup output directory
base_path = os.path.join(args.settings.data_dir, args.network.name, "raw")
if os.path.exists(base_path) and args.settings.overwrite:
Expand Down Expand Up @@ -115,18 +135,20 @@ def _setup_environment(
if log_file == file_paths["args_log"]:
yaml.safe_dump(args.to_dict(), f)

return args, base_path, file_paths
return args, base_path, file_paths, seed


def _prepare_network_and_scenarios(
args: NestedNamespace,
file_paths: Dict[str, str],
seed: int,
) -> Tuple[Network, np.ndarray]:
"""Prepare the network and generate load scenarios.

Args:
args: Configuration object
file_paths: Dictionary of file paths
seed: Global random seed for reproducibility.

Returns:
Tuple of (network, scenarios)
Expand All @@ -147,6 +169,7 @@ def _prepare_network_and_scenarios(
args.load.scenarios,
file_paths["scenarios_log"],
max_iter=args.settings.max_iter,
seed=seed,
)
scenarios_df = load_scenarios_to_df(scenarios)
scenarios_df.to_parquet(file_paths["scenarios"], index=False, engine="pyarrow")
Expand Down Expand Up @@ -230,10 +253,10 @@ def generate_power_flow_data(
"""

# Setup environment
args, base_path, file_paths = _setup_environment(config)
args, base_path, file_paths, seed = _setup_environment(config)

# Prepare network and scenarios
net, scenarios = _prepare_network_and_scenarios(args, file_paths)
net, scenarios = _prepare_network_and_scenarios(args, file_paths, seed)

# Initialize topology generator
topology_generator = initialize_topology_generator(args.topology_perturbation, net)
Expand All @@ -254,48 +277,50 @@ def generate_power_flow_data(

processed_data = []

# Process scenarios sequentially
with open(file_paths["tqdm_log"], "a") as f:
with tqdm(
total=args.load.scenarios,
desc="Processing scenarios",
file=Tee(sys.stdout, f),
miniters=5,
) as pbar:
for scenario_index in range(args.load.scenarios):
# Process the scenario
if args.settings.mode == "opf":
processed_data = process_scenario_opf_mode(
net,
scenarios,
scenario_index,
topology_generator,
generation_generator,
admittance_generator,
processed_data,
file_paths["error_log"],
args.settings.include_dc_res,
jl,
)
elif args.settings.mode == "pf":
processed_data = process_scenario_pf_mode(
net,
scenarios,
scenario_index,
topology_generator,
generation_generator,
admittance_generator,
processed_data,
file_paths["error_log"],
args.settings.include_dc_res,
args.settings.pf_fast,
args.settings.dcpf_fast,
jl,
)
else:
raise ValueError("Invalid mode!")

pbar.update(1)
# Process scenarios sequentially with deterministic seed
# Use custom_seed to control randomness for reproducibility
with custom_seed(seed + 1):
with open(file_paths["tqdm_log"], "a") as f:
with tqdm(
total=args.load.scenarios,
desc="Processing scenarios",
file=Tee(sys.stdout, f),
miniters=5,
) as pbar:
for scenario_index in range(args.load.scenarios):
# Process the scenario
if args.settings.mode == "opf":
processed_data = process_scenario_opf_mode(
net,
scenarios,
scenario_index,
topology_generator,
generation_generator,
admittance_generator,
processed_data,
file_paths["error_log"],
args.settings.include_dc_res,
jl,
)
elif args.settings.mode == "pf":
processed_data = process_scenario_pf_mode(
net,
scenarios,
scenario_index,
topology_generator,
generation_generator,
admittance_generator,
processed_data,
file_paths["error_log"],
args.settings.include_dc_res,
args.settings.pf_fast,
args.settings.dcpf_fast,
jl,
)
else:
raise ValueError("Invalid mode!")

pbar.update(1)

# Save final data
_save_generated_data(
Expand Down Expand Up @@ -339,14 +364,14 @@ def generate_power_flow_data_distributed(
- scenarios_{generator}.log: Load scenario generation notes
"""
# Setup environment
args, base_path, file_paths = _setup_environment(config)
args, base_path, file_paths, seed = _setup_environment(config)

# check if mode is valid
if args.settings.mode not in ["opf", "pf"]:
raise ValueError("Invalid mode!")

# Prepare network and scenarios
net, scenarios = _prepare_network_and_scenarios(args, file_paths)
net, scenarios = _prepare_network_and_scenarios(args, file_paths, seed)

# Initialize topology generator
topology_generator = initialize_topology_generator(args.topology_perturbation, net)
Expand Down Expand Up @@ -405,6 +430,7 @@ def generate_power_flow_data_distributed(
args.settings.dcpf_fast,
file_paths["solver_log_dir"],
args.settings.max_iter,
seed,
)
for chunk in scenario_chunks
]
Expand Down
1 change: 1 addition & 0 deletions gridfm_datakit/interactive.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ def create_config() -> Dict[str, Any]:
"dcpf_fast": dcpf_fast.value,
"enable_solver_logs": enable_solver_logs.value,
"max_iter": max_iter.value,
"seed": None, # seed is not used in the interactive interface
},
}
return config
Expand Down
88 changes: 87 additions & 1 deletion gridfm_datakit/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"""

import os
import shutil
import requests
from importlib import resources
import pandas as pd
Expand All @@ -22,6 +23,8 @@
VM,
VA,
REF,
PV,
PQ,
)
from gridfm_datakit.utils.idx_gen import GEN_BUS, GEN_STATUS, PG, QG
from gridfm_datakit.utils.idx_brch import (
Expand All @@ -36,12 +39,71 @@
BR_R_ASYM,
BR_X_ASYM,
)
from gridfm_datakit.utils.idx_cost import NCOST
from gridfm_datakit.utils.idx_cost import NCOST, MODEL, POLYNOMIAL
import warnings
import networkx as nx
import numpy as np
import copy
from typing import Dict, Tuple, Any
import tempfile
from juliapkg.state import STATE
from juliapkg.deps import run_julia, executable


def correct_network(network_path: str, force: bool = False) -> str:
"""
Load a MATPOWER network using PowerModels via run_julia
and save a corrected version.

Args:
network_path: Path to the original MATPOWER .m file.
force: If True, regenerate the corrected file even if it exists.

Returns:
Path to the corrected network file.

Raises:
FileNotFoundError: If input file does not exist.
RuntimeError: If PowerModels fails.
"""
if not os.path.exists(network_path):
raise FileNotFoundError(f"Network file not found: {network_path}")

base_path, ext = os.path.splitext(network_path)
corrected_path = f"{base_path}_corrected{ext}"

if os.path.exists(corrected_path) and not force:
return corrected_path

# Use temporary file for atomic replace
tmp_fd, tmp_path = tempfile.mkstemp(suffix=".m")
os.close(tmp_fd)

try:
project = STATE["project"]
jl_exe = executable()

# Julia script as a list of lines
julia_code = [
"using PowerModels",
f'data = PowerModels.parse_file("{network_path}")',
f'PowerModels.export_matpower("{tmp_path}", data)',
]

# Run Julia
run_julia(julia_code, project=project, executable=jl_exe)

# Sanity check
if not os.path.exists(tmp_path) or os.path.getsize(tmp_path) == 0:
raise RuntimeError("Julia produced empty MATPOWER file")

# Atomically replace target file (use shutil.move to allow cross-device)
shutil.move(tmp_path, corrected_path)
return corrected_path

finally:
if os.path.exists(tmp_path):
os.unlink(tmp_path)


def numpy_to_matlab_matrix(array: np.ndarray, name: str) -> str:
Expand Down Expand Up @@ -132,6 +194,11 @@ def __init__(self, mpc: Dict[str, Any]) -> None:
assert np.all(np.isin(self.gens[:, GEN_BUS], self.buses[:, BUS_I])), (
"All generator buses should be in bus IDs"
)

assert np.all(self.gencosts[:, MODEL] == POLYNOMIAL), (
"MODEL should be POLYNOMIAL"
)

# assert all generators have the same number of cost coefficients
assert np.all(self.gencosts[:, NCOST] == self.gencosts[:, NCOST][0]), (
"All generators must have the same number of cost coefficients"
Expand Down Expand Up @@ -345,6 +412,21 @@ def deactivate_gens(self, idx_gens: np.ndarray) -> None:
)
self.gens[idx_gens, GEN_STATUS] = 0

# -----------------------------
# Update PV buses that lost all generators → PQ
# -----------------------------
n_buses = self.buses.shape[0]

# Count in-service generators per bus
gens_on = self.gens[self.idx_gens_in_service]
gen_count = np.bincount(gens_on[:, GEN_BUS].astype(int), minlength=n_buses)

# Boolean mask: PV buses with no in-service generator
pv_no_gen = (self.buses[:, BUS_TYPE] == PV) & (gen_count == 0)

# Set them to PQ
self.buses[pv_no_gen, BUS_TYPE] = PQ

def check_single_connected_component(self) -> bool:
"""
Check that the network forms a single connected component.
Expand Down Expand Up @@ -541,6 +623,7 @@ def load_net_from_file(network_path: str) -> Network:
ValueError: If the file format is invalid.
"""
# Load network using matpowercaseframes
network_path = correct_network(network_path)
mpc_frames = CaseFrames(network_path)
mpc = {
key: mpc_frames.__getattribute__(key)
Expand Down Expand Up @@ -569,6 +652,7 @@ def load_net_from_pglib(grid_name: str) -> Network:
FileNotFoundError: If the file cannot be found after download.
ValueError: If the file format is invalid.
"""

# Construct file paths
file_path = str(
resources.files("gridfm_datakit.grids").joinpath(f"pglib_opf_{grid_name}.m"),
Expand All @@ -586,6 +670,8 @@ def load_net_from_pglib(grid_name: str) -> Network:
with open(file_path, "wb") as f:
f.write(response.content)

file_path = correct_network(file_path)

# Load network using matpowercaseframes
mpc_frames = CaseFrames(file_path)
mpc = {
Expand Down
Loading