Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/lib/data/adaptors/species_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@ def apply_list(self, data: List) -> List:
if info is None:
available = sorted(data.metadata.species.keys())
raise ValueError(f"unknown species {self.species_key!r}; available: {available}")
data = data.assign_metadata(subject=info.display)
if len(data.metadata.species) == 1:
# Dataset is already single-species (e.g. BP per-species file, or H5
# with one species). Row filter is a no-op; skip to avoid requiring
# q/m columns that BP data doesn't have.
return data
df = data.data
df = df[(df["q"] == info.q) & (df["m"] == info.m)]
return data.assign_data(df).assign_metadata(subject=info.display)
Expand Down
8 changes: 7 additions & 1 deletion src/lib/data/loader_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,16 @@
LOADERS: dict[str, type[DataSource]] = {}


def register_loader(prefix: str, cls: type[DataSource]) -> None:
"""Imperatively register a loader class under the given prefix. Overwrites
any existing registration for that prefix."""
LOADERS[prefix] = cls


def loader(*prefixes: str):
def decorator[T: type[DataSource]](cls: T) -> T:
for p in prefixes:
LOADERS[p] = cls
register_loader(p, cls)
return cls

return decorator
127 changes: 127 additions & 0 deletions src/lib/data/loaders/particle_bp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
import pathlib
import re

import dask.dataframe as dd
import numpy as np
import xarray as xr

from lib.config import CONFIG
from lib.data.data_with_attrs import LazyList, ListMetadata
from lib.data.loader_registry import register_loader
from lib.data.source import DataSource
from lib.file_util import get_available_steps
from lib.species import SpeciesInfo, build_species_display
from lib.var_info_registry import lookup

_DISCOVER_PARTICLE_BP_PREFIX_RE = re.compile(r"^prt\.([^.]+)\.\d+\.bp$")


def discover_particle_bp_loaders(data_dir: pathlib.Path):
for entry in data_dir.iterdir():
# note that BP "files" are actually directories
m = _DISCOVER_PARTICLE_BP_PREFIX_RE.match(entry.name)
if m is None:
continue

species_key = m.group(1)
register_loader(f"prt.{species_key}", ParticleLoaderBp)


def _get_path(prefix: str, step: int) -> pathlib.Path:
return CONFIG.data_dir / f"{prefix}.{step:09}.bp"


def _read_attrs(path: pathlib.Path) -> dict:
"""Open a BP file, return its attrs as a plain dict."""
with xr.open_dataset(path) as ds:
return {k: ds.attrs[k] for k in ds.attrs}


def _load_step_df(path: pathlib.Path, time: float) -> dd.DataFrame:
"""Open one BP step lazily and return a per-step dask DataFrame with a
constant `t` column. Drops the BP-assigned particle-dim index column.

Note: the `t` column is added via map_partitions rather than
dd.DataFrame.assign — the latter creates a broadcast-scalar column whose
`to_dask_array()` trips an IndexError in dask-expr's optimizer when the
dataframe came from xarray's to_dask_dataframe. map_partitions produces a
proper per-row column that survives the optimizer.
"""
with xr.open_dataset(path) as raw:
particle_dim = next(d for d, n in raw.sizes.items() if n > 1)
ds = xr.open_dataset(path, chunks={particle_dim: CONFIG.dask_chunk_size}).squeeze(drop=True)
df = ds.to_dask_dataframe().drop(columns=[particle_dim])
df = df.map_partitions(lambda p, t: p.assign(t=t), np.float64(time))
return df


_SPECIES_KEY_RE = re.compile(r"^([a-zA-Z]+)([+-]*)(\d*)$")


class ParticleLoaderBp(DataSource):
"""ADIOS2 particle loader — one instance per prt.<species_key> prefix.

Registered dynamically by lib.parsing.parse._get_parser (not via @loader),
because the set of valid prefixes depends on files present in the data
directory at run time.
"""

def __init__(self, prefix: str, active_key: str | None):
self.prefix = prefix
self.species_key = prefix.split(".", 1)[1]
self.active_key = active_key
self.steps = get_available_steps(f"{prefix}.", ".bp")

def get_data(self) -> LazyList:
step_attrs = [_read_attrs(_get_path(self.prefix, step)) for step in self.steps]
times = np.array([float(a["time"]) for a in step_attrs])

head = step_attrs[0]
q = float(head["q"])
m = float(head["m"])

subject = "Electrons" if q < 0 else "Ions"
match = _SPECIES_KEY_RE.match(self.species_key)
show_q = None
show_m = None
if match:
if match.group(2):
show_q = q
if match.group(3):
show_m = m
display = build_species_display(subject, show_q, show_m)

info = SpeciesInfo(self.species_key, display, q, m)
species_dict = {self.species_key: info}

dfs = [_load_step_df(_get_path(self.prefix, step), time) for step, time in zip(self.steps, times)]
df = dd.concat(dfs)

corners = np.asarray(head["corner"])
lengths = np.asarray(head["length"])
gdims = np.asarray(head["gdims"])
coordss = {dim: np.linspace(c, c + l, n, endpoint=False) for dim, c, l, n in zip(("x", "y", "z"), corners, lengths, gdims)}
coordss["t"] = times

metadata = ListMetadata(
weight_key="w",
coordss=coordss,
species=species_dict,
subject=info.display,
)
data = LazyList(df, metadata)

# var_info registry is keyed by "prt" (not per-species), so strip the
# species suffix when looking up per-column metadata.
var_infos = {key: lookup("prt", key) for key in data.dims}
return data.assign_metadata(
name_fragments=self._get_name_fragments(),
active_key=self.active_key,
var_infos=var_infos,
)

def _get_name_fragments(self) -> list[str]:
fragments = [self.prefix]
if self.active_key is not None:
fragments.append(self.active_key)
return fragments
24 changes: 12 additions & 12 deletions src/lib/data/loaders/particle_h5.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from lib.data.source import DataSource
from lib.file_util import get_available_steps
from lib.latex import Latex
from lib.species import SpeciesInfo
from lib.species import SpeciesInfo, build_species_display
from lib.var_info_registry import lookup

PRT_PARTICLES_KEY = "particles/p0/1d"
Expand Down Expand Up @@ -132,33 +132,33 @@ def _build_species_dict(qm: dict[SpeciesIdx, tuple[Charge, Mass]]) -> dict[str,
result: dict[str, SpeciesInfo] = {}
for sign, qms in sign_groups.items():
base, subject = ("i", "Ions") if sign > 0 else ("e", "Electrons")
if len(qms) == 1:
# just one species in sign-group; trivial key

masses_are_unique = len(set(qm[1] for qm in qms)) == len(qms)
charges_are_unique = len(set(qm[0] for qm in qms)) == len(qms)

if charges_are_unique and masses_are_unique:
q, m = qms[0]
key = base
display = Latex(rf"\text{{{subject}}}")
display = build_species_display(subject)
result[key] = SpeciesInfo(species_key=key, display=display, q=q, m=m)
elif len(set(qm[0] for qm in qms)) == len(qms):
# each species in sign-group has unique charge; label keys by charge only
elif charges_are_unique:
q_char = "+" if sign > 0 else "-"
for q, m in qms:
n_signs = int(abs(q))
key = base + q_char * n_signs
display = Latex(rf"\text{{{subject}}}^{{{n_signs if n_signs > 1 else ""}{q_char}}}")
display = build_species_display(subject, show_q=q)
result[key] = SpeciesInfo(species_key=key, display=display, q=q, m=m)
elif len(set(qm[1] for qm in qms)) == len(qms):
# each species in sign-group has unique mass; disambiguate by mass only
elif masses_are_unique:
for q, m in qms:
key = f"{base}{m:g}"
display = Latex(rf"\text{{{subject}}}_{{{m:g}}}")
display = build_species_display(subject, show_m=m)
result[key] = SpeciesInfo(species_key=key, display=display, q=q, m=m)
else:
# worst-case scenario: conflicting charges and masses
q_char = "+" if sign > 0 else "-"
for q, m in qms:
n_signs = int(abs(q))
key = f"{base}{q_char * n_signs}{m:g}"
display = Latex(rf"\text{{{subject}}}^{{{n_signs if n_signs > 1 else ""}{q_char}}}_{{{m:g}}}")
display = build_species_display(subject, show_q=q, show_m=m)
result[key] = SpeciesInfo(species_key=key, display=display, q=q, m=m)
return result

Expand Down
4 changes: 4 additions & 0 deletions src/lib/parsing/parse.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
import argparse
from pathlib import Path

from lib.config import CONFIG
from lib.data.loader_registry import LOADERS
from lib.data.loaders.particle_bp import discover_particle_bp_loaders
from lib.parsing.args import Args
from lib.parsing.args_registry import CUSTOM_ARGS


def _get_parser() -> argparse.ArgumentParser:
# FIXME: this is a hack. Shouldn't modify a global for this.
discover_particle_bp_loaders(CONFIG.data_dir)
parser = argparse.ArgumentParser(prog="psc-plot")

parser.add_argument("prefix", choices=LOADERS.keys(), help="data file prefix")
Expand Down
12 changes: 12 additions & 0 deletions src/lib/species.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,15 @@ class SpeciesInfo:
display: Latex
q: float
m: float


def build_species_display(subject: str, show_q: float | None = None, show_m: float | None = None) -> Latex:
"""Wraps `subject` in `\\text` and appends optional `q` and/or `m` info."""
display = rf"\text{{{subject}}}"
if show_q is not None:
q_char = "+" if show_q > 0 else "-"
q_mag = int(abs(show_q))
display += rf"^{{{q_mag if q_mag > 1 else ""}{q_char}}}"
if show_m is not None:
display += rf"_{{{show_m:g}}}"
return Latex(display)
4 changes: 0 additions & 4 deletions tests/data/test-2d/gauss.000000000.bp/profiling.json

This file was deleted.

4 changes: 0 additions & 4 deletions tests/data/test-2d/gauss.000000001.bp/profiling.json

This file was deleted.

4 changes: 0 additions & 4 deletions tests/data/test-2d/gauss.000000002.bp/profiling.json

This file was deleted.

4 changes: 0 additions & 4 deletions tests/data/test-2d/gauss.000000003.bp/profiling.json

This file was deleted.

4 changes: 0 additions & 4 deletions tests/data/test-2d/gauss.000000004.bp/profiling.json

This file was deleted.

Loading
Loading