Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
b24e967
adding _prepared_data to ProcessStep that does not get reset after ru…
toqduj Nov 23, 2025
d98b7f8
adding calculators for X-ray scattering geometry, which add Q, Q0, Q1…
toqduj Nov 23, 2025
9b21584
small fixes to the XS Geometry class
toqduj Nov 25, 2025
3f6c7de
fixing basedata to maintain auxiliary properties in math operations
toqduj Nov 25, 2025
db4092f
updating reduce_dimensionality to also maintain all basedata information
toqduj Nov 25, 2025
e3726ac
fixing units case of 1d data
toqduj Nov 25, 2025
977610c
Merge branch 'calculate_geometry' into required_improvements
toqduj Nov 25, 2025
abbac45
improving process_step_registry
toqduj Nov 25, 2025
2dda60a
adding a copy method to BaseData
toqduj Nov 26, 2025
06aca7b
improving BaseData __repr__
toqduj Nov 26, 2025
acdf1aa
validation of the 'axes' metadata shape in BaseData
toqduj Nov 26, 2025
dc721fd
simplifying _VarianceDict
toqduj Nov 26, 2025
23210ab
fix for non-multiplicative units handling - now raises error
toqduj Nov 26, 2025
e75a65a
updated MessageHandler with use and test in reduce_dimensionality pro…
toqduj Nov 26, 2025
d8a8c66
added an indexer for basedata and associated test
toqduj Nov 26, 2025
f4d7a67
simplifying xs_geometry
toqduj Nov 26, 2025
fcc8ae2
correcting data dimensions to [..., y, x]
toqduj Nov 26, 2025
1c57f16
combining two methods for conciseness
toqduj Nov 28, 2025
1ed3568
forgot tests
toqduj Nov 28, 2025
d56d32b
now with MessageHandler messages
toqduj Nov 28, 2025
700b558
added the required_arguments documentation to xs_geometry
toqduj Nov 29, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
165 changes: 130 additions & 35 deletions src/modacor/dataclasses/basedata.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@


# make a varianceDict that quacks like a dict, but is secretly a view on the uncertainties dict
class _VarianceDict(MutableMapping, dict):
class _VarianceDict(MutableMapping):
def __init__(self, parent: BaseData):
self._parent = parent

Expand All @@ -50,15 +50,6 @@ def __iter__(self):
def __len__(self):
return len(self._parent.uncertainties)

def keys(self):
# Return keys of the underlying uncertainties dict
return self._parent.uncertainties.keys()

def items(self):
# Return items as (key, variance) pairs
tmp = {key: self[key] for key in self._parent.uncertainties}
return tmp.items()

def __contains__(self, x) -> bool:
return x in self._parent.uncertainties

Expand Down Expand Up @@ -134,6 +125,28 @@ def _copy_uncertainties(unc_dict: Dict[str, np.ndarray]) -> Dict[str, np.ndarray
return {k: np.array(v, copy=True) for k, v in unc_dict.items()}


def _inherit_metadata(source: BaseData, result: BaseData) -> BaseData:
"""
Copy metadata-like attributes from `source` to `result` (axes, rank_of_data, weights)
without touching numerical content (signal, units, uncertainties).
"""
# Shallow-copy axes list to avoid aliasing of the list object itself
result.axes = list(source.axes)

# Keep the same rank_of_data; attrs validation ensures it is still valid
result.rank_of_data = source.rank_of_data

# Try to propagate weights; if shapes are incompatible, keep defaults
try:
arr = np.asarray(source.weights)
validate_broadcast(result.signal, arr, "weights")
result.weights = np.broadcast_to(arr, result.signal.shape).copy()
except ValueError:
logger.debug("Could not broadcast source weights to result shape; leaving default weights on result BaseData.")

return result


def _binary_basedata_op(
left: BaseData,
right: BaseData,
Expand Down Expand Up @@ -217,7 +230,8 @@ def _binary_basedata_op(

result.uncertainties[key] = sigma

return result
# Inherit metadata (axes, rank_of_data, weights) from the left operand
return _inherit_metadata(left, result)


def _unary_basedata_op(
Expand Down Expand Up @@ -258,7 +272,8 @@ def _unary_basedata_op(
sigma_y[valid] = deriv[valid] * np.abs(err_b[valid])
result.uncertainties[key] = sigma_y

return result
# Preserve metadata from the original element
return _inherit_metadata(element, result)


class UncertaintyOpsMixin:
Expand Down Expand Up @@ -357,11 +372,7 @@ def __rtruediv__(self, other: Any) -> BaseData:
# ---- unary dunder + convenience methods ----

def __neg__(self) -> BaseData:
return BaseData(
signal=-self.signal,
units=self.units,
uncertainties=_copy_uncertainties(self.uncertainties),
)
return negate_basedata_element(self)

def __pow__(self, exponent: float, modulo=None) -> BaseData:
if modulo is not None:
Expand Down Expand Up @@ -452,14 +463,14 @@ class BaseData(UncertaintyOpsMixin):
"""

# required:
# Core data array stored as an xarray DataArray
# Core data array stored as a numpy ndarray
signal: np.ndarray = field(
converter=signal_converter, validator=v.instance_of(np.ndarray), on_setattr=setters.validate
)
# Unit of signal*scaling+offset - required input 'dimensionless' for dimensionless data
units: pint.Unit = field(validator=v.instance_of(pint.Unit), converter=ureg.Unit, on_setattr=setters.validate) # type: ignore
# optional:
# Dict of variances represented as xarray DataArray objects; defaulting to an empty dict
# Dict of variances represented as numpy ndarray objects; defaulting to an empty dict
uncertainties: Dict[str, np.ndarray] = field(
factory=dict, converter=dict_signal_converter, validator=v.instance_of(dict), on_setattr=setters.validate
)
Expand Down Expand Up @@ -491,6 +502,14 @@ def __attrs_post_init__(self):
# Validate weights
validate_broadcast(self.signal, self.weights, "weights")

# Warn if axes length does not match signal.ndim
if self.axes and len(self.axes) != self.signal.ndim:
logger.debug(
"BaseData.axes length (%d) does not match signal.ndim (%d).",
len(self.axes),
self.signal.ndim,
)

@property
def variances(self) -> _VarianceDict:
"""
Expand Down Expand Up @@ -565,27 +584,102 @@ def to_units(self, new_units: pint.Unit, multiplicative_conversion=True) -> None
logger.debug("No unit conversion needed, units are the same.")
return

if not multiplicative_conversion:
# This path is subtle for offset units (e.g. degC <-> K) and we
# don't want to silently get uncertainties wrong.
raise NotImplementedError(
"Non-multiplicative unit conversions are not yet implemented for BaseData.\n"
"If you need this, we should design explicit rules (e.g. using delta units)."
)

logger.debug(f"Converting from {self.units} to {new_units}.")

if multiplicative_conversion:
# simple unit conversion, can be done to scalar
# Convert signal
cfact = new_units.m_from(self.units)
self.signal *= cfact
self.units = new_units
# Convert uncertainty
for key in self.uncertainties: # fastest as far as my limited testing goes against iterating over items():
self.uncertainties[key] *= cfact
# simple unit conversion, can be done to scalar
# Convert signal
cfact = new_units.m_from(self.units)
self.signal *= cfact
self.units = new_units
# Convert uncertainty
for key in self.uncertainties: # fastest as far as my limited testing goes against iterating over items():
self.uncertainties[key] *= cfact

def indexed(self, indexer: Any, *, rank_of_data: int | None = None) -> "BaseData":
"""
Return a new BaseData corresponding to ``self`` indexed by ``indexer``.

Parameters
----------
indexer :
Any valid NumPy indexer (int, slice, tuple of slices, boolean mask, ...),
applied consistently to ``signal`` and all uncertainty / weight arrays.
rank_of_data :
Optional explicit rank_of_data for the returned BaseData. If omitted,
it will default to ``min(self.rank_of_data, result.signal.ndim)``.

Notes
-----
- Units are preserved.
- Uncertainties and weights are sliced with the same indexer where possible.
- Axes handling is conservative: existing axes are kept unchanged. If you
want axes to track slicing semantics more strictly, a higher-level
helper can be added later.
"""
sig = np.asarray(self.signal)[indexer]

# Slice uncertainties with the same indexer
new_uncs: Dict[str, np.ndarray] = {}
for k, u in self.uncertainties.items():
u_arr = np.asarray(u, dtype=float)
# broadcast to signal shape, then apply the same indexer
u_full = np.broadcast_to(u_arr, self.signal.shape)
new_uncs[k] = u_full[indexer].copy()

# Try to slice weights; if shapes don't line up, fall back to scalar 1.0
try:
w_arr = np.asarray(self.weights, dtype=float)
new_weights = w_arr[indexer].copy()
except Exception:
new_weights = np.array(1.0, dtype=float)

# Decide rank_of_data for the result
if rank_of_data is None:
new_rank = min(self.rank_of_data, np.ndim(sig))
else:
new_signal = ureg.Quantity(self.signal, self.units).to(new_units).magnitude
# Convert uncertainties
for key in self.uncertainties:
# I am not sure but I think this would be the right way for non-multiplicative conversions
self.uncertainties[key] *= new_signal / self.signal
new_rank = int(rank_of_data)

result = BaseData(
signal=np.asarray(sig, dtype=float),
units=self.units,
uncertainties=new_uncs,
weights=new_weights,
# For now we keep axes as-is; more sophisticated axis handling can be
# added once the usage patterns are clear.
axes=list(self.axes),
rank_of_data=new_rank,
)
return result

def copy(self, with_axes: bool = True) -> "BaseData":
"""
Return a new BaseData with copied signal/uncertainties/weights.
Axes are shallow-copied (list copy) by default, so axis objects
themselves are still shared.
"""
new = BaseData(
signal=np.array(self.signal, copy=True),
units=self.units,
uncertainties=_copy_uncertainties(self.uncertainties),
weights=np.array(self.weights, copy=True),
axes=list(self.axes) if with_axes else [],
rank_of_data=self.rank_of_data,
)
return new

def __repr__(self):
return f"BaseData(signal={self.signal}, uncertainties={self.uncertainties}, units={self.units})"
return (
f"BaseData(shape={self.signal.shape}, dtype={self.signal.dtype}, units={self.units}, "
f"n_uncertainties={len(self.uncertainties)}, rank_of_data={self.rank_of_data})"
)

def __str__(self):
return f'{self.signal} {self.units} ± {[f"{u} ({k})" for k, u in self.uncertainties.items()]}'
Expand All @@ -598,11 +692,12 @@ def __str__(self):

def negate_basedata_element(element: BaseData) -> BaseData:
"""Negate a BaseData element with uncertainty and units propagation."""
return BaseData(
result = BaseData(
signal=-element.signal,
units=element.units,
uncertainties=_copy_uncertainties(element.uncertainties),
)
return _inherit_metadata(element, result)


def sqrt_basedata_element(element: BaseData) -> BaseData:
Expand Down
32 changes: 21 additions & 11 deletions src/modacor/dataclasses/messagehandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,18 @@

import logging

# logger = logging.getLogger(__name__)
_default_handler: MessageHandler | None = None


def get_default_handler(level: int = logging.INFO) -> MessageHandler:
"""
MoDaCor-wide default message handler. Useful for overarching logging like in the pipeline runner.
For specific modules or classes, it's better to create dedicated named MessageHandler instances.
"""
global _default_handler
if _default_handler is None:
_default_handler = MessageHandler(level=level, name="MoDaCor")
return _default_handler


class MessageHandler:
Expand All @@ -24,6 +35,7 @@ class MessageHandler:

Args:
level (int): The logging level to use. Defaults to logging.INFO.
name (str): Logger name (typically __name__).
"""

def __init__(self, level: int = logging.INFO, name: str = "MoDaCor", **kwargs):
Expand All @@ -33,20 +45,18 @@ def __init__(self, level: int = logging.INFO, name: str = "MoDaCor", **kwargs):
self.logger = logging.getLogger(name)
self.logger.setLevel(level)

self.consoleLogHandler = logging.StreamHandler()
self.consoleLogHandler.setLevel(level)
# Avoid adding multiple console handlers if this handler is created multiple times
if not any(isinstance(h, logging.StreamHandler) for h in self.logger.handlers):
console_handler = logging.StreamHandler()
console_handler.setLevel(level)

formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
self.consoleLogHandler.setFormatter(formatter)
self.logger.addHandler(self.consoleLogHandler)
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
console_handler.setFormatter(formatter)
self.logger.addHandler(console_handler)

def log(self, message: str, level: int = None) -> None: # , name: str = None):
def log(self, message: str, level: int = None) -> None:
if level is None:
level = self.level

# if name is None:
# name = self.name
# does not take a name: # self.logger = logging.getLogger(name)
self.logger.log(msg=message, level=level)

def info(self, message: str):
Expand Down
3 changes: 3 additions & 0 deletions src/modacor/dataclasses/process_step.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ class ProcessStep:

# if the process produces intermediate arrays, they are stored here, optionally cached
produced_outputs: dict[str, Any] = field(factory=dict)
# intermediate prepared data for the process step
_prepared_data: dict[str, Any] = field(factory=dict)

# a message handler, supporting logging, warnings, errors, etc. emitted by the process
# during execution
Expand Down Expand Up @@ -129,6 +131,7 @@ def reset(self):
self.__prepared = False
self.executed = False
self.produced_outputs = {}
self._prepared_data = {}

def modify_config_by_dict(self, by_dict: dict = {}) -> None:
"""Modify the configuration of the process step by a dictionary"""
Expand Down
31 changes: 31 additions & 0 deletions src/modacor/modules/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# SPDX-License-Identifier: BSD-3-Clause
# /usr/bin/env python3
# -*- coding: utf-8 -*-

from __future__ import annotations

__coding__ = "utf-8"
__authors__ = ["Brian R. Pauw"] # add names to the list as appropriate
__copyright__ = "Copyright 2025, The MoDaCor team"
__date__ = "25/11/2025"
__status__ = "Development" # "Development", "Production"
# end of header and standard imports

# official steps are imported here for ease
from modacor.modules.base_modules.divide import Divide
from modacor.modules.base_modules.multiply import Multiply
from modacor.modules.base_modules.poisson_uncertainties import PoissonUncertainties
from modacor.modules.base_modules.reduce_dimensionality import ReduceDimensionality
from modacor.modules.base_modules.subtract import Subtract
from modacor.modules.base_modules.subtract_databundles import SubtractDatabundles
from modacor.modules.technique_modules.xs_geometry import XSGeometry

__all__ = [
"Divide",
"Multiply",
"PoissonUncertainties",
"ReduceDimensionality",
"SubtractDatabundles",
"Subtract",
"XSGeometry",
]
Loading