Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DRAFT] Redesign/datasets ICA addition #56

Open
wants to merge 35 commits into
base: develop-eeg
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
66faeaa
initial test for ica dynamic item inclusion
vmcru Mar 4, 2025
4b62e60
exposed method for ica, used mne BIDSPath, path needs checking for pr…
vmcru Mar 5, 2025
c3ec3dc
Apply suggestions from code review
bruAristimunha Mar 5, 2025
77a8c59
Apply suggestions from code review
bruAristimunha Mar 5, 2025
a222c7c
Merge branch 'redesign/datasets' into redesign/datasets
vmcru Mar 7, 2025
3a78e1d
modifications to ica and validate to pass merge
vmcru Mar 7, 2025
86af6be
removed unused imports from all files and added credits
vmcru Mar 7, 2025
f29636c
updated validate_ica.py
vmcru Mar 7, 2025
30ff4be
Apply suggestions from code review
bruAristimunha Mar 18, 2025
f3330d9
support for ica as process added.
vmcru Mar 23, 2025
4e8bb55
Added hashing, setting check, and fixed caching bug.
vmcru Mar 23, 2025
b592c60
precommit mods
vmcru Mar 23, 2025
e2973aa
formatting fix
vmcru Mar 23, 2025
3f8b646
optional filtering added
vmcru Mar 23, 2025
73d1e93
added hashing to description name.
vmcru Mar 23, 2025
ade6b8d
added python-picard dependency in extra-requirements.txt for ica pica…
vmcru Mar 23, 2025
8b6633e
format fix extra-requirements.txt
vmcru Mar 23, 2025
a1031d2
Update benchmarks/MOABB/dataio/ica.py
vmcru Mar 25, 2025
282c016
Update benchmarks/MOABB/dataio/ica.py
vmcru Mar 25, 2025
ad2e39d
Update benchmarks/MOABB/dataio/datasets.py
vmcru Mar 26, 2025
822dc46
renamic critical to base and removing unnecessary comments
vmcru Mar 26, 2025
c524d11
tests upgrading pytest to see if it fixes breaks
vmcru Mar 26, 2025
a6fb933
added docstrings to process andn dynamic items functions.
vmcru Mar 26, 2025
544e638
formatting fixes
vmcru Mar 26, 2025
77daeeb
docstring fix
vmcru Mar 26, 2025
84e09dc
docstring adaptations for validate_ica.py
vmcru Mar 26, 2025
96e2546
precommit fixes
vmcru Mar 26, 2025
87ef955
Merge branch 'develop-eeg' into redesign/datasets
vmcru Mar 28, 2025
6e22fe5
rework of the metadata checking and storing
vmcru Mar 28, 2025
ab42aa9
metadata changes
vmcru Mar 28, 2025
2652128
precommit fixes
vmcru Mar 28, 2025
f2263e4
updates to the test files and minor tqeat to ica parameters.
vmcru Mar 28, 2025
210dd9e
adapted hashing for consistency and reproducibility. removed optional…
vmcru Mar 28, 2025
d787c0b
shpeechbrain changes.
vmcru Mar 28, 2025
4b03501
removed validate_ica.py from tracked files
vmcru Mar 28, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions benchmarks/MOABB/dataio/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,12 @@ def __init__(
data,
preload=False,
verbose=None,
# ica_processor: Optional[ICAProcessor] = None,
dynamic_items=(),
output_keys=(),
):
self.verbose = verbose
# self.ica_processor = ica_processor
dynamic_items = [self._make_load_raw_dynamic_item(preload)] + list(
dynamic_items
)
Expand Down
183 changes: 183 additions & 0 deletions benchmarks/MOABB/dataio/ica.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
"""Module for handling ICA computation and application for EEG data.
Author
------
Victor Cruz, 2025
"""
from pathlib import Path
from typing import Union, Optional, Dict, Any
import json
import hashlib

import mne
from mne.preprocessing import ICA
from mne_bids import get_bids_path_from_fname

from speechbrain.utils.data_pipeline import provides, takes


class ICAProcessor:
"""Handles ICA computation and application for EEG data.

Arguments
---------
n_components : int | float | None
Number of components to keep during ICA decomposition
method : str
The ICA method to use. Can be 'fastica', 'infomax' or 'picard'.
Defaults to 'fastica'.
random_state : int | None
Random state for reproducibility
fit_params : dict | None
Additional parameters to pass to the ICA fit method.
See mne.preprocessing.ICA for details.
filter_params : dict | None
Parameters for the high-pass filter applied before ICA.
Set to None to skip filtering if data is already filtered.
Defaults to {'l_freq': 1.0, 'h_freq': None}
"""

def __init__(
self,
n_components=None,
method="fastica",
random_state=42,
fit_params: Optional[Dict[str, Any]] = None,
filter_params: Optional[Dict[str, Any]] = None,
use_hash: bool = True,
):
self.n_components = n_components
self.method = method
self.random_state = random_state
self.fit_params = fit_params or {}
self.filter_params = filter_params or {"l_freq": 1.0, "h_freq": None}
self.use_hash = use_hash

def _get_params_hash(self) -> str:
"""Generate a short hash of the ICA parameters."""
# Select critical parameters that affect the ICA computation
# not accessible from ICA object for standarization
critical_params = {
"n_components": self.n_components,
"method": self.method,
"filter_params": self.filter_params,
}
# Create a deterministic string representation and hash it
param_str = json.dumps(critical_params, sort_keys=True)
return hashlib.md5(param_str.encode()).hexdigest()[
:8
] # First 8 chars are enough

def get_ica_metadata(self) -> Dict:
""" Generate metadata dictionary for the ICA parameters. """
return {
"n_components": self.n_components,
"method": self.method,
"random_state": self.random_state,
"filter_params": self.filter_params,
"fit_params": self.fit_params,
}

def get_ica_path(self, raw_path: Union[str, Path]) -> tuple[Path, Path]:
"""Generate path where ICA solution should be stored.

Creates a derivatives folder to store ICA solutions, following BIDS conventions.
Returns
-------
tuple[Path, Path]
Returns (ica_path, metadata_path)
"""
bids_path = get_bids_path_from_fname(raw_path)

if self.use_hash:
param_hash = self._get_params_hash()
folder_name = f"ica-{self.method}-{param_hash}"
desc = f"ica{param_hash}"
else:
folder_name = f"ica{self.method}"
desc = f"ica"

# For derivatives, you can put them in a derivatives folder:
bids_path.root = bids_path.root / ".." / "derivatives" / folder_name
# Keep the same base entities:
bids_path.update(
suffix="eeg", # override or confirm suffix
extension=".fif",
description=desc, # <-- This sets a desc=ica entity
check=True, # If you do not want BIDSPath to fail on derivative checks
)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
bids_path.root = bids_path.root / ".." / "derivatives" / folder_name
# Keep the same base entities:
bids_path.update(
suffix="eeg", # override or confirm suffix
extension=".fif",
description=desc, # <-- This sets a desc=ica entity
check=True, # If you do not want BIDSPath to fail on derivative checks
)
ica_path = bids_path.update(
processing="ica", suffix="ica"
)

# Make sure the folder is created
bids_path.fpath.parent.mkdir(parents=True, exist_ok=True)
Copy link
Collaborator

@bruAristimunha bruAristimunha Mar 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
bids_path.fpath.parent.mkdir(parents=True, exist_ok=True)
ica_path.mkdir(parents=True, exist_ok=True)


ica_path = bids_path.fpath
metadata_path = ica_path.with_suffix(".json")
Comment on lines +245 to +246
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
ica_path = bids_path.fpath
metadata_path = ica_path.with_suffix(".json")
metadata_path = bids_path.update(
suffix="metaica", extension=".json"
)


return ica_path, metadata_path

def save_ica(self, ica: ICA, ica_path: Path, metadata_path: Path):
"""Save ICA solution and metadata to disk."""
# Save ICA solution
ica.save(ica_path, overwrite=True)

# Save metadata
with metadata_path.open("w") as f:
json.dump(self.get_ica_metadata(), f)

def check_ica_metadata(self, metadata_path: Path) -> bool:
"""Check if existing ICA metadata matches current parameters."""
if not metadata_path.exists():
return False

with metadata_path.open() as f:
saved_metadata = json.load(f)

current_metadata = self.get_ica_metadata()
return saved_metadata == current_metadata

def compute_ica(self, raw: mne.io.RawArray, ica_path: Path) -> ICA:
"""Compute ICA solution and save to disk.

If filter_params is provided, applies a high-pass filter before ICA computation.
This step can be skipped if the data is already filtered by setting
filter_params to None during ICAProcessor initialization.
"""
if self.filter_params is not None:
# Apply high-pass filter only if filter parameters are provided
raw_filtered = raw.copy()
raw_filtered.filter(**self.filter_params)
else:
# Use raw data directly if no filtering is needed
raw_filtered = raw

ica = ICA(
n_components=self.n_components,
method=self.method,
random_state=self.random_state,
**self.fit_params,
)
ica.fit(raw_filtered)
ica.save(ica_path)
return ica

@property
def dynamic_item(self):
@takes("raw", "fpath")
@provides("raw", "ica_path")
def process(raw: mne.io.RawArray, fpath: Union[str, Path]):
"""Process raw data with ICA, computing or loading from cache."""

ica_path, metadata_path = self.get_ica_path(fpath)

if ica_path.exists() and self.check_ica_metadata(metadata_path):
ica = mne.preprocessing.read_ica(ica_path, verbose="ERROR")
else:
ica = self.compute_ica(raw, ica_path)
self.save_ica(ica, ica_path, metadata_path)

# Create a copy of the raw data before applying ICA
raw_ica = raw.copy()
ica.apply(raw_ica)

yield raw_ica
yield ica_path

return process
1 change: 1 addition & 0 deletions benchmarks/MOABB/extra-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
git+https://github.com/braindecode/braindecode
moabb
orion[profet]
python-picard
scikit-learn
Loading