From 66faeaaf9605946f3fa34d9b33c65f7157fe833a Mon Sep 17 00:00:00 2001 From: vmcru Date: Tue, 4 Mar 2025 09:20:46 -0500 Subject: [PATCH 1/8] initial test for ica dynamic item inclusion --- benchmarks/MOABB/dataio/datasets.py | 7 +++ benchmarks/MOABB/dataio/ica.py | 59 +++++++++++++++++ benchmarks/MOABB/validate_ica.py | 98 +++++++++++++++++++++++++++++ 3 files changed, 164 insertions(+) create mode 100644 benchmarks/MOABB/dataio/ica.py create mode 100644 benchmarks/MOABB/validate_ica.py diff --git a/benchmarks/MOABB/dataio/datasets.py b/benchmarks/MOABB/dataio/datasets.py index 0016c3f96..d20fd5049 100644 --- a/benchmarks/MOABB/dataio/datasets.py +++ b/benchmarks/MOABB/dataio/datasets.py @@ -23,6 +23,8 @@ from torch.utils.data import Dataset +from .ica import ICAProcessor + class RawEEGSample(TypedDict, total=False): """Default dictionary keys provided by `~RawEEGDataset`. @@ -94,10 +96,12 @@ def __init__( data, preload=False, verbose=None, + ica_processor: Optional[ICAProcessor] = None, dynamic_items=(), output_keys=(), ): self.verbose = verbose + self.ica_processor = ica_processor dynamic_items = [self._make_load_raw_dynamic_item(preload)] + list( dynamic_items ) @@ -297,6 +301,9 @@ def _make_load_raw_dynamic_item(self, preload: bool): @provides("info", "raw") def _load_raw(fpath: str): raw = self._read_raw_bids_cached(fpath, preload) + + if self.ica_processor is not None: + raw = self.ica_processor.process(raw, fpath) yield raw.info yield raw diff --git a/benchmarks/MOABB/dataio/ica.py b/benchmarks/MOABB/dataio/ica.py new file mode 100644 index 000000000..0cc3c220e --- /dev/null +++ b/benchmarks/MOABB/dataio/ica.py @@ -0,0 +1,59 @@ +from pathlib import Path +from typing import Union + +import mne +from mne.preprocessing import ICA + + +class ICAProcessor: + """Handles ICA computation and application for EEG data. + + Arguments + --------- + n_components : int | float | None + Number of components to keep during ICA decomposition + random_state : int | None + Random state for reproducibility + """ + + def __init__(self, n_components=None, random_state=42): + self.n_components = n_components + self.random_state = random_state + + def get_ica_path(self, raw_path: Union[str, Path]) -> Path: + """Generate path where ICA solution should be stored.""" + path = Path(raw_path) + return path.parent / f"{path.stem}_ica.fif" + + def compute_ica(self, raw: mne.io.RawArray, ica_path: Path) -> ICA: + """Compute ICA solution and save to disk.""" + # High-pass filter for ICA + raw_filtered = raw.copy() + raw_filtered.filter(l_freq=1.0, h_freq=None) + + ica = ICA( + n_components=self.n_components, + random_state=self.random_state + ) + ica.fit(raw) + ica.save(ica_path) + return ica + + + def process(self, raw: mne.io.RawArray, raw_path: Union[str, Path]) -> mne.io.RawArray: + """Process raw data with ICA, computing or loading from cache.""" + if not raw.preload: + raw.load_data() + + ica_path = self.get_ica_path(raw_path) + + if not ica_path.exists(): + ica = self.compute_ica(raw, ica_path) + else: + ica = mne.preprocessing.read_ica(ica_path) + + # Create a copy of the raw data before applying ICA + raw_ica = raw.copy() + ica.apply(raw_ica) + + return raw_ica \ No newline at end of file diff --git a/benchmarks/MOABB/validate_ica.py b/benchmarks/MOABB/validate_ica.py new file mode 100644 index 000000000..82a54037b --- /dev/null +++ b/benchmarks/MOABB/validate_ica.py @@ -0,0 +1,98 @@ +import logging +import os +from pathlib import Path +import time +import mne +import moabb +from moabb.datasets import BNCI2014_001 +from memory_profiler import profile + +from dataio.datasets import EpochedEEGDataset, RawEEGDataset, InMemoryDataset +from dataio.ica import ICAProcessor + +# Set up logging +mne.set_log_level(verbose=False) +moabb.set_log_level(level="ERROR") + +def test_ica_processing(): + # Test without ICA first + print("\nTesting without ICA:") + dataset_no_ica = EpochedEEGDataset.from_moabb( + BNCI2014_001(), + "data/MNE-BIDS-bnci2014-001-epoched.json", + save_path="data", + tmin=0, + tmax=4.0, + output_keys=["label", "subject", "session", "epoch"], + ) + + # Time iteration + start = time.time() + for _ in dataset_no_ica: + pass + print(f"Time without ICA: {time.time() - start:.2f}s") + + # Test with ICA + print("\nTesting with ICA:") + ica_processor = ICAProcessor(n_components=15) + dataset_with_ica = EpochedEEGDataset.from_moabb( + BNCI2014_001(), + "data/MNE-BIDS-bnci2014-001-epoched-ica.json", + save_path="data", + tmin=0, + tmax=4.0, + preload=True, + output_keys=["label", "subject", "session", "epoch"], # Removed ica_path + ica_processor=ica_processor + ) + + # First run - ICA computation and caching + print("First run (computing ICA):") + start = time.time() + for _ in dataset_with_ica: + pass + print(f"Time with ICA (first run): {time.time() - start:.2f}s") + + # Second run - should use cached ICA + print("\nSecond run (using cached ICA):") + start = time.time() + for _ in dataset_with_ica: + pass + print(f"Time with ICA (cached): {time.time() - start:.2f}s") + + # Test with InMemoryDataset wrapper + print("\nTesting with InMemoryDataset wrapper:") + dataset_with_ica_cached = InMemoryDataset(dataset_with_ica) + + start = time.time() + for _ in dataset_with_ica_cached: + pass + print(f"Time with ICA (in-memory cache): {time.time() - start:.2f}s") + + # Print some sample info + sample = dataset_with_ica[0] + print("\nSample info:") + print(f"Epoch shape: {sample['epoch'].shape}") + +@profile +def profile_memory_usage(): + ica_processor = ICAProcessor(n_components=15) + dataset = EpochedEEGDataset.from_moabb( + BNCI2014_001(), + "data/MNE-BIDS-bnci2014-001-epoched-ica.json", + save_path="data", + tmin=0, + tmax=4.0, + output_keys=["label", "subject", "session", "epoch"], # Removed ica_path + ica_processor=ica_processor + ) + + for _ in dataset: + pass + +if __name__ == "__main__": + print("Running performance tests...") + test_ica_processing() + + print("\nRunning memory profile...") + profile_memory_usage() \ No newline at end of file From 4b62e604d23fd117a2d6066588ed2a1232101f4f Mon Sep 17 00:00:00 2001 From: vmcru Date: Wed, 5 Mar 2025 00:14:46 -0500 Subject: [PATCH 2/8] exposed method for ica, used mne BIDSPath, path needs checking for proper caching usage. ica still not leveraging caching correctly --- benchmarks/MOABB/dataio/ica.py | 56 +++++++++--- benchmarks/MOABB/validate_ica.py | 143 ++++++++++++++++++++----------- 2 files changed, 141 insertions(+), 58 deletions(-) diff --git a/benchmarks/MOABB/dataio/ica.py b/benchmarks/MOABB/dataio/ica.py index 0cc3c220e..362988152 100644 --- a/benchmarks/MOABB/dataio/ica.py +++ b/benchmarks/MOABB/dataio/ica.py @@ -1,8 +1,9 @@ from pathlib import Path -from typing import Union +from typing import Union, Optional, Dict, Any import mne from mne.preprocessing import ICA +from mne_bids import get_bids_path_from_fname, BIDSPath class ICAProcessor: @@ -12,34 +13,69 @@ class ICAProcessor: --------- n_components : int | float | None Number of components to keep during ICA decomposition + method : str + The ICA method to use. Can be 'fastica', 'infomax' or 'picard'. + Defaults to 'fastica'. random_state : int | None Random state for reproducibility + fit_params : dict | None + Additional parameters to pass to the ICA fit method. + See mne.preprocessing.ICA for details. + filter_params : dict | None + Parameters for the high-pass filter applied before ICA. + Defaults to {'l_freq': 1.0, 'h_freq': None} """ - def __init__(self, n_components=None, random_state=42): + def __init__( + self, + n_components=None, + method='fastica', + random_state=42, + fit_params: Optional[Dict[str, Any]] = None, + filter_params: Optional[Dict[str, Any]] = None, + ): self.n_components = n_components + self.method = method self.random_state = random_state + self.fit_params = fit_params or {} + self.filter_params = filter_params or {'l_freq': 1.0, 'h_freq': None} def get_ica_path(self, raw_path: Union[str, Path]) -> Path: - """Generate path where ICA solution should be stored.""" - path = Path(raw_path) - return path.parent / f"{path.stem}_ica.fif" + """Generate path where ICA solution should be stored. + + Creates a derivatives folder to store ICA solutions, following BIDS conventions. + """ + bids_path = get_bids_path_from_fname(raw_path) + # For derivatives, you can put them in a derivatives folder: + bids_path.root = (bids_path.root / ".." / "derivatives" / f"ica-{self.method}") + # Keep the same base entities: + bids_path.update( + suffix='eeg', # override or confirm suffix + extension='.fif', + description='ica', # <-- This sets a desc=ica entity + check=True, # If you do not want BIDSPath to fail on derivative checks + ) + # Make sure the folder is created + bids_path.fpath.parent.mkdir(parents=True, exist_ok=True) + + return bids_path.fpath def compute_ica(self, raw: mne.io.RawArray, ica_path: Path) -> ICA: """Compute ICA solution and save to disk.""" # High-pass filter for ICA raw_filtered = raw.copy() - raw_filtered.filter(l_freq=1.0, h_freq=None) + raw_filtered.filter(**self.filter_params) ica = ICA( n_components=self.n_components, - random_state=self.random_state + method=self.method, + random_state=self.random_state, + **self.fit_params ) - ica.fit(raw) + ica.fit(raw_filtered) ica.save(ica_path) return ica - def process(self, raw: mne.io.RawArray, raw_path: Union[str, Path]) -> mne.io.RawArray: """Process raw data with ICA, computing or loading from cache.""" if not raw.preload: @@ -50,7 +86,7 @@ def process(self, raw: mne.io.RawArray, raw_path: Union[str, Path]) -> mne.io.Ra if not ica_path.exists(): ica = self.compute_ica(raw, ica_path) else: - ica = mne.preprocessing.read_ica(ica_path) + ica = mne.preprocessing.read_ica(ica_path, verbose='ERROR') # Create a copy of the raw data before applying ICA raw_ica = raw.copy() diff --git a/benchmarks/MOABB/validate_ica.py b/benchmarks/MOABB/validate_ica.py index 82a54037b..8e2d3eaa3 100644 --- a/benchmarks/MOABB/validate_ica.py +++ b/benchmarks/MOABB/validate_ica.py @@ -14,85 +14,132 @@ mne.set_log_level(verbose=False) moabb.set_log_level(level="ERROR") -def test_ica_processing(): - # Test without ICA first - print("\nTesting without ICA:") - dataset_no_ica = EpochedEEGDataset.from_moabb( - BNCI2014_001(), - "data/MNE-BIDS-bnci2014-001-epoched.json", - save_path="data", - tmin=0, - tmax=4.0, - output_keys=["label", "subject", "session", "epoch"], +def test_ica_method(method: str, n_components: int = 15, **kwargs): + """Test a specific ICA method and return timing results.""" + print(f"\nTesting ICA method: {method}") + ica_processor = ICAProcessor( + n_components=n_components, + method=method, + **kwargs ) - # Time iteration - start = time.time() - for _ in dataset_no_ica: - pass - print(f"Time without ICA: {time.time() - start:.2f}s") - - # Test with ICA - print("\nTesting with ICA:") - ica_processor = ICAProcessor(n_components=15) - dataset_with_ica = EpochedEEGDataset.from_moabb( + dataset = EpochedEEGDataset.from_moabb( BNCI2014_001(), - "data/MNE-BIDS-bnci2014-001-epoched-ica.json", + f"data/MNE-BIDS-bnci2014-001-epoched-{method}.json", save_path="data", tmin=0, tmax=4.0, preload=True, - output_keys=["label", "subject", "session", "epoch"], # Removed ica_path + output_keys=["label", "subject", "session", "epoch"], ica_processor=ica_processor ) - # First run - ICA computation and caching + # First run - ICA computation print("First run (computing ICA):") start = time.time() - for _ in dataset_with_ica: + for _ in dataset: pass - print(f"Time with ICA (first run): {time.time() - start:.2f}s") + computation_time = time.time() - start + print(f"Time with {method} ICA (first run): {computation_time:.2f}s") - # Second run - should use cached ICA + # Second run - using cached ICA print("\nSecond run (using cached ICA):") start = time.time() - for _ in dataset_with_ica: + for _ in dataset: pass - print(f"Time with ICA (cached): {time.time() - start:.2f}s") + cached_time = time.time() - start + print(f"Time with {method} ICA (cached): {cached_time:.2f}s") - # Test with InMemoryDataset wrapper + # Memory-cached version print("\nTesting with InMemoryDataset wrapper:") - dataset_with_ica_cached = InMemoryDataset(dataset_with_ica) - + dataset_cached = InMemoryDataset(dataset) start = time.time() - for _ in dataset_with_ica_cached: + for _ in dataset_cached: pass - print(f"Time with ICA (in-memory cache): {time.time() - start:.2f}s") + memory_cached_time = time.time() - start + print(f"Time with {method} ICA (in-memory cache): {memory_cached_time:.2f}s") - # Print some sample info - sample = dataset_with_ica[0] - print("\nSample info:") - print(f"Epoch shape: {sample['epoch'].shape}") + return { + 'method': method, + 'computation_time': computation_time, + 'cached_time': cached_time, + 'memory_cached_time': memory_cached_time + } -@profile -def profile_memory_usage(): - ica_processor = ICAProcessor(n_components=15) - dataset = EpochedEEGDataset.from_moabb( +def compare_ica_methods(): + # Test without ICA first as baseline + print("\nTesting without ICA (baseline):") + dataset_no_ica = EpochedEEGDataset.from_moabb( BNCI2014_001(), - "data/MNE-BIDS-bnci2014-001-epoched-ica.json", + "data/MNE-BIDS-bnci2014-001-epoched.json", save_path="data", tmin=0, tmax=4.0, - output_keys=["label", "subject", "session", "epoch"], # Removed ica_path - ica_processor=ica_processor + output_keys=["label", "subject", "session", "epoch"], ) - - for _ in dataset: + + start = time.time() + for _ in dataset_no_ica: pass + baseline_time = time.time() - start + print(f"Time without ICA: {baseline_time:.2f}s") + + # Test different ICA methods + results = [] + + # Test Picard + results.append(test_ica_method( + 'picard', + n_components=15, + fit_params={'max_iter': 500} + )) + + # Test Infomax + results.append(test_ica_method( + 'infomax', + n_components=15, + fit_params={'max_iter': 1000} + )) + + # Print comparison + print("\nComparison Summary:") + print("-" * 50) + print(f"Baseline (no ICA): {baseline_time:.2f}s") + print("-" * 50) + for result in results: + print(f"Method: {result['method']}") + print(f" Computation time: {result['computation_time']:.2f}s") + print(f" Cached access time: {result['cached_time']:.2f}s") + print(f" In-memory cached time: {result['memory_cached_time']:.2f}s") + print("-" * 50) + +@profile +def profile_memory_usage(): + # Profile memory usage for both methods + for method in ['picard', 'infomax']: + print(f"\nProfiling {method} ICA:") + ica_processor = ICAProcessor( + n_components=15, + method=method, + fit_params={'max_iter': 500} if method == 'picard' else {'iteration': 1000} + ) + dataset = EpochedEEGDataset.from_moabb( + BNCI2014_001(), + f"data/MNE-BIDS-bnci2014-001-epoched-{method}.json", + save_path="data", + tmin=0, + tmax=4.0, + preload=True, + output_keys=["label", "subject", "session", "epoch"], + ica_processor=ica_processor + ) + + for _ in dataset: + pass if __name__ == "__main__": - print("Running performance tests...") - test_ica_processing() + print("Running ICA method comparison...") + compare_ica_methods() print("\nRunning memory profile...") profile_memory_usage() \ No newline at end of file From c3ec3dc5710949037a7ed73bf85709d75b0dbe0c Mon Sep 17 00:00:00 2001 From: Bru Date: Wed, 5 Mar 2025 18:09:37 +0000 Subject: [PATCH 3/8] Apply suggestions from code review --- benchmarks/MOABB/dataio/ica.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/benchmarks/MOABB/dataio/ica.py b/benchmarks/MOABB/dataio/ica.py index 362988152..82d1a7fe6 100644 --- a/benchmarks/MOABB/dataio/ica.py +++ b/benchmarks/MOABB/dataio/ica.py @@ -78,8 +78,6 @@ def compute_ica(self, raw: mne.io.RawArray, ica_path: Path) -> ICA: def process(self, raw: mne.io.RawArray, raw_path: Union[str, Path]) -> mne.io.RawArray: """Process raw data with ICA, computing or loading from cache.""" - if not raw.preload: - raw.load_data() ica_path = self.get_ica_path(raw_path) From 77a8c59d77ac48afe2d241bc1810d26ba16b5831 Mon Sep 17 00:00:00 2001 From: Bru Date: Wed, 5 Mar 2025 18:10:19 +0000 Subject: [PATCH 4/8] Apply suggestions from code review --- benchmarks/MOABB/dataio/ica.py | 2 +- benchmarks/MOABB/validate_ica.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/MOABB/dataio/ica.py b/benchmarks/MOABB/dataio/ica.py index 82d1a7fe6..59a43ed6f 100644 --- a/benchmarks/MOABB/dataio/ica.py +++ b/benchmarks/MOABB/dataio/ica.py @@ -90,4 +90,4 @@ def process(self, raw: mne.io.RawArray, raw_path: Union[str, Path]) -> mne.io.Ra raw_ica = raw.copy() ica.apply(raw_ica) - return raw_ica \ No newline at end of file + return raw_ica diff --git a/benchmarks/MOABB/validate_ica.py b/benchmarks/MOABB/validate_ica.py index 8e2d3eaa3..26a18582b 100644 --- a/benchmarks/MOABB/validate_ica.py +++ b/benchmarks/MOABB/validate_ica.py @@ -142,4 +142,4 @@ def profile_memory_usage(): compare_ica_methods() print("\nRunning memory profile...") - profile_memory_usage() \ No newline at end of file + profile_memory_usage() From 3a78e1d3e0a7267c5bda62f671c7a0b375518883 Mon Sep 17 00:00:00 2001 From: vmcru Date: Thu, 6 Mar 2025 21:25:28 -0500 Subject: [PATCH 5/8] modifications to ica and validate to pass merge --- benchmarks/MOABB/dataio/ica.py | 2 +- benchmarks/MOABB/validate_ica.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/MOABB/dataio/ica.py b/benchmarks/MOABB/dataio/ica.py index 59a43ed6f..4ad893006 100644 --- a/benchmarks/MOABB/dataio/ica.py +++ b/benchmarks/MOABB/dataio/ica.py @@ -3,7 +3,7 @@ import mne from mne.preprocessing import ICA -from mne_bids import get_bids_path_from_fname, BIDSPath +from mne_bids import get_bids_path_from_fname class ICAProcessor: diff --git a/benchmarks/MOABB/validate_ica.py b/benchmarks/MOABB/validate_ica.py index 26a18582b..11152dbed 100644 --- a/benchmarks/MOABB/validate_ica.py +++ b/benchmarks/MOABB/validate_ica.py @@ -121,7 +121,7 @@ def profile_memory_usage(): ica_processor = ICAProcessor( n_components=15, method=method, - fit_params={'max_iter': 500} if method == 'picard' else {'iteration': 1000} + fit_params={'max_iter': 500} if method == 'picard' else {'max_iter': 1000} ) dataset = EpochedEEGDataset.from_moabb( BNCI2014_001(), From 86af6be7f7c2d7de17cd131679e8f7c5136d810f Mon Sep 17 00:00:00 2001 From: vmcru Date: Thu, 6 Mar 2025 21:42:10 -0500 Subject: [PATCH 6/8] removed unused imports from all files and added credits --- benchmarks/MOABB/dataio/datasets.py | 2 +- benchmarks/MOABB/dataio/ica.py | 43 +++++++++++------- benchmarks/MOABB/validate_ica.py | 70 ++++++++++++++++------------- 3 files changed, 65 insertions(+), 50 deletions(-) diff --git a/benchmarks/MOABB/dataio/datasets.py b/benchmarks/MOABB/dataio/datasets.py index 51fb84609..e7bfb348a 100644 --- a/benchmarks/MOABB/dataio/datasets.py +++ b/benchmarks/MOABB/dataio/datasets.py @@ -300,7 +300,7 @@ def _make_load_raw_dynamic_item(self, preload: bool): @provides("info", "raw") def _load_raw(fpath: str): raw = self._read_raw_bids_cached(fpath, preload) - + if self.ica_processor is not None: raw = self.ica_processor.process(raw, fpath) diff --git a/benchmarks/MOABB/dataio/ica.py b/benchmarks/MOABB/dataio/ica.py index 4ad893006..618446847 100644 --- a/benchmarks/MOABB/dataio/ica.py +++ b/benchmarks/MOABB/dataio/ica.py @@ -1,3 +1,8 @@ +"""Module for handling ICA computation and application for EEG data. +Author +------ +Victor Cruz, 2025 +""" from pathlib import Path from typing import Union, Optional, Dict, Any @@ -27,9 +32,9 @@ class ICAProcessor: """ def __init__( - self, - n_components=None, - method='fastica', + self, + n_components=None, + method="fastica", random_state=42, fit_params: Optional[Dict[str, Any]] = None, filter_params: Optional[Dict[str, Any]] = None, @@ -38,22 +43,24 @@ def __init__( self.method = method self.random_state = random_state self.fit_params = fit_params or {} - self.filter_params = filter_params or {'l_freq': 1.0, 'h_freq': None} + self.filter_params = filter_params or {"l_freq": 1.0, "h_freq": None} def get_ica_path(self, raw_path: Union[str, Path]) -> Path: """Generate path where ICA solution should be stored. - + Creates a derivatives folder to store ICA solutions, following BIDS conventions. """ bids_path = get_bids_path_from_fname(raw_path) # For derivatives, you can put them in a derivatives folder: - bids_path.root = (bids_path.root / ".." / "derivatives" / f"ica-{self.method}") + bids_path.root = ( + bids_path.root / ".." / "derivatives" / f"ica-{self.method}" + ) # Keep the same base entities: bids_path.update( - suffix='eeg', # override or confirm suffix - extension='.fif', - description='ica', # <-- This sets a desc=ica entity - check=True, # If you do not want BIDSPath to fail on derivative checks + suffix="eeg", # override or confirm suffix + extension=".fif", + description="ica", # <-- This sets a desc=ica entity + check=True, # If you do not want BIDSPath to fail on derivative checks ) # Make sure the folder is created bids_path.fpath.parent.mkdir(parents=True, exist_ok=True) @@ -70,24 +77,26 @@ def compute_ica(self, raw: mne.io.RawArray, ica_path: Path) -> ICA: n_components=self.n_components, method=self.method, random_state=self.random_state, - **self.fit_params + **self.fit_params, ) ica.fit(raw_filtered) ica.save(ica_path) return ica - def process(self, raw: mne.io.RawArray, raw_path: Union[str, Path]) -> mne.io.RawArray: + def process( + self, raw: mne.io.RawArray, raw_path: Union[str, Path] + ) -> mne.io.RawArray: """Process raw data with ICA, computing or loading from cache.""" - + ica_path = self.get_ica_path(raw_path) - + if not ica_path.exists(): ica = self.compute_ica(raw, ica_path) else: - ica = mne.preprocessing.read_ica(ica_path, verbose='ERROR') - + ica = mne.preprocessing.read_ica(ica_path, verbose="ERROR") + # Create a copy of the raw data before applying ICA raw_ica = raw.copy() ica.apply(raw_ica) - + return raw_ica diff --git a/benchmarks/MOABB/validate_ica.py b/benchmarks/MOABB/validate_ica.py index 11152dbed..1e569f7f7 100644 --- a/benchmarks/MOABB/validate_ica.py +++ b/benchmarks/MOABB/validate_ica.py @@ -1,28 +1,29 @@ -import logging -import os -from pathlib import Path +'''File for testing ICA computation and application for EEG data. +Authors +------- +Victor Cruz, 2025 +''' import time import mne import moabb from moabb.datasets import BNCI2014_001 from memory_profiler import profile -from dataio.datasets import EpochedEEGDataset, RawEEGDataset, InMemoryDataset -from dataio.ica import ICAProcessor +from dataio.datasets import EpochedEEGDataset, InMemoryDataset +from dataio.ica import ICAProcessor # Set up logging mne.set_log_level(verbose=False) moabb.set_log_level(level="ERROR") + def test_ica_method(method: str, n_components: int = 15, **kwargs): """Test a specific ICA method and return timing results.""" print(f"\nTesting ICA method: {method}") ica_processor = ICAProcessor( - n_components=n_components, - method=method, - **kwargs + n_components=n_components, method=method, **kwargs ) - + dataset = EpochedEEGDataset.from_moabb( BNCI2014_001(), f"data/MNE-BIDS-bnci2014-001-epoched-{method}.json", @@ -31,7 +32,7 @@ def test_ica_method(method: str, n_components: int = 15, **kwargs): tmax=4.0, preload=True, output_keys=["label", "subject", "session", "epoch"], - ica_processor=ica_processor + ica_processor=ica_processor, ) # First run - ICA computation @@ -57,15 +58,18 @@ def test_ica_method(method: str, n_components: int = 15, **kwargs): for _ in dataset_cached: pass memory_cached_time = time.time() - start - print(f"Time with {method} ICA (in-memory cache): {memory_cached_time:.2f}s") + print( + f"Time with {method} ICA (in-memory cache): {memory_cached_time:.2f}s" + ) return { - 'method': method, - 'computation_time': computation_time, - 'cached_time': cached_time, - 'memory_cached_time': memory_cached_time + "method": method, + "computation_time": computation_time, + "cached_time": cached_time, + "memory_cached_time": memory_cached_time, } + def compare_ica_methods(): # Test without ICA first as baseline print("\nTesting without ICA (baseline):") @@ -77,7 +81,7 @@ def compare_ica_methods(): tmax=4.0, output_keys=["label", "subject", "session", "epoch"], ) - + start = time.time() for _ in dataset_no_ica: pass @@ -86,20 +90,18 @@ def compare_ica_methods(): # Test different ICA methods results = [] - + # Test Picard - results.append(test_ica_method( - 'picard', - n_components=15, - fit_params={'max_iter': 500} - )) - + results.append( + test_ica_method("picard", n_components=15, fit_params={"max_iter": 500}) + ) + # Test Infomax - results.append(test_ica_method( - 'infomax', - n_components=15, - fit_params={'max_iter': 1000} - )) + results.append( + test_ica_method( + "infomax", n_components=15, fit_params={"max_iter": 1000} + ) + ) # Print comparison print("\nComparison Summary:") @@ -113,15 +115,18 @@ def compare_ica_methods(): print(f" In-memory cached time: {result['memory_cached_time']:.2f}s") print("-" * 50) + @profile def profile_memory_usage(): # Profile memory usage for both methods - for method in ['picard', 'infomax']: + for method in ["picard", "infomax"]: print(f"\nProfiling {method} ICA:") ica_processor = ICAProcessor( n_components=15, method=method, - fit_params={'max_iter': 500} if method == 'picard' else {'max_iter': 1000} + fit_params={"max_iter": 500} + if method == "picard" + else {"max_iter": 1000}, ) dataset = EpochedEEGDataset.from_moabb( BNCI2014_001(), @@ -131,15 +136,16 @@ def profile_memory_usage(): tmax=4.0, preload=True, output_keys=["label", "subject", "session", "epoch"], - ica_processor=ica_processor + ica_processor=ica_processor, ) for _ in dataset: pass + if __name__ == "__main__": print("Running ICA method comparison...") compare_ica_methods() - + print("\nRunning memory profile...") profile_memory_usage() From f29636ce7b0d90eebb62ecc33b65e26edd2b7fae Mon Sep 17 00:00:00 2001 From: vmcru Date: Thu, 6 Mar 2025 21:44:44 -0500 Subject: [PATCH 7/8] updated validate_ica.py --- benchmarks/MOABB/validate_ica.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/MOABB/validate_ica.py b/benchmarks/MOABB/validate_ica.py index 1e569f7f7..8c62b422c 100644 --- a/benchmarks/MOABB/validate_ica.py +++ b/benchmarks/MOABB/validate_ica.py @@ -1,8 +1,8 @@ -'''File for testing ICA computation and application for EEG data. +"""File for testing ICA computation and application for EEG data. Authors ------- Victor Cruz, 2025 -''' +""" import time import mne import moabb From 30ff4be7b91dc5be973b309a1d00414ab1ade224 Mon Sep 17 00:00:00 2001 From: Bru Date: Tue, 18 Mar 2025 14:49:39 +0100 Subject: [PATCH 8/8] Apply suggestions from code review Co-authored-by: Drew Wagner <33100250+Drew-Wagner@users.noreply.github.com> --- benchmarks/MOABB/dataio/datasets.py | 3 --- benchmarks/MOABB/dataio/ica.py | 32 +++++++++++++++++------------ 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/benchmarks/MOABB/dataio/datasets.py b/benchmarks/MOABB/dataio/datasets.py index e7bfb348a..85923f5f4 100644 --- a/benchmarks/MOABB/dataio/datasets.py +++ b/benchmarks/MOABB/dataio/datasets.py @@ -301,9 +301,6 @@ def _make_load_raw_dynamic_item(self, preload: bool): def _load_raw(fpath: str): raw = self._read_raw_bids_cached(fpath, preload) - if self.ica_processor is not None: - raw = self.ica_processor.process(raw, fpath) - yield raw.info yield raw diff --git a/benchmarks/MOABB/dataio/ica.py b/benchmarks/MOABB/dataio/ica.py index 618446847..1df2b7603 100644 --- a/benchmarks/MOABB/dataio/ica.py +++ b/benchmarks/MOABB/dataio/ica.py @@ -83,20 +83,26 @@ def compute_ica(self, raw: mne.io.RawArray, ica_path: Path) -> ICA: ica.save(ica_path) return ica - def process( - self, raw: mne.io.RawArray, raw_path: Union[str, Path] - ) -> mne.io.RawArray: - """Process raw data with ICA, computing or loading from cache.""" + @property + def dynamic_item(self): + @takes("raw", "fpath") + @provides("raw", "ica_path") + def process( + raw: mne.io.RawArray, fpath: Union[str, Path] + ): + """Process raw data with ICA, computing or loading from cache.""" - ica_path = self.get_ica_path(raw_path) + ica_path = self.get_ica_path(fpath) - if not ica_path.exists(): - ica = self.compute_ica(raw, ica_path) - else: - ica = mne.preprocessing.read_ica(ica_path, verbose="ERROR") + if not ica_path.exists(): + ica = self.compute_ica(raw, ica_path) + else: + ica = mne.preprocessing.read_ica(ica_path, verbose="ERROR") - # Create a copy of the raw data before applying ICA - raw_ica = raw.copy() - ica.apply(raw_ica) + # Create a copy of the raw data before applying ICA + raw_ica = raw.copy() + ica.apply(raw_ica) - return raw_ica + yield raw_ica + yield ica_path + return process