From 6e68339601513ada459154e932089d9bce3becd7 Mon Sep 17 00:00:00 2001 From: kushalkolar Date: Thu, 29 Sep 2022 18:31:45 -0400 Subject: [PATCH] add caiman.backup_dataframe(), change index parsing to not allow int, and allow pandas.Series --- mesmerize_core/caiman_extensions/_utils.py | 40 ++++++++---- mesmerize_core/caiman_extensions/common.py | 73 ++++++++++++++++------ 2 files changed, 82 insertions(+), 31 deletions(-) diff --git a/mesmerize_core/caiman_extensions/_utils.py b/mesmerize_core/caiman_extensions/_utils.py index e473856..9029011 100644 --- a/mesmerize_core/caiman_extensions/_utils.py +++ b/mesmerize_core/caiman_extensions/_utils.py @@ -1,6 +1,7 @@ from functools import wraps from typing import Union from uuid import UUID +import pandas as pd from mesmerize_core.caiman_extensions._batch_exceptions import BatchItemNotRunError, BatchItemUnsuccessfulError, \ WrongAlgorithmExtensionError @@ -30,27 +31,40 @@ def wrapper(self, *args, **kwargs): def _index_parser(func): + """ + Parses uuid identifier that can be passed in various ways and returns it as a UUID string regardless of input type. + """ @wraps(func) def _parser(instance, *args, **kwargs): - if "index" in kwargs.keys(): - index: Union[int, str, UUID] = kwargs["index"] + if "identifier" in kwargs.keys(): + u: Union[int, str, UUID] = kwargs["index"] elif len(args) > 0: - index = args[0] # always first positional arg + u = args[0] # always first positional arg - if isinstance(index, (UUID, str)): - _index = instance._df[instance._df["uuid"] == str(index)].index - if _index.size == 0: - raise ValueError(f"No batch item found with uuid: {index}") + if not isinstance(u, (pd.Series, UUID, str)): + raise TypeError( + "Passed index must be one of the following types:\n" + "`pandas.Series`, `UUID`, `str`" + ) + + # if the batch item itself was passed + if isinstance(u, pd.Series): + u = u["uuid"] - index = _index.item() + # if the passed `index` is already a UUID + if isinstance(u, (UUID, str)): + _index = instance._df[instance._df["uuid"] == str(u)].index + + # make sure it exists in the dataframe + if _index.size == 0: + raise ValueError(f"No batch item found with uuid: {u}") - if not isinstance(index, int): - raise TypeError(f"`index` argument must be of type `int`, `str`, or `UUID`") + u = str(u) - if "index" in kwargs.keys(): - kwargs["index"] = index + if "identifier" in kwargs.keys(): + kwargs["identifier"] = u else: - args = (index, *args[1:]) + args = (u, *args[1:]) return func(instance, *args, **kwargs) return _parser diff --git a/mesmerize_core/caiman_extensions/common.py b/mesmerize_core/caiman_extensions/common.py index a9c10e0..8484a15 100644 --- a/mesmerize_core/caiman_extensions/common.py +++ b/mesmerize_core/caiman_extensions/common.py @@ -8,6 +8,7 @@ from itertools import chain from collections import Counter from time import time +from datetime import datetime import numpy as np import pandas as pd @@ -126,6 +127,32 @@ def save_to_disk(self): shutil.copyfile(bak, path) raise IOError(f"Could not save dataframe to disk.") + def backup_dataframe(self, filename: str = None) -> Path: + """ + Backup the DataFrame to a new pickle file within the same batch directory. + By default the ``filename`` will be ``.YYYY-MM-DD-HH:MM:SS.bak`` + + .. warning: **This only backs up the DataFrame pickle file. It DOES NOT backup the mcorr & cnmf data files!** + + Parameters + ---------- + filename: Optional[str] + If provided this filename is used instead of the default with the datetimestamp + + Returns + ------- + Path + Path to the backup file. + + """ + path = self._df.paths.get_batch_path() + timestamp = datetime.now().strftime("%Y-%m-%d-%H:%M:%S") + back_path = path.with_suffix(path.suffix + f".bak.{timestamp}") + + shutil.copyfile(path, back_path) + + return back_path + @_index_parser def remove_item(self, index: Union[int, str, UUID], remove_data: bool = True, safe_removal: bool = True): """ @@ -230,34 +257,43 @@ def get_params_diffs(self, algo: str, item_name: str) -> pd.Series: @warning_experimental() @_index_parser - def get_children(self, index: Union[int, str, UUID]) -> List[UUID]: + def get_children(self, identifier: Union[pd.Series, str, UUID]) -> pd.DataFrame: """ - For the *motion correction* batch item at the provided ``index``, - returns a list of UUIDs for *CNMF(E)* batch items that use the + For the provided **motion correction** batch item identifier, + returns a DataFrame of *CNMF(E)* batch items that use the output of this motion correction batch item. + | Provide the batch item directly as a pandas Series (row), or the batch item's uuid. + | Note: Only Motion Correction items have children, CNMF(E) items do not have children. Parameters ---------- - index: Union[int, str, UUID] - the index of the mcorr item to get the children of + identifier: Union[pd.Series, str, UUID] + mcorr batch item as a pandas Series, or the uuid of the mcorr item to get the children of Returns ------- - List[UUID] - List of UUIDs of child CNMF items + pd.DataFrame + DataFrame of child CNMF(E) batch items """ - if not self._df.iloc[index]["algo"] == "mcorr": + item = self.uloc(identifier) + + if not item["algo"] == "mcorr": raise TypeError( "`caiman.get_children()` extension maybe only be used with " "mcorr batch items, CNMF(E) items do not have children." ) # get the output path for this mcorr item - output_path = self._df.iloc[index].mcorr.get_output_path() + try: + output_path = item.mcorr.get_output_path() + except BatchItemNotRunError: + raise BatchItemNotRunError("Batch item was not run, therefore it cannot have children.") + except BatchItemUnsuccessfulError: + raise BatchItemUnsuccessfulError("Batch item was unsuccessful, therefore it cannot have children.") # see if this output path shows up in the input_movie_path of any other batch item children = list() @@ -268,32 +304,33 @@ def get_children(self, index: Union[int, str, UUID]) -> List[UUID]: continue if _potential_child == output_path: children.append(r["uuid"]) - return children + + return self._df[self._df["uuid"].isin(children)] @warning_experimental() @_index_parser - def get_parent(self, index: Union[int, str, UUID]) -> Union[UUID, None]: + def get_parent(self, identifier: Union[pd.DataFrame, str, UUID]) -> Union[pd.Series, None]: """ - Get the UUID of the batch item whose output was used as + Get the row of the batch item whose output was used as the input for the batch item at the provided ``index``. | If a parent exists, it is always an mcorr batch item Parameters ---------- - index: Union[int, str, UUID] + identifier: Union[int, str, UUID] the index of the batch item to get the parent of Returns ------- - Union[UUID, None] - | if ``UUID``, this is the UUID of the batch item whose output was used for the input of the batch item at - the provided ``index`` + Union[pd.Series, None] + | if ``pd.Series``, this is the row of the batch item whose output was used + for the input of the batch item at the provided ``index`` | if ``None``, the batch item at the provided ``index`` has no parent within the batch dataframe. """ - input_movie_path = self._df.iloc[index].caiman.get_input_movie_path() + input_movie_path = self.uloc(identifier).caiman.get_input_movie_path() for i, r in self._df.iterrows(): if not r["algo"] == "mcorr": @@ -304,7 +341,7 @@ def get_parent(self, index: Union[int, str, UUID]) -> Union[UUID, None]: continue # can't be a parent if it was unsuccessful if _potential_parent == input_movie_path: - return r["uuid"] + return r @pd.api.extensions.register_series_accessor("caiman")