Skip to content

add caiman.backup_dataframe(), change index parsing #119

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 27 additions & 13 deletions mesmerize_core/caiman_extensions/_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from functools import wraps
from typing import Union
from uuid import UUID
import pandas as pd

from mesmerize_core.caiman_extensions._batch_exceptions import BatchItemNotRunError, BatchItemUnsuccessfulError, \
WrongAlgorithmExtensionError
Expand Down Expand Up @@ -30,27 +31,40 @@ def wrapper(self, *args, **kwargs):


def _index_parser(func):
"""
Parses uuid identifier that can be passed in various ways and returns it as a UUID string regardless of input type.
"""
@wraps(func)
def _parser(instance, *args, **kwargs):
if "index" in kwargs.keys():
index: Union[int, str, UUID] = kwargs["index"]
if "identifier" in kwargs.keys():
u: Union[int, str, UUID] = kwargs["index"]
elif len(args) > 0:
index = args[0] # always first positional arg
u = args[0] # always first positional arg

if isinstance(index, (UUID, str)):
_index = instance._df[instance._df["uuid"] == str(index)].index
if _index.size == 0:
raise ValueError(f"No batch item found with uuid: {index}")
if not isinstance(u, (pd.Series, UUID, str)):
raise TypeError(
"Passed index must be one of the following types:\n"
"`pandas.Series`, `UUID`, `str`"
)

# if the batch item itself was passed
if isinstance(u, pd.Series):
u = u["uuid"]

index = _index.item()
# if the passed `index` is already a UUID
if isinstance(u, (UUID, str)):
_index = instance._df[instance._df["uuid"] == str(u)].index

# make sure it exists in the dataframe
if _index.size == 0:
raise ValueError(f"No batch item found with uuid: {u}")

if not isinstance(index, int):
raise TypeError(f"`index` argument must be of type `int`, `str`, or `UUID`")
u = str(u)

if "index" in kwargs.keys():
kwargs["index"] = index
if "identifier" in kwargs.keys():
kwargs["identifier"] = u
else:
args = (index, *args[1:])
args = (u, *args[1:])

return func(instance, *args, **kwargs)
return _parser
73 changes: 55 additions & 18 deletions mesmerize_core/caiman_extensions/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from itertools import chain
from collections import Counter
from time import time
from datetime import datetime

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -126,6 +127,32 @@ def save_to_disk(self):
shutil.copyfile(bak, path)
raise IOError(f"Could not save dataframe to disk.")

def backup_dataframe(self, filename: str = None) -> Path:
"""
Backup the DataFrame to a new pickle file within the same batch directory.
By default the ``filename`` will be ``<orig_filename>.YYYY-MM-DD-HH:MM:SS.bak``

.. warning: **This only backs up the DataFrame pickle file. It DOES NOT backup the mcorr & cnmf data files!**

Parameters
----------
filename: Optional[str]
If provided this filename is used instead of the default with the datetimestamp

Returns
-------
Path
Path to the backup file.

"""
path = self._df.paths.get_batch_path()
timestamp = datetime.now().strftime("%Y-%m-%d-%H:%M:%S")
back_path = path.with_suffix(path.suffix + f".bak.{timestamp}")

shutil.copyfile(path, back_path)

return back_path

@_index_parser
def remove_item(self, index: Union[int, str, UUID], remove_data: bool = True, safe_removal: bool = True):
"""
Expand Down Expand Up @@ -230,34 +257,43 @@ def get_params_diffs(self, algo: str, item_name: str) -> pd.Series:

@warning_experimental()
@_index_parser
def get_children(self, index: Union[int, str, UUID]) -> List[UUID]:
def get_children(self, identifier: Union[pd.Series, str, UUID]) -> pd.DataFrame:
"""
For the *motion correction* batch item at the provided ``index``,
returns a list of UUIDs for *CNMF(E)* batch items that use the
For the provided **motion correction** batch item identifier,
returns a DataFrame of *CNMF(E)* batch items that use the
output of this motion correction batch item.

| Provide the batch item directly as a pandas Series (row), or the batch item's uuid.

| Note: Only Motion Correction items have children, CNMF(E) items do not have children.

Parameters
----------
index: Union[int, str, UUID]
the index of the mcorr item to get the children of
identifier: Union[pd.Series, str, UUID]
mcorr batch item as a pandas Series, or the uuid of the mcorr item to get the children of

Returns
-------
List[UUID]
List of UUIDs of child CNMF items
pd.DataFrame
DataFrame of child CNMF(E) batch items

"""

if not self._df.iloc[index]["algo"] == "mcorr":
item = self.uloc(identifier)

if not item["algo"] == "mcorr":
raise TypeError(
"`caiman.get_children()` extension maybe only be used with "
"mcorr batch items, CNMF(E) items do not have children."
)

# get the output path for this mcorr item
output_path = self._df.iloc[index].mcorr.get_output_path()
try:
output_path = item.mcorr.get_output_path()
except BatchItemNotRunError:
raise BatchItemNotRunError("Batch item was not run, therefore it cannot have children.")
except BatchItemUnsuccessfulError:
raise BatchItemUnsuccessfulError("Batch item was unsuccessful, therefore it cannot have children.")

# see if this output path shows up in the input_movie_path of any other batch item
children = list()
Expand All @@ -268,32 +304,33 @@ def get_children(self, index: Union[int, str, UUID]) -> List[UUID]:
continue
if _potential_child == output_path:
children.append(r["uuid"])
return children

return self._df[self._df["uuid"].isin(children)]

@warning_experimental()
@_index_parser
def get_parent(self, index: Union[int, str, UUID]) -> Union[UUID, None]:
def get_parent(self, identifier: Union[pd.DataFrame, str, UUID]) -> Union[pd.Series, None]:
"""
Get the UUID of the batch item whose output was used as
Get the row of the batch item whose output was used as
the input for the batch item at the provided ``index``.

| If a parent exists, it is always an mcorr batch item

Parameters
----------
index: Union[int, str, UUID]
identifier: Union[int, str, UUID]
the index of the batch item to get the parent of

Returns
-------
Union[UUID, None]
| if ``UUID``, this is the UUID of the batch item whose output was used for the input of the batch item at
the provided ``index``
Union[pd.Series, None]
| if ``pd.Series``, this is the row of the batch item whose output was used
for the input of the batch item at the provided ``index``

| if ``None``, the batch item at the provided ``index`` has no parent within the batch dataframe.

"""
input_movie_path = self._df.iloc[index].caiman.get_input_movie_path()
input_movie_path = self.uloc(identifier).caiman.get_input_movie_path()

for i, r in self._df.iterrows():
if not r["algo"] == "mcorr":
Expand All @@ -304,7 +341,7 @@ def get_parent(self, index: Union[int, str, UUID]) -> Union[UUID, None]:
continue # can't be a parent if it was unsuccessful

if _potential_parent == input_movie_path:
return r["uuid"]
return r


@pd.api.extensions.register_series_accessor("caiman")
Expand Down