Saving to directory in folder inside registered folder with DataManager not working.
Target folder: /opt/data/data/processed
Notebook folder: /opt/data/notebooks
DataManager ls output:
data/
formatted/
1 csv items
old/
5 mixed items
original/
5 mixed items
processed/
1 csv items
Code:
# Data management (only run first time, then it's automatic)
# DataManager.register_project('schupbach', '/opt/data/data/')
# Load data manager
dm = DataManager('schupbach')
# Import SPo2 database
spo2 = dm['original']['SPO2.csv'].load(sep=',',
nrows=100000,
encoding='utf-16',
error_bad_lines=True)
# Preformat the dataset
spo2 = ingestion.preformat(spo2)
dm['formatted'].save(spo2, 'test.csv', ingestion.preformat)
Stack trace:
---------------------------------------------------------------------------
InvalidGitRepositoryError Traceback (most recent call last)
<ipython-input-3-2a9f3b79d4da> in <module>
14 spo2 = ingestion.preformat(spo2)
15
---> 16 dm['formatted'].save(spo2, 'test.csv', ingestion.preformat)
~/code/tools/data-traffic-control/datatc/data_directory.py in save(self, data, file_name, transformer_func, enforce_clean_git, get_git_hash_from, **kwargs)
86 self.save_file(data, file_name, **kwargs)
87 else:
---> 88 self.transform_and_save(data, transformer_func, file_name, enforce_clean_git, get_git_hash_from, **kwargs)
89
90 def save_file(self, data: Any, file_name: str, **kwargs) -> None:
~/code/tools/data-traffic-control/datatc/data_directory.py in transform_and_save(self, data, transformer_func, file_name, enforce_clean_git, get_git_hash_from, **kwargs)
95 def transform_and_save(self, data: Any, transformer_func: Callable, file_name: str, enforce_clean_git=True,
96 get_git_hash_from: Any = None, **kwargs) -> None:
---> 97 new_transform_dir_path = TransformedDataInterface.save(data, transformer_func, parent_path=self.path,
98 file_name=file_name, enforce_clean_git=enforce_clean_git,
99 get_git_hash_from=get_git_hash_from, **kwargs)
~/code/tools/data-traffic-control/datatc/data_transformer.py in save(cls, data, transformer_func, parent_path, file_name, enforce_clean_git, get_git_hash_from, **kwargs)
106 if enforce_clean_git:
107 if transformer_func_in_repo:
--> 108 check_for_uncommitted_git_changes_at_path(transformer_func_file_repo_path)
109 else:
110 raise RuntimeError('`transformer_func` is not tracked in a git repo.'
~/code/tools/data-traffic-control/datatc/git_utilities.py in check_for_uncommitted_git_changes_at_path(repo_path)
63 True: uncommitted changes found. Repo is not valid.
64 """
---> 65 repo = Repo(repo_path, search_parent_directories=True)
66
67 try:
/usr/local/lib/python3.8/dist-packages/git/repo/base.py in __init__(self, path, odbt, search_parent_directories, expand_vars)
179
180 if self.git_dir is None:
--> 181 raise InvalidGitRepositoryError(epath)
182
183 self._bare = False
InvalidGitRepositoryError: /opt/data/notebooks
Saving to directory in folder inside registered folder with DataManager not working.
Target folder: /opt/data/data/processed
Notebook folder: /opt/data/notebooks
DataManager ls output:
data/ formatted/ 1 csv items old/ 5 mixed items original/ 5 mixed items processed/ 1 csv itemsCode:
Stack trace: