Skip to content

Commit 5378d93

Browse files
authored
Merge pull request #45 from brain-bican/advanced_git
save CAS zip support
2 parents b84b48d + fbbe7ee commit 5378d93

File tree

2 files changed

+41
-1
lines changed

2 files changed

+41
-1
lines changed

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
setup(
1010
name="tdta",
11-
version="0.1.0.dev15",
11+
version="0.1.0.dev16",
1212
description="The aim of this project is to provide taxonomy development tools custom actions.",
1313
long_description=README,
1414
long_description_content_type="text/markdown",

src/tdta/tdt_export.py

+40
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,15 @@
33
import ast
44
import json
55
import typing
6+
import zipfile
7+
import shutil
68
from typing import Union, List
79
from contextlib import closing
810
from pathlib import Path
911
from datetime import datetime
1012

1113
from tdta.utils import read_project_config
14+
from tdta.command_line_utils import runcmd
1215
from cas.model import (CellTypeAnnotation, Annotation, Labelset, AnnotationTransfer, AutomatedAnnotation, Review)
1316
from cas.file_utils import write_json_file
1417
from cas.matrix_file.resolver import resolve_matrix_file
@@ -18,6 +21,9 @@
1821

1922
cas_table_names = ["annotation", "labelset", "metadata", "annotation_transfer", "review"]
2023

24+
GITHUB_SIZE_LIMIT = 50 * 1000 * 1000 # 50 MB
25+
# GITHUB_SIZE_LIMIT = 2 * 1000
26+
2127

2228
def export_cas_data(sqlite_db: str, output_file: str, dataset_cache_folder: str = None):
2329
"""
@@ -62,9 +68,43 @@ def export_cas_data(sqlite_db: str, output_file: str, dataset_cache_folder: str
6268
write_json_file(cta, output_file, False)
6369

6470
print("CAS json successfully created at: {}".format(output_file))
71+
ensure_file_size_limit(output_file)
6572
return cta
6673

6774

75+
def ensure_file_size_limit(file_path):
76+
"""
77+
Checks if the file size exceeds the GitHub size limit and zips the file if needed.
78+
Parameters:
79+
file_path: file path to check
80+
"""
81+
if os.path.getsize(file_path) > GITHUB_SIZE_LIMIT:
82+
zip_path = zip_file(file_path)
83+
folder = os.path.dirname(file_path)
84+
is_git_repo = runcmd("cd {dir} && git rev-parse --is-inside-work-tree".format(dir=folder)).strip()
85+
if is_git_repo == "true":
86+
runcmd("cd {dir} && git reset {file_path}".format(dir=folder, file_path=file_path))
87+
runcmd("cd {dir} && git add {zip_path}".format(dir=folder, zip_path=zip_path))
88+
89+
90+
def zip_file(file_path):
91+
"""
92+
Zips the file into smaller parts if it exceeds the GitHub size limit.
93+
Parameters:
94+
file_path: file path to zip
95+
Returns: zipped file path
96+
"""
97+
folder = os.path.dirname(file_path)
98+
base_name = os.path.basename(file_path)
99+
zip_base = os.path.splitext(base_name)[0]
100+
101+
single_zip_path = os.path.join(folder, f"{zip_base}.zip")
102+
with zipfile.ZipFile(single_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
103+
zipf.write(file_path, base_name)
104+
print("File zipped due to GitHub size limits: " + single_zip_path)
105+
return single_zip_path
106+
107+
68108
def parse_metadata_data(cta, sqlite_db, table_name):
69109
"""
70110
Reads 'Metadata' table data into the CAS object

0 commit comments

Comments
 (0)