|
3 | 3 | import ast
|
4 | 4 | import json
|
5 | 5 | import typing
|
| 6 | +import zipfile |
| 7 | +import shutil |
6 | 8 | from typing import Union, List
|
7 | 9 | from contextlib import closing
|
8 | 10 | from pathlib import Path
|
9 | 11 | from datetime import datetime
|
10 | 12 |
|
11 | 13 | from tdta.utils import read_project_config
|
| 14 | +from tdta.command_line_utils import runcmd |
12 | 15 | from cas.model import (CellTypeAnnotation, Annotation, Labelset, AnnotationTransfer, AutomatedAnnotation, Review)
|
13 | 16 | from cas.file_utils import write_json_file
|
14 | 17 | from cas.matrix_file.resolver import resolve_matrix_file
|
|
18 | 21 |
|
19 | 22 | cas_table_names = ["annotation", "labelset", "metadata", "annotation_transfer", "review"]
|
20 | 23 |
|
| 24 | +GITHUB_SIZE_LIMIT = 50 * 1000 * 1000 # 50 MB |
| 25 | +# GITHUB_SIZE_LIMIT = 2 * 1000 |
| 26 | + |
21 | 27 |
|
22 | 28 | def export_cas_data(sqlite_db: str, output_file: str, dataset_cache_folder: str = None):
|
23 | 29 | """
|
@@ -62,9 +68,43 @@ def export_cas_data(sqlite_db: str, output_file: str, dataset_cache_folder: str
|
62 | 68 | write_json_file(cta, output_file, False)
|
63 | 69 |
|
64 | 70 | print("CAS json successfully created at: {}".format(output_file))
|
| 71 | + ensure_file_size_limit(output_file) |
65 | 72 | return cta
|
66 | 73 |
|
67 | 74 |
|
| 75 | +def ensure_file_size_limit(file_path): |
| 76 | + """ |
| 77 | + Checks if the file size exceeds the GitHub size limit and zips the file if needed. |
| 78 | + Parameters: |
| 79 | + file_path: file path to check |
| 80 | + """ |
| 81 | + if os.path.getsize(file_path) > GITHUB_SIZE_LIMIT: |
| 82 | + zip_path = zip_file(file_path) |
| 83 | + folder = os.path.dirname(file_path) |
| 84 | + is_git_repo = runcmd("cd {dir} && git rev-parse --is-inside-work-tree".format(dir=folder)).strip() |
| 85 | + if is_git_repo == "true": |
| 86 | + runcmd("cd {dir} && git reset {file_path}".format(dir=folder, file_path=file_path)) |
| 87 | + runcmd("cd {dir} && git add {zip_path}".format(dir=folder, zip_path=zip_path)) |
| 88 | + |
| 89 | + |
| 90 | +def zip_file(file_path): |
| 91 | + """ |
| 92 | + Zips the file into smaller parts if it exceeds the GitHub size limit. |
| 93 | + Parameters: |
| 94 | + file_path: file path to zip |
| 95 | + Returns: zipped file path |
| 96 | + """ |
| 97 | + folder = os.path.dirname(file_path) |
| 98 | + base_name = os.path.basename(file_path) |
| 99 | + zip_base = os.path.splitext(base_name)[0] |
| 100 | + |
| 101 | + single_zip_path = os.path.join(folder, f"{zip_base}.zip") |
| 102 | + with zipfile.ZipFile(single_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf: |
| 103 | + zipf.write(file_path, base_name) |
| 104 | + print("File zipped due to GitHub size limits: " + single_zip_path) |
| 105 | + return single_zip_path |
| 106 | + |
| 107 | + |
68 | 108 | def parse_metadata_data(cta, sqlite_db, table_name):
|
69 | 109 | """
|
70 | 110 | Reads 'Metadata' table data into the CAS object
|
|
0 commit comments