1- """Functions for generating manifest CSV files from File entities."""
1+ """Services for reading and writing Synapse manifest CSV files.
22
3+ This includes reading a manifest CSV file and preparing it for upload, as well
4+ as writing a manifest CSV file from a list of File entities.
5+ """
6+
7+ from __future__ import annotations
8+
9+ import ast
10+ import asyncio
311import csv
412import datetime
513import io
614import os
7- from typing import TYPE_CHECKING , Any , Union
15+ import re
16+ from dataclasses import dataclass
17+ from typing import TYPE_CHECKING , Any , Iterable , NamedTuple , Union
818
19+ from synapseclient import Synapse
920from synapseclient .core import utils
21+ from synapseclient .core .exceptions import (
22+ SynapseFileNotFoundError ,
23+ SynapseHTTPError ,
24+ SynapseProvenanceError ,
25+ )
26+ from synapseclient .core .utils import (
27+ bool_or_none ,
28+ datetime_or_none ,
29+ get_synid_and_version ,
30+ is_synapse_id_str ,
31+ is_url ,
32+ test_import_pandas ,
33+ topolgical_sort ,
34+ )
35+ from synapseclient .operations .factory_operations import FileOptions
36+ from synapseclient .operations .factory_operations import get_async as factory_get_async
1037
1138if TYPE_CHECKING :
12- from synapseclient .models import File
39+ from pandas import DataFrame , Series
40+
41+ from synapseclient .models import UsedEntity , UsedURL
42+ from synapseclient .models .file import File
43+
1344
1445MANIFEST_CSV_FILENAME = "manifest.csv"
1546DEFAULT_GENERATED_MANIFEST_CSV_KEYS = [
2455 "activityName" ,
2556 "activityDescription" ,
2657]
58+ #: Scalar types that Synapse supports as annotation values.
59+ SynapseAnnotationType = datetime .datetime | float | int | bool | str
60+
61+ # Columns that are NOT annotations — stripped before building File.annotations.
62+ # Covers the standard manifest columns plus the extra metadata columns produced
63+ # by the Synapse UI download cart and synapse get-download-list CLI.
64+ NON_ANNOTATION_COLUMNS = frozenset (
65+ [
66+ # Standard manifest columns used directly during upload
67+ "path" ,
68+ "parentId" ,
69+ "ID" ,
70+ "name" ,
71+ "synapseStore" ,
72+ "contentType" ,
73+ "activityName" ,
74+ "activityDescription" ,
75+ "forceVersion" ,
76+ "used" ,
77+ "executed" ,
78+ # Download-list / Synapse UI informational columns — ignore for upload
79+ "error" ,
80+ "versionNumber" ,
81+ "dataFileSizeBytes" ,
82+ "createdBy" ,
83+ "createdOn" ,
84+ "modifiedBy" ,
85+ "modifiedOn" ,
86+ "synapseURL" ,
87+ "dataFileMD5Hex" ,
88+ ]
89+ )
90+
91+ # Regex patterns used when parsing annotation cell values.
92+ # Matches a cell that is a bracket-delimited list, e.g. "[a, b, c]".
93+ # Disallows ']' inside to avoid matching adjacent lists like "[a][b]".
94+ _ARRAY_BRACKET_PATTERN = re .compile (r"^\[[^\]]*\]$" )
95+ # https://stackoverflow.com/questions/18893390/splitting-on-comma-outside-quotes
96+ _COMMAS_OUTSIDE_DOUBLE_QUOTES_PATTERN = re .compile (r",(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)" )
97+ # Valid Synapse file name characters (1–256 chars).
98+ _FILE_NAME_PATTERN = re .compile (r"^[`\w \-\+\.\(\)]{1,256}$" )
2799
28100
29101def _manifest_csv_filename (path : str ) -> str :
@@ -218,7 +290,7 @@ def _write_manifest_data_csv(path: str, keys: list[str], data: list[dict]) -> No
218290 writer .writerow (_convert_manifest_data_row_to_dict (row , keys ))
219291
220292
221- def generate_manifest_csv (all_files : list [" File" ], path : str ) -> None :
293+ def generate_manifest_csv (all_files : list [File ], path : str ) -> None :
222294 """Generates a manifest.csv file based on a list of File entities.
223295
224296 The generated file uses CSV format with comma delimiter and is interoperable
@@ -233,83 +305,6 @@ def generate_manifest_csv(all_files: list["File"], path: str) -> None:
233305 filename = _manifest_csv_filename (path = path )
234306 keys , data = _extract_entity_metadata_for_manifest_csv (all_files = all_files )
235307 _write_manifest_data_csv (filename , keys , data )
236- """Services for reading a Synapse manifest CSV file and preparing it for upload."""
237-
238- from __future__ import annotations
239-
240- import ast
241- import asyncio
242- import datetime
243- import os
244- import re
245- from dataclasses import dataclass
246- from typing import TYPE_CHECKING , Iterable , NamedTuple
247-
248- from synapseclient import Synapse
249- from synapseclient .core .exceptions import (
250- SynapseFileNotFoundError ,
251- SynapseHTTPError ,
252- SynapseProvenanceError ,
253- )
254- from synapseclient .core .utils import (
255- bool_or_none ,
256- datetime_or_none ,
257- get_synid_and_version ,
258- is_synapse_id_str ,
259- is_url ,
260- test_import_pandas ,
261- topolgical_sort ,
262- )
263- from synapseclient .operations .factory_operations import FileOptions
264- from synapseclient .operations .factory_operations import get_async as factory_get_async
265-
266- if TYPE_CHECKING :
267- from pandas import DataFrame , Series
268-
269- from synapseclient .models import UsedEntity , UsedURL
270- from synapseclient .models .file import File
271-
272- #: Scalar types that Synapse supports as annotation values.
273- SynapseAnnotationType = datetime .datetime | float | int | bool | str
274-
275- # Columns that are NOT annotations — stripped before building File.annotations.
276- # Covers the standard manifest columns plus the extra metadata columns produced
277- # by the Synapse UI download cart and synapse get-download-list CLI.
278- NON_ANNOTATION_COLUMNS = frozenset (
279- [
280- # Standard manifest columns used directly during upload
281- "path" ,
282- "parentId" ,
283- "ID" ,
284- "name" ,
285- "synapseStore" ,
286- "contentType" ,
287- "activityName" ,
288- "activityDescription" ,
289- "forceVersion" ,
290- "used" ,
291- "executed" ,
292- # Download-list / Synapse UI informational columns — ignore for upload
293- "error" ,
294- "versionNumber" ,
295- "dataFileSizeBytes" ,
296- "createdBy" ,
297- "createdOn" ,
298- "modifiedBy" ,
299- "modifiedOn" ,
300- "synapseURL" ,
301- "dataFileMD5Hex" ,
302- ]
303- )
304-
305- # Regex patterns used when parsing annotation cell values.
306- # Matches a cell that is a bracket-delimited list, e.g. "[a, b, c]".
307- # Disallows ']' inside to avoid matching adjacent lists like "[a][b]".
308- _ARRAY_BRACKET_PATTERN = re .compile (r"^\[[^\]]*\]$" )
309- # https://stackoverflow.com/questions/18893390/splitting-on-comma-outside-quotes
310- _COMMAS_OUTSIDE_DOUBLE_QUOTES_PATTERN = re .compile (r",(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)" )
311- # Valid Synapse file name characters (1–256 chars).
312- _FILE_NAME_PATTERN = re .compile (r"^[`\w \-\+\.\(\)]{1,256}$" )
313308
314309
315310class UploadSyncFile (NamedTuple ):
0 commit comments