Skip to content

Commit 9426c91

Browse files
committed
Merge branch 'main' into develop
Contains critical pushes that are needed in develop
2 parents 929319b + 218e1bd commit 9426c91

File tree

4 files changed

+140
-5
lines changed

4 files changed

+140
-5
lines changed

bin/input_to_database.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,8 @@
8484
```
8585
"""
8686
import argparse
87-
from datetime import date
8887
import logging
88+
from datetime import date
8989

9090
from genie import (
9191
config,
@@ -183,9 +183,9 @@ def main(
183183
today = date.today()
184184
table_name = f"Narrow MAF Database - {today}"
185185
# filetype = "vcf2maf"
186-
# syn7208886 is the GENIE staging project to archive maf table
186+
# save maf table to testing or production project as the mode
187187
new_tables = process_functions.create_new_fileformat_table(
188-
syn, "vcf2maf", table_name, project_id, "syn7208886"
188+
syn, "vcf2maf", table_name, project_id, project_id
189189
)
190190
syn.setPermissions(new_tables["newdb_ent"].id, 3326313, [])
191191
genie_config["vcf2maf"] = new_tables["newdb_ent"].id

genie/consortium_to_public.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import sys
66

77
import pandas as pd
8+
import synapseclient
89
import synapseutils
910
from genie import (
1011
create_case_lists,
@@ -18,6 +19,9 @@
1819
stdout_handler = logging.StreamHandler(stream=sys.stdout)
1920
stdout_handler.setLevel(logging.INFO)
2021
logger.addHandler(stdout_handler)
22+
from typing import Dict
23+
24+
from genie.load import _copyRecursive
2125

2226

2327
# TODO: Add to transform.py
@@ -338,7 +342,7 @@ def consortiumToPublic(
338342
)
339343
else:
340344
ent = syn.get(entId, followLink=True, downloadFile=False)
341-
copiedId = synapseutils.copy(
345+
copiedId = _copyRecursive(
342346
syn,
343347
ent,
344348
public_release_preview,

genie/load.py

Lines changed: 129 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55

66
import logging
77
import os
8-
import time
98
import tempfile
9+
import time
1010
from typing import Dict, List, Optional, Union
1111

1212
import pandas as pd
@@ -17,6 +17,9 @@
1717

1818
logger = logging.getLogger(__name__)
1919

20+
import synapseutils as synu
21+
from synapseclient import Entity, File, Folder, Link, Project, Schema
22+
2023

2124
# TODO Edit docstring
2225
def store_file(
@@ -248,3 +251,128 @@ def _update_table(
248251
syn.store(synapseclient.Table(database_synid, update_all_file.name))
249252
# Delete the update file
250253
os.unlink(update_all_file.name)
254+
255+
256+
def _copyRecursive(
257+
syn: synapseclient.Synapse,
258+
entity: str,
259+
destinationId: str,
260+
mapping: Dict[str, str] = None,
261+
skipCopyAnnotations: bool = False,
262+
**kwargs,
263+
) -> Dict[str, str]:
264+
"""
265+
NOTE: This is a copy of the function found here: https://github.com/Sage-Bionetworks/synapsePythonClient/blob/develop/synapseutils/copy_functions.py#L409
266+
This was copied because there is a restriction that doesn't allow for copying entities with access requirements
267+
268+
Recursively copies synapse entites, but does not copy the wikis
269+
270+
Arguments:
271+
syn: A Synapse object with user's login
272+
entity: A synapse entity ID
273+
destinationId: Synapse ID of a folder/project that the copied entity is being copied to
274+
mapping: A mapping of the old entities to the new entities
275+
skipCopyAnnotations: Skips copying the annotations
276+
Default is False
277+
278+
Returns:
279+
a mapping between the original and copied entity: {'syn1234':'syn33455'}
280+
"""
281+
282+
version = kwargs.get("version", None)
283+
setProvenance = kwargs.get("setProvenance", "traceback")
284+
excludeTypes = kwargs.get("excludeTypes", [])
285+
updateExisting = kwargs.get("updateExisting", False)
286+
if mapping is None:
287+
mapping = dict()
288+
# Check that passed in excludeTypes is file, table, and link
289+
if not isinstance(excludeTypes, list):
290+
raise ValueError("Excluded types must be a list")
291+
elif not all([i in ["file", "link", "table"] for i in excludeTypes]):
292+
raise ValueError(
293+
"Excluded types can only be a list of these values: file, table, and link"
294+
)
295+
296+
ent = syn.get(entity, downloadFile=False)
297+
if ent.id == destinationId:
298+
raise ValueError("destinationId cannot be the same as entity id")
299+
300+
if (isinstance(ent, Project) or isinstance(ent, Folder)) and version is not None:
301+
raise ValueError("Cannot specify version when copying a project of folder")
302+
303+
if not isinstance(ent, (Project, Folder, File, Link, Schema, Entity)):
304+
raise ValueError("Not able to copy this type of file")
305+
306+
permissions = syn.restGET("/entity/{}/permissions".format(ent.id))
307+
# Don't copy entities without DOWNLOAD permissions
308+
if not permissions["canDownload"]:
309+
syn.logger.warning(
310+
"%s not copied - this file lacks download permission" % ent.id
311+
)
312+
return mapping
313+
314+
# HACK: These lines of code were removed to allow for data with access requirements to be copied
315+
# https://github.com/Sage-Bionetworks/synapsePythonClient/blob/2909fa778e814f62f6fe6ce2d951ce58c0080a4e/synapseutils/copy_functions.py#L464-L470
316+
317+
copiedId = None
318+
319+
if isinstance(ent, Project):
320+
project = syn.get(destinationId)
321+
if not isinstance(project, Project):
322+
raise ValueError(
323+
"You must give a destinationId of a new project to copy projects"
324+
)
325+
copiedId = destinationId
326+
# Projects include Docker repos, and Docker repos cannot be copied
327+
# with the Synapse rest API. Entity views currently also aren't
328+
# supported
329+
entities = syn.getChildren(
330+
entity, includeTypes=["folder", "file", "table", "link"]
331+
)
332+
for i in entities:
333+
mapping = _copyRecursive(
334+
syn,
335+
i["id"],
336+
destinationId,
337+
mapping=mapping,
338+
skipCopyAnnotations=skipCopyAnnotations,
339+
**kwargs,
340+
)
341+
342+
if not skipCopyAnnotations:
343+
project.annotations = ent.annotations
344+
syn.store(project)
345+
elif isinstance(ent, Folder):
346+
copiedId = synu.copy_functions._copyFolder(
347+
syn,
348+
ent.id,
349+
destinationId,
350+
mapping=mapping,
351+
skipCopyAnnotations=skipCopyAnnotations,
352+
**kwargs,
353+
)
354+
elif isinstance(ent, File) and "file" not in excludeTypes:
355+
copiedId = synu.copy_functions._copyFile(
356+
syn,
357+
ent.id,
358+
destinationId,
359+
version=version,
360+
updateExisting=updateExisting,
361+
setProvenance=setProvenance,
362+
skipCopyAnnotations=skipCopyAnnotations,
363+
)
364+
elif isinstance(ent, Link) and "link" not in excludeTypes:
365+
copiedId = synu.copy_functions._copyLink(
366+
syn, ent.id, destinationId, updateExisting=updateExisting
367+
)
368+
elif isinstance(ent, Schema) and "table" not in excludeTypes:
369+
copiedId = synu.copy_functions._copyTable(
370+
syn, ent.id, destinationId, updateExisting=updateExisting
371+
)
372+
# This is currently done because copyLink returns None sometimes
373+
if copiedId is not None:
374+
mapping[ent.id] = copiedId
375+
syn.logger.info("Copied %s to %s" % (ent.id, copiedId))
376+
else:
377+
syn.logger.info("%s not copied" % ent.id)
378+
return mapping

genie/process_mutation.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,9 @@ def process_mutation_workflow(
197197
Annotated Maf Path. None if there are no valid mutation files.
198198
199199
"""
200+
# setting maf table upload timeout time
201+
syn.table_query_timeout = 50000
202+
200203
# Get valid files
201204
mutation_files = validfiles["fileType"].isin(["maf", "vcf"])
202205
valid_mutation_files = validfiles["path"][mutation_files].tolist()

0 commit comments

Comments
 (0)