Skip to content

Commit d6fd341

Browse files
committed
Add support for external URLs in File storage and update tests
1 parent 8f59e4d commit d6fd341

File tree

3 files changed

+145
-12
lines changed

3 files changed

+145
-12
lines changed

synapseclient/models/file.py

Lines changed: 29 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -656,9 +656,16 @@ def _cannot_store(self) -> bool:
656656
return (
657657
not (
658658
self.id is not None
659-
and (self.path is not None or self.data_file_handle_id is not None)
659+
and (
660+
self.path is not None
661+
or self.external_url is not None
662+
or self.data_file_handle_id is not None
663+
)
664+
)
665+
and not (
666+
(self.path is not None or self.external_url is not None)
667+
and self.parent_id is not None
660668
)
661-
and not (self.path is not None and self.parent_id is not None)
662669
and not (
663670
self.parent_id is not None and self.data_file_handle_id is not None
664671
)
@@ -766,12 +773,12 @@ async def store_async(
766773
File object.
767774
768775
If no Name is specified this will be derived from the file name. This is the
769-
reccommended way to store a file in Synapse.
776+
recommended way to store a file in Synapse.
770777
771778
Please note:
772779
The file, as it appears on disk, will be the file that is downloaded from
773780
Synapse. The name of the actual File is different from the name of the File
774-
Entity in Synapse. It is generally not reccommended to specify a different
781+
Entity in Synapse. It is generally not recommended to specify a different
775782
name for the Entity and the file as it will cause confusion and potential
776783
conflicts later on.
777784
@@ -787,8 +794,9 @@ async def store_async(
787794
The file object.
788795
789796
Raises:
790-
ValueError: If the file does not have an ID and a path, or a path and a
791-
parent ID, or a data file handle ID and a parent ID.
797+
ValueError: If the file does not have an ID and a (path or external_url),
798+
or a (path or external_url) and a parent ID, or a data file handle ID
799+
and a parent ID.
792800
793801
Example: Using this function
794802
File with the ID `syn123` at path `path/to/file.txt`:
@@ -803,6 +811,10 @@ async def store_async(
803811
804812
file_instance = await File(path="path/to/file.txt").store_async(parent=Folder(id="syn456"))
805813
814+
File with an external URL and a parent folder with the ID `syn456`:
815+
816+
file_instance = await File(name="my_file.txt", external_url="s3://bucket/key", parent_id="syn456", synapse_store=False).store_async()
817+
806818
File with a parent and existing file handle (This allows multiple entities to reference the underlying file):
807819
808820
file_instance = await File(data_file_handle_id="123", parent_id="syn456").store_async()
@@ -814,7 +826,7 @@ async def store_async(
814826
await file_instance.change_metadata_async(name="my_new_name_file.txt")
815827
816828
Rename a file, and the name of the file as downloaded
817-
(Does not update the file on disk). Is is reccommended that `name` and
829+
(Does not update the file on disk). Is is recommended that `name` and
818830
`download_as` match to prevent confusion later on:
819831
820832
file_instance = await File(id="syn123", download_file=False).get_async()
@@ -825,11 +837,15 @@ async def store_async(
825837
self.parent_id = parent.id if parent else self.parent_id
826838
if self._cannot_store():
827839
raise ValueError(
828-
"The file must have an (ID with a (path or `data_file_handle_id`)), or a "
829-
"(path with a (`parent_id` or parent with an id)), or a "
840+
"The file must have an (ID with a (path, external_url, or `data_file_handle_id`)), or a "
841+
"((path or external_url) with a (`parent_id` or parent with an id)), or a "
830842
"(data_file_handle_id with a (`parent_id` or parent with an id)) to store."
831843
)
832-
self.name = self.name or (guess_file_name(self.path) if self.path else None)
844+
self.name = self.name or (
845+
guess_file_name(self.path)
846+
if self.path
847+
else (guess_file_name(self.external_url) if self.external_url else None)
848+
)
833849
client = Synapse.get_client(synapse_client=synapse_client)
834850

835851
if existing_file := await self._find_existing_file(synapse_client=client):
@@ -847,8 +863,9 @@ async def store_async(
847863
}
848864
)
849865

850-
if self.path:
851-
self.path = os.path.expanduser(self.path)
866+
if self.path or self.external_url:
867+
if self.path:
868+
self.path = os.path.expanduser(self.path)
852869
async with client._get_parallel_file_transfer_semaphore(
853870
asyncio_event_loop=asyncio.get_running_loop()
854871
):

tests/integration/synapseclient/models/async/test_file_async.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -846,6 +846,64 @@ async def test_store_as_external_url_with_content_size_and_md5(
846846
assert file.file_handle.external_url == BOGUS_URL
847847
assert file.file_handle.content_md5 == BOGUS_MD5
848848

849+
async def test_store_external_url_then_update(
850+
self, project_model: Project, file: File
851+
) -> None:
852+
# GIVEN a file with an external URL
853+
file.name = str(uuid.uuid4())
854+
file.synapse_store = False
855+
file.external_url = BOGUS_URL
856+
857+
# WHEN I store the file
858+
file = await file.store_async(parent=project_model, synapse_client=self.syn)
859+
self.schedule_for_cleanup(file.id)
860+
861+
# THEN I expect the file to be stored with the external URL
862+
assert file.id is not None
863+
assert file.external_url == BOGUS_URL
864+
assert file.version_number == 1
865+
assert (
866+
file.file_handle.concrete_type
867+
== "org.sagebionetworks.repo.model.file.ExternalFileHandle"
868+
)
869+
assert file.file_handle.external_url == BOGUS_URL
870+
original_file_handle_id = file.file_handle.id
871+
872+
# WHEN I update the external URL
873+
new_external_url = "https://www.example.com/updated"
874+
file.external_url = new_external_url
875+
file = await file.store_async(synapse_client=self.syn)
876+
877+
# THEN I expect the file to be updated with the new external URL
878+
assert file.external_url == new_external_url
879+
assert file.version_number == 2
880+
assert file.file_handle.external_url == new_external_url
881+
assert file.file_handle.id != original_file_handle_id
882+
883+
async def test_store_external_url_without_synapse_store_false(
884+
self, project_model: Project, file: File
885+
) -> None:
886+
# GIVEN a file with an external URL but synapse_store not explicitly set to False
887+
file.name = str(uuid.uuid4())
888+
file.external_url = BOGUS_URL
889+
890+
# WHEN I store the file
891+
file = await file.store_async(parent=project_model, synapse_client=self.syn)
892+
self.schedule_for_cleanup(file.id)
893+
894+
# THEN I expect the file to be stored with the external URL
895+
# and synapse_store should be implicitly set to False
896+
assert file.id is not None
897+
assert file.external_url == BOGUS_URL
898+
assert file.synapse_store is False
899+
assert (
900+
file.file_handle.concrete_type
901+
== "org.sagebionetworks.repo.model.file.ExternalFileHandle"
902+
)
903+
assert file.file_handle.external_url == BOGUS_URL
904+
assert file.file_handle.bucket_name is None
905+
assert file.file_handle.key is None
906+
849907
async def test_store_conflict_with_existing_object(
850908
self, project_model: Project, file: File
851909
) -> None:

tests/integration/synapseclient/models/synchronous/test_file.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -819,6 +819,64 @@ def test_store_as_external_url_with_content_size_and_md5(
819819
assert file.file_handle.external_url == BOGUS_URL
820820
assert file.file_handle.content_md5 == BOGUS_MD5
821821

822+
def test_store_external_url_then_update(
823+
self, project_model: Project, file: File
824+
) -> None:
825+
# GIVEN a file with an external URL
826+
file.name = str(uuid.uuid4())
827+
file.synapse_store = False
828+
file.external_url = BOGUS_URL
829+
830+
# WHEN I store the file
831+
file = file.store(parent=project_model, synapse_client=self.syn)
832+
self.schedule_for_cleanup(file.id)
833+
834+
# THEN I expect the file to be stored with the external URL
835+
assert file.id is not None
836+
assert file.external_url == BOGUS_URL
837+
assert file.version_number == 1
838+
assert (
839+
file.file_handle.concrete_type
840+
== "org.sagebionetworks.repo.model.file.ExternalFileHandle"
841+
)
842+
assert file.file_handle.external_url == BOGUS_URL
843+
original_file_handle_id = file.file_handle.id
844+
845+
# WHEN I update the external URL
846+
new_external_url = "https://www.example.com/updated"
847+
file.external_url = new_external_url
848+
file = file.store(synapse_client=self.syn)
849+
850+
# THEN I expect the file to be updated with the new external URL
851+
assert file.external_url == new_external_url
852+
assert file.version_number == 2
853+
assert file.file_handle.external_url == new_external_url
854+
assert file.file_handle.id != original_file_handle_id
855+
856+
def test_store_external_url_without_synapse_store_false(
857+
self, project_model: Project, file: File
858+
) -> None:
859+
# GIVEN a file with an external URL but synapse_store not explicitly set to False
860+
file.name = str(uuid.uuid4())
861+
file.external_url = BOGUS_URL
862+
863+
# WHEN I store the file
864+
file = file.store(parent=project_model, synapse_client=self.syn)
865+
self.schedule_for_cleanup(file.id)
866+
867+
# THEN I expect the file to be stored with the external URL
868+
# and synapse_store should be implicitly set to False
869+
assert file.id is not None
870+
assert file.external_url == BOGUS_URL
871+
assert file.synapse_store is False
872+
assert (
873+
file.file_handle.concrete_type
874+
== "org.sagebionetworks.repo.model.file.ExternalFileHandle"
875+
)
876+
assert file.file_handle.external_url == BOGUS_URL
877+
assert file.file_handle.bucket_name is None
878+
assert file.file_handle.key is None
879+
822880
def test_store_conflict_with_existing_object(
823881
self, project_model: Project, file: File
824882
) -> None:

0 commit comments

Comments
 (0)