diff --git a/schematic/store/synapse.py b/schematic/store/synapse.py index 3bda77e11..3d4adb4f3 100644 --- a/schematic/store/synapse.py +++ b/schematic/store/synapse.py @@ -19,12 +19,10 @@ import numpy as np import pandas as pd import synapseclient -import synapseutils from opentelemetry import trace from synapseclient import Annotations as OldAnnotations from synapseclient import ( Column, - Entity, EntityViewSchema, EntityViewType, File, @@ -705,13 +703,16 @@ def getFilesInStorageDataset( ValueError: Dataset ID not found. """ file_list = [] - + # HACK: must requery the fileview to get new files, since SynapseStorage will query the last state + # of the fileview which may not contain any new folders in the fileview. + # This is a workaround to fileviews not always containing the latest information + # self.query_fileview(force_requery=True) + # Get path to dataset folder by using childern to avoid cases where the dataset is the scope of the view if self.storageFileviewTable.empty: raise ValueError( f"Fileview {self.storageFileview} is empty, please check the table and the provided synID and try again." ) - # Get path to dataset folder by using children to avoid cases where the dataset is the scope of the view child_path = self.storageFileviewTable.loc[ self.storageFileviewTable["parentId"] == datasetId, "path" ] diff --git a/tests/integration/test_commands.py b/tests/integration/test_commands.py index 5d244f141..4367d817b 100644 --- a/tests/integration/test_commands.py +++ b/tests/integration/test_commands.py @@ -95,14 +95,14 @@ def test_validate_valid_manifest(self, runner: CliRunner) -> None: # command has no (python) errors, has exit code 0 assert result.exit_code == 0 # command output has success message - assert result.output.split("\n")[4] == ( + result_list = result.output.split("\n") + assert ( "Your manifest has been validated successfully. " "There are no errors in your manifest, " "and it can be submitted without any modifications." - ) + ) in result_list # command output has no validation errors - for line in result.output.split("\n")[4]: - assert not line.startswith("error") + errors = [errors for result in result_list if result.startswith("error")] def test_validate_invalid_manifest(self, runner: CliRunner) -> None: """ @@ -504,9 +504,10 @@ def test_generate_empty_excel_manifest( os.remove("tests/data/example.Biospecimen.schema.json") # command output has excel file creation message + result_list = result.output.split("\n") assert ( - result.output.split("\n")[7] - == "Find the manifest template using this Excel file path: ./CLI_empty_excel.xlsx" + "Find the manifest template using this Excel file path: ./CLI_empty_excel.xlsx" + in result_list ) sheet1 = workbook["Sheet1"] @@ -665,18 +666,19 @@ def test_generate_bulk_rna_google_sheet_manifest( # Reset config to it's default values CONFIG.load_config("config_example.yml") - assert result.output.split("\n")[8] == ( - "Find the manifest template using this Google Sheet URL:" - ) - assert result.output.split("\n")[9].startswith( - "https://docs.google.com/spreadsheets/d/" - ) - assert result.output.split("\n")[10] == ( + result_list = result.output.split("\n") + assert "Find the manifest template using this Google Sheet URL:" in result_list + assert ( "Find the manifest template using this CSV file path: " "./CLI_gs_bulk_rna.csv" - ) - - google_sheet_url = result.output.split("\n")[9] + ) in result_list + google_sheet_result = [ + result + for result in result_list + if result.startswith("https://docs.google.com/spreadsheets/d/") + ] + assert len(google_sheet_result) == 1 + google_sheet_url = google_sheet_result[0] # Download the Google Sheets content as an Excel file and load into openpyxl export_url = f"{google_sheet_url}/export?format=xlsx" @@ -908,18 +910,19 @@ def test_generate_bulk_rna_google_sheet_manifest_with_annotations( os.remove("tests/data/example.BulkRNA-seqAssay.schema.json") os.remove("./CLI_gs_bulk_rna_annos.csv") - assert result.output.split("\n")[11] == ( - "Find the manifest template using this Google Sheet URL:" - ) - assert result.output.split("\n")[12].startswith( - "https://docs.google.com/spreadsheets/d/" - ) - assert result.output.split("\n")[13] == ( + result_list = result.output.split("\n") + assert "Find the manifest template using this Google Sheet URL:" in result_list + assert ( "Find the manifest template using this CSV file path: " "./CLI_gs_bulk_rna_annos.csv" - ) - - google_sheet_url = result.output.split("\n")[12] + ) in result_list + google_sheet_result = [ + result + for result in result_list + if result.startswith("https://docs.google.com/spreadsheets/d/") + ] + assert len(google_sheet_result) == 1 + google_sheet_url = google_sheet_result[0] # Download the Google Sheets content as an Excel file and load into openpyxl export_url = f"{google_sheet_url}/export?format=xlsx" @@ -1177,10 +1180,11 @@ def test_generate_mock_component_excel_manifest(self, runner: CliRunner) -> None # TODO: remove with https://sagebionetworks.jira.com/browse/SCHEMATIC-202 # Reset config to it's default values CONFIG.load_config("config_example.yml") - # Command output has excel file message - assert result.output.split("\n")[8] == ( + result_list = result.output.split("\n") + assert ( "Find the manifest template using this Excel file path: ./CLI_mock_comp.xlsx" + in result_list ) sheet1 = workbook["Sheet1"] diff --git a/tests/integration/test_metadata_model.py b/tests/integration/test_metadata_model.py index 2178a83b8..2f604ed5d 100644 --- a/tests/integration/test_metadata_model.py +++ b/tests/integration/test_metadata_model.py @@ -139,7 +139,6 @@ async def test_submit_filebased_manifest_file_and_entities_valid_manifest_submit dir=create_temp_folder(path=tempfile.gettempdir()), ) as tmp_file: df.to_csv(tmp_file.name, index=False) - # WHEN the manifest is submitted (Assertions are handled in the helper method) self._submit_and_verify_manifest( helpers=helpers, @@ -229,7 +228,6 @@ async def test_submit_filebased_manifest_file_and_entities_mock_filename( dir=create_temp_folder(path=tempfile.gettempdir()), ) as tmp_file: df.to_csv(tmp_file.name, index=False) - # WHEN the manifest is submitted (Assertions are handled in the helper method) self._submit_and_verify_manifest( helpers=helpers, @@ -450,6 +448,11 @@ def _submit_and_verify_manifest( raise ValueError( "expected_manifest_id or expected_manifest_name must be provided" ) + # HACK: must requery the fileview to get new files, since SynapseStorage will query the last state + # of the fileview which may not contain any new folders in the fileview. + # This is a workaround to fileviews not always containing the latest information + # Since the tests don't always follow a similar process as testing resources are created and destroyed + synapse_store.query_fileview(force_requery=True) # Spies if already_spied: diff --git a/tests/integration/test_store_synapse.py b/tests/integration/test_store_synapse.py index f9883dc8f..0deaf8ae5 100644 --- a/tests/integration/test_store_synapse.py +++ b/tests/integration/test_store_synapse.py @@ -400,38 +400,31 @@ def test_mock_get_files_in_storage_dataset( with patch( "schematic.store.synapse.CONFIG", return_value=TEST_CONFIG ) as mock_config: - with patch.object(synapse_store, "syn") as mock_synapse_client: - # AND the appropriate asset view - mock_config.synapse_master_fileview_id = asset_view - # AND the appropriately filtered filenames - if filenames: - files_to_remove = [] - for f in expected_files: - retain = False - for name in filenames: - if name in f[1]: - retain = True - if not retain: - files_to_remove.append(f) + # AND the appropriate asset view + mock_config.synapse_master_fileview_id = asset_view + # AND the appropriately filtered filenames + if filenames: + files_to_remove = [] + for f in expected_files: + retain = False + for name in filenames: + if name in f[1]: + retain = True + if not retain: + files_to_remove.append(f) - for file in files_to_remove: - expected_files.remove(file) + for file in files_to_remove: + expected_files.remove(file) - mock_table_dataFrame = pd.DataFrame( - expected_files, columns=["id", "path"] - ) - mock_table = build_table("Mock Table", "syn123", mock_table_dataFrame) - mock_synapse_client.tableQuery.return_value = mock_table - - # WHEN getFilesInStorageDataset is called for the given dataset - dataset_files = synapse_store.getFilesInStorageDataset( - datasetId=dataset_id, fileNames=filenames - ) + # WHEN getFilesInStorageDataset is called for the given dataset + dataset_files = synapse_store.getFilesInStorageDataset( + datasetId=dataset_id, fileNames=filenames + ) - # THEN the expected files are returned - # AND there are no unexpected files - assert dataset_files == expected_files - # AND the (synId, path) order is correct - synapse_id_regex = re_compile(SYN_ID_REGEX) - if dataset_files: - assert synapse_id_regex.fullmatch(dataset_files[0][0]) + # THEN the expected files are returned + # AND there are no unexpected files + assert dataset_files == expected_files + # AND the (synId, path) order is correct + synapse_id_regex = re_compile(SYN_ID_REGEX) + if dataset_files: + assert synapse_id_regex.fullmatch(dataset_files[0][0]) diff --git a/tests/test_store.py b/tests/test_store.py index c11854b0d..01a99489f 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -11,7 +11,7 @@ import uuid from contextlib import nullcontext as does_not_raise from typing import Any, Callable, Generator -from unittest.mock import AsyncMock, patch +from unittest.mock import AsyncMock, MagicMock, patch import pandas as pd import pytest @@ -464,49 +464,45 @@ def test_getDatasetProject(self, dataset_id, synapse_store): ( True, [ - ("syn126", "schematic - main/parent_folder/test_file"), + ("syn126", "syn_mock", "schematic - main/parent_folder/test_file"), ( "syn125", + "syn_mock", "schematic - main/parent_folder/test_folder/test_file_2", ), ], ), - (False, [("syn126", "test_file"), ("syn125", "test_file_2")]), + ( + False, + [ + ("syn126", "syn_mock", "test_file"), + ("syn125", "syn_mock", "test_file_2"), + ], + ), ], ) def test_getFilesInStorageDataset(self, synapse_store, full_path, expected): - mock_table_dataFrame_initial = pd.DataFrame( - { - "id": ["syn_mock"], - "path": ["schematic - main/parent_folder"], - } - ) - - mock_table_dataFrame_return = pd.DataFrame( + mock_table_dataframe_return = pd.DataFrame( { "id": ["syn126", "syn125"], + "parentId": ["syn_mock", "syn_mock"], "path": [ "schematic - main/parent_folder/test_file", "schematic - main/parent_folder/test_folder/test_file_2", ], } ) - mock_table_return = build_table( - "Mock Table", "syn123", mock_table_dataFrame_return - ) - with patch.object(synapse_store, "syn") as mocked_synapse_client: - with patch.object( - synapse_store, "storageFileviewTable" - ) as mocked_fileview_table: - mocked_fileview_table.storageFileviewTable.return_value = ( - mock_table_dataFrame_initial - ) - mocked_synapse_client.tableQuery.return_value = mock_table_return - file_list = synapse_store.getFilesInStorageDataset( - datasetId="syn_mock", fileNames=None, fullpath=full_path - ) - assert file_list == expected + with patch.object( + synapse_store, "storageFileviewTable", mock_table_dataframe_return + ), patch.object(synapse_store, "query_fileview") as mocked_query: + # query_fileview is the function called to get the fileview + mocked_query.return_value = mock_table_dataframe_return + + file_list = synapse_store.getFilesInStorageDataset( + datasetId="syn_mock", fileNames=None, fullpath=full_path + ) + assert file_list == expected @pytest.mark.parametrize( "full_path", @@ -516,27 +512,25 @@ def test_getFilesInStorageDataset(self, synapse_store, full_path, expected): ], ) def test_get_files_in_storage_dataset_exception(self, synapse_store, full_path): - mock_table_dataFrame_initial = pd.DataFrame( + mock_table_dataframe_return = pd.DataFrame( { "id": ["child_syn_mock"], "path": ["schematic - main/parent_folder/child_entity"], "parentId": ["wrong_syn_mock"], } ) + with patch.object( + synapse_store, "storageFileviewTable", mock_table_dataframe_return + ), patch.object(synapse_store, "query_fileview") as mocked_query: + # query_fileview is the function called to get the fileview + mocked_query.return_value = mock_table_dataframe_return - with patch.object(synapse_store, "syn") as mocked_synapse_client: - with patch.object( - synapse_store, "storageFileviewTable" - ) as mocked_fileview_table: - mocked_fileview_table.storageFileviewTable.return_value = ( - mock_table_dataFrame_initial + with pytest.raises( + LookupError, match="Dataset syn_mock could not be found" + ): + synapse_store.getFilesInStorageDataset( + datasetId="syn_mock", fileNames=None, fullpath=full_path ) - with pytest.raises( - LookupError, match="Dataset syn_mock could not be found" - ): - file_list = synapse_store.getFilesInStorageDataset( - datasetId="syn_mock", fileNames=None, fullpath=full_path - ) @pytest.mark.parametrize("downloadFile", [True, False]) def test_getDatasetManifest(self, synapse_store, downloadFile):