Sage-Bionetworks · GiaJordan · Dec 3, 2024 · Nov 23, 2024 · Nov 23, 2024 · Nov 23, 2024
diff --git a/schematic/store/synapse.py b/schematic/store/synapse.py
@@ -19,12 +19,10 @@
 import numpy as np
 import pandas as pd
 import synapseclient
-import synapseutils
 from opentelemetry import trace
 from synapseclient import Annotations as OldAnnotations
 from synapseclient import (
     Column,
-    Entity,
     EntityViewSchema,
     EntityViewType,
     File,
@@ -705,13 +703,16 @@ def getFilesInStorageDataset(
             ValueError: Dataset ID not found.
         """
         file_list = []
-
+        # HACK: must requery the fileview to get new files, since SynapseStorage will query the last state
+        # of the fileview which may not contain any new folders in the fileview.
+        # This is a workaround to fileviews not always containing the latest information
+        # self.query_fileview(force_requery=True)
+        # Get path to dataset folder by using childern to avoid cases where the dataset is the scope of the view
         if self.storageFileviewTable.empty:
             raise ValueError(
                 f"Fileview {self.storageFileview} is empty, please check the table and the provided synID and try again."
             )
 
-        # Get path to dataset folder by using children to avoid cases where the dataset is the scope of the view
         child_path = self.storageFileviewTable.loc[
             self.storageFileviewTable["parentId"] == datasetId, "path"
         ]

diff --git a/tests/integration/test_commands.py b/tests/integration/test_commands.py
@@ -95,14 +95,14 @@ def test_validate_valid_manifest(self, runner: CliRunner) -> None:
         # command has no (python) errors, has exit code 0
         assert result.exit_code == 0
         # command output has success message
-        assert result.output.split("\n")[4] == (
+        result_list = result.output.split("\n")
+        assert (
             "Your manifest has been validated successfully. "
             "There are no errors in your manifest, "
             "and it can be submitted without any modifications."
-        )
+        ) in result_list
         # command output has no validation errors
-        for line in result.output.split("\n")[4]:
-            assert not line.startswith("error")
+        errors = [errors for result in result_list if result.startswith("error")]
 
     def test_validate_invalid_manifest(self, runner: CliRunner) -> None:
         """
@@ -504,9 +504,10 @@ def test_generate_empty_excel_manifest(
                 os.remove("tests/data/example.Biospecimen.schema.json")
 
         # command output has excel file creation message
+        result_list = result.output.split("\n")
         assert (
-            result.output.split("\n")[7]
-            == "Find the manifest template using this Excel file path: ./CLI_empty_excel.xlsx"
+            "Find the manifest template using this Excel file path: ./CLI_empty_excel.xlsx"
+            in result_list
         )
 
         sheet1 = workbook["Sheet1"]
@@ -665,18 +666,19 @@ def test_generate_bulk_rna_google_sheet_manifest(
             # Reset config to it's default values
             CONFIG.load_config("config_example.yml")
 
-        assert result.output.split("\n")[8] == (
-            "Find the manifest template using this Google Sheet URL:"
-        )
-        assert result.output.split("\n")[9].startswith(
-            "https://docs.google.com/spreadsheets/d/"
-        )
-        assert result.output.split("\n")[10] == (
+        result_list = result.output.split("\n")
+        assert "Find the manifest template using this Google Sheet URL:" in result_list
+        assert (
             "Find the manifest template using this CSV file path: "
             "./CLI_gs_bulk_rna.csv"
-        )
-
-        google_sheet_url = result.output.split("\n")[9]
+        ) in result_list
+        google_sheet_result = [
+            result
+            for result in result_list
+            if result.startswith("https://docs.google.com/spreadsheets/d/")
+        ]
+        assert len(google_sheet_result) == 1
+        google_sheet_url = google_sheet_result[0]
 
         # Download the Google Sheets content as an Excel file and load into openpyxl
         export_url = f"{google_sheet_url}/export?format=xlsx"
@@ -908,18 +910,19 @@ def test_generate_bulk_rna_google_sheet_manifest_with_annotations(
                 os.remove("tests/data/example.BulkRNA-seqAssay.schema.json")
             os.remove("./CLI_gs_bulk_rna_annos.csv")
 
-        assert result.output.split("\n")[11] == (
-            "Find the manifest template using this Google Sheet URL:"
-        )
-        assert result.output.split("\n")[12].startswith(
-            "https://docs.google.com/spreadsheets/d/"
-        )
-        assert result.output.split("\n")[13] == (
+        result_list = result.output.split("\n")
+        assert "Find the manifest template using this Google Sheet URL:" in result_list
+        assert (
             "Find the manifest template using this CSV file path: "
             "./CLI_gs_bulk_rna_annos.csv"
-        )
-
-        google_sheet_url = result.output.split("\n")[12]
+        ) in result_list
+        google_sheet_result = [
+            result
+            for result in result_list
+            if result.startswith("https://docs.google.com/spreadsheets/d/")
+        ]
+        assert len(google_sheet_result) == 1
+        google_sheet_url = google_sheet_result[0]
 
         # Download the Google Sheets content as an Excel file and load into openpyxl
         export_url = f"{google_sheet_url}/export?format=xlsx"
@@ -1177,10 +1180,11 @@ def test_generate_mock_component_excel_manifest(self, runner: CliRunner) -> None
             # TODO: remove with https://sagebionetworks.jira.com/browse/SCHEMATIC-202
             # Reset config to it's default values
             CONFIG.load_config("config_example.yml")
-
         # Command output has excel file message
-        assert result.output.split("\n")[8] == (
+        result_list = result.output.split("\n")
+        assert (
             "Find the manifest template using this Excel file path: ./CLI_mock_comp.xlsx"
+            in result_list
         )
 
         sheet1 = workbook["Sheet1"]

diff --git a/tests/integration/test_metadata_model.py b/tests/integration/test_metadata_model.py
@@ -139,7 +139,6 @@ async def test_submit_filebased_manifest_file_and_entities_valid_manifest_submit
             dir=create_temp_folder(path=tempfile.gettempdir()),
         ) as tmp_file:
             df.to_csv(tmp_file.name, index=False)
-
             # WHEN the manifest is submitted (Assertions are handled in the helper method)
             self._submit_and_verify_manifest(
                 helpers=helpers,
@@ -229,7 +228,6 @@ async def test_submit_filebased_manifest_file_and_entities_mock_filename(
             dir=create_temp_folder(path=tempfile.gettempdir()),
         ) as tmp_file:
             df.to_csv(tmp_file.name, index=False)
-
             # WHEN the manifest is submitted (Assertions are handled in the helper method)
             self._submit_and_verify_manifest(
                 helpers=helpers,
@@ -450,6 +448,11 @@ def _submit_and_verify_manifest(
             raise ValueError(
                 "expected_manifest_id or expected_manifest_name must be provided"
             )
+        # HACK: must requery the fileview to get new files, since SynapseStorage will query the last state
+        # of the fileview which may not contain any new folders in the fileview.
+        # This is a workaround to fileviews not always containing the latest information
+        # Since the tests don't always follow a similar process as testing resources are created and destroyed
+        synapse_store.query_fileview(force_requery=True)
 
         # Spies
         if already_spied:

diff --git a/tests/integration/test_store_synapse.py b/tests/integration/test_store_synapse.py
@@ -400,38 +400,31 @@ def test_mock_get_files_in_storage_dataset(
         with patch(
             "schematic.store.synapse.CONFIG", return_value=TEST_CONFIG
         ) as mock_config:
-            with patch.object(synapse_store, "syn") as mock_synapse_client:
-                # AND the appropriate asset view
-                mock_config.synapse_master_fileview_id = asset_view
-                # AND the appropriately filtered filenames
-                if filenames:
-                    files_to_remove = []
-                    for f in expected_files:
-                        retain = False
-                        for name in filenames:
-                            if name in f[1]:
-                                retain = True
-                        if not retain:
-                            files_to_remove.append(f)
+            # AND the appropriate asset view
+            mock_config.synapse_master_fileview_id = asset_view
+            # AND the appropriately filtered filenames
+            if filenames:
+                files_to_remove = []
+                for f in expected_files:
+                    retain = False
+                    for name in filenames:
+                        if name in f[1]:
+                            retain = True
+                    if not retain:
+                        files_to_remove.append(f)
 
-                    for file in files_to_remove:
-                        expected_files.remove(file)
+                for file in files_to_remove:
+                    expected_files.remove(file)
 
-                mock_table_dataFrame = pd.DataFrame(
-                    expected_files, columns=["id", "path"]
-                )
-                mock_table = build_table("Mock Table", "syn123", mock_table_dataFrame)
-                mock_synapse_client.tableQuery.return_value = mock_table
-
-                # WHEN getFilesInStorageDataset is called for the given dataset
-                dataset_files = synapse_store.getFilesInStorageDataset(
-                    datasetId=dataset_id, fileNames=filenames
-                )
+            # WHEN getFilesInStorageDataset is called for the given dataset
+            dataset_files = synapse_store.getFilesInStorageDataset(
+                datasetId=dataset_id, fileNames=filenames
+            )
 
-                # THEN the expected files are returned
-                # AND there are no unexpected files
-                assert dataset_files == expected_files
-                # AND the (synId, path) order is correct
-                synapse_id_regex = re_compile(SYN_ID_REGEX)
-                if dataset_files:
-                    assert synapse_id_regex.fullmatch(dataset_files[0][0])
+            # THEN the expected files are returned
+            # AND there are no unexpected files
+            assert dataset_files == expected_files
+            # AND the (synId, path) order is correct
+            synapse_id_regex = re_compile(SYN_ID_REGEX)
+            if dataset_files:
+                assert synapse_id_regex.fullmatch(dataset_files[0][0])
diff --git a/tests/test_store.py b/tests/test_store.py
@@ -11,7 +11,7 @@
 import uuid
 from contextlib import nullcontext as does_not_raise
 from typing import Any, Callable, Generator
-from unittest.mock import AsyncMock, patch
+from unittest.mock import AsyncMock, MagicMock, patch
 
 import pandas as pd
 import pytest
@@ -464,49 +464,45 @@ def test_getDatasetProject(self, dataset_id, synapse_store):
             (
                 True,
                 [
-                    ("syn126", "schematic - main/parent_folder/test_file"),
+                    ("syn126", "syn_mock", "schematic - main/parent_folder/test_file"),
                     (
                         "syn125",
+                        "syn_mock",
                         "schematic - main/parent_folder/test_folder/test_file_2",
                     ),
                 ],
             ),
-            (False, [("syn126", "test_file"), ("syn125", "test_file_2")]),
+            (
+                False,
+                [
+                    ("syn126", "syn_mock", "test_file"),
+                    ("syn125", "syn_mock", "test_file_2"),
+                ],
+            ),
         ],
     )
     def test_getFilesInStorageDataset(self, synapse_store, full_path, expected):
-        mock_table_dataFrame_initial = pd.DataFrame(
-            {
-                "id": ["syn_mock"],
-                "path": ["schematic - main/parent_folder"],
-            }
-        )
-
-        mock_table_dataFrame_return = pd.DataFrame(
+        mock_table_dataframe_return = pd.DataFrame(
             {
                 "id": ["syn126", "syn125"],
+                "parentId": ["syn_mock", "syn_mock"],
                 "path": [
                     "schematic - main/parent_folder/test_file",
                     "schematic - main/parent_folder/test_folder/test_file_2",
                 ],
             }
         )
-        mock_table_return = build_table(
-            "Mock Table", "syn123", mock_table_dataFrame_return
-        )
 
-        with patch.object(synapse_store, "syn") as mocked_synapse_client:
-            with patch.object(
-                synapse_store, "storageFileviewTable"
-            ) as mocked_fileview_table:
-                mocked_fileview_table.storageFileviewTable.return_value = (
-                    mock_table_dataFrame_initial
-                )
-                mocked_synapse_client.tableQuery.return_value = mock_table_return
-                file_list = synapse_store.getFilesInStorageDataset(
-                    datasetId="syn_mock", fileNames=None, fullpath=full_path
-                )
-        assert file_list == expected
+        with patch.object(
+            synapse_store, "storageFileviewTable", mock_table_dataframe_return
+        ), patch.object(synapse_store, "query_fileview") as mocked_query:
+            # query_fileview is the function called to get the fileview
+            mocked_query.return_value = mock_table_dataframe_return
+
+            file_list = synapse_store.getFilesInStorageDataset(
+                datasetId="syn_mock", fileNames=None, fullpath=full_path
+            )
+            assert file_list == expected
 
     @pytest.mark.parametrize(
         "full_path",
@@ -516,27 +512,25 @@ def test_getFilesInStorageDataset(self, synapse_store, full_path, expected):
         ],
     )
     def test_get_files_in_storage_dataset_exception(self, synapse_store, full_path):
-        mock_table_dataFrame_initial = pd.DataFrame(
+        mock_table_dataframe_return = pd.DataFrame(
             {
                 "id": ["child_syn_mock"],
                 "path": ["schematic - main/parent_folder/child_entity"],
                 "parentId": ["wrong_syn_mock"],
             }
         )
+        with patch.object(
+            synapse_store, "storageFileviewTable", mock_table_dataframe_return
+        ), patch.object(synapse_store, "query_fileview") as mocked_query:
+            # query_fileview is the function called to get the fileview
+            mocked_query.return_value = mock_table_dataframe_return
 
-        with patch.object(synapse_store, "syn") as mocked_synapse_client:
-            with patch.object(
-                synapse_store, "storageFileviewTable"
-            ) as mocked_fileview_table:
-                mocked_fileview_table.storageFileviewTable.return_value = (
-                    mock_table_dataFrame_initial
+            with pytest.raises(
+                LookupError, match="Dataset syn_mock could not be found"
+            ):
+                synapse_store.getFilesInStorageDataset(
+                    datasetId="syn_mock", fileNames=None, fullpath=full_path
                 )
-                with pytest.raises(
-                    LookupError, match="Dataset syn_mock could not be found"
-                ):
-                    file_list = synapse_store.getFilesInStorageDataset(
-                        datasetId="syn_mock", fileNames=None, fullpath=full_path
-                    )
 
     @pytest.mark.parametrize("downloadFile", [True, False])
     def test_getDatasetManifest(self, synapse_store, downloadFile):