1+ import pandas as pd
2+ from pandas .testing import assert_frame_equal
13import pytest
4+ from unittest import mock
25
36from genie import dashboard_table_updater as dash_update
47
58
9+ @pytest .fixture
10+ def mock_syn (tmp_path ):
11+ """Fixture for a mock synapse client and test file."""
12+ syn = mock .MagicMock ()
13+
14+ # create a fake clinical file
15+ clinical_path = tmp_path / "clinical.txt"
16+ clinical_path .write_text ("SAMPLE_ID\n S1\n S2\n S3\n " )
17+
18+ # entity returned by syn.get for the clinical file
19+ clinical_entity = mock .MagicMock ()
20+ clinical_entity .path = str (clinical_path )
21+
22+ # syn.get() will return this entity
23+ syn .get .return_value = clinical_entity
24+
25+ # Default columns of existing table
26+ syn .getTableColumns .return_value = [{"name" : "SAMPLE_ID" }]
27+
28+ return syn
29+
30+
631@pytest .mark .parametrize (
732 "input_string_time, expected_output_time" ,
833 [
@@ -17,3 +42,117 @@ def test_that_string_to_unix_epoch_time_milliseconds_gives_expected_time(
1742):
1843 output = dash_update .string_to_unix_epoch_time_milliseconds (input_string_time )
1944 assert output == expected_output_time
45+
46+
47+ def test_that_update_samples_in_release_table_adds_column_and_calls_update (
48+ tmp_path , mock_syn
49+ ):
50+ """Test that a new release column is added and load._update_table gets correct data."""
51+ file_mapping = {"sample" : "syn123" }
52+ release = "5.3-consortium"
53+ samples_in_release_synid = "syn999"
54+
55+ clinical_df = pd .DataFrame ({"SAMPLE_ID" : ["S1" , "S2" , "S3" ]})
56+ existing_df = pd .DataFrame ({"SAMPLE_ID" : ["S1" ], release : [1 ]})
57+
58+ with (
59+ mock .patch .object (pd , "read_csv" , return_value = clinical_df ) as mock_read ,
60+ mock .patch .object (
61+ dash_update .extract , "get_syntabledf" , return_value = existing_df
62+ ) as mock_extract ,
63+ mock .patch .object (dash_update .load , "_update_table" ) as mock_update ,
64+ ):
65+ dash_update .update_samples_in_release_table (
66+ syn = mock_syn ,
67+ file_mapping = file_mapping ,
68+ release = release ,
69+ samples_in_release_synid = samples_in_release_synid ,
70+ )
71+
72+ # assertions on Synapse calls
73+ mock_syn .get .assert_has_calls (
74+ [
75+ mock .call (file_mapping ["sample" ], followLink = True ),
76+ mock .call (samples_in_release_synid ),
77+ ]
78+ )
79+ mock_syn .getTableColumns .assert_called_once_with (samples_in_release_synid )
80+
81+ # assertions on pd.read_csv
82+ mock_read .assert_called_once ()
83+ assert list (mock_read .return_value .columns ) == ["SAMPLE_ID" ]
84+
85+ # assertions on extract.get_syntabledf
86+ mock_extract .assert_called_once_with (
87+ syn = mock_syn ,
88+ query_string = f'SELECT SAMPLE_ID, "{ release } " FROM { samples_in_release_synid } ' ,
89+ )
90+
91+ # assertions on load._update_table inputs
92+ mock_update .assert_called_once ()
93+ args , kwargs = mock_update .call_args
94+
95+ # extract arguments
96+ _ , _ , samples_in_releasedf , synid_arg , key_cols = args
97+
98+ # Ensure correct Synapse ID and key columns
99+ assert synid_arg == samples_in_release_synid
100+ assert key_cols == ["SAMPLE_ID" ]
101+
102+ # check that new samples and old samples are in expected order
103+ assert_frame_equal (
104+ samples_in_releasedf .reset_index (drop = True ),
105+ pd .DataFrame (
106+ {"SAMPLE_ID" : ["S2" , "S3" , "S1" ], "5.3-consortium" : [1 , 1 , 1 ]}
107+ ),
108+ )
109+
110+
111+ def test_that_update_samples_in_release_table_existing_column_calls_update_directly (
112+ mock_syn ,
113+ ):
114+ """If the release column already exists, we skip creating new column but still call _update_table."""
115+ release = "5.3-consortium"
116+ file_mapping = {"sample" : "syn123" }
117+ samples_in_release_synid = "syn999"
118+
119+ # pre-xisting release column
120+ mock_syn .getTableColumns .return_value = [{"name" : "SAMPLE_ID" }, {"name" : release }]
121+
122+ clinical_df = pd .DataFrame ({"SAMPLE_ID" : ["S1" , "S2" ]})
123+ existing_df = pd .DataFrame ({"SAMPLE_ID" : ["S2" , "S1" ], release : [1 , 1 ]})
124+
125+ with (
126+ mock .patch .object (pd , "read_csv" , return_value = clinical_df ) as mock_read ,
127+ mock .patch .object (
128+ dash_update .extract , "get_syntabledf" , return_value = existing_df
129+ ) as mock_extract ,
130+ mock .patch .object (dash_update .load , "_update_table" ) as mock_update ,
131+ ):
132+
133+ dash_update .update_samples_in_release_table (
134+ syn = mock_syn ,
135+ file_mapping = file_mapping ,
136+ release = release ,
137+ samples_in_release_synid = samples_in_release_synid ,
138+ )
139+ # assert that samples_in_release schema is not retrieved by syn.get
140+ mock_syn .get .assert_has_calls (
141+ [
142+ mock .call (file_mapping ["sample" ], followLink = True ),
143+ ]
144+ )
145+ args , kwargs = mock_update .call_args
146+
147+ # extract arguments
148+ _ , _ , samples_in_releasedf , synid_arg , key_cols = args
149+
150+ # ensure correct Synapse ID and key columns
151+ assert synid_arg == samples_in_release_synid
152+ assert key_cols == ["SAMPLE_ID" ]
153+
154+ # check that new samples and old samples are in expected order
155+ assert_frame_equal (
156+ samples_in_releasedf .reset_index (drop = True ),
157+ pd .DataFrame ({"SAMPLE_ID" : ["S1" , "S2" ], "5.3-consortium" : [1 , 1 ]}),
158+ )
0 commit comments