Skip to content

Commit 654643f

Browse files
committed
switch to list
1 parent a1fda61 commit 654643f

File tree

2 files changed

+129
-2
lines changed

2 files changed

+129
-2
lines changed

genie/dashboard_table_updater.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -312,7 +312,8 @@ def update_oncotree_code_tables(syn, database_mappingdf):
312312

313313
# DISTRIBUTION OF ONCOTREE CODE TABLE UPDATE
314314
oncotree_code_distributiondf = pd.DataFrame(
315-
columns=set(clinicaldf["CENTER"]), index=set(clinicaldf["ONCOTREE_CODE"])
315+
columns=list(set(clinicaldf["CENTER"])),
316+
index=list(set(clinicaldf["ONCOTREE_CODE"])),
316317
)
317318
for center in oncotree_code_distributiondf.columns:
318319
onc_counts = clinicaldf["ONCOTREE_CODE"][
@@ -365,7 +366,8 @@ def update_oncotree_code_tables(syn, database_mappingdf):
365366

366367
# ### DISTRIBUTION OF PRIMARY ONCOTREE CODE TABLE UPDATE
367368
primary_code_distributiondf = pd.DataFrame(
368-
columns=set(clinicaldf["CENTER"]), index=set(clinicaldf["PRIMARY_CODES"])
369+
columns=sorted(list(clinicaldf["CENTER"])),
370+
index=sorted(list(clinicaldf["PRIMARY_CODES"])),
369371
)
370372

371373
for center in primary_code_distributiondf.columns:

tests/test_dashboard_table_updater.py

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,3 +155,128 @@ def test_that_update_samples_in_release_table_existing_column_calls_update_direc
155155
samples_in_releasedf.reset_index(drop=True),
156156
pd.DataFrame({"SAMPLE_ID": ["S1", "S2"], "5.3-consortium": [1, 1]}),
157157
)
158+
159+
160+
def test_update_oncotree_code_tables_calls_update_with_expected_dataframes():
161+
"""Ensure both _update_table calls receive correctly formatted data."""
162+
syn = mock.MagicMock()
163+
164+
db_map = pd.DataFrame(
165+
{
166+
"Database": ["oncotree", "oncotreeLink", "primaryCode"],
167+
"Id": ["syn111", "syn222", "syn333"],
168+
}
169+
)
170+
171+
# clinicaldf returned from first extract.get_syntabledf
172+
clinicaldf = pd.DataFrame(
173+
{
174+
"SAMPLE_ID": ["A1", "A2", "A3", "A4"],
175+
"CENTER": ["DFCI", "DFCI", "MSK", "MSK"],
176+
"ONCOTREE_CODE": ["BRCA", "LUNG", "LUNG", "SKIN"],
177+
}
178+
)
179+
180+
# Mock oncotree mapping from external URL
181+
oncotree_mapping = {
182+
"BRCA": {"ONCOTREE_PRIMARY_NODE": "BREAST"},
183+
"LUNG": {"ONCOTREE_PRIMARY_NODE": "LUNG"},
184+
"SKIN": {"ONCOTREE_PRIMARY_NODE": "SKIN"},
185+
}
186+
187+
# mock entity returned by syn.get for oncotreeLinkSynId
188+
oncotree_ent = mock.MagicMock()
189+
oncotree_ent.externalURL = "http://mock-oncotree.org"
190+
191+
with (
192+
mock.patch.object(dash_update.extract, "get_syntabledf") as mock_extract,
193+
mock.patch.object(dash_update.load, "_update_table") as mock_update,
194+
mock.patch.object(
195+
dash_update.process_functions,
196+
"get_oncotree_code_mappings",
197+
return_value=oncotree_mapping,
198+
),
199+
mock.patch.object(syn, "get", return_value=oncotree_ent),
200+
):
201+
# Configure get_syntabledf to return clinicaldf first, then mock DB snapshots later
202+
mock_extract.side_effect = [
203+
clinicaldf, # first call: select * from syn7517674
204+
pd.DataFrame(
205+
columns=["Oncotree_Code", "DFCI", "MSK", "Total"]
206+
), # second: oncotree DB
207+
pd.DataFrame(
208+
columns=["Oncotree_Code", "DFCI", "MSK", "Total"]
209+
), # third: primaryCode DB
210+
]
211+
212+
dash_update.update_oncotree_code_tables(syn, db_map)
213+
214+
# Two calls to load._update_table
215+
assert mock_update.call_count == 2
216+
217+
# First call = oncotree_code_distributiondf update
218+
args1, kwargs1 = mock_update.call_args_list[0]
219+
(
220+
passed_syn1,
221+
existing_df1,
222+
new_df1,
223+
synid1,
224+
key_cols1,
225+
) = args1
226+
227+
assert synid1 == "syn111"
228+
assert key_cols1 == ["Oncotree_Code"]
229+
assert passed_syn1 is syn
230+
231+
# expected oncotree_code_distributiondf
232+
expected_df1 = pd.DataFrame(
233+
{
234+
"Oncotree_Code": ["BRCA", "LUNG", "SKIN"],
235+
"DFCI": [1, 1, 0],
236+
"MSK": [0, 1, 1],
237+
"Total": [1, 2, 1],
238+
}
239+
).set_index("Oncotree_Code")
240+
expected_df1 = expected_df1.reset_index() # match the original index format
241+
242+
# sort by Oncotree_Code to ensure deterministic order
243+
assert_frame_equal(
244+
new_df1.sort_values("Oncotree_Code").reset_index(drop=True),
245+
expected_df1.sort_values("Oncotree_Code").reset_index(drop=True),
246+
check_dtype=False,
247+
)
248+
249+
# Second call = primary_code_distributiondf update
250+
args2, kwargs2 = mock_update.call_args_list[1]
251+
(
252+
passed_syn2,
253+
existing_df2,
254+
new_df2,
255+
synid2,
256+
key_cols2,
257+
) = args2
258+
259+
assert synid2 == "syn333"
260+
assert key_cols2 == ["Oncotree_Code"]
261+
assert passed_syn2 is syn
262+
263+
# expected primary_code_distributiondf
264+
expected_df2 = pd.DataFrame(
265+
{
266+
"Oncotree_Code": ["BREAST", "LUNG", "SKIN"],
267+
"DFCI": [1, 1, 0],
268+
"MSK": [0, 1, 1],
269+
"Total": [1, 2, 1],
270+
}
271+
).set_index("Oncotree_Code")
272+
expected_df2 = expected_df2.reset_index()
273+
274+
assert_frame_equal(
275+
new_df2.sort_values("Oncotree_Code").reset_index(drop=True),
276+
expected_df2.sort_values("Oncotree_Code").reset_index(drop=True),
277+
check_dtype=False,
278+
)
279+
280+
# Verify _update_table was called with to_delete=True
281+
for _, kwargs in mock_update.call_args_list:
282+
assert kwargs["to_delete"] is True

0 commit comments

Comments
 (0)