|
3 | 3 | import os |
4 | 4 | from unittest import mock |
5 | 5 | from unittest.mock import patch |
| 6 | +import pytest |
6 | 7 |
|
7 | 8 | import pandas as pd |
| 9 | +from pandas.testing import assert_frame_equal |
8 | 10 | import synapseclient |
9 | 11 |
|
10 | 12 | from genie import database_to_staging, extract, load |
@@ -106,3 +108,43 @@ def test_store_assay_info_files(syn): |
106 | 108 | used=f"{FILEVIEW_SYNID}.2", |
107 | 109 | ) |
108 | 110 | assert wes_ids == ["A"] |
| 111 | + |
| 112 | + |
| 113 | +@pytest.mark.parametrize( |
| 114 | + "input_data, filter_col, expected_result", |
| 115 | + [ |
| 116 | + ( |
| 117 | + pd.DataFrame( |
| 118 | + dict( |
| 119 | + SV_Status=["GERMLINE", "GERMLINE"], Sample_ID=["GENIE-1", "GENIE-2"] |
| 120 | + ) |
| 121 | + ), |
| 122 | + "SV_Status", |
| 123 | + pd.DataFrame(columns=["SV_Status", "Sample_ID"]), |
| 124 | + ), |
| 125 | + ( |
| 126 | + pd.DataFrame( |
| 127 | + dict( |
| 128 | + SV_Status=["GERMLINE", "SOMATIC"], Sample_ID=["GENIE-1", "GENIE-2"] |
| 129 | + ) |
| 130 | + ), |
| 131 | + "SV_Status", |
| 132 | + pd.DataFrame(dict(SV_Status=["SOMATIC"], Sample_ID=["GENIE-2"])), |
| 133 | + ), |
| 134 | + ( |
| 135 | + pd.DataFrame( |
| 136 | + dict(SV_Status=["SOMATIC", "SOMATIC"], Sample_ID=["GENIE-1", "GENIE-2"]) |
| 137 | + ), |
| 138 | + "SV_Status", |
| 139 | + pd.DataFrame( |
| 140 | + dict(SV_Status=["SOMATIC", "SOMATIC"], Sample_ID=["GENIE-1", "GENIE-2"]) |
| 141 | + ), |
| 142 | + ), |
| 143 | + ], |
| 144 | + ids=["all_germline", "some_germline", "no_germline"], |
| 145 | +) |
| 146 | +def test_that_filter_out_germline_variants_returns_expected( |
| 147 | + input_data, filter_col, expected_result |
| 148 | +): |
| 149 | + result = database_to_staging.filter_out_germline_variants(input_data, filter_col) |
| 150 | + assert_frame_equal(result, expected_result, check_index_type = False) |
0 commit comments