|
4 | 4 | from ontograph.utils import ( |
5 | 5 | _create_reverse_mapping, |
6 | 6 | _read_mapping_file, |
| 7 | + load_mapping_lut, |
7 | 8 | ) |
8 | 9 |
|
9 | 10 |
|
@@ -183,3 +184,102 @@ def test_create_reverse_mapping_only_target_column(df_with_only_target_column): |
183 | 184 |
|
184 | 185 | assert db_names == set() |
185 | 186 | assert reverse_map == {} |
| 187 | + |
| 188 | + |
| 189 | +# ---- Unit tests for load_mapping_lut |
| 190 | +def test_load_mapping_lut_success(sample_mapping_file): |
| 191 | + """Test loading a well-formed mapping file and generating the LUT.""" |
| 192 | + delimiter = '\t' |
| 193 | + target_column = 'col1' |
| 194 | + databases_names, reverse_map = load_mapping_lut( |
| 195 | + filepath=sample_mapping_file, |
| 196 | + delimiter=delimiter, |
| 197 | + target_column=target_column, |
| 198 | + ) |
| 199 | + |
| 200 | + # The file: col1\tcol2\tcol3\n1\t2\t3\na\tb\tc |
| 201 | + # Should produce: |
| 202 | + # databases_names: {'col2', 'col3'} |
| 203 | + # reverse_map: {'2': '1', '3': '1', 'b': 'a', 'c': 'a'} |
| 204 | + expected_db_names = {'col2', 'col3'} |
| 205 | + expected_reverse_map = {'2': '1', '3': '1', 'b': 'a', 'c': 'a'} |
| 206 | + |
| 207 | + assert databases_names == expected_db_names |
| 208 | + assert reverse_map == expected_reverse_map |
| 209 | + |
| 210 | + |
| 211 | +def test_load_mapping_lut_file_not_found(): |
| 212 | + """Test that FileNotFoundError is raised for a non-existent file.""" |
| 213 | + with pytest.raises(FileNotFoundError): |
| 214 | + load_mapping_lut( |
| 215 | + filepath='non_existent_file.tsv', |
| 216 | + delimiter='\t', |
| 217 | + target_column='col1', |
| 218 | + ) |
| 219 | + |
| 220 | + |
| 221 | +def test_load_mapping_lut_empty_file(empty_mapping_file): |
| 222 | + """Test that EmptyDataError is raised for an empty file.""" |
| 223 | + with pytest.raises(pd.errors.EmptyDataError): |
| 224 | + load_mapping_lut( |
| 225 | + filepath=empty_mapping_file, delimiter='\t', target_column='col1' |
| 226 | + ) |
| 227 | + |
| 228 | + |
| 229 | +def test_load_mapping_lut_non_existent_target_column(sample_mapping_file): |
| 230 | + """Test that KeyError is raised for a non-existent target column.""" |
| 231 | + with pytest.raises(KeyError): |
| 232 | + load_mapping_lut( |
| 233 | + filepath=sample_mapping_file, |
| 234 | + delimiter='\t', |
| 235 | + target_column='non_existent_col', |
| 236 | + ) |
| 237 | + |
| 238 | + |
| 239 | +def test_load_mapping_lut_incorrect_delimiter(sample_mapping_file): |
| 240 | + """Test that KeyError is raised when delimiter is incorrect (columns not parsed).""" |
| 241 | + # Using comma delimiter on a tab-separated file will result in a single column |
| 242 | + with pytest.raises(KeyError): |
| 243 | + load_mapping_lut( |
| 244 | + filepath=sample_mapping_file, delimiter=',', target_column='col1' |
| 245 | + ) |
| 246 | + |
| 247 | + |
| 248 | +@pytest.fixture |
| 249 | +def header_only_mapping_file(tmp_path): |
| 250 | + """Create a mapping file with only a header row.""" |
| 251 | + content = 'col1\tcol2\tcol3\n' |
| 252 | + file_path = tmp_path / 'header_only_mapping.tsv' |
| 253 | + file_path.write_text(content) |
| 254 | + return str(file_path) |
| 255 | + |
| 256 | + |
| 257 | +def test_load_mapping_lut_file_with_only_header(header_only_mapping_file): |
| 258 | + """Test with a mapping file containing only the header row.""" |
| 259 | + databases_names, reverse_map = load_mapping_lut( |
| 260 | + filepath=header_only_mapping_file, delimiter='\t', target_column='col1' |
| 261 | + ) |
| 262 | + # No data rows, so should be empty |
| 263 | + assert databases_names == set() |
| 264 | + assert reverse_map == {} |
| 265 | + |
| 266 | + |
| 267 | +@pytest.fixture |
| 268 | +def duplicate_source_mapping_file(tmp_path): |
| 269 | + """Create a mapping file with duplicate source IDs mapping to different targets.""" |
| 270 | + content = 'target_id\tsource_A\nT1\tA1\nT2\tA1\n' |
| 271 | + file_path = tmp_path / 'duplicate_source_mapping.tsv' |
| 272 | + file_path.write_text(content) |
| 273 | + return str(file_path) |
| 274 | + |
| 275 | + |
| 276 | +def test_load_mapping_lut_duplicate_source_ids(duplicate_source_mapping_file): |
| 277 | + """Test that the last mapping for a duplicate source ID is kept.""" |
| 278 | + databases_names, reverse_map = load_mapping_lut( |
| 279 | + filepath=duplicate_source_mapping_file, |
| 280 | + delimiter='\t', |
| 281 | + target_column='target_id', |
| 282 | + ) |
| 283 | + # The last mapping for 'A1' should be 'T2' |
| 284 | + assert databases_names == {'source_A'} |
| 285 | + assert reverse_map == {'A1': 'T2'} |
0 commit comments