Skip to content

Commit 5cc0467

Browse files
test: add unit tests for the function load_mapping_lut
1 parent b50c630 commit 5cc0467

1 file changed

Lines changed: 100 additions & 0 deletions

File tree

tests/test_utils.py

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from ontograph.utils import (
55
_create_reverse_mapping,
66
_read_mapping_file,
7+
load_mapping_lut,
78
)
89

910

@@ -183,3 +184,102 @@ def test_create_reverse_mapping_only_target_column(df_with_only_target_column):
183184

184185
assert db_names == set()
185186
assert reverse_map == {}
187+
188+
189+
# ---- Unit tests for load_mapping_lut
190+
def test_load_mapping_lut_success(sample_mapping_file):
191+
"""Test loading a well-formed mapping file and generating the LUT."""
192+
delimiter = '\t'
193+
target_column = 'col1'
194+
databases_names, reverse_map = load_mapping_lut(
195+
filepath=sample_mapping_file,
196+
delimiter=delimiter,
197+
target_column=target_column,
198+
)
199+
200+
# The file: col1\tcol2\tcol3\n1\t2\t3\na\tb\tc
201+
# Should produce:
202+
# databases_names: {'col2', 'col3'}
203+
# reverse_map: {'2': '1', '3': '1', 'b': 'a', 'c': 'a'}
204+
expected_db_names = {'col2', 'col3'}
205+
expected_reverse_map = {'2': '1', '3': '1', 'b': 'a', 'c': 'a'}
206+
207+
assert databases_names == expected_db_names
208+
assert reverse_map == expected_reverse_map
209+
210+
211+
def test_load_mapping_lut_file_not_found():
212+
"""Test that FileNotFoundError is raised for a non-existent file."""
213+
with pytest.raises(FileNotFoundError):
214+
load_mapping_lut(
215+
filepath='non_existent_file.tsv',
216+
delimiter='\t',
217+
target_column='col1',
218+
)
219+
220+
221+
def test_load_mapping_lut_empty_file(empty_mapping_file):
222+
"""Test that EmptyDataError is raised for an empty file."""
223+
with pytest.raises(pd.errors.EmptyDataError):
224+
load_mapping_lut(
225+
filepath=empty_mapping_file, delimiter='\t', target_column='col1'
226+
)
227+
228+
229+
def test_load_mapping_lut_non_existent_target_column(sample_mapping_file):
230+
"""Test that KeyError is raised for a non-existent target column."""
231+
with pytest.raises(KeyError):
232+
load_mapping_lut(
233+
filepath=sample_mapping_file,
234+
delimiter='\t',
235+
target_column='non_existent_col',
236+
)
237+
238+
239+
def test_load_mapping_lut_incorrect_delimiter(sample_mapping_file):
240+
"""Test that KeyError is raised when delimiter is incorrect (columns not parsed)."""
241+
# Using comma delimiter on a tab-separated file will result in a single column
242+
with pytest.raises(KeyError):
243+
load_mapping_lut(
244+
filepath=sample_mapping_file, delimiter=',', target_column='col1'
245+
)
246+
247+
248+
@pytest.fixture
249+
def header_only_mapping_file(tmp_path):
250+
"""Create a mapping file with only a header row."""
251+
content = 'col1\tcol2\tcol3\n'
252+
file_path = tmp_path / 'header_only_mapping.tsv'
253+
file_path.write_text(content)
254+
return str(file_path)
255+
256+
257+
def test_load_mapping_lut_file_with_only_header(header_only_mapping_file):
258+
"""Test with a mapping file containing only the header row."""
259+
databases_names, reverse_map = load_mapping_lut(
260+
filepath=header_only_mapping_file, delimiter='\t', target_column='col1'
261+
)
262+
# No data rows, so should be empty
263+
assert databases_names == set()
264+
assert reverse_map == {}
265+
266+
267+
@pytest.fixture
268+
def duplicate_source_mapping_file(tmp_path):
269+
"""Create a mapping file with duplicate source IDs mapping to different targets."""
270+
content = 'target_id\tsource_A\nT1\tA1\nT2\tA1\n'
271+
file_path = tmp_path / 'duplicate_source_mapping.tsv'
272+
file_path.write_text(content)
273+
return str(file_path)
274+
275+
276+
def test_load_mapping_lut_duplicate_source_ids(duplicate_source_mapping_file):
277+
"""Test that the last mapping for a duplicate source ID is kept."""
278+
databases_names, reverse_map = load_mapping_lut(
279+
filepath=duplicate_source_mapping_file,
280+
delimiter='\t',
281+
target_column='target_id',
282+
)
283+
# The last mapping for 'A1' should be 'T2'
284+
assert databases_names == {'source_A'}
285+
assert reverse_map == {'A1': 'T2'}

0 commit comments

Comments
 (0)