11import os
2+ from pathlib import Path
23from unittest .mock import patch
34import pytest
45from file_processing import File
56from file_processing .errors import FileProcessingFailedError
67from file_processing_test_data import get_test_files_path
78
9+ # Data fixture
810test_files_path = get_test_files_path ()
9-
10- variable_names = "path, text_length, encoding, num_rows, num_cols, num_cells, empty_cells"
11+ variable_names = (
12+ "path, text_length, encoding, num_rows, num_cols, num_cells, empty_cells"
13+ )
1114values = [
12- (test_files_path / '2021_Census_English.csv' , 6084302 , 'ISO-8859-1' , 36835 , 23 , 847205 , 253932 ),
13- (test_files_path / 'Approved_Schools_2023_10_01.csv' , 1403268 , 'UTF-8-SIG' , 5385 , 13 , 70005 , 73 )
15+ (test_files_path / '2021_Census_English.csv' , 6084302 , 'ISO-8859-1' , 36835 , 23 , 847205 , 253932 ),
16+ (test_files_path / 'Approved_Schools_2023_10_01.csv' , 1403268 , 'UTF-8-SIG' , 5385 , 13 , 70005 , 73 )
1417]
1518
16-
1719@pytest .mark .parametrize (variable_names , values )
1820def test_csv_metadata (path , text_length , encoding , num_rows , num_cols , num_cells , empty_cells ):
1921 file_obj = File (path )
@@ -24,22 +26,58 @@ def test_csv_metadata(path, text_length, encoding, num_rows, num_cols, num_cells
2426 assert file_obj .metadata ['num_cells' ] == num_cells
2527 assert file_obj .metadata ['empty_cells' ] == empty_cells
2628
27-
2829@pytest .mark .parametrize (variable_names , values )
2930def test_save_csv_metadata (copy_file , text_length , encoding , num_rows , num_cols , num_cells , empty_cells ):
31+ # Reuse metadata assertions on the saved file
3032 test_csv_metadata (copy_file , text_length , encoding , num_rows , num_cols , num_cells , empty_cells )
3133
32-
33- @pytest .mark .parametrize ("valid_path" , [path for path , * _ in values ])
34+ @pytest .mark .parametrize ("valid_path" , [p for p , * _ in values ])
3435def test_csv_invalid_save_location (valid_path ):
3536 csv_file = File (valid_path )
36- invalid_save_path = '/non_existent_folder/' + os .path .basename (valid_path )
37+ invalid_path = '/non_existent_folder/' + os .path .basename (str ( valid_path ) )
3738 with pytest .raises (FileProcessingFailedError ):
38- csv_file .processor .save (invalid_save_path )
39-
39+ csv_file .processor .save (invalid_path )
4040
4141@pytest .mark .parametrize (variable_names , values )
4242def test_not_opening_file (path , text_length , encoding , num_rows , num_cols , num_cells , empty_cells ):
4343 with patch ('builtins.open' , autospec = True ) as mock_open :
4444 File (path , open_file = False )
4545 mock_open .assert_not_called ()
46+
47+ @pytest .mark .parametrize (variable_names , values )
48+ @pytest .mark .parametrize ("algorithm" , ["md5" , "sha256" ])
49+ def test_csv_copy_with_integrity (path , tmp_path , algorithm ,
50+ text_length , encoding , num_rows , num_cols , num_cells , empty_cells ):
51+ # 1. Compute original hash (raw file)
52+ file_obj = File (path , open_file = False )
53+ original_hash = file_obj .processor .compute_hash (algorithm )
54+
55+ # 2. Copy with integrity verification
56+ dest = tmp_path / Path (path ).name
57+ file_obj .copy (str (dest ), verify_integrity = True )
58+
59+ # 3. Re-open and verify metadata unchanged
60+ saved = File (str (dest ))
61+ assert len (saved .metadata ['text' ]) == text_length
62+ assert saved .metadata ['encoding' ] == encoding
63+ assert saved .metadata ['num_rows' ] == num_rows
64+ assert saved .metadata ['num_cols' ] == num_cols
65+ assert saved .metadata ['num_cells' ] == num_cells
66+ assert saved .metadata ['empty_cells' ] == empty_cells
67+
68+ # 4. Verify hash matches for chosen algorithm
69+ assert saved .processor .compute_hash (algorithm ) == original_hash
70+
71+ @pytest .mark .parametrize ("path" , [p for p , * _ in values ])
72+ def test_csv_copy_integrity_failure (path , tmp_path , monkeypatch ):
73+ file_obj = File (path , open_file = False )
74+ # Simulate corruption in raw copy
75+ import shutil
76+ def corrupt (src , dest , * , follow_symlinks = True ):
77+ with open (dest , 'w' ) as f :
78+ f .write ('CORRUPTED!' )
79+ monkeypatch .setattr (shutil , 'copy2' , corrupt )
80+
81+ with pytest .raises (FileProcessingFailedError ) as excinfo :
82+ file_obj .copy (str (tmp_path / Path (path ).name ), verify_integrity = True )
83+ assert 'Integrity check failed' in str (excinfo .value )
0 commit comments