Skip to content

Commit 8aac7d0

Browse files
authored
Merge pull request #20 from UBC-MDS/ali-mileston-2-fresh
Approved PR for file_size_util func and tests
2 parents bfbddfc + d424ab3 commit 8aac7d0

2 files changed

Lines changed: 263 additions & 4 deletions

File tree

src/tame_your_files/file_size_utilities.py

Lines changed: 52 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
"""
22
File size utilities for analyzing disk usage.
33
"""
4+
import heapq
45
from dataclasses import dataclass
56
from pathlib import Path
67

7-
88
@dataclass(frozen=True)
99
class FileInfo:
1010
"""
@@ -43,9 +43,27 @@ def largest_files(root: Path, n: int = 10) -> list[FileInfo]:
4343
- Only regular files are considered.
4444
- Directories are ignored.
4545
- Unreadable files, broken symlinks, and permission errors
46-
are skipped silently.
46+
are skipped silently.
4747
"""
48-
pass
48+
file_infos = []
49+
50+
try:
51+
# Recursively scan all files in root and all subdirectories
52+
for item in root.rglob("*"):
53+
if item.is_file():
54+
try:
55+
size = item.stat().st_size
56+
abs_path = item.resolve()
57+
file_infos.append(FileInfo(path=abs_path, size_bytes=size))
58+
except (OSError, PermissionError):
59+
pass
60+
except (OSError, PermissionError):
61+
pass
62+
63+
# Use heapq.nlargest for efficient selection, then sort for deterministic ordering
64+
largest = heapq.nlargest(n, file_infos, key=lambda x: x.size_bytes)
65+
# Sort by size descending, then by path for deterministic ordering
66+
return sorted(largest, key=lambda x: (-x.size_bytes, str(x.path)))
4967

5068

5169
def files_to_free_space(root: Path, target_bytes: int) -> list[FileInfo]:
@@ -77,5 +95,35 @@ def files_to_free_space(root: Path, target_bytes: int) -> list[FileInfo]:
7795
- If total file size is insufficient, all files are returned.
7896
- No files are deleted.
7997
"""
80-
pass
98+
if target_bytes <= 0:
99+
return []
100+
101+
file_infos = []
102+
103+
try:
104+
# Recursively scan all files in root and all subdirectories
105+
for item in root.rglob("*"):
106+
if item.is_file():
107+
try:
108+
size = item.stat().st_size
109+
abs_path = item.resolve()
110+
file_infos.append(FileInfo(path=abs_path, size_bytes=size))
111+
except (OSError, PermissionError):
112+
pass
113+
except (OSError, PermissionError):
114+
pass
115+
116+
# Sort by size descending, then by path for deterministic ordering
117+
sorted_files = sorted(file_infos, key=lambda x: (-x.size_bytes, str(x.path)))
118+
119+
result = []
120+
total_size = 0
121+
122+
for file_info in sorted_files:
123+
result.append(file_info)
124+
total_size += file_info.size_bytes
125+
if total_size >= target_bytes:
126+
break
127+
128+
return result
81129

Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
"""
2+
Unit tests for file_size_utilities module.
3+
4+
These tests focus on logic, data structures, and edge cases
5+
without requiring filesystem access.
6+
"""
7+
import pytest
8+
from pathlib import Path
9+
from tame_your_files.file_size_utilities import FileInfo
10+
11+
12+
class TestFileInfo:
13+
"""Unit tests for FileInfo dataclass."""
14+
15+
def test_fileinfo_creation(self):
16+
"""Test FileInfo can be created with path and size."""
17+
path = Path("/test/file.txt")
18+
size = 1000
19+
file_info = FileInfo(path=path, size_bytes=size)
20+
21+
assert file_info.path == path
22+
assert file_info.size_bytes == size
23+
24+
def test_fileinfo_immutable(self):
25+
"""Test FileInfo is immutable (frozen dataclass)."""
26+
from dataclasses import FrozenInstanceError
27+
28+
file_info = FileInfo(path=Path("/test/file.txt"), size_bytes=1000)
29+
30+
with pytest.raises(FrozenInstanceError):
31+
file_info.size_bytes = 2000 # type: ignore[misc]
32+
33+
def test_fileinfo_equality(self):
34+
"""Test FileInfo equality comparison."""
35+
path1 = Path("/test/file1.txt")
36+
path2 = Path("/test/file2.txt")
37+
38+
file_info1 = FileInfo(path=path1, size_bytes=1000)
39+
file_info2 = FileInfo(path=path1, size_bytes=1000)
40+
file_info3 = FileInfo(path=path2, size_bytes=1000)
41+
file_info4 = FileInfo(path=path1, size_bytes=2000)
42+
43+
assert file_info1 == file_info2 # Same path and size
44+
assert file_info1 != file_info3 # Different path
45+
assert file_info1 != file_info4 # Different size
46+
47+
def test_fileinfo_hashable(self):
48+
"""Test FileInfo is hashable (required for frozen dataclass)."""
49+
file_info1 = FileInfo(path=Path("/test/file1.txt"), size_bytes=1000)
50+
file_info2 = FileInfo(path=Path("/test/file2.txt"), size_bytes=2000)
51+
52+
# Should be able to create a set
53+
file_set = {file_info1, file_info2}
54+
assert len(file_set) == 2
55+
56+
# Should be able to use as dict key
57+
file_dict = {file_info1: "first", file_info2: "second"}
58+
assert file_dict[file_info1] == "first"
59+
60+
61+
class TestFileInfoSorting:
62+
"""Unit tests for sorting logic used in file_size_utilities."""
63+
64+
def test_sorting_by_size_descending(self):
65+
"""Test FileInfo objects can be sorted by size in descending order."""
66+
files = [
67+
FileInfo(path=Path("/test/small.txt"), size_bytes=100),
68+
FileInfo(path=Path("/test/large.txt"), size_bytes=1000),
69+
FileInfo(path=Path("/test/medium.txt"), size_bytes=500),
70+
]
71+
72+
sorted_files = sorted(files, key=lambda x: (-x.size_bytes, str(x.path)))
73+
74+
assert sorted_files[0].size_bytes == 1000
75+
assert sorted_files[1].size_bytes == 500
76+
assert sorted_files[2].size_bytes == 100
77+
78+
def test_sorting_deterministic_with_same_size(self):
79+
"""Test sorting is deterministic when files have same size."""
80+
files = [
81+
FileInfo(path=Path("/test/z_file.txt"), size_bytes=100),
82+
FileInfo(path=Path("/test/a_file.txt"), size_bytes=100),
83+
FileInfo(path=Path("/test/m_file.txt"), size_bytes=100),
84+
]
85+
86+
sorted_files = sorted(files, key=lambda x: (-x.size_bytes, str(x.path)))
87+
88+
# Should sort by path alphabetically when sizes are equal
89+
assert sorted_files[0].path.name == "a_file.txt"
90+
assert sorted_files[1].path.name == "m_file.txt"
91+
assert sorted_files[2].path.name == "z_file.txt"
92+
93+
94+
class TestSelectionLogic:
95+
"""Unit tests for file selection logic."""
96+
97+
def test_nlargest_selection(self):
98+
"""Test logic for selecting n largest files."""
99+
import heapq
100+
101+
files = [
102+
FileInfo(path=Path("/test/file1.txt"), size_bytes=100),
103+
FileInfo(path=Path("/test/file2.txt"), size_bytes=500),
104+
FileInfo(path=Path("/test/file3.txt"), size_bytes=1000),
105+
FileInfo(path=Path("/test/file4.txt"), size_bytes=200),
106+
FileInfo(path=Path("/test/file5.txt"), size_bytes=300),
107+
]
108+
109+
# Select top 3 largest
110+
largest = heapq.nlargest(3, files, key=lambda x: x.size_bytes)
111+
112+
assert len(largest) == 3
113+
sizes = [f.size_bytes for f in largest]
114+
assert 1000 in sizes
115+
assert 500 in sizes
116+
assert 300 in sizes
117+
assert 100 not in sizes
118+
assert 200 not in sizes
119+
120+
def test_greedy_selection_logic(self):
121+
"""Test greedy selection logic for files_to_free_space."""
122+
files = [
123+
FileInfo(path=Path("/test/file1.txt"), size_bytes=100),
124+
FileInfo(path=Path("/test/file2.txt"), size_bytes=200),
125+
FileInfo(path=Path("/test/file3.txt"), size_bytes=300),
126+
FileInfo(path=Path("/test/file4.txt"), size_bytes=400),
127+
]
128+
129+
# Sort by size descending
130+
sorted_files = sorted(files, key=lambda x: (-x.size_bytes, str(x.path)))
131+
132+
# Simulate greedy selection for target of 500 bytes
133+
target_bytes = 500
134+
result = []
135+
total_size = 0
136+
137+
for file_info in sorted_files:
138+
result.append(file_info)
139+
total_size += file_info.size_bytes
140+
if total_size >= target_bytes:
141+
break
142+
143+
assert len(result) == 2
144+
assert result[0].size_bytes == 400
145+
assert result[1].size_bytes == 300
146+
assert total_size == 700
147+
assert total_size >= target_bytes
148+
149+
def test_greedy_selection_exceeding_total(self):
150+
"""Test greedy selection when target exceeds total."""
151+
files = [
152+
FileInfo(path=Path("/test/file1.txt"), size_bytes=100),
153+
FileInfo(path=Path("/test/file2.txt"), size_bytes=200),
154+
]
155+
156+
sorted_files = sorted(files, key=lambda x: (-x.size_bytes, str(x.path)))
157+
158+
target_bytes = 1000 # Larger than total (300)
159+
result = []
160+
total_size = 0
161+
162+
for file_info in sorted_files:
163+
result.append(file_info)
164+
total_size += file_info.size_bytes
165+
if total_size >= target_bytes:
166+
break
167+
168+
# Should return all files even though target isn't met
169+
assert len(result) == 2
170+
assert total_size == 300
171+
172+
173+
class TestEdgeCases:
174+
"""Unit tests for edge cases and input validation."""
175+
176+
def test_zero_target_returns_empty(self):
177+
"""Test that zero target returns empty list."""
178+
# This tests the early return logic
179+
target_bytes = 0
180+
result = []
181+
182+
# Simulate the early return
183+
if target_bytes <= 0:
184+
result = []
185+
186+
assert result == []
187+
188+
def test_negative_target_returns_empty(self):
189+
"""Test that negative target returns empty list."""
190+
target_bytes = -100
191+
result = []
192+
193+
# Simulate the early return
194+
if target_bytes <= 0:
195+
result = []
196+
197+
assert result == []
198+
199+
def test_empty_file_list(self):
200+
"""Test behavior with empty file list."""
201+
files = []
202+
203+
# Test nlargest with empty list
204+
import heapq
205+
largest = heapq.nlargest(10, files, key=lambda x: x.size_bytes)
206+
assert largest == []
207+
208+
# Test sorting empty list
209+
sorted_files = sorted(files, key=lambda x: (-x.size_bytes, str(x.path)))
210+
assert sorted_files == []
211+

0 commit comments

Comments
 (0)