Skip to content

Commit faaf46d

Browse files
authored
Merge pull request #26 from eecs485staff/fix-tests
test improvements
2 parents 6c3bfa2 + e69b9d9 commit faaf46d

File tree

4 files changed

+69
-46
lines changed

4 files changed

+69
-46
lines changed

tests/test_api.py

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,19 @@
11
"""System tests for the API interface."""
2-
import pathlib
3-
import filecmp
42
import madoop
5-
6-
7-
# Directory containing unit test input files, etc.
8-
TESTDATA_DIR = pathlib.Path(__file__).parent/"testdata"
3+
from . import utils
4+
from .utils import TESTDATA_DIR
95

106

117
def test_simple(tmpdir):
128
"""Run a simple MapReduce job and verify the output."""
139
with tmpdir.as_cwd():
1410
madoop.mapreduce(
15-
input_dir=str(TESTDATA_DIR/"word_count/input"),
11+
input_dir=TESTDATA_DIR/"word_count/input",
1612
output_dir="output",
17-
map_exe=str(TESTDATA_DIR/"word_count/map.py"),
18-
reduce_exe=str(TESTDATA_DIR/"word_count/reduce.py"),
19-
)
20-
for path in (TESTDATA_DIR/"word_count/correct").glob("part-*"):
21-
assert filecmp.cmp(
22-
path,
23-
TESTDATA_DIR/"word_count/correct"/path,
24-
shallow=False,
13+
map_exe=TESTDATA_DIR/"word_count/map.py",
14+
reduce_exe=TESTDATA_DIR/"word_count/reduce.py",
2515
)
16+
utils.assert_dirs_eq(
17+
TESTDATA_DIR/"word_count/correct/output",
18+
tmpdir/"output",
19+
)

tests/test_cli.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,8 @@
11
"""System tests for the command line interface."""
2-
import pathlib
32
import subprocess
4-
import filecmp
53
import pkg_resources
6-
7-
# Directory containing unit test input files, etc.
8-
TESTDATA_DIR = pathlib.Path(__file__).parent/"testdata"
4+
from . import utils
5+
from .utils import TESTDATA_DIR
96

107

118
def test_version():
@@ -45,10 +42,10 @@ def test_simple(tmpdir):
4542
stdout=subprocess.PIPE,
4643
check=True,
4744
)
48-
correct_list = sorted((TESTDATA_DIR/"word_count/correct").glob("part-*"))
49-
actual_list = sorted(pathlib.Path(tmpdir/"output").glob("part-*"))
50-
for correct, actual in zip(correct_list, actual_list):
51-
assert filecmp.cmp(correct, actual, shallow=False)
45+
utils.assert_dirs_eq(
46+
TESTDATA_DIR/"word_count/correct/output",
47+
tmpdir/"output",
48+
)
5249

5350

5451
def test_hadoop_arguments(tmpdir):

tests/test_stages.py

Lines changed: 15 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,7 @@
11
"""System tests for the map stage of Michigan Hadoop."""
2-
import pathlib
3-
import filecmp
42
from madoop.mapreduce import map_stage, group_stage, reduce_stage
5-
6-
7-
# Directory containing unit test input files, etc.
8-
TESTDATA_DIR = pathlib.Path(__file__).parent/"testdata"
3+
from . import utils
4+
from .utils import TESTDATA_DIR
95

106

117
def test_map_stage(tmpdir):
@@ -16,11 +12,10 @@ def test_map_stage(tmpdir):
1612
output_dir=tmpdir,
1713
num_map=2,
1814
)
19-
correct_dir = TESTDATA_DIR/"word_count/correct/mapper-output"
20-
correct_list = sorted(correct_dir.glob("part-*"))
21-
actual_list = sorted(pathlib.Path(tmpdir/"output").glob("part-*"))
22-
for correct, actual in zip(correct_list, actual_list):
23-
assert filecmp.cmp(correct, actual, shallow=False)
15+
utils.assert_dirs_eq(
16+
TESTDATA_DIR/"word_count/correct/mapper-output",
17+
tmpdir,
18+
)
2419

2520

2621
def test_group_stage(tmpdir):
@@ -29,11 +24,10 @@ def test_group_stage(tmpdir):
2924
input_dir=TESTDATA_DIR/"word_count/correct/mapper-output",
3025
output_dir=tmpdir,
3126
)
32-
correct_dir = TESTDATA_DIR/"word_count/correct/grouper-output"
33-
correct_list = sorted(correct_dir.glob("part-*"))
34-
actual_list = sorted(pathlib.Path(tmpdir).glob("part-*"))
35-
for correct, actual in zip(correct_list, actual_list):
36-
assert filecmp.cmp(correct, actual, shallow=False)
27+
utils.assert_dirs_eq(
28+
TESTDATA_DIR/"word_count/correct/grouper-output",
29+
tmpdir,
30+
)
3731

3832

3933
def test_reduce_stage(tmpdir):
@@ -42,10 +36,9 @@ def test_reduce_stage(tmpdir):
4236
exe=TESTDATA_DIR/"word_count/reduce.py",
4337
input_dir=TESTDATA_DIR/"word_count/correct/grouper-output",
4438
output_dir=tmpdir,
45-
num_reduce=2,
39+
num_reduce=4,
40+
)
41+
utils.assert_dirs_eq(
42+
TESTDATA_DIR/"word_count/correct/reducer-output",
43+
tmpdir,
4644
)
47-
correct_dir = TESTDATA_DIR/"word_count/correct/reducer-output"
48-
correct_list = sorted(correct_dir.glob("part-*"))
49-
actual_list = sorted(pathlib.Path(tmpdir).glob("part-*"))
50-
for correct, actual in zip(correct_list, actual_list):
51-
assert filecmp.cmp(correct, actual, shallow=False)

tests/utils.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
"""Unit test utilities."""
2+
import filecmp
3+
import pathlib
4+
5+
6+
# Directory containing unit test input files, etc.
7+
TESTDATA_DIR = pathlib.Path(__file__).parent/"testdata"
8+
9+
10+
def assert_dirs_eq(dir1, dir2):
11+
"""Compare two directories of files."""
12+
assert dir1 != dir2, (
13+
"Refusing to compare a directory to itself:\n"
14+
f"dir1 = {dir1}\n"
15+
f"dir2 = {dir2}\n"
16+
)
17+
18+
# Get a list of files in each directory
19+
dir1 = pathlib.Path(dir1)
20+
dir2 = pathlib.Path(dir2)
21+
paths1 = list(dir1.iterdir())
22+
paths2 = list(dir2.iterdir())
23+
24+
# Sanity checks
25+
assert paths1, f"Empty directory: {dir1}"
26+
assert paths2, f"Empty directory: {dir2}"
27+
assert all(p.is_file() for p in paths1)
28+
assert all(p.is_file() for p in paths2)
29+
assert len(paths1) == len(paths2), (
30+
"Number of output files does not match\n"
31+
f"dir1 = {dir1}\n"
32+
f"dir2 = {dir2}\n"
33+
f"number of files in dir1 = {len(paths1)}\n"
34+
f"number of files in dir2 = {len(paths2)}\n"
35+
)
36+
37+
# Compare files pairwise
38+
for path1, path2 in zip(sorted(paths1), sorted(paths2)):
39+
assert filecmp.cmp(path1, path2, shallow=False)

0 commit comments

Comments
 (0)