11"""System tests for the map stage of Michigan Hadoop."""
2- import pathlib
3- import filecmp
42from madoop .mapreduce import map_stage , group_stage , reduce_stage
5-
6-
7- # Directory containing unit test input files, etc.
8- TESTDATA_DIR = pathlib .Path (__file__ ).parent / "testdata"
3+ from . import utils
4+ from .utils import TESTDATA_DIR
95
106
117def test_map_stage (tmpdir ):
@@ -16,11 +12,10 @@ def test_map_stage(tmpdir):
1612 output_dir = tmpdir ,
1713 num_map = 2 ,
1814 )
19- correct_dir = TESTDATA_DIR / "word_count/correct/mapper-output"
20- correct_list = sorted (correct_dir .glob ("part-*" ))
21- actual_list = sorted (pathlib .Path (tmpdir / "output" ).glob ("part-*" ))
22- for correct , actual in zip (correct_list , actual_list ):
23- assert filecmp .cmp (correct , actual , shallow = False )
15+ utils .assert_dirs_eq (
16+ TESTDATA_DIR / "word_count/correct/mapper-output" ,
17+ tmpdir ,
18+ )
2419
2520
2621def test_group_stage (tmpdir ):
@@ -29,11 +24,10 @@ def test_group_stage(tmpdir):
2924 input_dir = TESTDATA_DIR / "word_count/correct/mapper-output" ,
3025 output_dir = tmpdir ,
3126 )
32- correct_dir = TESTDATA_DIR / "word_count/correct/grouper-output"
33- correct_list = sorted (correct_dir .glob ("part-*" ))
34- actual_list = sorted (pathlib .Path (tmpdir ).glob ("part-*" ))
35- for correct , actual in zip (correct_list , actual_list ):
36- assert filecmp .cmp (correct , actual , shallow = False )
27+ utils .assert_dirs_eq (
28+ TESTDATA_DIR / "word_count/correct/grouper-output" ,
29+ tmpdir ,
30+ )
3731
3832
3933def test_reduce_stage (tmpdir ):
@@ -42,10 +36,9 @@ def test_reduce_stage(tmpdir):
4236 exe = TESTDATA_DIR / "word_count/reduce.py" ,
4337 input_dir = TESTDATA_DIR / "word_count/correct/grouper-output" ,
4438 output_dir = tmpdir ,
45- num_reduce = 2 ,
39+ num_reduce = 4 ,
40+ )
41+ utils .assert_dirs_eq (
42+ TESTDATA_DIR / "word_count/correct/reducer-output" ,
43+ tmpdir ,
4644 )
47- correct_dir = TESTDATA_DIR / "word_count/correct/reducer-output"
48- correct_list = sorted (correct_dir .glob ("part-*" ))
49- actual_list = sorted (pathlib .Path (tmpdir ).glob ("part-*" ))
50- for correct , actual in zip (correct_list , actual_list ):
51- assert filecmp .cmp (correct , actual , shallow = False )
0 commit comments