Skip to content

Commit 627d1c6

Browse files
committed
Merge remote-tracking branch 'origin/develop'
2 parents 3270caf + e7f6205 commit 627d1c6

File tree

12 files changed

+29
-17
lines changed

12 files changed

+29
-17
lines changed

madoop/mapreduce.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ def prepare_input_files(input_dir, output_dir):
123123
st_size = inpath.stat().st_size
124124
total_size += st_size
125125
n_splits = math.ceil(st_size / MAX_INPUT_SPLIT_SIZE)
126-
assert n_splits > 0
126+
n_splits = 1 if not n_splits else n_splits # Handle empty input file
127127
LOGGER.debug(
128128
"input %s size=%sB partitions=%s", inpath, st_size, n_splits
129129
)
@@ -278,13 +278,6 @@ def group_stage(input_dir, output_dir):
278278
last_two(inpath), outparent.name, ",".join(outnames),
279279
)
280280

281-
# Remove empty output files. We won't always use the maximum number of
282-
# reducers because some MapReduce programs have fewer intermediate keys.
283-
for path in sorted(output_dir.iterdir()):
284-
if path.stat().st_size == 0:
285-
LOGGER.debug("empty partition: rm %s", last_two(path))
286-
path.unlink()
287-
288281
# Sort output files
289282
for path in sorted(output_dir.iterdir()):
290283
sort_file(path)

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
description="A light weight MapReduce framework for education.",
1515
long_description=LONG_DESCRIPTION,
1616
long_description_content_type="text/markdown",
17-
version="0.3.0",
17+
version="0.4.0",
1818
author="Andrew DeOrio",
1919
author_email="[email protected]",
2020
url="https://github.com/eecs485staff/madoop/",

tests/test_api.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,3 +55,18 @@ def test_missing_shebang(tmpdir):
5555
map_exe=TESTDATA_DIR/"word_count/map.py",
5656
reduce_exe=TESTDATA_DIR/"word_count/reduce_invalid.py",
5757
)
58+
59+
60+
def test_empty_inputs(tmpdir):
61+
"""Empty input files should not raise an error."""
62+
with tmpdir.as_cwd():
63+
madoop.mapreduce(
64+
input_dir=TESTDATA_DIR/"word_count/input_empty",
65+
output_dir="output",
66+
map_exe=TESTDATA_DIR/"word_count/map.py",
67+
reduce_exe=TESTDATA_DIR/"word_count/reduce.py",
68+
)
69+
utils.assert_dirs_eq(
70+
TESTDATA_DIR/"word_count/correct/output",
71+
tmpdir/"output",
72+
)

tests/testdata/word_count/correct/grouper-output/part-00001

Whitespace-only changes.
Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +0,0 @@
1-
Bye 1
2-
Hadoop 2
3-
World 2
Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
1-
Hello 2
1+
Bye 1
2+
Hadoop 2
3+
World 2
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Hello 2
Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +0,0 @@
1-
Bye 1
2-
Hadoop 2
3-
World 2
Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
1-
Hello 2
1+
Bye 1
2+
Hadoop 2
3+
World 2
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Hello 2

0 commit comments

Comments
 (0)