Skip to content

Commit 101178a

Browse files
committed
Merge remote-tracking branch 'origin/develop'
2 parents 1fb3c2c + 30ec287 commit 101178a

File tree

3 files changed

+28
-14
lines changed

3 files changed

+28
-14
lines changed

madoop/__main__.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,10 @@
66
import argparse
77
import importlib.metadata
88
import logging
9+
import os
910
import pathlib
1011
import shutil
12+
import stat
1113
import sys
1214
import textwrap
1315
from .mapreduce import mapreduce
@@ -100,6 +102,13 @@ def __call__(self, parser, *args, **kwargs):
100102
if dst.exists():
101103
parser.error(f"directory already exists: {dst}")
102104
shutil.copytree(src, dst)
105+
106+
# Set executable bit
107+
st = os.stat(dst/"map.py")
108+
os.chmod(dst/"map.py", st.st_mode | stat.S_IEXEC)
109+
st = os.stat(dst/"reduce.py")
110+
os.chmod(dst/"reduce.py", st.st_mode | stat.S_IEXEC)
111+
103112
print(textwrap.dedent(f"""\
104113
Created {dst}, try:
105114

madoop/mapreduce.py

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,9 @@ def mapreduce(
4343
# Executable scripts must have valid shebangs
4444
is_executable(map_exe)
4545
is_executable(reduce_exe)
46+
# The partitioner executable expects to receive num_reducers as an arg
4647
if partitioner:
47-
is_executable(partitioner)
48+
is_executable(partitioner, str(num_reducers))
4849

4950
# Create a tmp directory which will be automatically cleaned up
5051
with tempfile.TemporaryDirectory(prefix="madoop-") as tmpdir:
@@ -157,7 +158,7 @@ def normalize_input_paths(input_path):
157158
return input_paths
158159

159160

160-
def is_executable(exe):
161+
def is_executable(exe, *args):
161162
"""Verify exe is executable and raise exception if it is not.
162163
163164
Execute exe with an empty string input and verify that it returns zero. We
@@ -168,7 +169,7 @@ def is_executable(exe):
168169
exe = pathlib.Path(exe).resolve()
169170
try:
170171
subprocess.run(
171-
str(exe),
172+
[str(exe), *args],
172173
shell=False,
173174
input="".encode(),
174175
stdout=subprocess.PIPE,
@@ -198,9 +199,13 @@ def part_filename(num):
198199

199200
def map_single_chunk(exe, input_path, output_path, chunk):
200201
"""Execute mapper on a single chunk."""
202+
LOGGER.debug(
203+
"%s < %s > %s",
204+
exe.name, last_two(input_path), last_two(output_path),
205+
)
201206
with output_path.open("w") as outfile:
202207
try:
203-
subprocess.run(
208+
ret = subprocess.run(
204209
str(exe),
205210
shell=False,
206211
check=True,
@@ -217,6 +222,8 @@ def map_single_chunk(exe, input_path, output_path, chunk):
217222
) from err
218223
except OSError as err:
219224
raise MadoopError(f"Command returned non-zero: {err}") from err
225+
if ret.stderr:
226+
LOGGER.warning("stderr: %s", ret.stderr.decode().rstrip())
220227

221228

222229
def map_stage(exe, input_dir, output_dir):
@@ -229,10 +236,6 @@ def map_stage(exe, input_dir, output_dir):
229236
for input_path in normalize_input_paths(input_dir):
230237
for chunk in split_file(input_path, MAX_INPUT_SPLIT_SIZE):
231238
output_path = output_dir/part_filename(part_num)
232-
LOGGER.debug(
233-
"%s < %s > %s",
234-
exe.name, last_two(input_path), last_two(output_path),
235-
)
236239
futures.append(pool.submit(
237240
map_single_chunk,
238241
exe,
@@ -427,9 +430,13 @@ def group_stage(input_dir, output_dir, num_reducers, partitioner):
427430

428431
def reduce_single_file(exe, input_path, output_path):
429432
"""Execute reducer on a single file."""
433+
LOGGER.debug(
434+
"%s < %s > %s",
435+
exe.name, last_two(input_path), last_two(output_path),
436+
)
430437
with input_path.open() as infile, output_path.open("w") as outfile:
431438
try:
432-
subprocess.run(
439+
ret = subprocess.run(
433440
str(exe),
434441
shell=False,
435442
check=True,
@@ -446,6 +453,8 @@ def reduce_single_file(exe, input_path, output_path):
446453
) from err
447454
except OSError as err:
448455
raise MadoopError(f"Command returned non-zero: {err}") from err
456+
if ret.stderr:
457+
LOGGER.warning("stderr: %s", ret.stderr.decode().rstrip())
449458

450459

451460
def reduce_stage(exe, input_dir, output_dir):
@@ -457,10 +466,6 @@ def reduce_stage(exe, input_dir, output_dir):
457466
) as pool:
458467
for i, input_path in enumerate(sorted(input_dir.iterdir())):
459468
output_path = output_dir/part_filename(i)
460-
LOGGER.debug(
461-
"%s < %s > %s",
462-
exe.name, last_two(input_path), last_two(output_path),
463-
)
464469
futures.append(pool.submit(
465470
reduce_single_file,
466471
exe,

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "madoop"
7-
version = "1.3.1"
7+
version = "1.3.2"
88
description="A light weight MapReduce framework for education."
99
license = {file = "LICENSE"}
1010
authors = [

0 commit comments

Comments
 (0)