-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathreducer.py
More file actions
98 lines (78 loc) · 2.64 KB
/
reducer.py
File metadata and controls
98 lines (78 loc) · 2.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/usr/bin/env python3
"""
Word Count Reducer Module
Improved version with better error handling and performance
"""
import sys
import logging
from typing import List, Tuple
from utils import safe_parse_line, handle_broken_pipe, setup_logging
# Setup logging
setup_logging()
logger = logging.getLogger(__name__)
def print_word_count(word: str, count: int) -> None:
"""Print word count in tab-separated format"""
print(f'{word}\t{count}')
def reduce_word_counts() -> List[Tuple[str, int]]:
"""
Reduce word counts from mapper output
Returns:
List of (word, count) tuples sorted by count descending
"""
current_word = None
current_count = 0
word_counts = []
try:
# Read input from stdin
for line in sys.stdin:
parsed = safe_parse_line(line, expected_parts=2)
if not parsed:
continue
word, count_str = parsed
try:
count = int(count_str)
except ValueError:
logger.warning(f"Invalid count value: {count_str}")
continue
# Sum up counts for the same word
if current_word == word:
current_count += count
else:
# Store previous word count if exists
if current_word:
word_counts.append((current_word, current_count))
# Initialize new word
current_word = word
current_count = count
# Handle the last word
if current_word:
word_counts.append((current_word, current_count))
# Sort by count in descending order
word_counts.sort(key=lambda x: x[1], reverse=True)
return word_counts
except Exception as e:
logger.error(f"Error in reducer: {e}")
return []
def main():
"""Main reducer function"""
try:
# Process word counts
word_counts = reduce_word_counts()
if not word_counts:
logger.warning("No word counts to process")
return
# Output results
for word, count in word_counts:
print_word_count(word, count)
logger.info(f"Processed {len(word_counts)} unique words")
except KeyboardInterrupt:
logger.info("Reducer interrupted by user")
sys.exit(0)
except Exception as e:
logger.error(f"Error in reducer: {e}")
sys.exit(1)
if __name__ == "__main__":
try:
main()
except BrokenPipeError:
handle_broken_pipe()