-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtask1_reducer1.py
executable file
·39 lines (30 loc) · 1013 Bytes
/
task1_reducer1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!/usr/bin/python3
import sys
def read_map_output(file):
"""
Return an iterator for key, value pair extracted from file (sys.stdin).
Input format: key \t value
Output format: (key, value)
"""
for line in file:
yield line.strip().split("\t", 1)
def reduce1():
""" This reducer perform reduce side join
Input format: photo_city \t photo_id \t tags
Output format: cityname \t number of photo
"""
current_city = ""
photo_count = 0
for city,photo_id in read_map_output(sys.stdin):
# Check if the city read is the same as the city currently being processed
if current_city != city:
if current_city != "":
print("{}\t{}".format(current_city, str(photo_count)))
current_city = city
photo_count = 0
photo_count += 1
#the last city
if current_city != "":
print("{}\t{}" .format(current_city, str(photo_count)))
if __name__ == "__main__":
reduce1()