-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathdistributionChecker.py
74 lines (62 loc) · 2.69 KB
/
distributionChecker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import psycopg2
import csv
def main():
try:
# Establish database connection
conn = psycopg2.connect(
host="/tmp/",
database="mettas",
user="mettas",
port="1997"
)
cur = conn.cursor()
# Optimized query to get counts of each make
query = ("SELECT knownfortitles, COUNT(*) FROM actors GROUP BY knownfortitles;")
cur.execute(query)
results = cur.fetchall()
sorted_results = sorted(results, key=lambda x: x[1], reverse=True)
# Write the results to a CSV file
with open('/data/mettas/data/actors_distribution.csv', 'w') as csvfile: # Note 'wb' for Python 2.7
writer = csv.writer(csvfile)
writer.writerow(['make', 'count'])
writer.writerows(sorted_results)
print ("Results have been written to car_accidents.csv")
except psycopg2.Error as db_err:
print ("Database error occurred: {0}".format(db_err))
except Exception as e:
print ("An error occurred: {0}".format(e))
finally:
# Ensure resources are closed properly
cur.close()
conn.close()
def dist_comparison(file_a, file_b):
# Output file path
output_file_path = '/data/mettas/Join-Game/spotify_comparison.csv'
# Prepare to read the first file and index its rows by 'make'
with open(file_a, 'rb') as csv1:
reader1 = csv.DictReader(csv1)
rows1 = {}
for row in reader1:
make = row['make'].strip()
rows1.setdefault(make, []).append(row)
# Open the output file for writing
with open(output_file_path, 'wb') as write_file:
writer = csv.writer(write_file)
# Prepare to read the second file and compare rows
with open(file_b, 'rb') as csv2:
reader2 = csv.DictReader(csv2)
fieldnames1 = reader1.fieldnames
fieldnames2 = reader2.fieldnames
# Write combined header to the output file
writer.writerow(fieldnames1 + fieldnames2)
# Iterate over rows in file_b and check if the 'make' exists in rows indexed from file_a
for row2 in reader2:
make = row2['make'].strip()
if make in rows1:
# For each matching 'make', combine and write rows from both files
for row1 in rows1[make]:
combined_row = [row1[field] for field in fieldnames1] + [row2[field] for field in fieldnames2]
writer.writerow(combined_row)
if __name__ == "__main__":
# dist_comparison('/data/mettas/data/sp_artist_count.csv', '/data/mettas/data/sp_tracks_count.csv')
main()