-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathetl_job.py
146 lines (130 loc) · 5.16 KB
/
etl_job.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import json
import sys
import pandas as pd
import requests
def clear_last_line():
"""Clears the last line of the console output."""
# Platform-specific code to clear the last line
if sys.platform.startswith('win'):
print("\r" + " " * len(last_printed_line) + "\r", end="")
else:
sys.stdout.write("\033[K") # Clear to end of line for Unix-like systems
def print_progress(completed, total, update_frequency=1):
global last_printed_line
if completed % update_frequency == 0:
percentage = completed / total * 100
progress_line = f"Progress: {percentage:.2f}% ({completed} out of {total})"
print('\r' + progress_line, end="") # Print without line break
last_printed_line = progress_line
clear_last_line() # Clear the previous line for update effect
df = pd.read_csv('hospital_directory.csv', low_memory=False)
unique_specialities = df['Specialties'].unique()
# unique_specialities
hospitals_final = []
for index, row in df.iterrows():
type = ''
if row.get('Hospital_category'):
type = row.get('Hospital_category')
elif row.get('Hospital_Care_Type'):
type = row.get('Hospital_Care_Type')
geolocation = ''
if row.get('Location_Coordinates') not in ['', 'nan', 'NaN', None]:
geolocation = str(row.get('Location_Coordinates'))
else:
geolocation = None
hospital = {
'name': row.Hospital_Name if row.Hospital_Name != 'nan' else '',
'city': row.District if row.District != 'nan' else '',
'id': 'HIO' + str(row.State_ID) + str(row.District_ID) + str(row.Sr_No) if row.Sr_No != 'nan' else '',
'geolocation': geolocation if geolocation is not None else str(''),
'type': row.get('Hospital_Care_Type') if row.get('Hospital_Category') else '',
'opening_time': None,
'closing_time': None,
'address': row.Location if row.Location != 'nan' else '',
'state': row.State if row.State != 'nan' else '',
}
hospitalInfo = {
'hospital_id': 'HIO' + str(row.State_ID) + str(row.District_ID) + str(row.Sr_No),
'address': row.Location if row.Location != 'nan' else '',
'city_name': row.District if row.District != 'nan' else '',
'state_name': row.State if row.State != 'nan' else '',
'geolocation': geolocation
}
bloodBank = {
'hospital_id': 'HIO' + str(row.State_ID) + str(row.District_ID) + str(row.Sr_No),
}
availability = {
'hospital_id': 'HIO' + str(row.State_ID) + str(row.District_ID) + str(row.Sr_No),
'bed': 0,
'total_bed': 0,
'icu': 0,
'total_icu': 0,
'ccu': 0,
'total_ccu': 0,
'ventilator': 0,
'total_ventilator': 0,
'oxygen_cylinders': 0,
'total_oxygen_cylinders': 0
}
amenities = {
'hospital_id': 'HIO' + str(row.State_ID) + str(row.District_ID) + str(row.Sr_No),
"x_ray": False,
"mri": False,
"ecg": False,
"ultra_sound": False,
"blood_test": False
}
row = {
'hospital': hospital,
'hospitalInfo': hospitalInfo,
'bloodBank': bloodBank,
'availability': availability,
'amenities': amenities
}
hospitals_final.append(row)
base_url = "http://localhost:8080/api/v1/hospital"
headers = {
"Content-Type": "application/json",
"Authorization": "Bearer eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJzb3VyYWJocmFqMzExQGdtYWlsLmNvbSIsImlhdCI6MTcxMzI4Mjc2NywiZXhwIjoxNzEzMzE4NzY3fQ.IDh85MD3mRmFFOqi67pjLpo-yVnJx2Xvq3XNWxuHN4w"
}
total_calls = len(hospitals_final)
completed_calls = 0
last_printed_line = ""
failed = []
allarr = []
# Add all hospitals
for index in range(0, len(hospitals_final)):
try:
completed_calls += 1
print_progress(completed_calls, total_calls)
payload = hospitals_final[index]["hospital"]
allarr.append(payload)
# payload = json.dumps(payload, default=str, allow_nan=True)
# print(payload)
# Add hospital
# response = requests.post(base_url + '/add', json=payload, headers=headers)
# if response.status_code == 200:
# data = response.json()
# print("Hospital updated for id: " + str(index))
# else:
# data = response.text
# print(f"Error: API request failed with status code {response.status_code}")
# Update hospital info
# payload = hospitals_final[index]["hospitalInfo"]
# response = requests.post(base_url + '/update?command=hospital_info', json=payload, headers=headers)
# if response.status_code == 200:
# data = response.json()
# # print("Hospital info updated for id: " + str(index))
# else:
# data = response.text
# # print(f"Error: API request failed with status code {response.status_code}")
except Exception as e:
print("Error: " + str(e))
failed.append(index)
allarr = {'hospitals': allarr}
allarr = pd.DataFrame(allarr, columns=['hospitals'], dtype=object, index=None)
allarr.to_json('hospitals.json', index=False)
print("Failed: " + str(failed))
failed = {'failed': failed}
failed = pd.DataFrame(failed)
failed.to_json('failed.json', index=False)