-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathutils.py
More file actions
94 lines (70 loc) · 3.1 KB
/
utils.py
File metadata and controls
94 lines (70 loc) · 3.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import csv
import yaml
import pickle
from typing import List, Tuple, Dict
with open('config.yaml', 'r') as file:
config = yaml.safe_load(file)
LAND_AREA_FILE = config['general']['land_area_file']
WORLD_CITIES_FILE = config['general']['world_cities_file']
TOTAL_LAND_AREA = 129965453 # km^2
COUNTRY_PROBABILITIES_FILE = config['general']['country_probabilities_file']
def read_csv(file_path: str) -> List[List[str]]:
with open(file_path, "r", encoding="utf-8") as file:
reader = csv.reader(file)
return list(reader)
def get_unique_column_values(file_path: str, column_index: int) -> List[str]:
data = read_csv(file_path)
return sorted(set(row[column_index] for row in data[1:] if len(row) > column_index))
def _get_csv_random_from_landarea() -> None:
data = read_csv(LAND_AREA_FILE)
# Extract header and find the index for 2021 data
header = data[0]
year_2021_index = header.index("2021")
# Extract country names and 2021 land areas, skipping header
countries_and_areas = [(row[0], float(row[year_2021_index])) for row in data[1:] if row[year_2021_index]]
# Normalize probabilities
probabilities = [area / TOTAL_LAND_AREA for _, area in countries_and_areas]
# Create a list of tuples with country names and their probabilities
country_probabilities = [(country, prob) for country, prob in zip([country for country, _ in countries_and_areas], probabilities)]
# Sort the list by probability in descending order
country_probabilities.sort(key=lambda x: x[1], reverse=True)
# Write the probabilities to a CSV file
with open(COUNTRY_PROBABILITIES_FILE, "w", newline='', encoding='utf-8') as csvfile:
writer = csv.writer(csvfile)
writer.writerows(country_probabilities)
return country_probabilities
def _make_city_dictionary() -> Dict[str, List[str]]:
with open(WORLD_CITIES_FILE, "r", encoding="utf-8") as file:
reader = csv.reader(file)
data = list(reader)
city_dict = {}
for row in data[1:]:
country = row[4]
city = row[0]
if country not in city_dict:
city_dict[country] = []
city_dict[country].append(city)
return city_dict
def _make_city_dictionary_with_coords() -> Dict[str, Tuple[float, float]]:
with open(WORLD_CITIES_FILE, "r", encoding="utf-8") as file:
reader = csv.reader(file)
data = list(reader)
city_dict = {}
for row in data[1:]:
city = row[0]
lat = float(row[2])
lon = float(row[3])
city_dict[city] = (lat, lon)
return city_dict
def _get_countries_from_landarea() -> List[str]:
return get_unique_column_values(LAND_AREA_FILE, 0)
def _get_countries_from_worldcities() -> List[str]:
return get_unique_column_values(WORLD_CITIES_FILE, 4)
def _compare_countries_list(list1: List[str], list2: List[str]) -> List[str]:
return sorted(set(list1) - set(list2))
def _export_city_dictionary_pickle() -> None:
with open("city_dict.pkl", "wb") as file:
pickle.dump(_make_city_dictionary(), file)
def _export_city_dictionary_with_coords_pickle() -> None:
with open("city_dict_with_coords.pkl", "wb") as file:
pickle.dump(_make_city_dictionary_with_coords(), file)