-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy path02-search-panorama.py
More file actions
165 lines (126 loc) · 4.54 KB
/
Copy path02-search-panorama.py
File metadata and controls
165 lines (126 loc) · 4.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
import os
import time
import sqlite3
import streetview
import concurrent.futures
DB_PATH = "gsv.db"
SEARCH_BATCH_SIZE = 100000
# if you are skeptical about the API results affected by the network, you can set this to False
# and the coords that have no panorama found will not be marked as searched, and will be searched again in the future
COUNT_NONE_FOUND_AS_SEARCHED = True
WORKERS = 72
########################################
# MARK: Database setup
########################################
def setup_database():
print("Setting up database")
conn = sqlite3.connect(DB_PATH)
cursor = conn.cursor()
cursor.execute(
"""CREATE TABLE IF NOT EXISTS sample_coords
(id INTEGER PRIMARY KEY AUTOINCREMENT, lat real, lon real, label text, searched boolean default False)
"""
)
cursor.execute(
"""
CREATE TABLE IF NOT EXISTS search_panoramas (
pano_id TEXT PRIMARY KEY,
lat REAL,
lon REAL,
date TEXT,
copyright TEXT,
heading REAL,
pitch REAL,
roll REAL
)
"""
)
conn.commit()
conn.close()
########################################
# MARK: Get unsearched coords
########################################
def get_unsearched_coords(batch_size: int) -> dict[int, tuple[float, float]]:
conn = sqlite3.connect(DB_PATH)
coords: dict[int, tuple[float, float]] = {}
cursor = conn.execute(
"SELECT id, lat, lon, label, searched FROM sample_coords WHERE searched = 0 ORDER BY RANDOM() LIMIT ?",
[batch_size],
)
rows = cursor.fetchall()
print(f"Found {len(rows)} unsearched coords")
for row in rows:
(id, lat, lon, label, searched) = row
coords[id] = (lat, lon)
conn.close()
return coords
########################################
# MARK: Search panorama
########################################
def search_and_insert(coord_id, lat, lon):
print(f"Searching for coords {coord_id} with lat {lat:.2f} and lon {lon:.2f}")
panorama_results = streetview.search_panoramas(lat, lon)
# if result is not a list or is an empty list
if not isinstance(panorama_results, list):
print(f"Error searching for point {coord_id}, ({lat}, {lon})")
return
if len(panorama_results) == 0:
print(f"No panorama found for point {coord_id}, ({lat}, {lon})")
if not COUNT_NONE_FOUND_AS_SEARCHED:
return
conn = sqlite3.connect(DB_PATH)
cursor = conn.cursor()
for result in panorama_results:
cursor.execute(
"INSERT OR IGNORE INTO search_panoramas VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
[
result.pano_id,
result.lat,
result.lon,
result.date,
None,
result.heading,
result.pitch,
result.roll,
],
)
cursor.execute("UPDATE sample_coords SET searched = 1 WHERE id = ?", [coord_id])
conn.commit()
conn.close()
print(f"Found {len(panorama_results)} panoramas for coord {coord_id}")
def run_batch_in_parallel():
coords = get_unsearched_coords(SEARCH_BATCH_SIZE)
if len(coords) == 0:
print("No unsearched coords found, exiting")
exit(0)
coords_count = len(coords)
progress = 0
begin_time = time.time()
last_progress_time = time.time()
with concurrent.futures.ThreadPoolExecutor(max_workers=WORKERS) as executor:
futures = {
executor.submit(search_and_insert, coord_id, lat, lon): coord_id
for coord_id, (lat, lon) in coords.items()
}
for future in concurrent.futures.as_completed(futures):
coord_id = futures[future]
try:
future.result()
except Exception as e:
print(f"Error searching for coord {coord_id}")
print(e)
progress += 1
last_duration = time.time() - last_progress_time
last_speed = progress / (time.time() - last_progress_time)
last_progress_time = time.time()
total_duration = time.time() - begin_time
total_speed = progress / total_duration
# clear the console
os.system("cls" if os.name == "nt" else "clear")
print("Search Coord Progress: %d/%d" % (progress, coords_count))
print("Last Speed: %f coords/sec" % last_speed)
print("Total Speed: %f coords/sec" % total_speed)
if __name__ == "__main__":
setup_database()
while True:
run_batch_in_parallel()