|
2 | 2 | This is the Python interface
|
3 | 3 | """
|
4 | 4 | import logging
|
5 |
| -from geopy import geocoders |
6 | 5 | from geopy.extra.rate_limiter import RateLimiter
|
7 | 6 | from sqlite_utils import Database
|
8 | 7 |
|
@@ -39,31 +38,41 @@ def geocode_table(
|
39 | 38 | table = db[table_name]
|
40 | 39 |
|
41 | 40 | if latitude_column not in table.columns_dict:
|
| 41 | + log.info(f"Adding latitude column: {latitude_column}") |
42 | 42 | table.add_column(latitude_column, float)
|
43 | 43 |
|
44 | 44 | if longitude_column not in table.columns_dict:
|
| 45 | + log.info(f"Adding longitude column: {longitude_column}") |
45 | 46 | table.add_column(longitude_column, float)
|
46 | 47 |
|
47 |
| - if force: |
48 |
| - rows = table.rows |
49 |
| - else: |
50 |
| - rows = table.rows_where( |
51 |
| - f"{latitude_column} IS NULL OR {longitude_column} IS NULL" |
52 |
| - ) |
| 48 | + if "geocoder" not in table.columns_dict: |
| 49 | + log.info("Adding geocoder column") |
| 50 | + table.add_column("geocoder", str) |
| 51 | + |
| 52 | + rows, todo = select_ungeocoded( |
| 53 | + db, |
| 54 | + table, |
| 55 | + latitude_column=latitude_column, |
| 56 | + longitude_column=longitude_column, |
| 57 | + force=force, |
| 58 | + ) |
53 | 59 |
|
54 |
| - if delay: |
55 |
| - geocode = RateLimiter(geocoder.geocode, min_delay_seconds=delay) |
56 |
| - else: |
57 |
| - geocode = geocoder.geocode |
| 60 | + # always use a rate limiter, even with no delay |
| 61 | + geocode = RateLimiter(geocoder.geocode, min_delay_seconds=delay) |
58 | 62 |
|
59 | 63 | count = 0
|
| 64 | + log.info(f"Geocoding {todo} rows from {table.name}") |
60 | 65 | for row in rows:
|
61 | 66 | result = geocode_row(geocode, query_template, row)
|
62 | 67 | if result:
|
63 | 68 | pks = [row[pk] for pk in table.pks]
|
64 | 69 | table.update(
|
65 | 70 | pks,
|
66 |
| - {latitude_column: result.latitude, longitude_column: result.longitude}, |
| 71 | + { |
| 72 | + latitude_column: result.latitude, |
| 73 | + longitude_column: result.longitude, |
| 74 | + "geocoder": geocoder.__class__.__name__, |
| 75 | + }, |
67 | 76 | )
|
68 | 77 | count += 1
|
69 | 78 |
|
@@ -95,6 +104,7 @@ def geocode_list(
|
95 | 104 | if result:
|
96 | 105 | row[longitude_column] = result.longitude
|
97 | 106 | row[latitude_column] = result.latitude
|
| 107 | + row["geocoder"] = get_geocoder_class(geocode) |
98 | 108 |
|
99 | 109 | yield row, bool(result)
|
100 | 110 |
|
@@ -126,3 +136,13 @@ def select_ungeocoded(
|
126 | 136 | rows = table.rows_where(f"{latitude_column} IS NULL OR {longitude_column} IS NULL")
|
127 | 137 |
|
128 | 138 | return rows, count
|
| 139 | + |
| 140 | + |
| 141 | +def get_geocoder_class(geocode): |
| 142 | + "Walk back up to the original geocoder class" |
| 143 | + |
| 144 | + if isinstance(geocode, RateLimiter): |
| 145 | + return geocode.func.__self__.__class__.__name__ |
| 146 | + |
| 147 | + # unwrapped function |
| 148 | + return geocode.__self__.__class__.__name__ |
0 commit comments