-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconfig.toml
More file actions
52 lines (44 loc) · 1.87 KB
/
config.toml
File metadata and controls
52 lines (44 loc) · 1.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
[source]
sqlite_path = "C:/Users/kk/Code/toronto-addresses-import/addresses.db"
[run_defaults]
# Small downtown rectangle for first test run
bbox = [43.645, -79.42, 43.665, -79.39]
overpass_url = "https://overpass-api.de/api/interpreter"
[osm]
# Where stage 2 reads OSM data from. "local" reads the cached extract refreshed
# via `python -m t2.osm_refresh`. "overpass" posts a live query to overpass_url.
source = "local"
# Geofabrik PBF covering Toronto. Ontario is the smallest Geofabrik region
# that includes the city; ~600MB on first download, diff-friendly.
pbf_url = "https://download.geofabrik.de/north-america/canada/ontario-latest.osm.pbf"
# City-of-Toronto-ish clip applied after tag filtering. Anything outside this
# rectangle is discarded from the local extract to keep it small.
toronto_bbox = [43.58, -79.64, 43.86, -79.11]
# Where the PBF, filtered JSON, meta sidecar, lock and log live.
extract_dir = "data/osm"
[conflation]
# Search radius for finding a matching OSM address (same housenumber + street).
match_radius_m = 100
# Matches within this distance auto-approve; beyond it they become MATCH_FAR and go to review.
match_near_m = 15
[checks]
match_far = true
suffix_range = true
city_duplicate = true
missing_sample = true
nearby_street_mismatch = true
[check_params.city_duplicate]
radius_m = 3.0
[check_params.missing_sample]
every_nth = 50
[check_params.nearby_street_mismatch]
# Distance from a MISSING candidate within which an OSM address with the
# same housenumber but a different normalized street name will be flagged
# as a likely street-name spelling variant (e.g. "Deane Field" vs
# "Deanefield"). Tight by design — a wider radius would surface unrelated
# same-number addresses on parallel streets.
radius_m = 20.0
[upload]
batch_size = 300
changesets_per_minute = 1
changeset_comment_template = "Toronto Open Data address import, run={run_name}, batch={batch_id}"