-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscot_1851_config.yaml
More file actions
94 lines (87 loc) · 2.85 KB
/
scot_1851_config.yaml
File metadata and controls
94 lines (87 loc) · 2.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
census:
country: "scot"
year: 1851
uid_field: "RecID"
field_to_geocode: "Address"
boundaries_field: ["merged_id", "conrd_town"]
subset_field: "subset_id"
census_file: "../data/input/census/1851_sct_geocode.txt"
read_csv_params:
sep: "\t" # passed to 'sep' parameter of pandas read_csv
encoding: "latin-1" # passed to 'encoding' parameter of pandas read_csv
quoting: 3 # must be int 0,1,2, or 3. Passed to 'quoting' parameter of pandas read_csv
na_values: [".", " ", " - "] # Passed to 'na_values' parameter of pandas read_csv
# nrows: 1000000
usecols: ["RecID", "Address","ParID", "subset_id"]
field_to_clean: "Address"
standardisation_file: "../configuration/standardisation_files/icem_street_standardisation.json" # regex replacement file
min_len: 5
cleaned_field_suffix: "_alt"
unique_field_to_geocode_name: "address_uid"
lkups:
scot_parishes:
lkup_file: "../data/input/scot/scot_parish_boundary/scotboundarylinking.xlsx"
lkup_uid_field: "ParID"
lkup_census_field: "ParID"
lkup_params:
sheet_name: "1851_icem"
usecols: ["ParID", "merged_id", ]
scot_conrdtowns:
lkup_file: "../data/input/scot/conrd_town_recid/conrd_town_recid_1851.txt"
lkup_uid_field: "recid"
lkup_census_field: "RecID"
lkup_params:
sep: ","
quoting: 3
usecols: ["recid", "conrd_town", ]
write_processed_csv_params:
sep: "\t"
encoding: "utf-8"
index: False
write_processed_csv_params_slim:
columns: ["address_uid", "Address_alt", "merged_id", "subset_id", "conrd_town"]
sep: "\t"
encoding: "utf-8"
index: False
comparers:
rapidfuzzy_wratio: "rapidfuzzy_wratio_s"
rapidfuzzy_partial_ratio_alignment: "align"
sim_comp_thresh: 0.9
align_thresh: 7
final_score_field: "fs"
boundaries:
boundary_1:
geom_name: "scotparish"
gis_file: "../data/input/scot/scot_parish_boundary/CivilParish_pre1891/CivilParish_pre1891.shp"
gis_uid_field: "JOIN_NAME_"
gis_read_params:
engine: "pyogrio"
columns: ["JOIN_NAME_"]
# max_features: 1000
gis_write_params:
sep: "\t"
encoding: "utf-8"
index: False
lkup_file: "../data/input/scot/scot_parish_boundary/scotboundarylinking.xlsx"
lkup_field_uid: "JOIN_NAME_"
lkup_field_censuslink: "merged_id"
lkup_read_params:
sheet_name: "1851_gis"
na_values: "."
usecols: ["JOIN_NAME_", "merged_id" ]
gis_write_params:
sep: "\t"
encoding: "utf-8"
index: False
boundary_2:
geom_name: "scotconrdtown"
gis_file: "../data/input/scot/Scotland_ConRD_Town_1851_1901/Scotland_ConRD_Town_1851_1901.shp"
gis_uid_field: "ConRD_town"
gis_read_params:
engine: "pyogrio"
columns: ["ConRD_town"]
# max_features: 1000
gis_write_params:
sep: "\t"
encoding: "utf-8"
index: False