-
Notifications
You must be signed in to change notification settings - Fork 8k
/
Copy pathconfig.yml
162 lines (136 loc) · 2.27 KB
/
config.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# Package Overview
package_name: regression_model
# Data Files
training_data_file: train.csv
test_data_file: test.csv
# Variables
# The variable we are attempting to predict (sale price)
target: SalePrice
pipeline_name: regression_model
pipeline_save_file: regression_model_output_v
# Will cause syntax errors since they begin with numbers
variables_to_rename:
1stFlrSF: FirstFlrSF
2ndFlrSF: SecondFlrSF
3SsnPorch: ThreeSsnPortch
##
features:
- MSSubClass
- MSZoning
- LotFrontage
- LotShape
- LandContour
- LotConfig
- Neighborhood
- OverallQual
- OverallCond
- YearRemodAdd
- RoofStyle
- Exterior1st
- ExterQual
- Foundation
- BsmtQual
- BsmtExposure
- BsmtFinType1
- HeatingQC
- CentralAir
- FirstFlrSF # renamed
- SecondFlrSF # renamed
- GrLivArea
- BsmtFullBath
- HalfBath
- KitchenQual
- TotRmsAbvGrd
- Functional
- Fireplaces
- FireplaceQu
- GarageFinish
- GarageCars
- GarageArea
- PavedDrive
- WoodDeckSF
- ScreenPorch
- SaleCondition
# this one is only to calculate temporal variable:
- YrSold
# set train/test split
test_size: 0.1
# to set the random seed
random_state: 0
alpha: 0.001
# categorical variables with NA in train set
categorical_vars_with_na_frequent:
- BsmtQual
- BsmtExposure
- BsmtFinType1
- GarageFinish
categorical_vars_with_na_missing:
- FireplaceQu
numerical_vars_with_na:
- LotFrontage
temporal_vars:
- YearRemodAdd
ref_var: YrSold
# variables to log transform
numericals_log_vars:
- LotFrontage
- FirstFlrSF
- GrLivArea
binarize_vars:
- ScreenPorch
# variables to map
qual_vars:
- ExterQual
- BsmtQual
- HeatingQC
- KitchenQual
- FireplaceQu
exposure_vars:
- BsmtExposure
finish_vars:
- BsmtFinType1
garage_vars:
- GarageFinish
categorical_vars:
- MSSubClass
- MSZoning
- LotShape
- LandContour
- LotConfig
- Neighborhood
- RoofStyle
- Exterior1st
- Foundation
- CentralAir
- Functional
- PavedDrive
- SaleCondition
# variable mappings
qual_mappings:
Po: 1
Fa: 2
TA: 3
Gd: 4
Ex: 5
Missing: 0
NA: 0
exposure_mappings:
No: 1
Mn: 2
Av: 3
Gd: 4
finish_mappings:
Missing: 0
NA: 0
Unf: 1
LwQ: 2
Rec: 3
BLQ: 4
ALQ: 5
GLQ: 6
garage_mappings:
Missing: 0
NA: 0
Unf: 1
RFn: 2
Fin: 3