1818
1919def to_snake_case (s : str ) -> str :
2020 """Convert string to snake_case.
21-
21+
2222 Handles special cases:
2323 - "/ (food cost)" → "slash_food_cost"
2424 - "* (food cost)" → "star_food_cost"
2525 - Spaces and special chars → underscores
2626 """
27-
27+
2828 # Replace special characters with underscores
29- s = re .sub (r' [^\w\s-]' , '' , s ) # Remove special chars except spaces and hyphens
29+ s = re .sub (r" [^\w\s-]" , "" , s ) # Remove special chars except spaces and hyphens
3030 # Convert spaces and hyphens to underscores
31- s = re .sub (r' [\s-]+' , '_' , s )
31+ s = re .sub (r" [\s-]+" , "_" , s )
3232 # Convert to lowercase
3333 s = s .lower ()
3434 # Remove leading/trailing underscores
35- s = s .strip ('_' )
35+ s = s .strip ("_" )
3636 return s
3737
3838
@@ -44,64 +44,114 @@ def normalize_boolean_value(value: Any) -> bool:
4444 return True
4545 if value == "X" :
4646 return True
47+ # BLUE DUCK??
48+ if value == 1 :
49+ return True
4750 if value is None :
4851 return False
4952 return bool (value )
5053
5154
55+ def normalize_float_value (value ) -> float :
56+ if value is None :
57+ return 0.0
58+ return value
59+
60+
5261def normalize_master_json (data : List [Dict ]) -> List [Dict ]:
5362 """Normalize bird card data from master.json."""
5463 normalized = []
55-
64+
5665 # Define boolean fields (fields that can have "X" values)
5766 boolean_fields = {
58- "Forest" , "Grassland" , "Wetland" ,
67+ "Forest" ,
68+ "Grassland" ,
69+ "Wetland" ,
5970 "Bonus card" ,
60- "Predator" , "Flocking" ,
61- "North America" , "Central America" , "South America" ,
62- "Europe" , "Asia" , "Africa" , "Oceania" ,
71+ "Predator" ,
72+ "Flocking" ,
73+ "North America" ,
74+ "Central America" ,
75+ "South America" ,
76+ "Europe" ,
77+ "Asia" ,
78+ "Africa" ,
79+ "Oceania" ,
6380 "Fan Art Pack?" ,
64- "Anatomist" , "Cartographer" , "Historian" , "Photographer" ,
65- "Backyard Birder" , "Bird Bander" , "Bird Counter" , "Bird Feeder" ,
66- "Diet Specialist" , "Enclosure Builder" , "Endangered Species Protector" ,
67- "Falconer" , "Fishery Manager" , "Food Web Expert" , "Forester" ,
68- "Large Bird Specialist" , "Nest Box Builder" , "Omnivore Expert" ,
69- "Passerine Specialist" , "Platform Builder" , "Prairie Manager" ,
70- "Rodentologist" , "Small Clutch Specialist" , "Viticulturalist" ,
71- "Wetland Scientist" , "Wildlife Gardener" ,"/ (food cost)" , "* (food cost)"
81+ "Anatomist" ,
82+ "Cartographer" ,
83+ "Historian" ,
84+ "Photographer" ,
85+ "Backyard Birder" ,
86+ "Bird Bander" ,
87+ "Bird Counter" ,
88+ "Bird Feeder" ,
89+ "Diet Specialist" ,
90+ "Enclosure Builder" ,
91+ "Endangered Species Protector" ,
92+ "Falconer" ,
93+ "Fishery Manager" ,
94+ "Food Web Expert" ,
95+ "Forester" ,
96+ "Large Bird Specialist" ,
97+ "Nest Box Builder" ,
98+ "Omnivore Expert" ,
99+ "Passerine Specialist" ,
100+ "Platform Builder" ,
101+ "Prairie Manager" ,
102+ "Rodentologist" ,
103+ "Small Clutch Specialist" ,
104+ "Viticulturalist" ,
105+ "Wetland Scientist" ,
106+ "Wildlife Gardener" ,
107+ "/ (food cost)" ,
108+ "* (food cost)" ,
109+ "Swift Start" ,
110+ "Automa ban" ,
72111 }
73-
112+ float_fields = {
113+ "Invertebrate" ,
114+ "Seed" ,
115+ "Fish" ,
116+ "Fruit" ,
117+ "Rodent" ,
118+ "Nectar" ,
119+ "Wild (food)" ,
120+ "Total food cost" ,
121+ }
122+
74123 for item in data :
75124 normalized_item = {}
76-
125+
77126 for key , value in item .items ():
78127 new_key = to_snake_case (key )
79-
80-
128+
81129 # Handle boolean fields
82130 if key in boolean_fields :
83131 normalized_item [new_key ] = normalize_boolean_value (value )
84- elif new_key == "wingspan" :
85- value = str (value )
132+ elif key in float_fields :
133+ normalized_item [new_key ] = normalize_float_value (value )
134+ elif key == "Color" and value is None :
135+ normalized_item [new_key ] = "white"
86136 # Handle nested structures
87137 else :
88138 normalized_item [new_key ] = value
89-
139+
90140 normalized .append (normalized_item )
91-
141+
92142 return normalized
93143
94144
95145def normalize_bonus_json (data : List [Dict ]) -> List [Dict ]:
96146 """Normalize bonus card data from bonus.json."""
97147 normalized = []
98-
148+
99149 for item in data :
100150 normalized_item = {}
101-
151+
102152 for key , value in item .items ():
103153 new_key = to_snake_case (key )
104-
154+
105155 # Handle special field name mappings
106156 if key == "Bonus card" :
107157 new_key = "bonus_card"
@@ -110,32 +160,32 @@ def normalize_bonus_json(data: List[Dict]) -> List[Dict]:
110160 elif key == "%" :
111161 new_key = "percentage"
112162 # Convert "-" to null
113- if value == "-" or value == "variable" :
163+ if value == "-" or value == "variable" :
114164 value = None
115165
116166 elif key == "VP Average" :
117167 new_key = "vp_average"
118168 elif key == "Explanatory text" :
119169 new_key = "explanatory_text"
120-
170+
121171 # Handle boolean field
122172 if key == "Automa" :
123173 normalized_item [new_key ] = normalize_boolean_value (value )
124174 else :
125175 normalized_item [new_key ] = value
126-
176+
127177 normalized .append (normalized_item )
128-
178+
129179 return normalized
130180
131181
132182def normalize_goals_json (data : List [Dict ]) -> List [Dict ]:
133183 """Normalize goals data from goals.json."""
134184 normalized = []
135-
185+
136186 for item in data :
137187 normalized_item = {}
138-
188+
139189 for key , value in item .items ():
140190 # Handle numeric score keys
141191
@@ -151,15 +201,13 @@ def normalize_goals_json(data: List[Dict]) -> List[Dict]:
151201 else :
152202 new_key = key # Keep id as-is
153203 normalized_item [new_key ] = value
154-
155-
204+
156205 normalized .append (normalized_item )
157-
206+
158207 return normalized
159208
160209
161210def normalize_general_json (data : Dict ) -> Dict :
162-
163211 return data
164212
165213
@@ -169,45 +217,44 @@ def main():
169217 script_dir = Path (__file__ ).parent
170218 data_dir = script_dir
171219 output_dir = script_dir / "data_normalized"
172-
220+
173221 # Create output directory if it doesn't exist
174222 output_dir .mkdir (exist_ok = True )
175-
223+
176224 # Define file processors
177225 processors = {
178226 "master.json" : normalize_master_json ,
179227 "bonus.json" : normalize_bonus_json ,
180228 "goals.json" : normalize_goals_json ,
181229 "general.json" : normalize_general_json ,
182230 }
183-
231+
184232 # Process each file
185233 for filename , processor in processors .items ():
186234 input_path = data_dir / filename
187-
235+
188236 if not input_path .exists ():
189237 print (f"Warning: { filename } not found, skipping..." )
190238 continue
191-
239+
192240 print (f"Processing { filename } ..." )
193-
241+
194242 # Read input file
195- with open (input_path , 'r' , encoding = ' utf-8' ) as f :
243+ with open (input_path , "r" , encoding = " utf-8" ) as f :
196244 data = json .load (f )
197-
245+
198246 # Normalize data
199247 normalized_data = processor (data )
200-
248+
201249 # Write output file
202250 output_path = output_dir / filename
203- with open (output_path , 'w' , encoding = ' utf-8' ) as f :
251+ with open (output_path , "w" , encoding = " utf-8" ) as f :
204252 json .dump (normalized_data , f , indent = 2 , ensure_ascii = False )
205-
253+
206254 print (f" → Written to { output_path } " )
207-
255+
208256 print (f"\n Normalization complete! Output files written to { output_dir } " )
209257
210258
211259if __name__ == "__main__" :
212260 main ()
213-
0 commit comments