Skip to content

Commit 198891b

Browse files
authored
Add reproduced notebook, clean model code, and trained model
1 parent 38d9cbb commit 198891b

File tree

3 files changed

+4220
-0
lines changed

3 files changed

+4220
-0
lines changed
Lines changed: 366 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,366 @@
1+
{
2+
"nbformat": 4,
3+
"nbformat_minor": 0,
4+
"metadata": {
5+
"colab": {
6+
"provenance": []
7+
},
8+
"kernelspec": {
9+
"name": "python3",
10+
"display_name": "Python 3"
11+
},
12+
"language_info": {
13+
"name": "python"
14+
}
15+
},
16+
"cells": [
17+
{
18+
"cell_type": "markdown",
19+
"source": [
20+
"# Clean Model Code"
21+
],
22+
"metadata": {
23+
"id": "oJzW6amLdYTb"
24+
}
25+
},
26+
{
27+
"cell_type": "code",
28+
"execution_count": 1,
29+
"metadata": {
30+
"id": "jp6OuLM997oV"
31+
},
32+
"outputs": [],
33+
"source": [
34+
"# Libraries\n",
35+
"import pandas as pd\n",
36+
"import numpy as np\n",
37+
"import joblib\n",
38+
"\n",
39+
"from sklearn.compose import ColumnTransformer\n",
40+
"from sklearn.preprocessing import OneHotEncoder\n",
41+
"from sklearn.pipeline import Pipeline\n",
42+
"from sklearn.ensemble import GradientBoostingRegressor\n"
43+
]
44+
},
45+
{
46+
"cell_type": "code",
47+
"source": [
48+
"# 1. Load Data\n",
49+
"def load_data():\n",
50+
" base_path = \"/content/Data/\"\n",
51+
"\n",
52+
" # Load EV and ICE datasets\n",
53+
" ev = pd.read_csv(base_path + \"Pure electric consumption.csv\")\n",
54+
" diesel = pd.read_csv(base_path + \"Diesel consumption.csv\")\n",
55+
" petrol91 = pd.read_csv(base_path + \"petrol91RON consumption.csv\")\n",
56+
" petrol95 = pd.read_csv(base_path + \"petrol95RON consumption.csv\")\n",
57+
" petrol98 = pd.read_csv(base_path + \"petrol98RON consumption.csv\")\n",
58+
"\n",
59+
" return ev, diesel, petrol91, petrol95, petrol98"
60+
],
61+
"metadata": {
62+
"id": "WYYebg76_Gah"
63+
},
64+
"execution_count": 2,
65+
"outputs": []
66+
},
67+
{
68+
"cell_type": "code",
69+
"source": [
70+
"# 2. Preprocess Data\n",
71+
"def prepare_data():\n",
72+
" ev, diesel, petrol91, petrol95, petrol98 = load_data()\n",
73+
"\n",
74+
" # Drop missing-value columns\n",
75+
" dfs_cleaned = [df.dropna(axis=1) for df in [ev, diesel, petrol91, petrol95, petrol98]]\n",
76+
" ev, diesel, petrol91, petrol95, petrol98 = dfs_cleaned\n",
77+
"\n",
78+
" # Calculate EV CO₂ emissions\n",
79+
" emission_factor = 0.18 # kg/kWh\n",
80+
" ev[\"EV_gCO2_per_km\"] = (\n",
81+
" ev[\"EnergyConsumptionWhkm\"] / 1000 * emission_factor * 1000\n",
82+
" )\n",
83+
"\n",
84+
" # Add ICE baselines\n",
85+
" def add_baseline(df, fuel):\n",
86+
" if fuel.lower().startswith(\"petrol\"):\n",
87+
" df[\"ICE_CO2_Baseline\"] = df[\"FuelConsumptionCombined\"] * 23.2\n",
88+
" else:\n",
89+
" df[\"ICE_CO2_Baseline\"] = df[\"FuelConsumptionCombined\"] * 26.5\n",
90+
" df[\"FuelType\"] = fuel\n",
91+
" return df\n",
92+
"\n",
93+
" petrol91 = add_baseline(petrol91, \"Petrol91\")\n",
94+
" petrol95 = add_baseline(petrol95, \"Petrol95\")\n",
95+
" petrol98 = add_baseline(petrol98, \"Petrol98\")\n",
96+
" diesel = add_baseline(diesel, \"Diesel\")\n",
97+
"\n",
98+
" # Combine ICE datasets\n",
99+
" ice_all = pd.concat([petrol91, petrol95, petrol98, diesel], ignore_index=True)\n",
100+
"\n",
101+
" # Cartesian Join (smaller sample for training)\n",
102+
" df = (\n",
103+
" ev.assign(key=1)\n",
104+
" .merge(ice_all.assign(key=1), on=\"key\", suffixes=(\"_EV\", \"_ICE\"))\n",
105+
" .drop(\"key\", axis=1)\n",
106+
" .sample(n=4000, random_state=42)\n",
107+
" )\n",
108+
"\n",
109+
" # Create final features\n",
110+
" df[\"YearDiff\"] = df[\"ModelReleaseYear_EV\"] - df[\"ModelReleaseYear_ICE\"]\n",
111+
" df[\"CO2_saving\"] = df[\"ICE_CO2_Baseline\"] - df[\"EV_gCO2_per_km\"]\n",
112+
"\n",
113+
" X = df[[\n",
114+
" \"Make_EV\", \"Make_ICE\",\n",
115+
" \"BodyStyle_EV\", \"BodyStyle_ICE\",\n",
116+
" \"FuelType_ICE\", \"YearDiff\", \"ICE_CO2_Baseline\"\n",
117+
" ]]\n",
118+
"\n",
119+
" y = df[\"CO2_saving\"]\n",
120+
"\n",
121+
" return X, y\n"
122+
],
123+
"metadata": {
124+
"id": "tlFGBXnC_Jes"
125+
},
126+
"execution_count": 3,
127+
"outputs": []
128+
},
129+
{
130+
"cell_type": "code",
131+
"source": [
132+
"# 3. Build Preprocessing Pipeline\n",
133+
"def build_preprocessor():\n",
134+
" categorical_cols = [\n",
135+
" \"Make_EV\", \"Make_ICE\",\n",
136+
" \"BodyStyle_EV\", \"BodyStyle_ICE\",\n",
137+
" \"FuelType_ICE\"\n",
138+
" ]\n",
139+
"\n",
140+
" preprocessor = ColumnTransformer(\n",
141+
" transformers=[\n",
142+
" (\"cat\", OneHotEncoder(handle_unknown=\"ignore\", sparse_output=False), categorical_cols)\n",
143+
" ],\n",
144+
" remainder=\"passthrough\",\n",
145+
" )\n",
146+
" return preprocessor"
147+
],
148+
"metadata": {
149+
"id": "48xrTDYZ_Oov"
150+
},
151+
"execution_count": 4,
152+
"outputs": []
153+
},
154+
{
155+
"cell_type": "code",
156+
"source": [
157+
"# 4. Train and Save the Model\n",
158+
"def train_model():\n",
159+
"\n",
160+
" print(\"Loading and preparing data...\")\n",
161+
" X, y = prepare_data()\n",
162+
"\n",
163+
" print(\"Building model pipeline...\")\n",
164+
" preprocessor = build_preprocessor()\n",
165+
"\n",
166+
" model = GradientBoostingRegressor(random_state=42)\n",
167+
"\n",
168+
" pipeline = Pipeline([\n",
169+
" (\"preprocessor\", preprocessor),\n",
170+
" (\"model\", model)\n",
171+
" ])\n",
172+
"\n",
173+
" print(\"Training model...\")\n",
174+
" pipeline.fit(X, y)\n",
175+
"\n",
176+
" print(\"Saving model to co2_savings_model.pkl...\")\n",
177+
" joblib.dump(pipeline, \"co2_savings_model.pkl\")\n",
178+
"\n",
179+
" print(\"Training complete!\")\n",
180+
" return pipeline\n",
181+
"\n"
182+
],
183+
"metadata": {
184+
"id": "zoeudXzD_Rpo"
185+
},
186+
"execution_count": 5,
187+
"outputs": []
188+
},
189+
{
190+
"cell_type": "code",
191+
"source": [
192+
"# 5. Prediction Function (Used by FastAPI)\n",
193+
"def load_model():\n",
194+
" return joblib.load(\"co2_savings_model.pkl\")\n",
195+
"\n",
196+
"\n",
197+
"def predict_savings(input_dict):\n",
198+
" \"\"\"\n",
199+
" input_dict example:\n",
200+
" {\n",
201+
" \"Make_EV\": \"Tesla\",\n",
202+
" \"Make_ICE\": \"Toyota\",\n",
203+
" \"BodyStyle_EV\": \"SUV\",\n",
204+
" \"BodyStyle_ICE\": \"SUV\",\n",
205+
" \"FuelType_ICE\": \"Petrol95\",\n",
206+
" \"YearDiff\": 5,\n",
207+
" \"ICE_CO2_Baseline\": 220.4\n",
208+
" }\n",
209+
" \"\"\"\n",
210+
" model = load_model()\n",
211+
"\n",
212+
" input_df = pd.DataFrame([input_dict])\n",
213+
"\n",
214+
" prediction = model.predict(input_df)[0]\n",
215+
"\n",
216+
" return {\"Predicted_CO2_Savings\": float(prediction)}\n",
217+
"\n",
218+
"\n",
219+
"# Execute Training If Run Directly\n",
220+
"\n",
221+
"if __name__ == \"__main__\":\n",
222+
" train_model()"
223+
],
224+
"metadata": {
225+
"colab": {
226+
"base_uri": "https://localhost:8080/"
227+
},
228+
"id": "C5HAlwff_YWz",
229+
"outputId": "3f70208b-71ef-49ca-a45b-cce6ba2d52b0"
230+
},
231+
"execution_count": 6,
232+
"outputs": [
233+
{
234+
"output_type": "stream",
235+
"name": "stdout",
236+
"text": [
237+
"Loading and preparing data...\n",
238+
"Building model pipeline...\n",
239+
"Training model...\n",
240+
"Saving model to co2_savings_model.pkl...\n",
241+
"Training complete!\n"
242+
]
243+
}
244+
]
245+
},
246+
{
247+
"cell_type": "code",
248+
"source": [
249+
"# Prediction Function\n",
250+
"sample_input = {\n",
251+
" \"Make_EV\": \"Tesla\",\n",
252+
" \"Make_ICE\": \"Toyota\",\n",
253+
" \"BodyStyle_EV\": \"SUV\",\n",
254+
" \"BodyStyle_ICE\": \"SUV\",\n",
255+
" \"FuelType_ICE\": \"Petrol95\",\n",
256+
" \"YearDiff\": 5,\n",
257+
" \"ICE_CO2_Baseline\": 220.4\n",
258+
"}\n",
259+
"\n",
260+
"predict_savings(sample_input)\n"
261+
],
262+
"metadata": {
263+
"colab": {
264+
"base_uri": "https://localhost:8080/"
265+
},
266+
"id": "0cdFnCMNEuLW",
267+
"outputId": "dde3e048-ccd7-43f8-ebe3-021f4ff7b5e7"
268+
},
269+
"execution_count": 7,
270+
"outputs": [
271+
{
272+
"output_type": "execute_result",
273+
"data": {
274+
"text/plain": [
275+
"{'Predicted_CO2_Savings': 191.07520862997606}"
276+
]
277+
},
278+
"metadata": {},
279+
"execution_count": 7
280+
}
281+
]
282+
},
283+
{
284+
"cell_type": "code",
285+
"source": [
286+
"from google.colab import files\n",
287+
"files.download(\"co2_savings_model.pkl\")\n"
288+
],
289+
"metadata": {
290+
"colab": {
291+
"base_uri": "https://localhost:8080/",
292+
"height": 17
293+
},
294+
"id": "hx7R4DxbNZlH",
295+
"outputId": "e5f4c6db-7526-4c30-aa98-6f5c4613a751"
296+
},
297+
"execution_count": 8,
298+
"outputs": [
299+
{
300+
"output_type": "display_data",
301+
"data": {
302+
"text/plain": [
303+
"<IPython.core.display.Javascript object>"
304+
],
305+
"application/javascript": [
306+
"\n",
307+
" async function download(id, filename, size) {\n",
308+
" if (!google.colab.kernel.accessAllowed) {\n",
309+
" return;\n",
310+
" }\n",
311+
" const div = document.createElement('div');\n",
312+
" const label = document.createElement('label');\n",
313+
" label.textContent = `Downloading \"${filename}\": `;\n",
314+
" div.appendChild(label);\n",
315+
" const progress = document.createElement('progress');\n",
316+
" progress.max = size;\n",
317+
" div.appendChild(progress);\n",
318+
" document.body.appendChild(div);\n",
319+
"\n",
320+
" const buffers = [];\n",
321+
" let downloaded = 0;\n",
322+
"\n",
323+
" const channel = await google.colab.kernel.comms.open(id);\n",
324+
" // Send a message to notify the kernel that we're ready.\n",
325+
" channel.send({})\n",
326+
"\n",
327+
" for await (const message of channel.messages) {\n",
328+
" // Send a message to notify the kernel that we're ready.\n",
329+
" channel.send({})\n",
330+
" if (message.buffers) {\n",
331+
" for (const buffer of message.buffers) {\n",
332+
" buffers.push(buffer);\n",
333+
" downloaded += buffer.byteLength;\n",
334+
" progress.value = downloaded;\n",
335+
" }\n",
336+
" }\n",
337+
" }\n",
338+
" const blob = new Blob(buffers, {type: 'application/binary'});\n",
339+
" const a = document.createElement('a');\n",
340+
" a.href = window.URL.createObjectURL(blob);\n",
341+
" a.download = filename;\n",
342+
" div.appendChild(a);\n",
343+
" a.click();\n",
344+
" div.remove();\n",
345+
" }\n",
346+
" "
347+
]
348+
},
349+
"metadata": {}
350+
},
351+
{
352+
"output_type": "display_data",
353+
"data": {
354+
"text/plain": [
355+
"<IPython.core.display.Javascript object>"
356+
],
357+
"application/javascript": [
358+
"download(\"download_7a84e164-972a-47fa-9c52-86369d5592d7\", \"co2_savings_model.pkl\", 146676)"
359+
]
360+
},
361+
"metadata": {}
362+
}
363+
]
364+
}
365+
]
366+
}

0 commit comments

Comments
 (0)