1+ {
2+ "nbformat" : 4 ,
3+ "nbformat_minor" : 0 ,
4+ "metadata" : {
5+ "colab" : {
6+ "provenance" : []
7+ },
8+ "kernelspec" : {
9+ "name" : " python3" ,
10+ "display_name" : " Python 3"
11+ },
12+ "language_info" : {
13+ "name" : " python"
14+ }
15+ },
16+ "cells" : [
17+ {
18+ "cell_type" : " markdown" ,
19+ "source" : [
20+ " # Clean Model Code"
21+ ],
22+ "metadata" : {
23+ "id" : " oJzW6amLdYTb"
24+ }
25+ },
26+ {
27+ "cell_type" : " code" ,
28+ "execution_count" : 1 ,
29+ "metadata" : {
30+ "id" : " jp6OuLM997oV"
31+ },
32+ "outputs" : [],
33+ "source" : [
34+ " # Libraries\n " ,
35+ " import pandas as pd\n " ,
36+ " import numpy as np\n " ,
37+ " import joblib\n " ,
38+ " \n " ,
39+ " from sklearn.compose import ColumnTransformer\n " ,
40+ " from sklearn.preprocessing import OneHotEncoder\n " ,
41+ " from sklearn.pipeline import Pipeline\n " ,
42+ " from sklearn.ensemble import GradientBoostingRegressor\n "
43+ ]
44+ },
45+ {
46+ "cell_type" : " code" ,
47+ "source" : [
48+ " # 1. Load Data\n " ,
49+ " def load_data():\n " ,
50+ " base_path = \" /content/Data/\"\n " ,
51+ " \n " ,
52+ " # Load EV and ICE datasets\n " ,
53+ " ev = pd.read_csv(base_path + \" Pure electric consumption.csv\" )\n " ,
54+ " diesel = pd.read_csv(base_path + \" Diesel consumption.csv\" )\n " ,
55+ " petrol91 = pd.read_csv(base_path + \" petrol91RON consumption.csv\" )\n " ,
56+ " petrol95 = pd.read_csv(base_path + \" petrol95RON consumption.csv\" )\n " ,
57+ " petrol98 = pd.read_csv(base_path + \" petrol98RON consumption.csv\" )\n " ,
58+ " \n " ,
59+ " return ev, diesel, petrol91, petrol95, petrol98"
60+ ],
61+ "metadata" : {
62+ "id" : " WYYebg76_Gah"
63+ },
64+ "execution_count" : 2 ,
65+ "outputs" : []
66+ },
67+ {
68+ "cell_type" : " code" ,
69+ "source" : [
70+ " # 2. Preprocess Data\n " ,
71+ " def prepare_data():\n " ,
72+ " ev, diesel, petrol91, petrol95, petrol98 = load_data()\n " ,
73+ " \n " ,
74+ " # Drop missing-value columns\n " ,
75+ " dfs_cleaned = [df.dropna(axis=1) for df in [ev, diesel, petrol91, petrol95, petrol98]]\n " ,
76+ " ev, diesel, petrol91, petrol95, petrol98 = dfs_cleaned\n " ,
77+ " \n " ,
78+ " # Calculate EV CO₂ emissions\n " ,
79+ " emission_factor = 0.18 # kg/kWh\n " ,
80+ " ev[\" EV_gCO2_per_km\" ] = (\n " ,
81+ " ev[\" EnergyConsumptionWhkm\" ] / 1000 * emission_factor * 1000\n " ,
82+ " )\n " ,
83+ " \n " ,
84+ " # Add ICE baselines\n " ,
85+ " def add_baseline(df, fuel):\n " ,
86+ " if fuel.lower().startswith(\" petrol\" ):\n " ,
87+ " df[\" ICE_CO2_Baseline\" ] = df[\" FuelConsumptionCombined\" ] * 23.2\n " ,
88+ " else:\n " ,
89+ " df[\" ICE_CO2_Baseline\" ] = df[\" FuelConsumptionCombined\" ] * 26.5\n " ,
90+ " df[\" FuelType\" ] = fuel\n " ,
91+ " return df\n " ,
92+ " \n " ,
93+ " petrol91 = add_baseline(petrol91, \" Petrol91\" )\n " ,
94+ " petrol95 = add_baseline(petrol95, \" Petrol95\" )\n " ,
95+ " petrol98 = add_baseline(petrol98, \" Petrol98\" )\n " ,
96+ " diesel = add_baseline(diesel, \" Diesel\" )\n " ,
97+ " \n " ,
98+ " # Combine ICE datasets\n " ,
99+ " ice_all = pd.concat([petrol91, petrol95, petrol98, diesel], ignore_index=True)\n " ,
100+ " \n " ,
101+ " # Cartesian Join (smaller sample for training)\n " ,
102+ " df = (\n " ,
103+ " ev.assign(key=1)\n " ,
104+ " .merge(ice_all.assign(key=1), on=\" key\" , suffixes=(\" _EV\" , \" _ICE\" ))\n " ,
105+ " .drop(\" key\" , axis=1)\n " ,
106+ " .sample(n=4000, random_state=42)\n " ,
107+ " )\n " ,
108+ " \n " ,
109+ " # Create final features\n " ,
110+ " df[\" YearDiff\" ] = df[\" ModelReleaseYear_EV\" ] - df[\" ModelReleaseYear_ICE\" ]\n " ,
111+ " df[\" CO2_saving\" ] = df[\" ICE_CO2_Baseline\" ] - df[\" EV_gCO2_per_km\" ]\n " ,
112+ " \n " ,
113+ " X = df[[\n " ,
114+ " \" Make_EV\" , \" Make_ICE\" ,\n " ,
115+ " \" BodyStyle_EV\" , \" BodyStyle_ICE\" ,\n " ,
116+ " \" FuelType_ICE\" , \" YearDiff\" , \" ICE_CO2_Baseline\"\n " ,
117+ " ]]\n " ,
118+ " \n " ,
119+ " y = df[\" CO2_saving\" ]\n " ,
120+ " \n " ,
121+ " return X, y\n "
122+ ],
123+ "metadata" : {
124+ "id" : " tlFGBXnC_Jes"
125+ },
126+ "execution_count" : 3 ,
127+ "outputs" : []
128+ },
129+ {
130+ "cell_type" : " code" ,
131+ "source" : [
132+ " # 3. Build Preprocessing Pipeline\n " ,
133+ " def build_preprocessor():\n " ,
134+ " categorical_cols = [\n " ,
135+ " \" Make_EV\" , \" Make_ICE\" ,\n " ,
136+ " \" BodyStyle_EV\" , \" BodyStyle_ICE\" ,\n " ,
137+ " \" FuelType_ICE\"\n " ,
138+ " ]\n " ,
139+ " \n " ,
140+ " preprocessor = ColumnTransformer(\n " ,
141+ " transformers=[\n " ,
142+ " (\" cat\" , OneHotEncoder(handle_unknown=\" ignore\" , sparse_output=False), categorical_cols)\n " ,
143+ " ],\n " ,
144+ " remainder=\" passthrough\" ,\n " ,
145+ " )\n " ,
146+ " return preprocessor"
147+ ],
148+ "metadata" : {
149+ "id" : " 48xrTDYZ_Oov"
150+ },
151+ "execution_count" : 4 ,
152+ "outputs" : []
153+ },
154+ {
155+ "cell_type" : " code" ,
156+ "source" : [
157+ " # 4. Train and Save the Model\n " ,
158+ " def train_model():\n " ,
159+ " \n " ,
160+ " print(\" Loading and preparing data...\" )\n " ,
161+ " X, y = prepare_data()\n " ,
162+ " \n " ,
163+ " print(\" Building model pipeline...\" )\n " ,
164+ " preprocessor = build_preprocessor()\n " ,
165+ " \n " ,
166+ " model = GradientBoostingRegressor(random_state=42)\n " ,
167+ " \n " ,
168+ " pipeline = Pipeline([\n " ,
169+ " (\" preprocessor\" , preprocessor),\n " ,
170+ " (\" model\" , model)\n " ,
171+ " ])\n " ,
172+ " \n " ,
173+ " print(\" Training model...\" )\n " ,
174+ " pipeline.fit(X, y)\n " ,
175+ " \n " ,
176+ " print(\" Saving model to co2_savings_model.pkl...\" )\n " ,
177+ " joblib.dump(pipeline, \" co2_savings_model.pkl\" )\n " ,
178+ " \n " ,
179+ " print(\" Training complete!\" )\n " ,
180+ " return pipeline\n " ,
181+ " \n "
182+ ],
183+ "metadata" : {
184+ "id" : " zoeudXzD_Rpo"
185+ },
186+ "execution_count" : 5 ,
187+ "outputs" : []
188+ },
189+ {
190+ "cell_type" : " code" ,
191+ "source" : [
192+ " # 5. Prediction Function (Used by FastAPI)\n " ,
193+ " def load_model():\n " ,
194+ " return joblib.load(\" co2_savings_model.pkl\" )\n " ,
195+ " \n " ,
196+ " \n " ,
197+ " def predict_savings(input_dict):\n " ,
198+ " \"\"\"\n " ,
199+ " input_dict example:\n " ,
200+ " {\n " ,
201+ " \" Make_EV\" : \" Tesla\" ,\n " ,
202+ " \" Make_ICE\" : \" Toyota\" ,\n " ,
203+ " \" BodyStyle_EV\" : \" SUV\" ,\n " ,
204+ " \" BodyStyle_ICE\" : \" SUV\" ,\n " ,
205+ " \" FuelType_ICE\" : \" Petrol95\" ,\n " ,
206+ " \" YearDiff\" : 5,\n " ,
207+ " \" ICE_CO2_Baseline\" : 220.4\n " ,
208+ " }\n " ,
209+ " \"\"\"\n " ,
210+ " model = load_model()\n " ,
211+ " \n " ,
212+ " input_df = pd.DataFrame([input_dict])\n " ,
213+ " \n " ,
214+ " prediction = model.predict(input_df)[0]\n " ,
215+ " \n " ,
216+ " return {\" Predicted_CO2_Savings\" : float(prediction)}\n " ,
217+ " \n " ,
218+ " \n " ,
219+ " # Execute Training If Run Directly\n " ,
220+ " \n " ,
221+ " if __name__ == \" __main__\" :\n " ,
222+ " train_model()"
223+ ],
224+ "metadata" : {
225+ "colab" : {
226+ "base_uri" : " https://localhost:8080/"
227+ },
228+ "id" : " C5HAlwff_YWz" ,
229+ "outputId" : " 3f70208b-71ef-49ca-a45b-cce6ba2d52b0"
230+ },
231+ "execution_count" : 6 ,
232+ "outputs" : [
233+ {
234+ "output_type" : " stream" ,
235+ "name" : " stdout" ,
236+ "text" : [
237+ " Loading and preparing data...\n " ,
238+ " Building model pipeline...\n " ,
239+ " Training model...\n " ,
240+ " Saving model to co2_savings_model.pkl...\n " ,
241+ " Training complete!\n "
242+ ]
243+ }
244+ ]
245+ },
246+ {
247+ "cell_type" : " code" ,
248+ "source" : [
249+ " # Prediction Function\n " ,
250+ " sample_input = {\n " ,
251+ " \" Make_EV\" : \" Tesla\" ,\n " ,
252+ " \" Make_ICE\" : \" Toyota\" ,\n " ,
253+ " \" BodyStyle_EV\" : \" SUV\" ,\n " ,
254+ " \" BodyStyle_ICE\" : \" SUV\" ,\n " ,
255+ " \" FuelType_ICE\" : \" Petrol95\" ,\n " ,
256+ " \" YearDiff\" : 5,\n " ,
257+ " \" ICE_CO2_Baseline\" : 220.4\n " ,
258+ " }\n " ,
259+ " \n " ,
260+ " predict_savings(sample_input)\n "
261+ ],
262+ "metadata" : {
263+ "colab" : {
264+ "base_uri" : " https://localhost:8080/"
265+ },
266+ "id" : " 0cdFnCMNEuLW" ,
267+ "outputId" : " dde3e048-ccd7-43f8-ebe3-021f4ff7b5e7"
268+ },
269+ "execution_count" : 7 ,
270+ "outputs" : [
271+ {
272+ "output_type" : " execute_result" ,
273+ "data" : {
274+ "text/plain" : [
275+ " {'Predicted_CO2_Savings': 191.07520862997606}"
276+ ]
277+ },
278+ "metadata" : {},
279+ "execution_count" : 7
280+ }
281+ ]
282+ },
283+ {
284+ "cell_type" : " code" ,
285+ "source" : [
286+ " from google.colab import files\n " ,
287+ " files.download(\" co2_savings_model.pkl\" )\n "
288+ ],
289+ "metadata" : {
290+ "colab" : {
291+ "base_uri" : " https://localhost:8080/" ,
292+ "height" : 17
293+ },
294+ "id" : " hx7R4DxbNZlH" ,
295+ "outputId" : " e5f4c6db-7526-4c30-aa98-6f5c4613a751"
296+ },
297+ "execution_count" : 8 ,
298+ "outputs" : [
299+ {
300+ "output_type" : " display_data" ,
301+ "data" : {
302+ "text/plain" : [
303+ " <IPython.core.display.Javascript object>"
304+ ],
305+ "application/javascript" : [
306+ " \n " ,
307+ " async function download(id, filename, size) {\n " ,
308+ " if (!google.colab.kernel.accessAllowed) {\n " ,
309+ " return;\n " ,
310+ " }\n " ,
311+ " const div = document.createElement('div');\n " ,
312+ " const label = document.createElement('label');\n " ,
313+ " label.textContent = `Downloading \" ${filename}\" : `;\n " ,
314+ " div.appendChild(label);\n " ,
315+ " const progress = document.createElement('progress');\n " ,
316+ " progress.max = size;\n " ,
317+ " div.appendChild(progress);\n " ,
318+ " document.body.appendChild(div);\n " ,
319+ " \n " ,
320+ " const buffers = [];\n " ,
321+ " let downloaded = 0;\n " ,
322+ " \n " ,
323+ " const channel = await google.colab.kernel.comms.open(id);\n " ,
324+ " // Send a message to notify the kernel that we're ready.\n " ,
325+ " channel.send({})\n " ,
326+ " \n " ,
327+ " for await (const message of channel.messages) {\n " ,
328+ " // Send a message to notify the kernel that we're ready.\n " ,
329+ " channel.send({})\n " ,
330+ " if (message.buffers) {\n " ,
331+ " for (const buffer of message.buffers) {\n " ,
332+ " buffers.push(buffer);\n " ,
333+ " downloaded += buffer.byteLength;\n " ,
334+ " progress.value = downloaded;\n " ,
335+ " }\n " ,
336+ " }\n " ,
337+ " }\n " ,
338+ " const blob = new Blob(buffers, {type: 'application/binary'});\n " ,
339+ " const a = document.createElement('a');\n " ,
340+ " a.href = window.URL.createObjectURL(blob);\n " ,
341+ " a.download = filename;\n " ,
342+ " div.appendChild(a);\n " ,
343+ " a.click();\n " ,
344+ " div.remove();\n " ,
345+ " }\n " ,
346+ " "
347+ ]
348+ },
349+ "metadata" : {}
350+ },
351+ {
352+ "output_type" : " display_data" ,
353+ "data" : {
354+ "text/plain" : [
355+ " <IPython.core.display.Javascript object>"
356+ ],
357+ "application/javascript" : [
358+ " download(\" download_7a84e164-972a-47fa-9c52-86369d5592d7\" , \" co2_savings_model.pkl\" , 146676)"
359+ ]
360+ },
361+ "metadata" : {}
362+ }
363+ ]
364+ }
365+ ]
366+ }
0 commit comments