Chameleon-company
diff --git a/‎Use_Cases/Environmental Impact Analysis/Ruvinya-Ekanayake/Clean_Model_Code.ipynb‎
Lines changed: 366 additions & 0 deletions b/‎Use_Cases/Environmental Impact Analysis/Ruvinya-Ekanayake/Clean_Model_Code.ipynb‎
Lines changed: 366 additions & 0 deletions
@@ -0,0 +1,366 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Clean Model Code"
+      ],
+      "metadata": {
+        "id": "oJzW6amLdYTb"
+      }
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 1,
+      "metadata": {
+        "id": "jp6OuLM997oV"
+      },
+      "outputs": [],
+      "source": [
+        "# Libraries\n",
+        "import pandas as pd\n",
+        "import numpy as np\n",
+        "import joblib\n",
+        "\n",
+        "from sklearn.compose import ColumnTransformer\n",
+        "from sklearn.preprocessing import OneHotEncoder\n",
+        "from sklearn.pipeline import Pipeline\n",
+        "from sklearn.ensemble import GradientBoostingRegressor\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# 1. Load Data\n",
+        "def load_data():\n",
+        "    base_path = \"/content/Data/\"\n",
+        "\n",
+        "    # Load EV and ICE datasets\n",
+        "    ev = pd.read_csv(base_path + \"Pure electric consumption.csv\")\n",
+        "    diesel = pd.read_csv(base_path + \"Diesel consumption.csv\")\n",
+        "    petrol91 = pd.read_csv(base_path + \"petrol91RON consumption.csv\")\n",
+        "    petrol95 = pd.read_csv(base_path + \"petrol95RON consumption.csv\")\n",
+        "    petrol98 = pd.read_csv(base_path + \"petrol98RON consumption.csv\")\n",
+        "\n",
+        "    return ev, diesel, petrol91, petrol95, petrol98"
+      ],
+      "metadata": {
+        "id": "WYYebg76_Gah"
+      },
+      "execution_count": 2,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# 2. Preprocess Data\n",
+        "def prepare_data():\n",
+        "    ev, diesel, petrol91, petrol95, petrol98 = load_data()\n",
+        "\n",
+        "    # Drop missing-value columns\n",
+        "    dfs_cleaned = [df.dropna(axis=1) for df in [ev, diesel, petrol91, petrol95, petrol98]]\n",
+        "    ev, diesel, petrol91, petrol95, petrol98 = dfs_cleaned\n",
+        "\n",
+        "    # Calculate EV CO₂ emissions\n",
+        "    emission_factor = 0.18  # kg/kWh\n",
+        "    ev[\"EV_gCO2_per_km\"] = (\n",
+        "        ev[\"EnergyConsumptionWhkm\"] / 1000 * emission_factor * 1000\n",
+        "    )\n",
+        "\n",
+        "    # Add ICE baselines\n",
+        "    def add_baseline(df, fuel):\n",
+        "        if fuel.lower().startswith(\"petrol\"):\n",
+        "            df[\"ICE_CO2_Baseline\"] = df[\"FuelConsumptionCombined\"] * 23.2\n",
+        "        else:\n",
+        "            df[\"ICE_CO2_Baseline\"] = df[\"FuelConsumptionCombined\"] * 26.5\n",
+        "        df[\"FuelType\"] = fuel\n",
+        "        return df\n",
+        "\n",
+        "    petrol91 = add_baseline(petrol91, \"Petrol91\")\n",
+        "    petrol95 = add_baseline(petrol95, \"Petrol95\")\n",
+        "    petrol98 = add_baseline(petrol98, \"Petrol98\")\n",
+        "    diesel   = add_baseline(diesel,   \"Diesel\")\n",
+        "\n",
+        "    # Combine ICE datasets\n",
+        "    ice_all = pd.concat([petrol91, petrol95, petrol98, diesel], ignore_index=True)\n",
+        "\n",
+        "    # Cartesian Join (smaller sample for training)\n",
+        "    df = (\n",
+        "        ev.assign(key=1)\n",
+        "        .merge(ice_all.assign(key=1), on=\"key\", suffixes=(\"_EV\", \"_ICE\"))\n",
+        "        .drop(\"key\", axis=1)\n",
+        "        .sample(n=4000, random_state=42)\n",
+        "    )\n",
+        "\n",
+        "    # Create final features\n",
+        "    df[\"YearDiff\"] = df[\"ModelReleaseYear_EV\"] - df[\"ModelReleaseYear_ICE\"]\n",
+        "    df[\"CO2_saving\"] = df[\"ICE_CO2_Baseline\"] - df[\"EV_gCO2_per_km\"]\n",
+        "\n",
+        "    X = df[[\n",
+        "        \"Make_EV\", \"Make_ICE\",\n",
+        "        \"BodyStyle_EV\", \"BodyStyle_ICE\",\n",
+        "        \"FuelType_ICE\", \"YearDiff\", \"ICE_CO2_Baseline\"\n",
+        "    ]]\n",
+        "\n",
+        "    y = df[\"CO2_saving\"]\n",
+        "\n",
+        "    return X, y\n"
+      ],
+      "metadata": {
+        "id": "tlFGBXnC_Jes"
+      },
+      "execution_count": 3,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# 3. Build Preprocessing Pipeline\n",
+        "def build_preprocessor():\n",
+        "    categorical_cols = [\n",
+        "        \"Make_EV\", \"Make_ICE\",\n",
+        "        \"BodyStyle_EV\", \"BodyStyle_ICE\",\n",
+        "        \"FuelType_ICE\"\n",
+        "    ]\n",
+        "\n",
+        "    preprocessor = ColumnTransformer(\n",
+        "        transformers=[\n",
+        "            (\"cat\", OneHotEncoder(handle_unknown=\"ignore\", sparse_output=False), categorical_cols)\n",
+        "        ],\n",
+        "        remainder=\"passthrough\",\n",
+        "    )\n",
+        "    return preprocessor"
+      ],
+      "metadata": {
+        "id": "48xrTDYZ_Oov"
+      },
+      "execution_count": 4,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# 4. Train and Save the Model\n",
+        "def train_model():\n",
+        "\n",
+        "    print(\"Loading and preparing data...\")\n",
+        "    X, y = prepare_data()\n",
+        "\n",
+        "    print(\"Building model pipeline...\")\n",
+        "    preprocessor = build_preprocessor()\n",
+        "\n",
+        "    model = GradientBoostingRegressor(random_state=42)\n",
+        "\n",
+        "    pipeline = Pipeline([\n",
+        "        (\"preprocessor\", preprocessor),\n",
+        "        (\"model\", model)\n",
+        "    ])\n",
+        "\n",
+        "    print(\"Training model...\")\n",
+        "    pipeline.fit(X, y)\n",
+        "\n",
+        "    print(\"Saving model to co2_savings_model.pkl...\")\n",
+        "    joblib.dump(pipeline, \"co2_savings_model.pkl\")\n",
+        "\n",
+        "    print(\"Training complete!\")\n",
+        "    return pipeline\n",
+        "\n"
+      ],
+      "metadata": {
+        "id": "zoeudXzD_Rpo"
+      },
+      "execution_count": 5,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# 5. Prediction Function (Used by FastAPI)\n",
+        "def load_model():\n",
+        "    return joblib.load(\"co2_savings_model.pkl\")\n",
+        "\n",
+        "\n",
+        "def predict_savings(input_dict):\n",
+        "    \"\"\"\n",
+        "    input_dict example:\n",
+        "    {\n",
+        "      \"Make_EV\": \"Tesla\",\n",
+        "      \"Make_ICE\": \"Toyota\",\n",
+        "      \"BodyStyle_EV\": \"SUV\",\n",
+        "      \"BodyStyle_ICE\": \"SUV\",\n",
+        "      \"FuelType_ICE\": \"Petrol95\",\n",
+        "      \"YearDiff\": 5,\n",
+        "      \"ICE_CO2_Baseline\": 220.4\n",
+        "    }\n",
+        "    \"\"\"\n",
+        "    model = load_model()\n",
+        "\n",
+        "    input_df = pd.DataFrame([input_dict])\n",
+        "\n",
+        "    prediction = model.predict(input_df)[0]\n",
+        "\n",
+        "    return {\"Predicted_CO2_Savings\": float(prediction)}\n",
+        "\n",
+        "\n",
+        "# Execute Training If Run Directly\n",
+        "\n",
+        "if __name__ == \"__main__\":\n",
+        "    train_model()"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "C5HAlwff_YWz",
+        "outputId": "3f70208b-71ef-49ca-a45b-cce6ba2d52b0"
+      },
+      "execution_count": 6,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Loading and preparing data...\n",
+            "Building model pipeline...\n",
+            "Training model...\n",
+            "Saving model to co2_savings_model.pkl...\n",
+            "Training complete!\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Prediction Function\n",
+        "sample_input = {\n",
+        "    \"Make_EV\": \"Tesla\",\n",
+        "    \"Make_ICE\": \"Toyota\",\n",
+        "    \"BodyStyle_EV\": \"SUV\",\n",
+        "    \"BodyStyle_ICE\": \"SUV\",\n",
+        "    \"FuelType_ICE\": \"Petrol95\",\n",
+        "    \"YearDiff\": 5,\n",
+        "    \"ICE_CO2_Baseline\": 220.4\n",
+        "}\n",
+        "\n",
+        "predict_savings(sample_input)\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "0cdFnCMNEuLW",
+        "outputId": "dde3e048-ccd7-43f8-ebe3-021f4ff7b5e7"
+      },
+      "execution_count": 7,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "{'Predicted_CO2_Savings': 191.07520862997606}"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 7
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from google.colab import files\n",
+        "files.download(\"co2_savings_model.pkl\")\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 17
+        },
+        "id": "hx7R4DxbNZlH",
+        "outputId": "e5f4c6db-7526-4c30-aa98-6f5c4613a751"
+      },
+      "execution_count": 8,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.Javascript object>"
+            ],
+            "application/javascript": [
+              "\n",
+              "    async function download(id, filename, size) {\n",
+              "      if (!google.colab.kernel.accessAllowed) {\n",
+              "        return;\n",
+              "      }\n",
+              "      const div = document.createElement('div');\n",
+              "      const label = document.createElement('label');\n",
+              "      label.textContent = `Downloading \"${filename}\": `;\n",
+              "      div.appendChild(label);\n",
+              "      const progress = document.createElement('progress');\n",
+              "      progress.max = size;\n",
+              "      div.appendChild(progress);\n",
+              "      document.body.appendChild(div);\n",
+              "\n",
+              "      const buffers = [];\n",
+              "      let downloaded = 0;\n",
+              "\n",
+              "      const channel = await google.colab.kernel.comms.open(id);\n",
+              "      // Send a message to notify the kernel that we're ready.\n",
+              "      channel.send({})\n",
+              "\n",
+              "      for await (const message of channel.messages) {\n",
+              "        // Send a message to notify the kernel that we're ready.\n",
+              "        channel.send({})\n",
+              "        if (message.buffers) {\n",
+              "          for (const buffer of message.buffers) {\n",
+              "            buffers.push(buffer);\n",
+              "            downloaded += buffer.byteLength;\n",
+              "            progress.value = downloaded;\n",
+              "          }\n",
+              "        }\n",
+              "      }\n",
+              "      const blob = new Blob(buffers, {type: 'application/binary'});\n",
+              "      const a = document.createElement('a');\n",
+              "      a.href = window.URL.createObjectURL(blob);\n",
+              "      a.download = filename;\n",
+              "      div.appendChild(a);\n",
+              "      a.click();\n",
+              "      div.remove();\n",
+              "    }\n",
+              "  "
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.Javascript object>"
+            ],
+            "application/javascript": [
+              "download(\"download_7a84e164-972a-47fa-9c52-86369d5592d7\", \"co2_savings_model.pkl\", 146676)"
+            ]
+          },
+          "metadata": {}
+        }
+      ]
+    }
+  ]
+}