diff --git a/AttritionForecast_Analysis_and_Prediction_S.ipynb b/AttritionForecast_Analysis_and_Prediction_S.ipynb
new file mode 100644
index 0000000..114f06c
--- /dev/null
+++ b/AttritionForecast_Analysis_and_Prediction_S.ipynb
@@ -0,0 +1,2566 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "provenance": []
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "metadata": {
+ "id": "uJK0CMj1paWT"
+ },
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "from sklearn.preprocessing import LabelEncoder\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "from sklearn.metrics import confusion_matrix, accuracy_score, classification_report\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df = pd.read_csv('/content/WA_Fn-UseC_-HR-Employee-Attrition.csv')\n"
+ ],
+ "metadata": {
+ "id": "Hx96ztmAqA_G"
+ },
+ "execution_count": 51,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 582
+ },
+ "id": "hs2Fk056qEhB",
+ "outputId": "0d2295a5-87fa-478b-9d68-3a46da2c10f6"
+ },
+ "execution_count": 52,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " Age Attrition BusinessTravel DailyRate Department \\\n",
+ "0 41 Yes Travel_Rarely 1102 Sales \n",
+ "1 49 No Travel_Frequently 279 Research & Development \n",
+ "2 37 Yes Travel_Rarely 1373 Research & Development \n",
+ "3 33 No Travel_Frequently 1392 Research & Development \n",
+ "4 27 No Travel_Rarely 591 Research & Development \n",
+ "... ... ... ... ... ... \n",
+ "1465 36 No Travel_Frequently 884 Research & Development \n",
+ "1466 39 No Travel_Rarely 613 Research & Development \n",
+ "1467 27 No Travel_Rarely 155 Research & Development \n",
+ "1468 49 No Travel_Frequently 1023 Sales \n",
+ "1469 34 No Travel_Rarely 628 Research & Development \n",
+ "\n",
+ " DistanceFromHome Education EducationField EmployeeCount \\\n",
+ "0 1 2 Life Sciences 1 \n",
+ "1 8 1 Life Sciences 1 \n",
+ "2 2 2 Other 1 \n",
+ "3 3 4 Life Sciences 1 \n",
+ "4 2 1 Medical 1 \n",
+ "... ... ... ... ... \n",
+ "1465 23 2 Medical 1 \n",
+ "1466 6 1 Medical 1 \n",
+ "1467 4 3 Life Sciences 1 \n",
+ "1468 2 3 Medical 1 \n",
+ "1469 8 3 Medical 1 \n",
+ "\n",
+ " EmployeeNumber ... RelationshipSatisfaction StandardHours \\\n",
+ "0 1 ... 1 80 \n",
+ "1 2 ... 4 80 \n",
+ "2 4 ... 2 80 \n",
+ "3 5 ... 3 80 \n",
+ "4 7 ... 4 80 \n",
+ "... ... ... ... ... \n",
+ "1465 2061 ... 3 80 \n",
+ "1466 2062 ... 1 80 \n",
+ "1467 2064 ... 2 80 \n",
+ "1468 2065 ... 4 80 \n",
+ "1469 2068 ... 1 80 \n",
+ "\n",
+ " StockOptionLevel TotalWorkingYears TrainingTimesLastYear \\\n",
+ "0 0 8 0 \n",
+ "1 1 10 3 \n",
+ "2 0 7 3 \n",
+ "3 0 8 3 \n",
+ "4 1 6 3 \n",
+ "... ... ... ... \n",
+ "1465 1 17 3 \n",
+ "1466 1 9 5 \n",
+ "1467 1 6 0 \n",
+ "1468 0 17 3 \n",
+ "1469 0 6 3 \n",
+ "\n",
+ " WorkLifeBalance YearsAtCompany YearsInCurrentRole \\\n",
+ "0 1 6 4 \n",
+ "1 3 10 7 \n",
+ "2 3 0 0 \n",
+ "3 3 8 7 \n",
+ "4 3 2 2 \n",
+ "... ... ... ... \n",
+ "1465 3 5 2 \n",
+ "1466 3 7 7 \n",
+ "1467 3 6 2 \n",
+ "1468 2 9 6 \n",
+ "1469 4 4 3 \n",
+ "\n",
+ " YearsSinceLastPromotion YearsWithCurrManager \n",
+ "0 0 5 \n",
+ "1 1 7 \n",
+ "2 0 0 \n",
+ "3 3 0 \n",
+ "4 2 2 \n",
+ "... ... ... \n",
+ "1465 0 3 \n",
+ "1466 1 7 \n",
+ "1467 0 3 \n",
+ "1468 0 8 \n",
+ "1469 1 2 \n",
+ "\n",
+ "[1470 rows x 35 columns]"
+ ],
+ "text/html": [
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Age | \n",
+ " Attrition | \n",
+ " BusinessTravel | \n",
+ " DailyRate | \n",
+ " Department | \n",
+ " DistanceFromHome | \n",
+ " Education | \n",
+ " EducationField | \n",
+ " EmployeeCount | \n",
+ " EmployeeNumber | \n",
+ " ... | \n",
+ " RelationshipSatisfaction | \n",
+ " StandardHours | \n",
+ " StockOptionLevel | \n",
+ " TotalWorkingYears | \n",
+ " TrainingTimesLastYear | \n",
+ " WorkLifeBalance | \n",
+ " YearsAtCompany | \n",
+ " YearsInCurrentRole | \n",
+ " YearsSinceLastPromotion | \n",
+ " YearsWithCurrManager | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 41 | \n",
+ " Yes | \n",
+ " Travel_Rarely | \n",
+ " 1102 | \n",
+ " Sales | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " Life Sciences | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " ... | \n",
+ " 1 | \n",
+ " 80 | \n",
+ " 0 | \n",
+ " 8 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 6 | \n",
+ " 4 | \n",
+ " 0 | \n",
+ " 5 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 49 | \n",
+ " No | \n",
+ " Travel_Frequently | \n",
+ " 279 | \n",
+ " Research & Development | \n",
+ " 8 | \n",
+ " 1 | \n",
+ " Life Sciences | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " ... | \n",
+ " 4 | \n",
+ " 80 | \n",
+ " 1 | \n",
+ " 10 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 10 | \n",
+ " 7 | \n",
+ " 1 | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 37 | \n",
+ " Yes | \n",
+ " Travel_Rarely | \n",
+ " 1373 | \n",
+ " Research & Development | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " Other | \n",
+ " 1 | \n",
+ " 4 | \n",
+ " ... | \n",
+ " 2 | \n",
+ " 80 | \n",
+ " 0 | \n",
+ " 7 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 33 | \n",
+ " No | \n",
+ " Travel_Frequently | \n",
+ " 1392 | \n",
+ " Research & Development | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " Life Sciences | \n",
+ " 1 | \n",
+ " 5 | \n",
+ " ... | \n",
+ " 3 | \n",
+ " 80 | \n",
+ " 0 | \n",
+ " 8 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 8 | \n",
+ " 7 | \n",
+ " 3 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 27 | \n",
+ " No | \n",
+ " Travel_Rarely | \n",
+ " 591 | \n",
+ " Research & Development | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " Medical | \n",
+ " 1 | \n",
+ " 7 | \n",
+ " ... | \n",
+ " 4 | \n",
+ " 80 | \n",
+ " 1 | \n",
+ " 6 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 1465 | \n",
+ " 36 | \n",
+ " No | \n",
+ " Travel_Frequently | \n",
+ " 884 | \n",
+ " Research & Development | \n",
+ " 23 | \n",
+ " 2 | \n",
+ " Medical | \n",
+ " 1 | \n",
+ " 2061 | \n",
+ " ... | \n",
+ " 3 | \n",
+ " 80 | \n",
+ " 1 | \n",
+ " 17 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 5 | \n",
+ " 2 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 1466 | \n",
+ " 39 | \n",
+ " No | \n",
+ " Travel_Rarely | \n",
+ " 613 | \n",
+ " Research & Development | \n",
+ " 6 | \n",
+ " 1 | \n",
+ " Medical | \n",
+ " 1 | \n",
+ " 2062 | \n",
+ " ... | \n",
+ " 1 | \n",
+ " 80 | \n",
+ " 1 | \n",
+ " 9 | \n",
+ " 5 | \n",
+ " 3 | \n",
+ " 7 | \n",
+ " 7 | \n",
+ " 1 | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " 1467 | \n",
+ " 27 | \n",
+ " No | \n",
+ " Travel_Rarely | \n",
+ " 155 | \n",
+ " Research & Development | \n",
+ " 4 | \n",
+ " 3 | \n",
+ " Life Sciences | \n",
+ " 1 | \n",
+ " 2064 | \n",
+ " ... | \n",
+ " 2 | \n",
+ " 80 | \n",
+ " 1 | \n",
+ " 6 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ " 6 | \n",
+ " 2 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 1468 | \n",
+ " 49 | \n",
+ " No | \n",
+ " Travel_Frequently | \n",
+ " 1023 | \n",
+ " Sales | \n",
+ " 2 | \n",
+ " 3 | \n",
+ " Medical | \n",
+ " 1 | \n",
+ " 2065 | \n",
+ " ... | \n",
+ " 4 | \n",
+ " 80 | \n",
+ " 0 | \n",
+ " 17 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 9 | \n",
+ " 6 | \n",
+ " 0 | \n",
+ " 8 | \n",
+ "
\n",
+ " \n",
+ " 1469 | \n",
+ " 34 | \n",
+ " No | \n",
+ " Travel_Rarely | \n",
+ " 628 | \n",
+ " Research & Development | \n",
+ " 8 | \n",
+ " 3 | \n",
+ " Medical | \n",
+ " 1 | \n",
+ " 2068 | \n",
+ " ... | \n",
+ " 1 | \n",
+ " 80 | \n",
+ " 0 | \n",
+ " 6 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 4 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1470 rows × 35 columns
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "variable_name": "df"
+ }
+ },
+ "metadata": {},
+ "execution_count": 52
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Display the first few rows\n",
+ "print(df.head())\n",
+ "\n"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "rFS_iManqFvJ",
+ "outputId": "4c9601db-19ef-4c72-ba3f-534c2b821ca2"
+ },
+ "execution_count": 53,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ " Age Attrition BusinessTravel DailyRate Department \\\n",
+ "0 41 Yes Travel_Rarely 1102 Sales \n",
+ "1 49 No Travel_Frequently 279 Research & Development \n",
+ "2 37 Yes Travel_Rarely 1373 Research & Development \n",
+ "3 33 No Travel_Frequently 1392 Research & Development \n",
+ "4 27 No Travel_Rarely 591 Research & Development \n",
+ "\n",
+ " DistanceFromHome Education EducationField EmployeeCount EmployeeNumber \\\n",
+ "0 1 2 Life Sciences 1 1 \n",
+ "1 8 1 Life Sciences 1 2 \n",
+ "2 2 2 Other 1 4 \n",
+ "3 3 4 Life Sciences 1 5 \n",
+ "4 2 1 Medical 1 7 \n",
+ "\n",
+ " ... RelationshipSatisfaction StandardHours StockOptionLevel \\\n",
+ "0 ... 1 80 0 \n",
+ "1 ... 4 80 1 \n",
+ "2 ... 2 80 0 \n",
+ "3 ... 3 80 0 \n",
+ "4 ... 4 80 1 \n",
+ "\n",
+ " TotalWorkingYears TrainingTimesLastYear WorkLifeBalance YearsAtCompany \\\n",
+ "0 8 0 1 6 \n",
+ "1 10 3 3 10 \n",
+ "2 7 3 3 0 \n",
+ "3 8 3 3 8 \n",
+ "4 6 3 3 2 \n",
+ "\n",
+ " YearsInCurrentRole YearsSinceLastPromotion YearsWithCurrManager \n",
+ "0 4 0 5 \n",
+ "1 7 1 7 \n",
+ "2 0 0 0 \n",
+ "3 7 3 0 \n",
+ "4 2 2 2 \n",
+ "\n",
+ "[5 rows x 35 columns]\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Get a concise summary of the DataFrame\n",
+ "print(df.info())"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "v7HM4SqVqYmO",
+ "outputId": "10bd1f21-30b6-4460-e5ba-832ac5554a7c"
+ },
+ "execution_count": 54,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\n",
+ "RangeIndex: 1470 entries, 0 to 1469\n",
+ "Data columns (total 35 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 Age 1470 non-null int64 \n",
+ " 1 Attrition 1470 non-null object\n",
+ " 2 BusinessTravel 1470 non-null object\n",
+ " 3 DailyRate 1470 non-null int64 \n",
+ " 4 Department 1470 non-null object\n",
+ " 5 DistanceFromHome 1470 non-null int64 \n",
+ " 6 Education 1470 non-null int64 \n",
+ " 7 EducationField 1470 non-null object\n",
+ " 8 EmployeeCount 1470 non-null int64 \n",
+ " 9 EmployeeNumber 1470 non-null int64 \n",
+ " 10 EnvironmentSatisfaction 1470 non-null int64 \n",
+ " 11 Gender 1470 non-null object\n",
+ " 12 HourlyRate 1470 non-null int64 \n",
+ " 13 JobInvolvement 1470 non-null int64 \n",
+ " 14 JobLevel 1470 non-null int64 \n",
+ " 15 JobRole 1470 non-null object\n",
+ " 16 JobSatisfaction 1470 non-null int64 \n",
+ " 17 MaritalStatus 1470 non-null object\n",
+ " 18 MonthlyIncome 1470 non-null int64 \n",
+ " 19 MonthlyRate 1470 non-null int64 \n",
+ " 20 NumCompaniesWorked 1470 non-null int64 \n",
+ " 21 Over18 1470 non-null object\n",
+ " 22 OverTime 1470 non-null object\n",
+ " 23 PercentSalaryHike 1470 non-null int64 \n",
+ " 24 PerformanceRating 1470 non-null int64 \n",
+ " 25 RelationshipSatisfaction 1470 non-null int64 \n",
+ " 26 StandardHours 1470 non-null int64 \n",
+ " 27 StockOptionLevel 1470 non-null int64 \n",
+ " 28 TotalWorkingYears 1470 non-null int64 \n",
+ " 29 TrainingTimesLastYear 1470 non-null int64 \n",
+ " 30 WorkLifeBalance 1470 non-null int64 \n",
+ " 31 YearsAtCompany 1470 non-null int64 \n",
+ " 32 YearsInCurrentRole 1470 non-null int64 \n",
+ " 33 YearsSinceLastPromotion 1470 non-null int64 \n",
+ " 34 YearsWithCurrManager 1470 non-null int64 \n",
+ "dtypes: int64(26), object(9)\n",
+ "memory usage: 402.1+ KB\n",
+ "None\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Get statistical summary of numeric columns\n",
+ "print(df.describe())\n"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "Tboi56PUqlsQ",
+ "outputId": "46700d18-89ad-4652-d6fc-b59f162626ee"
+ },
+ "execution_count": 55,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ " Age DailyRate DistanceFromHome Education EmployeeCount \\\n",
+ "count 1470.000000 1470.000000 1470.000000 1470.000000 1470.0 \n",
+ "mean 36.923810 802.485714 9.192517 2.912925 1.0 \n",
+ "std 9.135373 403.509100 8.106864 1.024165 0.0 \n",
+ "min 18.000000 102.000000 1.000000 1.000000 1.0 \n",
+ "25% 30.000000 465.000000 2.000000 2.000000 1.0 \n",
+ "50% 36.000000 802.000000 7.000000 3.000000 1.0 \n",
+ "75% 43.000000 1157.000000 14.000000 4.000000 1.0 \n",
+ "max 60.000000 1499.000000 29.000000 5.000000 1.0 \n",
+ "\n",
+ " EmployeeNumber EnvironmentSatisfaction HourlyRate JobInvolvement \\\n",
+ "count 1470.000000 1470.000000 1470.000000 1470.000000 \n",
+ "mean 1024.865306 2.721769 65.891156 2.729932 \n",
+ "std 602.024335 1.093082 20.329428 0.711561 \n",
+ "min 1.000000 1.000000 30.000000 1.000000 \n",
+ "25% 491.250000 2.000000 48.000000 2.000000 \n",
+ "50% 1020.500000 3.000000 66.000000 3.000000 \n",
+ "75% 1555.750000 4.000000 83.750000 3.000000 \n",
+ "max 2068.000000 4.000000 100.000000 4.000000 \n",
+ "\n",
+ " JobLevel ... RelationshipSatisfaction StandardHours \\\n",
+ "count 1470.000000 ... 1470.000000 1470.0 \n",
+ "mean 2.063946 ... 2.712245 80.0 \n",
+ "std 1.106940 ... 1.081209 0.0 \n",
+ "min 1.000000 ... 1.000000 80.0 \n",
+ "25% 1.000000 ... 2.000000 80.0 \n",
+ "50% 2.000000 ... 3.000000 80.0 \n",
+ "75% 3.000000 ... 4.000000 80.0 \n",
+ "max 5.000000 ... 4.000000 80.0 \n",
+ "\n",
+ " StockOptionLevel TotalWorkingYears TrainingTimesLastYear \\\n",
+ "count 1470.000000 1470.000000 1470.000000 \n",
+ "mean 0.793878 11.279592 2.799320 \n",
+ "std 0.852077 7.780782 1.289271 \n",
+ "min 0.000000 0.000000 0.000000 \n",
+ "25% 0.000000 6.000000 2.000000 \n",
+ "50% 1.000000 10.000000 3.000000 \n",
+ "75% 1.000000 15.000000 3.000000 \n",
+ "max 3.000000 40.000000 6.000000 \n",
+ "\n",
+ " WorkLifeBalance YearsAtCompany YearsInCurrentRole \\\n",
+ "count 1470.000000 1470.000000 1470.000000 \n",
+ "mean 2.761224 7.008163 4.229252 \n",
+ "std 0.706476 6.126525 3.623137 \n",
+ "min 1.000000 0.000000 0.000000 \n",
+ "25% 2.000000 3.000000 2.000000 \n",
+ "50% 3.000000 5.000000 3.000000 \n",
+ "75% 3.000000 9.000000 7.000000 \n",
+ "max 4.000000 40.000000 18.000000 \n",
+ "\n",
+ " YearsSinceLastPromotion YearsWithCurrManager \n",
+ "count 1470.000000 1470.000000 \n",
+ "mean 2.187755 4.123129 \n",
+ "std 3.222430 3.568136 \n",
+ "min 0.000000 0.000000 \n",
+ "25% 0.000000 2.000000 \n",
+ "50% 1.000000 3.000000 \n",
+ "75% 3.000000 7.000000 \n",
+ "max 15.000000 17.000000 \n",
+ "\n",
+ "[8 rows x 26 columns]\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Check for missing values\n",
+ "print(df.isnull().sum())\n"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "Y_5UzgeCqo94",
+ "outputId": "771d9205-4a5d-499c-d0d0-6fd7c41d9518"
+ },
+ "execution_count": 56,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Age 0\n",
+ "Attrition 0\n",
+ "BusinessTravel 0\n",
+ "DailyRate 0\n",
+ "Department 0\n",
+ "DistanceFromHome 0\n",
+ "Education 0\n",
+ "EducationField 0\n",
+ "EmployeeCount 0\n",
+ "EmployeeNumber 0\n",
+ "EnvironmentSatisfaction 0\n",
+ "Gender 0\n",
+ "HourlyRate 0\n",
+ "JobInvolvement 0\n",
+ "JobLevel 0\n",
+ "JobRole 0\n",
+ "JobSatisfaction 0\n",
+ "MaritalStatus 0\n",
+ "MonthlyIncome 0\n",
+ "MonthlyRate 0\n",
+ "NumCompaniesWorked 0\n",
+ "Over18 0\n",
+ "OverTime 0\n",
+ "PercentSalaryHike 0\n",
+ "PerformanceRating 0\n",
+ "RelationshipSatisfaction 0\n",
+ "StandardHours 0\n",
+ "StockOptionLevel 0\n",
+ "TotalWorkingYears 0\n",
+ "TrainingTimesLastYear 0\n",
+ "WorkLifeBalance 0\n",
+ "YearsAtCompany 0\n",
+ "YearsInCurrentRole 0\n",
+ "YearsSinceLastPromotion 0\n",
+ "YearsWithCurrManager 0\n",
+ "dtype: int64\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Check for duplicate rows\n",
+ "print(df.duplicated().sum())\n",
+ "\n",
+ "\n"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "RkIeqiEzqv_4",
+ "outputId": "39de62c9-4b31-4cff-eb91-4b93629a0309"
+ },
+ "execution_count": 57,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "0\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Get the list of categorical columns\n",
+ "categorical_columns = df.select_dtypes(include=['object']).columns\n",
+ "print(categorical_columns)\n"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "oIQIYAVBq9LC",
+ "outputId": "2759fa27-ddd1-45e6-c7a8-abbbba710598"
+ },
+ "execution_count": 58,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Index(['Attrition', 'BusinessTravel', 'Department', 'EducationField', 'Gender',\n",
+ " 'JobRole', 'MaritalStatus', 'Over18', 'OverTime'],\n",
+ " dtype='object')\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# One-Hot Encoding for categorical variables\n",
+ "df_encoded = pd.get_dummies(df, columns=categorical_columns)\n",
+ "\n",
+ "le = LabelEncoder()\n",
+ "for column in categorical_columns:\n",
+ " df[column] = le.fit_transform(df[column])\n"
+ ],
+ "metadata": {
+ "id": "QWmv7na9rE5H"
+ },
+ "execution_count": 59,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 443
+ },
+ "id": "ZqTVp1sBrgFn",
+ "outputId": "1bd132c5-5ef1-4fd6-94ea-676c8dcd529e"
+ },
+ "execution_count": 60,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " Age Attrition BusinessTravel DailyRate Department DistanceFromHome \\\n",
+ "0 41 1 2 1102 2 1 \n",
+ "1 49 0 1 279 1 8 \n",
+ "2 37 1 2 1373 1 2 \n",
+ "3 33 0 1 1392 1 3 \n",
+ "4 27 0 2 591 1 2 \n",
+ "... ... ... ... ... ... ... \n",
+ "1465 36 0 1 884 1 23 \n",
+ "1466 39 0 2 613 1 6 \n",
+ "1467 27 0 2 155 1 4 \n",
+ "1468 49 0 1 1023 2 2 \n",
+ "1469 34 0 2 628 1 8 \n",
+ "\n",
+ " Education EducationField EmployeeCount EmployeeNumber ... \\\n",
+ "0 2 1 1 1 ... \n",
+ "1 1 1 1 2 ... \n",
+ "2 2 4 1 4 ... \n",
+ "3 4 1 1 5 ... \n",
+ "4 1 3 1 7 ... \n",
+ "... ... ... ... ... ... \n",
+ "1465 2 3 1 2061 ... \n",
+ "1466 1 3 1 2062 ... \n",
+ "1467 3 1 1 2064 ... \n",
+ "1468 3 3 1 2065 ... \n",
+ "1469 3 3 1 2068 ... \n",
+ "\n",
+ " RelationshipSatisfaction StandardHours StockOptionLevel \\\n",
+ "0 1 80 0 \n",
+ "1 4 80 1 \n",
+ "2 2 80 0 \n",
+ "3 3 80 0 \n",
+ "4 4 80 1 \n",
+ "... ... ... ... \n",
+ "1465 3 80 1 \n",
+ "1466 1 80 1 \n",
+ "1467 2 80 1 \n",
+ "1468 4 80 0 \n",
+ "1469 1 80 0 \n",
+ "\n",
+ " TotalWorkingYears TrainingTimesLastYear WorkLifeBalance \\\n",
+ "0 8 0 1 \n",
+ "1 10 3 3 \n",
+ "2 7 3 3 \n",
+ "3 8 3 3 \n",
+ "4 6 3 3 \n",
+ "... ... ... ... \n",
+ "1465 17 3 3 \n",
+ "1466 9 5 3 \n",
+ "1467 6 0 3 \n",
+ "1468 17 3 2 \n",
+ "1469 6 3 4 \n",
+ "\n",
+ " YearsAtCompany YearsInCurrentRole YearsSinceLastPromotion \\\n",
+ "0 6 4 0 \n",
+ "1 10 7 1 \n",
+ "2 0 0 0 \n",
+ "3 8 7 3 \n",
+ "4 2 2 2 \n",
+ "... ... ... ... \n",
+ "1465 5 2 0 \n",
+ "1466 7 7 1 \n",
+ "1467 6 2 0 \n",
+ "1468 9 6 0 \n",
+ "1469 4 3 1 \n",
+ "\n",
+ " YearsWithCurrManager \n",
+ "0 5 \n",
+ "1 7 \n",
+ "2 0 \n",
+ "3 0 \n",
+ "4 2 \n",
+ "... ... \n",
+ "1465 3 \n",
+ "1466 7 \n",
+ "1467 3 \n",
+ "1468 8 \n",
+ "1469 2 \n",
+ "\n",
+ "[1470 rows x 35 columns]"
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Age | \n",
+ " Attrition | \n",
+ " BusinessTravel | \n",
+ " DailyRate | \n",
+ " Department | \n",
+ " DistanceFromHome | \n",
+ " Education | \n",
+ " EducationField | \n",
+ " EmployeeCount | \n",
+ " EmployeeNumber | \n",
+ " ... | \n",
+ " RelationshipSatisfaction | \n",
+ " StandardHours | \n",
+ " StockOptionLevel | \n",
+ " TotalWorkingYears | \n",
+ " TrainingTimesLastYear | \n",
+ " WorkLifeBalance | \n",
+ " YearsAtCompany | \n",
+ " YearsInCurrentRole | \n",
+ " YearsSinceLastPromotion | \n",
+ " YearsWithCurrManager | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 41 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 1102 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " ... | \n",
+ " 1 | \n",
+ " 80 | \n",
+ " 0 | \n",
+ " 8 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 6 | \n",
+ " 4 | \n",
+ " 0 | \n",
+ " 5 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 49 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 279 | \n",
+ " 1 | \n",
+ " 8 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " ... | \n",
+ " 4 | \n",
+ " 80 | \n",
+ " 1 | \n",
+ " 10 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 10 | \n",
+ " 7 | \n",
+ " 1 | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 37 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 1373 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 4 | \n",
+ " 1 | \n",
+ " 4 | \n",
+ " ... | \n",
+ " 2 | \n",
+ " 80 | \n",
+ " 0 | \n",
+ " 7 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 33 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1392 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 5 | \n",
+ " ... | \n",
+ " 3 | \n",
+ " 80 | \n",
+ " 0 | \n",
+ " 8 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 8 | \n",
+ " 7 | \n",
+ " 3 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 27 | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 591 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 7 | \n",
+ " ... | \n",
+ " 4 | \n",
+ " 80 | \n",
+ " 1 | \n",
+ " 6 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 1465 | \n",
+ " 36 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 884 | \n",
+ " 1 | \n",
+ " 23 | \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 2061 | \n",
+ " ... | \n",
+ " 3 | \n",
+ " 80 | \n",
+ " 1 | \n",
+ " 17 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 5 | \n",
+ " 2 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 1466 | \n",
+ " 39 | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 613 | \n",
+ " 1 | \n",
+ " 6 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 2062 | \n",
+ " ... | \n",
+ " 1 | \n",
+ " 80 | \n",
+ " 1 | \n",
+ " 9 | \n",
+ " 5 | \n",
+ " 3 | \n",
+ " 7 | \n",
+ " 7 | \n",
+ " 1 | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " 1467 | \n",
+ " 27 | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 155 | \n",
+ " 1 | \n",
+ " 4 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 2064 | \n",
+ " ... | \n",
+ " 2 | \n",
+ " 80 | \n",
+ " 1 | \n",
+ " 6 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ " 6 | \n",
+ " 2 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 1468 | \n",
+ " 49 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1023 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 2065 | \n",
+ " ... | \n",
+ " 4 | \n",
+ " 80 | \n",
+ " 0 | \n",
+ " 17 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 9 | \n",
+ " 6 | \n",
+ " 0 | \n",
+ " 8 | \n",
+ "
\n",
+ " \n",
+ " 1469 | \n",
+ " 34 | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 628 | \n",
+ " 1 | \n",
+ " 8 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 2068 | \n",
+ " ... | \n",
+ " 1 | \n",
+ " 80 | \n",
+ " 0 | \n",
+ " 6 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 4 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1470 rows × 35 columns
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "variable_name": "df"
+ }
+ },
+ "metadata": {},
+ "execution_count": 60
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Separate features (X) and target variable (y)\n",
+ "X = df.drop(columns=[target])\n",
+ "y = df[target]\n"
+ ],
+ "metadata": {
+ "id": "dLSaqkZ0rvJy"
+ },
+ "execution_count": 61,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "X"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 443
+ },
+ "id": "o6T5dvKAsbzp",
+ "outputId": "54523b5a-2605-497a-b182-795eceaadb00"
+ },
+ "execution_count": 62,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " Age BusinessTravel DailyRate Department DistanceFromHome Education \\\n",
+ "0 41 2 1102 2 1 2 \n",
+ "1 49 1 279 1 8 1 \n",
+ "2 37 2 1373 1 2 2 \n",
+ "3 33 1 1392 1 3 4 \n",
+ "4 27 2 591 1 2 1 \n",
+ "... ... ... ... ... ... ... \n",
+ "1465 36 1 884 1 23 2 \n",
+ "1466 39 2 613 1 6 1 \n",
+ "1467 27 2 155 1 4 3 \n",
+ "1468 49 1 1023 2 2 3 \n",
+ "1469 34 2 628 1 8 3 \n",
+ "\n",
+ " EducationField EmployeeCount EmployeeNumber EnvironmentSatisfaction \\\n",
+ "0 1 1 1 2 \n",
+ "1 1 1 2 3 \n",
+ "2 4 1 4 4 \n",
+ "3 1 1 5 4 \n",
+ "4 3 1 7 1 \n",
+ "... ... ... ... ... \n",
+ "1465 3 1 2061 3 \n",
+ "1466 3 1 2062 4 \n",
+ "1467 1 1 2064 2 \n",
+ "1468 3 1 2065 4 \n",
+ "1469 3 1 2068 2 \n",
+ "\n",
+ " ... RelationshipSatisfaction StandardHours StockOptionLevel \\\n",
+ "0 ... 1 80 0 \n",
+ "1 ... 4 80 1 \n",
+ "2 ... 2 80 0 \n",
+ "3 ... 3 80 0 \n",
+ "4 ... 4 80 1 \n",
+ "... ... ... ... ... \n",
+ "1465 ... 3 80 1 \n",
+ "1466 ... 1 80 1 \n",
+ "1467 ... 2 80 1 \n",
+ "1468 ... 4 80 0 \n",
+ "1469 ... 1 80 0 \n",
+ "\n",
+ " TotalWorkingYears TrainingTimesLastYear WorkLifeBalance \\\n",
+ "0 8 0 1 \n",
+ "1 10 3 3 \n",
+ "2 7 3 3 \n",
+ "3 8 3 3 \n",
+ "4 6 3 3 \n",
+ "... ... ... ... \n",
+ "1465 17 3 3 \n",
+ "1466 9 5 3 \n",
+ "1467 6 0 3 \n",
+ "1468 17 3 2 \n",
+ "1469 6 3 4 \n",
+ "\n",
+ " YearsAtCompany YearsInCurrentRole YearsSinceLastPromotion \\\n",
+ "0 6 4 0 \n",
+ "1 10 7 1 \n",
+ "2 0 0 0 \n",
+ "3 8 7 3 \n",
+ "4 2 2 2 \n",
+ "... ... ... ... \n",
+ "1465 5 2 0 \n",
+ "1466 7 7 1 \n",
+ "1467 6 2 0 \n",
+ "1468 9 6 0 \n",
+ "1469 4 3 1 \n",
+ "\n",
+ " YearsWithCurrManager \n",
+ "0 5 \n",
+ "1 7 \n",
+ "2 0 \n",
+ "3 0 \n",
+ "4 2 \n",
+ "... ... \n",
+ "1465 3 \n",
+ "1466 7 \n",
+ "1467 3 \n",
+ "1468 8 \n",
+ "1469 2 \n",
+ "\n",
+ "[1470 rows x 34 columns]"
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Age | \n",
+ " BusinessTravel | \n",
+ " DailyRate | \n",
+ " Department | \n",
+ " DistanceFromHome | \n",
+ " Education | \n",
+ " EducationField | \n",
+ " EmployeeCount | \n",
+ " EmployeeNumber | \n",
+ " EnvironmentSatisfaction | \n",
+ " ... | \n",
+ " RelationshipSatisfaction | \n",
+ " StandardHours | \n",
+ " StockOptionLevel | \n",
+ " TotalWorkingYears | \n",
+ " TrainingTimesLastYear | \n",
+ " WorkLifeBalance | \n",
+ " YearsAtCompany | \n",
+ " YearsInCurrentRole | \n",
+ " YearsSinceLastPromotion | \n",
+ " YearsWithCurrManager | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 41 | \n",
+ " 2 | \n",
+ " 1102 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " ... | \n",
+ " 1 | \n",
+ " 80 | \n",
+ " 0 | \n",
+ " 8 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 6 | \n",
+ " 4 | \n",
+ " 0 | \n",
+ " 5 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 49 | \n",
+ " 1 | \n",
+ " 279 | \n",
+ " 1 | \n",
+ " 8 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 3 | \n",
+ " ... | \n",
+ " 4 | \n",
+ " 80 | \n",
+ " 1 | \n",
+ " 10 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 10 | \n",
+ " 7 | \n",
+ " 1 | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 37 | \n",
+ " 2 | \n",
+ " 1373 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 4 | \n",
+ " 1 | \n",
+ " 4 | \n",
+ " 4 | \n",
+ " ... | \n",
+ " 2 | \n",
+ " 80 | \n",
+ " 0 | \n",
+ " 7 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 33 | \n",
+ " 1 | \n",
+ " 1392 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 5 | \n",
+ " 4 | \n",
+ " ... | \n",
+ " 3 | \n",
+ " 80 | \n",
+ " 0 | \n",
+ " 8 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 8 | \n",
+ " 7 | \n",
+ " 3 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 27 | \n",
+ " 2 | \n",
+ " 591 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 7 | \n",
+ " 1 | \n",
+ " ... | \n",
+ " 4 | \n",
+ " 80 | \n",
+ " 1 | \n",
+ " 6 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 1465 | \n",
+ " 36 | \n",
+ " 1 | \n",
+ " 884 | \n",
+ " 1 | \n",
+ " 23 | \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 2061 | \n",
+ " 3 | \n",
+ " ... | \n",
+ " 3 | \n",
+ " 80 | \n",
+ " 1 | \n",
+ " 17 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 5 | \n",
+ " 2 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 1466 | \n",
+ " 39 | \n",
+ " 2 | \n",
+ " 613 | \n",
+ " 1 | \n",
+ " 6 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 2062 | \n",
+ " 4 | \n",
+ " ... | \n",
+ " 1 | \n",
+ " 80 | \n",
+ " 1 | \n",
+ " 9 | \n",
+ " 5 | \n",
+ " 3 | \n",
+ " 7 | \n",
+ " 7 | \n",
+ " 1 | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " 1467 | \n",
+ " 27 | \n",
+ " 2 | \n",
+ " 155 | \n",
+ " 1 | \n",
+ " 4 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 2064 | \n",
+ " 2 | \n",
+ " ... | \n",
+ " 2 | \n",
+ " 80 | \n",
+ " 1 | \n",
+ " 6 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ " 6 | \n",
+ " 2 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 1468 | \n",
+ " 49 | \n",
+ " 1 | \n",
+ " 1023 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 2065 | \n",
+ " 4 | \n",
+ " ... | \n",
+ " 4 | \n",
+ " 80 | \n",
+ " 0 | \n",
+ " 17 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 9 | \n",
+ " 6 | \n",
+ " 0 | \n",
+ " 8 | \n",
+ "
\n",
+ " \n",
+ " 1469 | \n",
+ " 34 | \n",
+ " 2 | \n",
+ " 628 | \n",
+ " 1 | \n",
+ " 8 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 2068 | \n",
+ " 2 | \n",
+ " ... | \n",
+ " 1 | \n",
+ " 80 | \n",
+ " 0 | \n",
+ " 6 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 4 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1470 rows × 34 columns
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "variable_name": "X"
+ }
+ },
+ "metadata": {},
+ "execution_count": 62
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Split the data into training and test sets\n",
+ "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"
+ ],
+ "metadata": {
+ "id": "uX2VwSc5tHFQ"
+ },
+ "execution_count": 63,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "from sklearn.linear_model import LogisticRegression\n",
+ "from sklearn.ensemble import RandomForestClassifier\n",
+ "from sklearn.metrics import confusion_matrix, accuracy_score, classification_report\n",
+ "\n",
+ "# Logistic Regression\n",
+ "log_reg = LogisticRegression(max_iter=1000)\n",
+ "log_reg.fit(X_train, y_train)\n",
+ "y_pred_log_reg = log_reg.predict(X_test)\n",
+ "print(\"Logistic Regression Confusion Matrix:\")\n",
+ "print(confusion_matrix(y_test, y_pred_log_reg))\n",
+ "print(\"Logistic Regression Accuracy:\")\n",
+ "print(accuracy_score(y_test, y_pred_log_reg))\n",
+ "print(\"Logistic Regression Classification Report:\")\n",
+ "print(classification_report(y_test, y_pred_log_reg))\n",
+ "\n",
+ "# Random Forest Classifier\n",
+ "rf_clf = RandomForestClassifier()\n",
+ "rf_clf.fit(X_train, y_train)\n",
+ "y_pred_rf_clf = rf_clf.predict(X_test)\n",
+ "print(\"Random Forest Confusion Matrix:\")\n",
+ "print(confusion_matrix(y_test, y_pred_rf_clf))\n",
+ "print(\"Random Forest Accuracy:\")\n",
+ "print(accuracy_score(y_test, y_pred_rf_clf))\n",
+ "print(\"Random Forest Classification Report:\")\n",
+ "print(classification_report(y_test, y_pred_rf_clf))"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "atK0_s3Otbv3",
+ "outputId": "a855434b-357e-477c-c1b4-0c2696522135"
+ },
+ "execution_count": 64,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
+ "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
+ "\n",
+ "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
+ " https://scikit-learn.org/stable/modules/preprocessing.html\n",
+ "Please also refer to the documentation for alternative solver options:\n",
+ " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
+ " n_iter_i = _check_optimize_result(\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Logistic Regression Confusion Matrix:\n",
+ "[[249 6]\n",
+ " [ 36 3]]\n",
+ "Logistic Regression Accuracy:\n",
+ "0.8571428571428571\n",
+ "Logistic Regression Classification Report:\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " 0 0.87 0.98 0.92 255\n",
+ " 1 0.33 0.08 0.12 39\n",
+ "\n",
+ " accuracy 0.86 294\n",
+ " macro avg 0.60 0.53 0.52 294\n",
+ "weighted avg 0.80 0.86 0.82 294\n",
+ "\n",
+ "Random Forest Confusion Matrix:\n",
+ "[[253 2]\n",
+ " [ 35 4]]\n",
+ "Random Forest Accuracy:\n",
+ "0.8741496598639455\n",
+ "Random Forest Classification Report:\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " 0 0.88 0.99 0.93 255\n",
+ " 1 0.67 0.10 0.18 39\n",
+ "\n",
+ " accuracy 0.87 294\n",
+ " macro avg 0.77 0.55 0.55 294\n",
+ "weighted avg 0.85 0.87 0.83 294\n",
+ "\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [],
+ "metadata": {
+ "id": "fiRgIW3Kt4sC"
+ },
+ "execution_count": 64,
+ "outputs": []
+ }
+ ]
+}
\ No newline at end of file