diff --git a/scikitLearn/python/IncomePrediction.ipynb b/scikitLearn/python/IncomePrediction.ipynb index 12300de..b0944e2 100644 --- a/scikitLearn/python/IncomePrediction.ipynb +++ b/scikitLearn/python/IncomePrediction.ipynb @@ -20,6 +20,76 @@ "output_type": "stream", "text": [ "Requirement already up-to-date: pandas in ./.local/lib/python3.6/site-packages (1.0.3)\n", + "Requirement already satisfied, skipping upgrade: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas) (2019.3)\n", + "Requirement already satisfied, skipping upgrade: numpy>=1.13.3 in /usr/local/lib/python3.6/dist-packages (from pandas) (1.18.1)\n", + "Requirement already satisfied, skipping upgrade: python-dateutil>=2.6.1 in /usr/local/lib/python3.6/dist-packages (from pandas) (2.8.1)\n", + "Requirement already satisfied, skipping upgrade: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.6.1->pandas) (1.11.0)\n", + "Requirement already up-to-date: scikit-learn in ./.local/lib/python3.6/site-packages (0.22.2.post1)\n", + "Requirement already satisfied, skipping upgrade: scipy>=0.17.0 in /usr/local/lib/python3.6/dist-packages (from scikit-learn) (1.4.1)\n", + "Requirement already satisfied, skipping upgrade: numpy>=1.11.0 in /usr/local/lib/python3.6/dist-packages (from scikit-learn) (1.18.1)\n", + "Requirement already satisfied, skipping upgrade: joblib>=0.11 in ./.local/lib/python3.6/site-packages (from scikit-learn) (0.14.1)\n", + "Requirement already up-to-date: alibi in ./.local/lib/python3.6/site-packages (0.4.0)\n", + "Requirement already satisfied, skipping upgrade: scipy in /usr/local/lib/python3.6/dist-packages (from alibi) (1.4.1)\n", + "Requirement already satisfied, skipping upgrade: prettyprinter in ./.local/lib/python3.6/site-packages (from alibi) (0.18.0)\n", + "Requirement already satisfied, skipping upgrade: spacy in ./.local/lib/python3.6/site-packages (from alibi) (2.2.4)\n", + "Requirement already satisfied, skipping upgrade: Pillow in ./.local/lib/python3.6/site-packages (from alibi) (7.0.0)\n", + "Requirement already satisfied, skipping upgrade: scikit-learn in ./.local/lib/python3.6/site-packages (from alibi) (0.22.2.post1)\n", + "Requirement already satisfied, skipping upgrade: scikit-image in ./.local/lib/python3.6/site-packages (from alibi) (0.16.2)\n", + "Requirement already satisfied, skipping upgrade: shap in ./.local/lib/python3.6/site-packages (from alibi) (0.35.0)\n", + "Requirement already satisfied, skipping upgrade: numpy in /usr/local/lib/python3.6/dist-packages (from alibi) (1.18.1)\n", + "Requirement already satisfied, skipping upgrade: tensorflow<2.0 in /usr/local/lib/python3.6/dist-packages (from alibi) (1.15.2)\n", + "Requirement already satisfied, skipping upgrade: attrs in /usr/local/lib/python3.6/dist-packages (from alibi) (19.3.0)\n", + "Requirement already satisfied, skipping upgrade: beautifulsoup4 in ./.local/lib/python3.6/site-packages (from alibi) (4.8.2)\n", + "Requirement already satisfied, skipping upgrade: requests in /usr/local/lib/python3.6/dist-packages (from alibi) (2.22.0)\n", + "Requirement already satisfied, skipping upgrade: pandas in ./.local/lib/python3.6/site-packages (from alibi) (1.0.3)\n", + "Requirement already satisfied, skipping upgrade: colorful>=0.4.0 in ./.local/lib/python3.6/site-packages (from prettyprinter->alibi) (0.5.4)\n", + "Requirement already satisfied, skipping upgrade: Pygments>=2.2.0 in /usr/local/lib/python3.6/dist-packages (from prettyprinter->alibi) (2.5.2)\n", + "Requirement already satisfied, skipping upgrade: wasabi<1.1.0,>=0.4.0 in ./.local/lib/python3.6/site-packages (from spacy->alibi) (0.6.0)\n", + "Requirement already satisfied, skipping upgrade: thinc==7.4.0 in ./.local/lib/python3.6/site-packages (from spacy->alibi) (7.4.0)\n", + "Requirement already satisfied, skipping upgrade: murmurhash<1.1.0,>=0.28.0 in ./.local/lib/python3.6/site-packages (from spacy->alibi) (1.0.2)\n", + "Requirement already satisfied, skipping upgrade: plac<1.2.0,>=0.9.6 in ./.local/lib/python3.6/site-packages (from spacy->alibi) (1.1.3)\n", + "Requirement already satisfied, skipping upgrade: preshed<3.1.0,>=3.0.2 in ./.local/lib/python3.6/site-packages (from spacy->alibi) (3.0.2)\n", + "Requirement already satisfied, skipping upgrade: catalogue<1.1.0,>=0.0.7 in ./.local/lib/python3.6/site-packages (from spacy->alibi) (1.0.0)\n", + "Requirement already satisfied, skipping upgrade: tqdm<5.0.0,>=4.38.0 in ./.local/lib/python3.6/site-packages (from spacy->alibi) (4.43.0)\n", + "Requirement already satisfied, skipping upgrade: cymem<2.1.0,>=2.0.2 in ./.local/lib/python3.6/site-packages (from spacy->alibi) (2.0.3)\n", + "Requirement already satisfied, skipping upgrade: blis<0.5.0,>=0.4.0 in ./.local/lib/python3.6/site-packages (from spacy->alibi) (0.4.1)\n", + "Requirement already satisfied, skipping upgrade: setuptools in /usr/local/lib/python3.6/dist-packages (from spacy->alibi) (45.1.0)\n", + "Requirement already satisfied, skipping upgrade: srsly<1.1.0,>=1.0.2 in ./.local/lib/python3.6/site-packages (from spacy->alibi) (1.0.2)\n", + "Requirement already satisfied, skipping upgrade: joblib>=0.11 in ./.local/lib/python3.6/site-packages (from scikit-learn->alibi) (0.14.1)\n", + "Requirement already satisfied, skipping upgrade: PyWavelets>=0.4.0 in ./.local/lib/python3.6/site-packages (from scikit-image->alibi) (1.1.1)\n", + "Requirement already satisfied, skipping upgrade: networkx>=2.0 in ./.local/lib/python3.6/site-packages (from scikit-image->alibi) (2.4)\n", + "Requirement already satisfied, skipping upgrade: imageio>=2.3.0 in ./.local/lib/python3.6/site-packages (from scikit-image->alibi) (2.8.0)\n", + "Requirement already satisfied, skipping upgrade: matplotlib!=3.0.0,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from scikit-image->alibi) (3.1.2)\n", + "Requirement already satisfied, skipping upgrade: wrapt>=1.11.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow<2.0->alibi) (1.11.2)\n", + "Requirement already satisfied, skipping upgrade: google-pasta>=0.1.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow<2.0->alibi) (0.1.8)\n", + "Requirement already satisfied, skipping upgrade: grpcio>=1.8.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow<2.0->alibi) (1.26.0)\n", + "Requirement already satisfied, skipping upgrade: gast==0.2.2 in /usr/local/lib/python3.6/dist-packages (from tensorflow<2.0->alibi) (0.2.2)\n", + "Requirement already satisfied, skipping upgrade: tensorboard<1.16.0,>=1.15.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow<2.0->alibi) (1.15.0)\n", + "Requirement already satisfied, skipping upgrade: six>=1.10.0 in /usr/lib/python3/dist-packages (from tensorflow<2.0->alibi) (1.11.0)\n", + "Requirement already satisfied, skipping upgrade: tensorflow-estimator==1.15.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow<2.0->alibi) (1.15.1)\n", + "Requirement already satisfied, skipping upgrade: astor>=0.6.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow<2.0->alibi) (0.8.1)\n", + "Requirement already satisfied, skipping upgrade: keras-preprocessing>=1.0.5 in /usr/local/lib/python3.6/dist-packages (from tensorflow<2.0->alibi) (1.1.0)\n", + "Requirement already satisfied, skipping upgrade: wheel>=0.26; python_version >= \"3\" in /usr/lib/python3/dist-packages (from tensorflow<2.0->alibi) (0.30.0)\n", + "Requirement already satisfied, skipping upgrade: keras-applications>=1.0.8 in /usr/local/lib/python3.6/dist-packages (from tensorflow<2.0->alibi) (1.0.8)\n", + "Requirement already satisfied, skipping upgrade: termcolor>=1.1.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow<2.0->alibi) (1.1.0)\n", + "Requirement already satisfied, skipping upgrade: absl-py>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow<2.0->alibi) (0.9.0)\n", + "Requirement already satisfied, skipping upgrade: protobuf>=3.6.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow<2.0->alibi) (3.11.2)\n", + "Requirement already satisfied, skipping upgrade: opt-einsum>=2.3.2 in /usr/local/lib/python3.6/dist-packages (from tensorflow<2.0->alibi) (3.1.0)\n", + "Requirement already satisfied, skipping upgrade: soupsieve>=1.2 in ./.local/lib/python3.6/site-packages (from beautifulsoup4->alibi) (2.0)\n", + "Requirement already satisfied, skipping upgrade: idna<2.9,>=2.5 in /usr/lib/python3/dist-packages (from requests->alibi) (2.6)\n", + "Requirement already satisfied, skipping upgrade: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in ./.local/lib/python3.6/site-packages (from requests->alibi) (1.24.3)\n", + "Requirement already satisfied, skipping upgrade: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->alibi) (2019.11.28)\n", + "Requirement already satisfied, skipping upgrade: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->alibi) (3.0.4)\n", + "Requirement already satisfied, skipping upgrade: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas->alibi) (2019.3)\n", + "Requirement already satisfied, skipping upgrade: python-dateutil>=2.6.1 in /usr/local/lib/python3.6/dist-packages (from pandas->alibi) (2.8.1)\n", + "Requirement already satisfied, skipping upgrade: importlib-metadata>=0.20; python_version < \"3.8\" in /usr/local/lib/python3.6/dist-packages (from catalogue<1.1.0,>=0.0.7->spacy->alibi) (1.4.0)\n", + "Requirement already satisfied, skipping upgrade: decorator>=4.3.0 in /usr/local/lib/python3.6/dist-packages (from networkx>=2.0->scikit-image->alibi) (4.4.1)\n", + "Requirement already satisfied, skipping upgrade: kiwisolver>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib!=3.0.0,>=2.0.0->scikit-image->alibi) (1.1.0)\n", + "Requirement already satisfied, skipping upgrade: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib!=3.0.0,>=2.0.0->scikit-image->alibi) (2.4.6)\n", + "Requirement already satisfied, skipping upgrade: cycler>=0.10 in /usr/local/lib/python3.6/dist-packages (from matplotlib!=3.0.0,>=2.0.0->scikit-image->alibi) (0.10.0)\n", + "Requirement already satisfied, skipping upgrade: markdown>=2.6.8 in /usr/local/lib/python3.6/dist-packages (from tensorboard<1.16.0,>=1.15.0->tensorflow<2.0->alibi) (3.1.1)\n", + "Requirement already satisfied, skipping upgrade: werkzeug>=0.11.15 in /usr/local/lib/python3.6/dist-packages (from tensorboard<1.16.0,>=1.15.0->tensorflow<2.0->alibi) (0.16.1)\r\n", + "Requirement already satisfied, skipping upgrade: h5py in /usr/local/lib/python3.6/dist-packages (from keras-applications>=1.0.8->tensorflow<2.0->alibi) (2.10.0)\r\n", "Requirement already satisfied, skipping upgrade: numpy>=1.13.3 in /usr/local/lib/python3.6/dist-packages (from pandas) (1.18.1)\n", "Requirement already satisfied, skipping upgrade: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas) (2019.3)\n", "Requirement already satisfied, skipping upgrade: python-dateutil>=2.6.1 in /usr/local/lib/python3.6/dist-packages (from pandas) (2.8.1)\n", @@ -120,6 +190,8 @@ "from typing import Tuple, Union\n", "import requests\n", "from requests import RequestException\n", + "from io import BytesIO, StringIO\n", + "from joblib import dump" "from io import BytesIO, StringIO" ] }, @@ -646,6 +718,36 @@ "This is due to the imbalanced dataset (roughly 25:75 high:low earner proportion), so during the sampling stage feature ranges corresponding to low-earners will be oversampled. This is a feature because it can point out an imbalanced dataset, but it can also be fixed by producing balanced datasets to enable anchors to be found for either class." ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Exporting model" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "['income.joblib']" + ], + "text/plain": [ + "['income.joblib']" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dump(clf, 'income.joblib')" + ] + }, { "cell_type": "code", "execution_count": null,