From cef1bf90a0b33bc33463be31f75229cb7255e221 Mon Sep 17 00:00:00 2001 From: baturayo Date: Thu, 26 Aug 2021 12:59:31 +0200 Subject: [PATCH 1/8] add make install --- Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 986eb5f..b6d83e2 100644 --- a/Makefile +++ b/Makefile @@ -13,6 +13,8 @@ DATASET := data/transformed/creditcard.csv ############################################################### # COMMANDS # ############################################################### +install: ## install dependencies + pip install -e ".[test, serve]" clean: ## clean artifacts @echo ">>> cleaning files" @@ -32,7 +34,7 @@ serve: ## serve trained model with a REST API using dploy-kickstart @echo ">>> serving the trained model" kickstart serve -e ml_skeleton_py/model/predict.py -l . -run-pipeline: clean generate-dataset train serve ## clean artifacts -> generate dataset -> train -> serve +run-pipeline: install clean generate-dataset train serve ## clean artifacts -> generate dataset -> train -> serve lint: ## flake8 linting and black code style @echo ">>> black files" From a6f5187bc0a6749b772f202df8feaac8c1603767 Mon Sep 17 00:00:00 2001 From: baturayo Date: Thu, 26 Aug 2021 12:59:49 +0200 Subject: [PATCH 2/8] upgrade versions --- setup.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/setup.py b/setup.py index cf9f20a..3e6dcc2 100644 --- a/setup.py +++ b/setup.py @@ -2,14 +2,13 @@ test_deps = [ - "pytest>=5.3.5", - "pytest-flask>=1.0.0", - "pip>=20.0.0", - "tox>=3.14.0", - "flake8>=3.7.9", - "flake8-annotations>=1.1.3", - "pytest-cov>=2.8.1", - "black>=19.10b0" + "pytest>=6.2.3", + "pytest-flask>=1.2.0", + "pip>=21.0.1", + "flake8>=3.9.2", + "flake8-annotations>=2.6.2", + "pytest-cov>=2.12.1", + "black>=21.7b0" ] serve_deps = [ @@ -26,7 +25,7 @@ author_email="info@dataroots.io", description="Description of my ml-skeleton package", packages=find_packages(), - install_requires=["pandas>=1.1.0", "scikit-learn>=0.23.2"], + install_requires=["pandas>=1.3.2", "scikit-learn>=0.24.2"], tests_require=test_deps, extras_require=extras, ) From ff5510e33b64fb5c86260fe51d3228fe6c7d9573 Mon Sep 17 00:00:00 2001 From: baturayo Date: Thu, 26 Aug 2021 13:00:14 +0200 Subject: [PATCH 3/8] black refac' --- ml_skeleton_py/model/train.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/ml_skeleton_py/model/train.py b/ml_skeleton_py/model/train.py index a34a484..748fd7b 100644 --- a/ml_skeleton_py/model/train.py +++ b/ml_skeleton_py/model/train.py @@ -63,10 +63,7 @@ def train(dataset_loc: str, model_dir: str, model_name: str = "lr") -> None: auc_roc = round(training_score.mean(), 2) logger.info(f"Classifier: {pipeline.__class__.__name__}") - logger.info( - "Has a training score " - + f"of {auc_roc} roc_auc" - ) + logger.info("Has a training score " + f"of {auc_roc} roc_auc") check_performance(auc_roc) # Serialize and dump trained pipeline to disk pred_result = { @@ -83,9 +80,11 @@ def train(dataset_loc: str, model_dir: str, model_name: str = "lr") -> None: def check_performance(auc_roc: float) -> None: if auc_roc < s.EXPECTED_MIN_AUC: - raise Exception("The auc roc is less than the expected, " - "please check your data manipulation or " - "training parameters!") + raise Exception( + "The auc roc is less than the expected, " + "please check your data manipulation or " + "training parameters!" + ) else: # Performance is more than the expected pass From d2f3eacefb448e953d32f5904e786d991a4c5813 Mon Sep 17 00:00:00 2001 From: baturayo Date: Thu, 26 Aug 2021 13:00:48 +0200 Subject: [PATCH 4/8] add py3.9 to the testing --- .github/workflows/test-and-train.yml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test-and-train.yml b/.github/workflows/test-and-train.yml index c464caa..8730497 100644 --- a/.github/workflows/test-and-train.yml +++ b/.github/workflows/test-and-train.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.7, 3.8] + python-version: [3.7, 3.8, 3.9] steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} @@ -22,8 +22,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python -m pip install --upgrade pip - python -m pip install -e ".[test]" + make install - name: Lint with flake8 & black run: | make lint @@ -50,8 +49,7 @@ jobs: python-version: 3.7 - name: Install dependencies run: | - python -m pip install --upgrade pip - python -m pip install -e . + make install - name: Train the model run: | make train From a80e653b827c01e69b696eb86705c406ee7e9c04 Mon Sep 17 00:00:00 2001 From: baturayo Date: Thu, 26 Aug 2021 13:01:18 +0200 Subject: [PATCH 5/8] black refac' --- ml_skeleton_py/etl/generate_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ml_skeleton_py/etl/generate_dataset.py b/ml_skeleton_py/etl/generate_dataset.py index 6bb4f1c..754f116 100644 --- a/ml_skeleton_py/etl/generate_dataset.py +++ b/ml_skeleton_py/etl/generate_dataset.py @@ -68,7 +68,7 @@ def remove_outliers(df: pd.DataFrame, **kwargs: int) -> pd.DataFrame: df_outlier_removed = df_outlier_removed[ df_outlier_removed.is_outlier != -1 - ] # -1 represents outliers + ] # -1 represents outliers # Report number of removed rows n_filtered_rows = df_outlier_removed.shape[0] From 682b6ef496b8c4096c96c289c2ac19ac1e7e7aeb Mon Sep 17 00:00:00 2001 From: baturayo Date: Thu, 26 Aug 2021 13:05:15 +0200 Subject: [PATCH 6/8] update make help --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b6d83e2..f8ac903 100644 --- a/Makefile +++ b/Makefile @@ -34,7 +34,7 @@ serve: ## serve trained model with a REST API using dploy-kickstart @echo ">>> serving the trained model" kickstart serve -e ml_skeleton_py/model/predict.py -l . -run-pipeline: install clean generate-dataset train serve ## clean artifacts -> generate dataset -> train -> serve +run-pipeline: install clean generate-dataset train serve ## install dependencies -> clean artifacts -> generate dataset -> train -> serve lint: ## flake8 linting and black code style @echo ">>> black files" From 45c24c746bcc6ae957268e407bfb7bb7b2f1c378 Mon Sep 17 00:00:00 2001 From: baturayo Date: Thu, 26 Aug 2021 13:09:40 +0200 Subject: [PATCH 7/8] add python 3.9 badge --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7c41346..ecccbbc 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ [![maintained by dataroots](https://img.shields.io/badge/maintained%20by-dataroots-%2300b189)](https://dataroots.io) -[![PythonVersion](https://img.shields.io/badge/python-3.7%20%7C%203.8-blue)](https://img.shields.io/badge/python-3.7%20%7C%203.8-blue) +[![PythonVersion](https://img.shields.io/pypi/pyversions/gino_admin)](https://img.shields.io/pypi/pyversions/gino_admin) [![tests](https://github.com/datarootsio/ml-skeleton-py/workflows/tests/badge.svg?branch=master)](https://github.com/datarootsio/ml-skeleton-py/actions) [![Codecov](https://codecov.io/github/datarootsio/ml-skeleton-py/badge.svg?branch=master&service=github)](https://github.com/datarootsio/ml-skeleton-py/actions) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) From 1aee73e06db493dde8807a3b0532d14541b3b9a1 Mon Sep 17 00:00:00 2001 From: baturayo Date: Thu, 26 Aug 2021 13:15:08 +0200 Subject: [PATCH 8/8] update dependency installation guide --- HOWTO.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/HOWTO.md b/HOWTO.md index 537a380..6993a2b 100644 --- a/HOWTO.md +++ b/HOWTO.md @@ -14,12 +14,12 @@ While the project is heavily opinionated, opinions are welcomed to be discussed: ``` 2. Install dependencies using [pip](https://pip.pypa.io/en/stable/installing/). The following command -will install the dependencies from `setup.py`. Note that installing dependencies with `-e` +will install the dependencies from `setup.py`. In the backend it will run `pip install -e ".[test, serve]"`. Note that installing dependencies with `-e` editable mode is needed to properly run unit tests. `[test, serve]` is optional. `test` refers to unit test dependencies and `serve` refers to deployment dependencies. ```bash - pip install -e ".[test, serve]" + make install ``` ## Running the project @@ -49,7 +49,7 @@ Note the dependency: `generate-dataset` > `train` > `serve`. ## Docker -Currently you can find the following docker files: +Currently, you can find the following docker files: 1. `jupyter.Dockerfile` builds an image for running notebooks. 2. `test.Dockerfile` builds an image to run all tests in (`make test-docker`). 3. `serve.Dockerfile` build an image to serve the trained model via a REST api.