diff --git a/.circleci/config.yml b/.circleci/config.yml index 037645ab2..393468a98 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -54,7 +54,7 @@ jobs: - run: name: Deploy to Railway App (You must set RAILWAY_TOKEN env var) command: | - cd section-07-ci-and-publishing/house-prices-api && railway up --detach + cd section-07-ci-and-publishing/house-prices-api && railway up --detach -s MLDevOps-Beginner -e production section_07_test_and_upload_regression_model: <<: *defaults @@ -92,7 +92,7 @@ jobs: - run: name: Build and run Dockerfile (see https://docs.railway.app/deploy/dockerfiles) command: | - cd section-08-deploying-with-containers && railway up --detach + cd section-08-deploying-with-containers && railway up --detach -s MLDevOps-Beginner -e production test_regression_model_py37: docker: diff --git a/README.md b/README.md index 7fbf80b75..bb7935a94 100644 --- a/README.md +++ b/README.md @@ -2,3 +2,5 @@ Accompanying repo for the online course Deployment of Machine Learning Models. For the documentation, visit the [course on Udemy](https://www.udemy.com/deployment-of-machine-learning-models/?couponCode=TIDREPO). + +update: only for testing CI/ CD pipeline \ No newline at end of file diff --git a/section-05-production-model-package/regression_model/trained_models/train_pipeline.py b/section-05-production-model-package/regression_model/trained_models/train_pipeline.py new file mode 100644 index 000000000..edcdd7c15 --- /dev/null +++ b/section-05-production-model-package/regression_model/trained_models/train_pipeline.py @@ -0,0 +1,30 @@ +import numpy as np +from config.core import config +from pipeline import price_pipe +from processing.data_manager import load_dataset, save_pipeline +from sklearn.model_selection import train_test_split + +def run_training() -> None: + """Train the model""" + + # read training data + data = load_dataset(file_name=config.app_config.training_data_file) + + # divide train and test + X_train, X_test, y_train, y_test = train_test_split( + data[config.model_config.features], # predictors + data[config.model_config.target], + test_size=config.model_config.test_size, + # we are setting the random seed here for reproductivity + random_state=config.model_config.random_state, + ) + y_train= np.log(y_train) + + # fit model + price_pipe.fit(X_train, y_train) + + # persist trained model + save_pipeline(pipeline_to_persist=price_pipe) + +if __name__ == "__main__": + run_training() \ No newline at end of file diff --git a/section-05-production-model-package/requirements/requirements.txt b/section-05-production-model-package/requirements/requirements.txt index 0fbffd3a6..607a23ff0 100644 --- a/section-05-production-model-package/requirements/requirements.txt +++ b/section-05-production-model-package/requirements/requirements.txt @@ -1,7 +1,7 @@ # We use compatible release functionality (see PEP 440 here: https://www.python.org/dev/peps/pep-0440/#compatible-release) # to specify acceptable version ranges of our project dependencies. This gives us the flexibility to keep up with small # updates/fixes, whilst ensuring we don't install a major update which could introduce backwards incompatible changes. -numpy>=1.21.0,<2.0.0 +numpy>=1.21.0,<1.24.9 #2.0.0 pandas>=1.3.5,<2.0.0 pydantic>=1.8.1,<2.0.0 scikit-learn>=1.1.3,<2.0.0 diff --git a/section-07-ci-and-publishing/model-package/regression_model/config/core.py b/section-07-ci-and-publishing/model-package/regression_model/config/core.py index f5f354b19..f321864f9 100644 --- a/section-07-ci-and-publishing/model-package/regression_model/config/core.py +++ b/section-07-ci-and-publishing/model-package/regression_model/config/core.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import Dict, List, Sequence +from typing import Dict, List, Optional, Sequence from pydantic import BaseModel from strictyaml import YAML, load @@ -69,7 +69,7 @@ def find_config_file() -> Path: raise Exception(f"Config not found at {CONFIG_FILE_PATH!r}") -def fetch_config_from_yaml(cfg_path: Path = None) -> YAML: +def fetch_config_from_yaml(cfg_path: Optional[Path] = None) -> YAML: """Parse YAML containing the package configuration.""" if not cfg_path: