Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions Dockerfile-jenkins
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
FROM jenkins/jenkins:2.347-jdk11
FROM jenkins/jenkins:2.472-jdk21

USER jenkins
RUN jenkins-plugin-cli --plugins blueocean:1.25.5 build-timestamp:1.0.3 timestamper:1.17 pollscm:1.3.1 github-api:1.303-400.v35c2d8258028
RUN jenkins-plugin-cli --plugins pipeline-stage-view:2.34 pipeline-graph-view:313.v1322ce83d680 build-timestamp:1.0.3 timestamper:1.17 pollscm:1.5 github-api:1.321-475.vf7ef62885c83 github-branch-source:1793.v1831e9c68d77


USER root
ENV FLUENTD_HOST "fluentd"
Expand All @@ -14,8 +15,9 @@ ENV SECRET_KEY ${SECRET_KEY}
COPY ./requirements.txt requirements.txt

RUN apt update && \
apt install -y python3 python3-pip
RUN pip3 install -r requirements.txt
apt install -y python3.11 python3-pip
RUN pip3 install --upgrade pip --break-system-packages && \
pip install -r requirements.txt --break-system-packages && echo "hello world"

COPY jenkins/1-configureJenkins.groovy /usr/share/jenkins/ref/init.groovy.d/1-configureJenkins.groovy
COPY jenkins/2-addAccessKeys.groovy /usr/share/jenkins/ref/init.groovy.d/2-addAccessKeys.groovy
4 changes: 2 additions & 2 deletions Dockerfile-mlflow
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
FROM python:3.9-slim-buster
WORKDIR /usr/src/app

RUN pip install awscli==1.24.7 boto3==1.23.7 mlflow==1.23.1 protobuf==3.20.1
RUN pip install awscli==1.24.7 boto3==1.23.7 mlflow==1.30.0 protobuf==3.20.1
EXPOSE 5000
ENV MLFLOW_S3_ENDPOINT_URL ${MLFLOW_S3_ENDPOINT_URL}
ENV AWS_ACCESS_KEY_ID ${AWS_ACCESS_KEY_ID}
ENV AWS_SECRET_ACCESS_KEY ${AWS_SECRET_ACCESS_KEY}
ENTRYPOINT mlflow server -h 0.0.0.0 -p 5000 --default-artifact-root s3://cd4ml-ml-flow-bucket/ --backend-store-uri /mnt/mlflow
ENTRYPOINT mlflow server -h 0.0.0.0 -p 5000 --default-artifact-root s3://cd4ml-ml-flow-bucket/ --backend-store-uri /mnt/mlflow
2 changes: 1 addition & 1 deletion Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ pipeline {
stages {
stage('Install dependencies') {
steps {
sh 'pip3 install -r requirements.txt'
sh 'pip3 install -r requirements.txt --break-system-packages'
}
}
stage('Run tests') {
Expand Down
4 changes: 2 additions & 2 deletions cd4ml/filenames.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ def _get_model_file_templates(model_results_dir):
def _get_problem_file_templates(raw_problem_data_dir):
file_names_problem = {
'groceries': {
'raw_grocery_data': '%s/groceries.csv' % raw_problem_data_dir,
'grocery_data_shuffled': '%s/groceries_shuffled.csv' % raw_problem_data_dir
'raw_grocery_data': '%s/store47-2016.csv' % raw_problem_data_dir,
'grocery_data_shuffled': '%s/store47-2016_shuffled.csv' % raw_problem_data_dir
},
'houses': {
'raw_house_data': '%s/house_sales.csv' % raw_problem_data_dir,
Expand Down
20 changes: 15 additions & 5 deletions cd4ml/problems/groceries/download_data/download_data.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
from cd4ml.filenames import get_problem_files
from cd4ml.utils.utils import download_to_file_from_url, shuffle_csv_file
import zipfile
from pathlib import Path
import logging

download_params = {'key': 'store47-2016.csv',
'gcs_bucket': 'continuous-intelligence',
'base_url': 'https://storage.googleapis.com'}
logger = logging.getLogger(__name__)

download_params = {'key': 'store47-2016',
'gcs_bucket': 'raw/master',
'base_url': 'https://github.com/luizmachado/CDMLDataset'}


def get_grocery_url_and_files(problem_name):
Expand All @@ -13,12 +18,17 @@ def get_grocery_url_and_files(problem_name):
base_url = download_params['base_url']

filename = file_names['raw_grocery_data']
url = "%s/%s/%s" % (base_url, gcs_bucket, key)
url = "%s/%s/%s.zip" % (base_url, gcs_bucket, key)
filename_shuffled = file_names['grocery_data_shuffled']
return url, filename, filename_shuffled


def download(problem_name, use_cache=True):
url, filename, filename_shuffled = get_grocery_url_and_files(problem_name)
download_to_file_from_url(url, filename, use_cache=use_cache)
zipname = f"{filename}.zip"
download_to_file_from_url(url, zipname, use_cache=use_cache)
target_dir = Path(filename).parent
logger.inf(f"Unzipping: {zipname} @ {target_dir}")
with zipfile.ZipFile(zipname, 'r') as zip_ref:
zip_ref.extractall(target_dir)
shuffle_csv_file(filename, filename_shuffled)
6 changes: 4 additions & 2 deletions cd4ml/problems/houses/download_data/download_data.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from cd4ml.filenames import get_problem_files
from cd4ml.utils.utils import download_to_file_from_url

baseUri = "https://github.com/luizmachado/CD4ML/tree/master/dataset/"

download_params = {
'url': "https://github.com/dave31415/house_price/raw/master/data/house_data_100000.csv",
'url_lookup': "https://github.com/dave31415/house_price/raw/master/data/zip_lookup.csv"
'url': "https://github.com/luizmachado/CD4ML/raw/master/dataset/house_data_100000.csv",
'url_lookup': "https://github.com/luizmachado/CD4ML/raw/master/dataset/zip_lookup.csv"
}


Expand Down
3 changes: 1 addition & 2 deletions cd4ml/problems/iris/download_data/download_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
from cd4ml.utils.utils import download_to_file_from_url

download_params = {
'url': "https://gist.githubusercontent.com/curran/a08a1080b88344b0c8a7/"
"raw/0e7a9b0a5d22642a06d3d5b9bcbad9890c8ee534/iris.csv"
'url': "https://github.com/luizmachado/CD4ML/raw/master/dataset/iris.csv"
}


Expand Down
3 changes: 3 additions & 0 deletions dataset/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# continuous-delivery-for-machine-learning-data

The data files were extracted from ThoughtWorks project at https://github.com/ThoughtWorksInc/CD4ML-Scenarios
Loading