Skip to content

Iss26 #44

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 28 additions & 2 deletions SageMaker-ModelMonitoring.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
"execution_count": 1,
"source": [
"%%time\n",
"# cell 01\n",
"\n",
"# Handful of configuration\n",
"\n",
Expand Down Expand Up @@ -98,6 +97,7 @@
"execution_count": 2,
"source": [
"# cell 02\n",
"\n",
"# Upload some test files\n",
"boto3.Session().resource('s3').Bucket(bucket).Object(\"test_upload/test.txt\").upload_file('test_data/upload-test-file.txt')\n",
"print(\"Success! You are all set to proceed.\")"
Expand Down Expand Up @@ -129,6 +129,7 @@
"execution_count": 3,
"source": [
"# cell 03\n",
"\n",
"model_file = open(\"model/xgb-churn-prediction-model.tar.gz\", 'rb')\n",
"s3_key = os.path.join(prefix, 'xgb-churn-prediction-model.tar.gz')\n",
"boto3.Session().resource('s3').Bucket(bucket).Object(s3_key).upload_fileobj(model_file)"
Expand All @@ -149,6 +150,7 @@
"execution_count": 4,
"source": [
"# cell 04\n",
"\n",
"from time import gmtime, strftime\n",
"from sagemaker.model import Model\n",
"from sagemaker.image_uris import retrieve\n",
Expand All @@ -174,6 +176,7 @@
"execution_count": 5,
"source": [
"# cell 05\n",
"\n",
"from sagemaker.model_monitor import DataCaptureConfig\n",
"\n",
"endpoint_name = 'DEMO-xgb-churn-pred-model-monitor-' + strftime(\"%Y-%m-%d-%H-%M-%S\", gmtime())\n",
Expand Down Expand Up @@ -222,6 +225,7 @@
"execution_count": 6,
"source": [
"# cell 06\n",
"\n",
"from sagemaker.predictor import Predictor\n",
"import sagemaker\n",
"import time\n",
Expand Down Expand Up @@ -269,6 +273,7 @@
"execution_count": 7,
"source": [
"# cell 07\n",
"\n",
"s3_client = boto3.Session().client('s3')\n",
"current_endpoint_capture_prefix = '{}/{}'.format(data_capture_prefix, endpoint_name)\n",
"result = s3_client.list_objects(Bucket=bucket, Prefix=current_endpoint_capture_prefix)\n",
Expand Down Expand Up @@ -303,6 +308,7 @@
"execution_count": 8,
"source": [
"# cell 08\n",
"\n",
"def get_obj_body(obj_key):\n",
" return s3_client.get_object(Bucket=bucket, Key=obj_key).get('Body').read().decode(\"utf-8\")\n",
"\n",
Expand Down Expand Up @@ -335,6 +341,7 @@
"execution_count": 9,
"source": [
"# cell 09\n",
"\n",
"import json\n",
"print(json.dumps(json.loads(capture_file.split('\\n')[0]), indent=2))"
],
Expand Down Expand Up @@ -417,6 +424,7 @@
"execution_count": 10,
"source": [
"# cell 10\n",
"\n",
"# copy over the training dataset to Amazon S3 (if you already have it in Amazon S3, you could reuse it)\n",
"baseline_prefix = prefix + '/baselining'\n",
"baseline_data_prefix = baseline_prefix + '/data'\n",
Expand Down Expand Up @@ -444,6 +452,7 @@
"execution_count": 11,
"source": [
"# cell 11\n",
"\n",
"training_data_file = open(\"test_data/training-dataset-with-header.csv\", 'rb')\n",
"s3_key = os.path.join(baseline_prefix, 'data', 'training-dataset-with-header.csv')\n",
"boto3.Session().resource('s3').Bucket(bucket).Object(s3_key).upload_fileobj(training_data_file)"
Expand All @@ -470,6 +479,7 @@
"execution_count": 12,
"source": [
"# cell 12\n",
"\n",
"from sagemaker.model_monitor import DefaultModelMonitor\n",
"from sagemaker.model_monitor.dataset_format import DatasetFormat\n",
"\n",
Expand Down Expand Up @@ -3961,6 +3971,7 @@
"execution_count": 13,
"source": [
"# cell 13\n",
"\n",
"s3_client = boto3.Session().client('s3')\n",
"result = s3_client.list_objects(Bucket=bucket, Prefix=baseline_results_prefix)\n",
"report_files = [report_file.get(\"Key\") for report_file in result.get('Contents')]\n",
Expand All @@ -3985,6 +3996,7 @@
"execution_count": 14,
"source": [
"# cell 14\n",
"\n",
"import pandas as pd\n",
"\n",
"baseline_job = my_default_monitor.latest_baselining_job\n",
Expand Down Expand Up @@ -4303,6 +4315,7 @@
"execution_count": 15,
"source": [
"# cell 15\n",
"\n",
"constraints_df = pd.json_normalize(baseline_job.suggested_constraints().body_dict[\"features\"])\n",
"constraints_df.head(10)"
],
Expand Down Expand Up @@ -4451,6 +4464,7 @@
"execution_count": 16,
"source": [
"# cell 16\n",
"\n",
"# First, copy over some test scripts to the S3 bucket so that they can be used for pre and post processing\n",
"boto3.Session().resource('s3').Bucket(bucket).Object(code_prefix+\"/preprocessor.py\").upload_file('preprocessor.py')\n",
"boto3.Session().resource('s3').Bucket(bucket).Object(code_prefix+\"/postprocessor.py\").upload_file('postprocessor.py')"
Expand All @@ -4470,6 +4484,7 @@
"execution_count": 17,
"source": [
"# cell 17\n",
"\n",
"from sagemaker.model_monitor import CronExpressionGenerator\n",
"from time import gmtime, strftime\n",
"\n",
Expand Down Expand Up @@ -4503,6 +4518,7 @@
"execution_count": 18,
"source": [
"# cell 18\n",
"\n",
"from threading import Thread\n",
"from time import sleep\n",
"import time\n",
Expand Down Expand Up @@ -4546,6 +4562,7 @@
"execution_count": 19,
"source": [
"# cell 19\n",
"\n",
"desc_schedule_result = my_default_monitor.describe_schedule()\n",
"print('Schedule status: {}'.format(desc_schedule_result['MonitoringScheduleStatus']))"
],
Expand Down Expand Up @@ -4575,6 +4592,7 @@
"execution_count": 20,
"source": [
"# cell 20\n",
"\n",
"mon_executions = my_default_monitor.list_executions()\n",
"print(\"We created a hourly schedule above and it will kick off executions ON the hour (plus 0 - 20 min buffer.\\nWe will have to wait till we hit the hour...\")\n",
"\n",
Expand Down Expand Up @@ -4722,6 +4740,7 @@
"execution_count": 21,
"source": [
"# cell 21\n",
"\n",
"latest_execution = mon_executions[-1] # latest execution's index is -1, second to last is -2 and so on..\n",
"time.sleep(60)\n",
"latest_execution.wait(logs=False)\n",
Expand Down Expand Up @@ -4750,6 +4769,7 @@
"execution_count": 22,
"source": [
"# cell 22\n",
"\n",
"report_uri=latest_execution.output.destination\n",
"print('Report Uri: {}'.format(report_uri))"
],
Expand All @@ -4776,6 +4796,7 @@
"execution_count": 23,
"source": [
"# cell 23\n",
"\n",
"from urllib.parse import urlparse\n",
"s3uri = urlparse(report_uri)\n",
"report_bucket = s3uri.netloc\n",
Expand Down Expand Up @@ -4824,6 +4845,7 @@
"execution_count": 24,
"source": [
"# cell 24\n",
"\n",
"violations = my_default_monitor.latest_monitoring_constraint_violations()\n",
"pd.set_option('display.max_colwidth', None)\n",
"constraints_df = pd.json_normalize(violations.body_dict[\"violations\"])\n",
Expand Down Expand Up @@ -4967,6 +4989,7 @@
"execution_count": null,
"source": [
"# cell 25\n",
"\n",
"#my_default_monitor.stop_monitoring_schedule()\n",
"#my_default_monitor.start_monitoring_schedule()"
],
Expand All @@ -4989,6 +5012,7 @@
"execution_count": null,
"source": [
"# cell 26\n",
"\n",
"my_default_monitor.delete_monitoring_schedule()\n",
"time.sleep(60) # actually wait for the deletion"
],
Expand All @@ -5000,6 +5024,7 @@
"execution_count": null,
"source": [
"# cell 27\n",
"\n",
"predictor.delete_endpoint()"
],
"outputs": [],
Expand All @@ -5012,6 +5037,7 @@
"execution_count": null,
"source": [
"# cell 28\n",
"\n",
"predictor.delete_model()"
],
"outputs": [],
Expand Down Expand Up @@ -5042,4 +5068,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}
22 changes: 17 additions & 5 deletions TaxiFare_Predict_Liner_Learner.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
"outputs": [],
"source": [
"# cell 01\n",
"\n",
"!conda update pandas -y"
]
},
Expand All @@ -50,6 +51,7 @@
"outputs": [],
"source": [
"# cell 02\n",
"\n",
"import os\n",
"import boto3\n",
"import re\n",
Expand All @@ -64,6 +66,7 @@
"outputs": [],
"source": [
"# cell 03\n",
"\n",
"role = sagemaker.get_execution_role()\n",
"sess = sagemaker.Session()\n",
"region = boto3.Session().region_name\n",
Expand Down Expand Up @@ -128,6 +131,7 @@
],
"source": [
"# cell 04\n",
"\n",
"import boto3\n",
"FILE_TRAIN = \"nyc-taxi.csv\"\n",
"# s3 = boto3.client(\"s3\")\n",
Expand Down Expand Up @@ -179,6 +183,7 @@
],
"source": [
"# cell 05\n",
"\n",
"df.info()"
]
},
Expand Down Expand Up @@ -781,6 +786,7 @@
],
"source": [
"# cell 06\n",
"\n",
"# Frequency tables for each categorical feature\n",
"for column in df.select_dtypes(include=['object']).columns:\n",
" display(pd.crosstab(index=df[column], columns='% observations', normalize='columns'))\n",
Expand Down Expand Up @@ -835,6 +841,7 @@
],
"source": [
"# cell 07\n",
"\n",
"df = df.drop(['payment_type', 'store_and_fwd_flag'], axis=1)\n",
"df.info()"
]
Expand Down Expand Up @@ -875,6 +882,7 @@
],
"source": [
"# cell 08\n",
"\n",
"df['dropoff_datetime']= pd.to_datetime(df['dropoff_datetime'])\n",
"df['pickup_datetime']= pd.to_datetime(df['pickup_datetime'])\n",
"df['journey_time'] = (df['dropoff_datetime'] - df['pickup_datetime'])\n",
Expand Down Expand Up @@ -925,6 +933,7 @@
],
"source": [
"# cell 09\n",
"\n",
"df = df.drop(['dropoff_datetime', 'pickup_datetime'], axis=1)\n",
"df.info()"
]
Expand Down Expand Up @@ -973,6 +982,7 @@
],
"source": [
"# cell 10\n",
"\n",
"df = pd.get_dummies(df, dtype=float)\n",
"df.info()"
]
Expand All @@ -991,6 +1001,7 @@
"outputs": [],
"source": [
"# cell 11\n",
"\n",
"import numpy as np\n",
"\n",
"train_data, validation_data, test_data = np.split(df.sample(frac=1, random_state=1729), [int(0.7 * len(df)), int(0.9 * len(df))])\n",
Expand All @@ -1006,6 +1017,7 @@
"outputs": [],
"source": [
"# cell 12\n",
"\n",
"boto3.Session().resource('s3').Bucket(data_bucket).Object(os.path.join(data_prefix, 'train/train.csv')).upload_file('train.csv')\n",
"boto3.Session().resource('s3').Bucket(data_bucket).Object(os.path.join(data_prefix, 'validation/validation.csv')).upload_file('validation.csv')\n",
"boto3.Session().resource('s3').Bucket(data_bucket).Object(os.path.join(data_prefix, 'test/test.csv')).upload_file('test.csv')"
Expand Down Expand Up @@ -1038,6 +1050,7 @@
],
"source": [
"# cell 13\n",
"\n",
"# creating the inputs for the fit() function with the training and validation location\n",
"s3_train_data = f\"s3://{data_bucket}/{data_prefix}/train\"\n",
"print(f\"training files will be taken from: {s3_train_data}\")\n",
Expand Down Expand Up @@ -1096,6 +1109,7 @@
],
"source": [
"# cell 14\n",
"\n",
"# getting the linear learner image according to the region\n",
"from sagemaker.image_uris import retrieve\n",
"\n",
Expand All @@ -1119,7 +1133,6 @@
}
],
"source": [
"# cell 15\n",
"%%time\n",
"import boto3\n",
"import sagemaker\n",
Expand Down Expand Up @@ -3435,7 +3448,6 @@
}
],
"source": [
"# cell 16\n",
"%%time\n",
"linear.fit(inputs={\"train\": train_data, \"validation\": validation_data}, job_name=job_name)"
]
Expand Down Expand Up @@ -3466,7 +3478,6 @@
}
],
"source": [
"# cell 17\n",
"%%time\n",
"# creating the endpoint out of the trained model\n",
"linear_predictor = linear.deploy(initial_instance_count=1, instance_type=\"ml.c4.xlarge\")\n",
Expand Down Expand Up @@ -3496,6 +3507,7 @@
"outputs": [],
"source": [
"# cell 18\n",
"\n",
"# configure the predictor to accept to serialize csv input and parse the reposne as json\n",
"from sagemaker.serializers import CSVSerializer\n",
"from sagemaker.deserializers import JSONDeserializer\n",
Expand Down Expand Up @@ -3533,7 +3545,6 @@
}
],
"source": [
"# cell 19\n",
"%%time\n",
"import json\n",
"from itertools import islice\n",
Expand Down Expand Up @@ -3571,7 +3582,8 @@
"metadata": {},
"outputs": [],
"source": [
"# cell 20\n"
"# cell 20\n",
"\n"
]
}
],
Expand Down
Loading