Skip to content

Commit ae96356

Browse files
Merge pull request #8 from EdmondIguazio/demo-changes-new
Changes by review
2 parents 26825c8 + 9dce085 commit ae96356

File tree

7 files changed

+55
-187
lines changed

7 files changed

+55
-187
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Banking LLM monitoring and feedback loop demo
22

3-
This demo showcases how to train, deploy, and monitor LLM using an approach described as [LLM as a judge](https://www.confident-ai.com/blog/why-llm-as-a-judge-is-the-best-llm-evaluation-method).
3+
This demo showcases how to train, deploy, and monitor LLM using an approach described as [LLM as a judge](https://www.mlrun.org/blog/llm-as-a-judge-practical-example-with-open-source-mlrun/).
44

55
This demo illustrates training an open-source model to answer banking-related questions only. It does this by analyzing the responses that were generated by the model traffic, and retraining the model according to the performance. The model performance analysis is done by a separate LLM that judges the results. Once the dataset is large enough, you can then retrain the model and mesure the performance again.
66

images/feedback_loop.png

-35.4 KB
Loading

images/model_endpoint.png

92.9 KB
Loading

llm-monitoring-main.ipynb

Lines changed: 53 additions & 158 deletions
Original file line numberDiff line numberDiff line change
@@ -75,12 +75,32 @@
7575
},
7676
"outputs": [],
7777
"source": [
78-
"#%pip install -U -r requirements.txt"
78+
"%pip install -U mlrun openai transformers datasets trl peft bitsandbytes sentencepiece "
7979
]
8080
},
8181
{
8282
"cell_type": "code",
83-
"execution_count": 1,
83+
"execution_count": null,
84+
"id": "a618cb8f-f34d-4ae2-8fc8-2da00c56e601",
85+
"metadata": {},
86+
"outputs": [],
87+
"source": [
88+
"%pip install deepeval==1.1.9"
89+
]
90+
},
91+
{
92+
"cell_type": "code",
93+
"execution_count": null,
94+
"id": "7b2735a2-2f36-4a4b-9474-5ded80304274",
95+
"metadata": {},
96+
"outputs": [],
97+
"source": [
98+
"%pip install \"protobuf<3.20\""
99+
]
100+
},
101+
{
102+
"cell_type": "code",
103+
"execution_count": null,
84104
"id": "c1c99e64-b5a2-45c8-83f3-eda2e0d79cb2",
85105
"metadata": {
86106
"tags": []
@@ -90,16 +110,13 @@
90110
"import os\n",
91111
"import random\n",
92112
"import time\n",
93-
"import dotenv \n",
113+
"import dotenv\n",
94114
"import pandas as pd\n",
95115
"from tqdm.notebook import tqdm\n",
96116
"from datasets import load_dataset\n",
97117
"\n",
98118
"import mlrun\n",
99119
"from mlrun.features import Feature # To log the model with inputs and outputs information\n",
100-
"import mlrun.common.schemas.alert as alert_constants # To configure an alert\n",
101-
"from mlrun.model_monitoring.helpers import get_result_instance_fqn # To configure an alert\n",
102-
"\n",
103120
"from src.llm_as_a_judge import OpenAIJudge\n",
104121
"pd.set_option(\"display.max_colwidth\", None)"
105122
]
@@ -157,7 +174,7 @@
157174
"project = mlrun.get_or_create_project(\n",
158175
" name=\"llm-monitoring\",\n",
159176
" parameters={\n",
160-
" \"default_image\": \"gcr.io/iguazio/llm-serving:1.7.2\",\n",
177+
" \"default_image\": \"edmondg/llm-serving:1.8.0-rc11\",\n",
161178
" \"node_selector\": {\"alpha.eksctl.io/nodegroup-name\": \"added-a10x4\"},\n",
162179
" },\n",
163180
" context=\"./src\",\n",
@@ -902,7 +919,13 @@
902919
"id": "cd171097-960e-4971-8b2e-d2c371823fbd",
903920
"metadata": {},
904921
"source": [
905-
"First log it:"
922+
"Note: The [gemma-2b](https://huggingface.co/google/gemma-2b) model by Google is publicly accessible, but if you want to use it then you\n",
923+
"have to first read and accept its terms and conditions. Alternatively, look for a different model and change the\n",
924+
"code of this demo.\n",
925+
"\n",
926+
"Second Note: The model serving implementation is done using `V2ModelServer`. This is naive solution and will be relplaced soon.\n",
927+
"\n",
928+
"Let's log it first:"
906929
]
907930
},
908931
{
@@ -929,7 +952,7 @@
929952
"base_model = \"google-gemma-2b\"\n",
930953
"project.log_model(\n",
931954
" base_model,\n",
932-
" model_file=\"src/model-iris.pkl\",\n",
955+
" model_file=\"src/no-op.pkl\",\n",
933956
" inputs=[Feature(value_type=\"str\", name=\"question\")],\n",
934957
" outputs=[Feature(value_type=\"str\", name=\"answer\")],\n",
935958
")"
@@ -1032,148 +1055,12 @@
10321055
"deployment = serving_function.deploy()"
10331056
]
10341057
},
1035-
{
1036-
"cell_type": "markdown",
1037-
"id": "84d6db53-6514-4af6-b6c8-8eecc5043f48",
1038-
"metadata": {},
1039-
"source": [
1040-
"### 3.4. Configure an Alert"
1041-
]
1042-
},
1043-
{
1044-
"cell_type": "markdown",
1045-
"id": "c30b25bf-028d-40b3-aa7b-275ad190ac80",
1046-
"metadata": {},
1047-
"source": [
1048-
"Define an alert to be triggered on degradation of model performance."
1049-
]
1050-
},
1051-
{
1052-
"cell_type": "code",
1053-
"execution_count": 26,
1054-
"id": "fe9c4369-16c7-42b4-9057-6e623be63a09",
1055-
"metadata": {
1056-
"tags": []
1057-
},
1058-
"outputs": [],
1059-
"source": [
1060-
"app_name = \"llm-as-a-judge\"\n",
1061-
"result_name = \"restrict-to-banking\"\n",
1062-
"message = \"Model perf detected\"\n",
1063-
"alert_config_name = \"restrict-to-banking\"\n",
1064-
"dummy_url = \"dummy-webhook.default-tenant.app.llm-dev.iguazio-cd1.com\""
1065-
]
1066-
},
1067-
{
1068-
"cell_type": "code",
1069-
"execution_count": 27,
1070-
"id": "27ee93a4-b296-42a6-9f2d-d9ed549670c9",
1071-
"metadata": {
1072-
"tags": []
1073-
},
1074-
"outputs": [],
1075-
"source": [
1076-
"# Get Endpoint ID:\n",
1077-
"endpoints = mlrun.get_run_db().list_model_endpoints(project=project.name, model=\"\")\n",
1078-
"ep_id = endpoints[0].metadata.uid"
1079-
]
1080-
},
1081-
{
1082-
"cell_type": "code",
1083-
"execution_count": 28,
1084-
"id": "6144ddc2-5552-4670-ba15-c21b19b4164f",
1085-
"metadata": {
1086-
"tags": []
1087-
},
1088-
"outputs": [],
1089-
"source": [
1090-
"prj_alert_obj = get_result_instance_fqn(\n",
1091-
" ep_id, app_name=app_name, result_name=result_name\n",
1092-
")\n",
1093-
"\n",
1094-
"webhook_notification = mlrun.common.schemas.Notification(\n",
1095-
" name=\"webhook\",\n",
1096-
" kind=\"webhook\",\n",
1097-
" params={\"url\": dummy_url},\n",
1098-
" when=[\"completed\", \"error\"],\n",
1099-
" severity=\"debug\",\n",
1100-
" message=\"Model perf detected\",\n",
1101-
" condition=\"\",\n",
1102-
")"
1103-
]
1104-
},
1105-
{
1106-
"cell_type": "code",
1107-
"execution_count": 29,
1108-
"id": "ea519ff5-0d4c-4f39-bd00-57c77b54fff4",
1109-
"metadata": {},
1110-
"outputs": [],
1111-
"source": [
1112-
"import mlrun.common.schemas.alert as alert_objects"
1113-
]
1114-
},
1115-
{
1116-
"cell_type": "code",
1117-
"execution_count": 30,
1118-
"id": "eecfcf75-d01f-49c7-92da-32b22c87f206",
1119-
"metadata": {},
1120-
"outputs": [],
1121-
"source": [
1122-
"alert_config = mlrun.alerts.alert.AlertConfig(\n",
1123-
" project=project.name,\n",
1124-
" name=alert_config_name,\n",
1125-
" summary=alert_config_name,\n",
1126-
" severity=alert_constants.AlertSeverity.HIGH,\n",
1127-
" entities=alert_constants.EventEntities(\n",
1128-
" kind=alert_constants.EventEntityKind.MODEL_ENDPOINT_RESULT,\n",
1129-
" project=project.name,\n",
1130-
" ids=[prj_alert_obj],\n",
1131-
" ),\n",
1132-
" trigger=alert_constants.AlertTrigger(\n",
1133-
" events=[alert_objects.EventKind.MODEL_PERFORMANCE_DETECTED, alert_objects.EventKind.MODEL_PERFORMANCE_SUSPECTED]\n",
1134-
" ),\n",
1135-
" criteria=alert_constants.AlertCriteria(count=1, period=\"10m\"),\n",
1136-
" notifications=[\n",
1137-
" alert_constants.AlertNotification(notification=webhook_notification)\n",
1138-
" ],\n",
1139-
" reset_policy=mlrun.common.schemas.alert.ResetPolicy.MANUAL,\n",
1140-
")"
1141-
]
1142-
},
1143-
{
1144-
"cell_type": "code",
1145-
"execution_count": 31,
1146-
"id": "e18d85fb-f146-4923-9372-49a890dd25e8",
1147-
"metadata": {},
1148-
"outputs": [
1149-
{
1150-
"name": "stdout",
1151-
"output_type": "stream",
1152-
"text": [
1153-
"> 2025-02-04 10:01:54,214 [warning] Alerts are disabled, alert will still be stored but will not be triggered\n"
1154-
]
1155-
},
1156-
{
1157-
"data": {
1158-
"text/plain": [
1159-
"<mlrun.alerts.alert.AlertConfig at 0x7f93f879e250>"
1160-
]
1161-
},
1162-
"execution_count": 31,
1163-
"metadata": {},
1164-
"output_type": "execute_result"
1165-
}
1166-
],
1167-
"source": [
1168-
"project.store_alert_config(alert_config)"
1169-
]
1170-
},
11711058
{
11721059
"cell_type": "markdown",
11731060
"id": "e11348e6-e53a-4e5e-a680-7c18f4298316",
11741061
"metadata": {},
11751062
"source": [
1176-
"### 3.5. Check the Performance of the Base Model\n",
1063+
"### 3.4. Check the Performance of the Base Model\n",
11771064
"\n",
11781065
"To evaluate the base model, ask it a number of questions and give it some requests. \n",
11791066
"\n",
@@ -1268,6 +1155,22 @@
12681155
"![](./images/grafana_before.png)"
12691156
]
12701157
},
1158+
{
1159+
"cell_type": "markdown",
1160+
"id": "5788aa8c-9f22-48e4-8896-602ad273b3ce",
1161+
"metadata": {},
1162+
"source": [
1163+
"You can also check out the model endpoint screen under projects:"
1164+
]
1165+
},
1166+
{
1167+
"cell_type": "markdown",
1168+
"id": "2865c395-9b61-4301-8844-9597225856aa",
1169+
"metadata": {},
1170+
"source": [
1171+
"![](./images/model_endpoint.png)"
1172+
]
1173+
},
12711174
{
12721175
"cell_type": "markdown",
12731176
"id": "635f8310-4efb-4ade-a54a-646b5af9b690",
@@ -1281,7 +1184,7 @@
12811184
"id": "80851fb2-9911-4976-8cd4-298c7a6b6938",
12821185
"metadata": {},
12831186
"source": [
1284-
"### 3.6 Evaluate the model using DeepEval"
1187+
"### 3.5 Evaluate the model using DeepEval"
12851188
]
12861189
},
12871190
{
@@ -2543,21 +2446,13 @@
25432446
" watch=False,\n",
25442447
")"
25452448
]
2546-
},
2547-
{
2548-
"cell_type": "code",
2549-
"execution_count": null,
2550-
"id": "dea99e0a-6fd0-4c4c-92c1-f6d551ea5e0a",
2551-
"metadata": {},
2552-
"outputs": [],
2553-
"source": []
25542449
}
25552450
],
25562451
"metadata": {
25572452
"kernelspec": {
2558-
"display_name": "test-mlrun-172",
2453+
"display_name": "mlrun-base",
25592454
"language": "python",
2560-
"name": "conda-env-.conda-test-mlrun-172-py"
2455+
"name": "conda-env-mlrun-base-py"
25612456
},
25622457
"language_info": {
25632458
"codemirror_mode": {
@@ -2569,7 +2464,7 @@
25692464
"name": "python",
25702465
"nbconvert_exporter": "python",
25712466
"pygments_lexer": "ipython3",
2572-
"version": "3.9.21"
2467+
"version": "3.9.18"
25732468
}
25742469
},
25752470
"nbformat": 4,

src/metric_sample.py

Lines changed: 0 additions & 15 deletions
This file was deleted.

src/model-iris.pkl

-171 KB
Binary file not shown.

src/project_setup.py

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,6 @@ def setup(
4040
# Unpack parameters:
4141
source = project.get_param(key="source")
4242
default_image = project.get_param(key="default_image")
43-
node_selector = project.get_param(key="node_selector", default=None)
44-
# gpus = project.get_param(key="gpus", default=0)
45-
node_name = project.get_param(key="node_name", default=None)
4643

4744
# Set the project git source:
4845
if source:
@@ -55,7 +52,7 @@ def setup(
5552
_build_image(project=project)
5653
else:
5754
project.set_default_image(default_image)
58-
55+
5956
# Set functions
6057
_set_function(
6158
project=project,
@@ -75,15 +72,6 @@ def setup(
7572
node_selector=node_selector,
7673
node_name=node_name,
7774
)
78-
_set_function(
79-
project=project,
80-
func="metric_sample.py",
81-
name="metric-sample",
82-
kind="job",
83-
image="mlrun/mlrun",
84-
node_selector=node_selector,
85-
node_name=node_name,
86-
)
8775
_set_function(
8876
project=project,
8977
func="generate_ds.py",

0 commit comments

Comments
 (0)