|
75 | 75 | }, |
76 | 76 | "outputs": [], |
77 | 77 | "source": [ |
78 | | - "#%pip install -U -r requirements.txt" |
| 78 | + "%pip install -U mlrun openai transformers datasets trl peft bitsandbytes sentencepiece " |
79 | 79 | ] |
80 | 80 | }, |
81 | 81 | { |
82 | 82 | "cell_type": "code", |
83 | | - "execution_count": 1, |
| 83 | + "execution_count": null, |
| 84 | + "id": "a618cb8f-f34d-4ae2-8fc8-2da00c56e601", |
| 85 | + "metadata": {}, |
| 86 | + "outputs": [], |
| 87 | + "source": [ |
| 88 | + "%pip install deepeval==1.1.9" |
| 89 | + ] |
| 90 | + }, |
| 91 | + { |
| 92 | + "cell_type": "code", |
| 93 | + "execution_count": null, |
| 94 | + "id": "7b2735a2-2f36-4a4b-9474-5ded80304274", |
| 95 | + "metadata": {}, |
| 96 | + "outputs": [], |
| 97 | + "source": [ |
| 98 | + "%pip install \"protobuf<3.20\"" |
| 99 | + ] |
| 100 | + }, |
| 101 | + { |
| 102 | + "cell_type": "code", |
| 103 | + "execution_count": null, |
84 | 104 | "id": "c1c99e64-b5a2-45c8-83f3-eda2e0d79cb2", |
85 | 105 | "metadata": { |
86 | 106 | "tags": [] |
|
90 | 110 | "import os\n", |
91 | 111 | "import random\n", |
92 | 112 | "import time\n", |
93 | | - "import dotenv \n", |
| 113 | + "import dotenv\n", |
94 | 114 | "import pandas as pd\n", |
95 | 115 | "from tqdm.notebook import tqdm\n", |
96 | 116 | "from datasets import load_dataset\n", |
97 | 117 | "\n", |
98 | 118 | "import mlrun\n", |
99 | 119 | "from mlrun.features import Feature # To log the model with inputs and outputs information\n", |
100 | | - "import mlrun.common.schemas.alert as alert_constants # To configure an alert\n", |
101 | | - "from mlrun.model_monitoring.helpers import get_result_instance_fqn # To configure an alert\n", |
102 | | - "\n", |
103 | 120 | "from src.llm_as_a_judge import OpenAIJudge\n", |
104 | 121 | "pd.set_option(\"display.max_colwidth\", None)" |
105 | 122 | ] |
|
157 | 174 | "project = mlrun.get_or_create_project(\n", |
158 | 175 | " name=\"llm-monitoring\",\n", |
159 | 176 | " parameters={\n", |
160 | | - " \"default_image\": \"gcr.io/iguazio/llm-serving:1.7.2\",\n", |
| 177 | + " \"default_image\": \"edmondg/llm-serving:1.8.0-rc11\",\n", |
161 | 178 | " \"node_selector\": {\"alpha.eksctl.io/nodegroup-name\": \"added-a10x4\"},\n", |
162 | 179 | " },\n", |
163 | 180 | " context=\"./src\",\n", |
|
902 | 919 | "id": "cd171097-960e-4971-8b2e-d2c371823fbd", |
903 | 920 | "metadata": {}, |
904 | 921 | "source": [ |
905 | | - "First log it:" |
| 922 | + "Note: The [gemma-2b](https://huggingface.co/google/gemma-2b) model by Google is publicly accessible, but if you want to use it then you\n", |
| 923 | + "have to first read and accept its terms and conditions. Alternatively, look for a different model and change the\n", |
| 924 | + "code of this demo.\n", |
| 925 | + "\n", |
| 926 | + "Second Note: The model serving implementation is done using `V2ModelServer`. This is naive solution and will be relplaced soon.\n", |
| 927 | + "\n", |
| 928 | + "Let's log it first:" |
906 | 929 | ] |
907 | 930 | }, |
908 | 931 | { |
|
929 | 952 | "base_model = \"google-gemma-2b\"\n", |
930 | 953 | "project.log_model(\n", |
931 | 954 | " base_model,\n", |
932 | | - " model_file=\"src/model-iris.pkl\",\n", |
| 955 | + " model_file=\"src/no-op.pkl\",\n", |
933 | 956 | " inputs=[Feature(value_type=\"str\", name=\"question\")],\n", |
934 | 957 | " outputs=[Feature(value_type=\"str\", name=\"answer\")],\n", |
935 | 958 | ")" |
|
1032 | 1055 | "deployment = serving_function.deploy()" |
1033 | 1056 | ] |
1034 | 1057 | }, |
1035 | | - { |
1036 | | - "cell_type": "markdown", |
1037 | | - "id": "84d6db53-6514-4af6-b6c8-8eecc5043f48", |
1038 | | - "metadata": {}, |
1039 | | - "source": [ |
1040 | | - "### 3.4. Configure an Alert" |
1041 | | - ] |
1042 | | - }, |
1043 | | - { |
1044 | | - "cell_type": "markdown", |
1045 | | - "id": "c30b25bf-028d-40b3-aa7b-275ad190ac80", |
1046 | | - "metadata": {}, |
1047 | | - "source": [ |
1048 | | - "Define an alert to be triggered on degradation of model performance." |
1049 | | - ] |
1050 | | - }, |
1051 | | - { |
1052 | | - "cell_type": "code", |
1053 | | - "execution_count": 26, |
1054 | | - "id": "fe9c4369-16c7-42b4-9057-6e623be63a09", |
1055 | | - "metadata": { |
1056 | | - "tags": [] |
1057 | | - }, |
1058 | | - "outputs": [], |
1059 | | - "source": [ |
1060 | | - "app_name = \"llm-as-a-judge\"\n", |
1061 | | - "result_name = \"restrict-to-banking\"\n", |
1062 | | - "message = \"Model perf detected\"\n", |
1063 | | - "alert_config_name = \"restrict-to-banking\"\n", |
1064 | | - "dummy_url = \"dummy-webhook.default-tenant.app.llm-dev.iguazio-cd1.com\"" |
1065 | | - ] |
1066 | | - }, |
1067 | | - { |
1068 | | - "cell_type": "code", |
1069 | | - "execution_count": 27, |
1070 | | - "id": "27ee93a4-b296-42a6-9f2d-d9ed549670c9", |
1071 | | - "metadata": { |
1072 | | - "tags": [] |
1073 | | - }, |
1074 | | - "outputs": [], |
1075 | | - "source": [ |
1076 | | - "# Get Endpoint ID:\n", |
1077 | | - "endpoints = mlrun.get_run_db().list_model_endpoints(project=project.name, model=\"\")\n", |
1078 | | - "ep_id = endpoints[0].metadata.uid" |
1079 | | - ] |
1080 | | - }, |
1081 | | - { |
1082 | | - "cell_type": "code", |
1083 | | - "execution_count": 28, |
1084 | | - "id": "6144ddc2-5552-4670-ba15-c21b19b4164f", |
1085 | | - "metadata": { |
1086 | | - "tags": [] |
1087 | | - }, |
1088 | | - "outputs": [], |
1089 | | - "source": [ |
1090 | | - "prj_alert_obj = get_result_instance_fqn(\n", |
1091 | | - " ep_id, app_name=app_name, result_name=result_name\n", |
1092 | | - ")\n", |
1093 | | - "\n", |
1094 | | - "webhook_notification = mlrun.common.schemas.Notification(\n", |
1095 | | - " name=\"webhook\",\n", |
1096 | | - " kind=\"webhook\",\n", |
1097 | | - " params={\"url\": dummy_url},\n", |
1098 | | - " when=[\"completed\", \"error\"],\n", |
1099 | | - " severity=\"debug\",\n", |
1100 | | - " message=\"Model perf detected\",\n", |
1101 | | - " condition=\"\",\n", |
1102 | | - ")" |
1103 | | - ] |
1104 | | - }, |
1105 | | - { |
1106 | | - "cell_type": "code", |
1107 | | - "execution_count": 29, |
1108 | | - "id": "ea519ff5-0d4c-4f39-bd00-57c77b54fff4", |
1109 | | - "metadata": {}, |
1110 | | - "outputs": [], |
1111 | | - "source": [ |
1112 | | - "import mlrun.common.schemas.alert as alert_objects" |
1113 | | - ] |
1114 | | - }, |
1115 | | - { |
1116 | | - "cell_type": "code", |
1117 | | - "execution_count": 30, |
1118 | | - "id": "eecfcf75-d01f-49c7-92da-32b22c87f206", |
1119 | | - "metadata": {}, |
1120 | | - "outputs": [], |
1121 | | - "source": [ |
1122 | | - "alert_config = mlrun.alerts.alert.AlertConfig(\n", |
1123 | | - " project=project.name,\n", |
1124 | | - " name=alert_config_name,\n", |
1125 | | - " summary=alert_config_name,\n", |
1126 | | - " severity=alert_constants.AlertSeverity.HIGH,\n", |
1127 | | - " entities=alert_constants.EventEntities(\n", |
1128 | | - " kind=alert_constants.EventEntityKind.MODEL_ENDPOINT_RESULT,\n", |
1129 | | - " project=project.name,\n", |
1130 | | - " ids=[prj_alert_obj],\n", |
1131 | | - " ),\n", |
1132 | | - " trigger=alert_constants.AlertTrigger(\n", |
1133 | | - " events=[alert_objects.EventKind.MODEL_PERFORMANCE_DETECTED, alert_objects.EventKind.MODEL_PERFORMANCE_SUSPECTED]\n", |
1134 | | - " ),\n", |
1135 | | - " criteria=alert_constants.AlertCriteria(count=1, period=\"10m\"),\n", |
1136 | | - " notifications=[\n", |
1137 | | - " alert_constants.AlertNotification(notification=webhook_notification)\n", |
1138 | | - " ],\n", |
1139 | | - " reset_policy=mlrun.common.schemas.alert.ResetPolicy.MANUAL,\n", |
1140 | | - ")" |
1141 | | - ] |
1142 | | - }, |
1143 | | - { |
1144 | | - "cell_type": "code", |
1145 | | - "execution_count": 31, |
1146 | | - "id": "e18d85fb-f146-4923-9372-49a890dd25e8", |
1147 | | - "metadata": {}, |
1148 | | - "outputs": [ |
1149 | | - { |
1150 | | - "name": "stdout", |
1151 | | - "output_type": "stream", |
1152 | | - "text": [ |
1153 | | - "> 2025-02-04 10:01:54,214 [warning] Alerts are disabled, alert will still be stored but will not be triggered\n" |
1154 | | - ] |
1155 | | - }, |
1156 | | - { |
1157 | | - "data": { |
1158 | | - "text/plain": [ |
1159 | | - "<mlrun.alerts.alert.AlertConfig at 0x7f93f879e250>" |
1160 | | - ] |
1161 | | - }, |
1162 | | - "execution_count": 31, |
1163 | | - "metadata": {}, |
1164 | | - "output_type": "execute_result" |
1165 | | - } |
1166 | | - ], |
1167 | | - "source": [ |
1168 | | - "project.store_alert_config(alert_config)" |
1169 | | - ] |
1170 | | - }, |
1171 | 1058 | { |
1172 | 1059 | "cell_type": "markdown", |
1173 | 1060 | "id": "e11348e6-e53a-4e5e-a680-7c18f4298316", |
1174 | 1061 | "metadata": {}, |
1175 | 1062 | "source": [ |
1176 | | - "### 3.5. Check the Performance of the Base Model\n", |
| 1063 | + "### 3.4. Check the Performance of the Base Model\n", |
1177 | 1064 | "\n", |
1178 | 1065 | "To evaluate the base model, ask it a number of questions and give it some requests. \n", |
1179 | 1066 | "\n", |
|
1268 | 1155 | "" |
1269 | 1156 | ] |
1270 | 1157 | }, |
| 1158 | + { |
| 1159 | + "cell_type": "markdown", |
| 1160 | + "id": "5788aa8c-9f22-48e4-8896-602ad273b3ce", |
| 1161 | + "metadata": {}, |
| 1162 | + "source": [ |
| 1163 | + "You can also check out the model endpoint screen under projects:" |
| 1164 | + ] |
| 1165 | + }, |
| 1166 | + { |
| 1167 | + "cell_type": "markdown", |
| 1168 | + "id": "2865c395-9b61-4301-8844-9597225856aa", |
| 1169 | + "metadata": {}, |
| 1170 | + "source": [ |
| 1171 | + "" |
| 1172 | + ] |
| 1173 | + }, |
1271 | 1174 | { |
1272 | 1175 | "cell_type": "markdown", |
1273 | 1176 | "id": "635f8310-4efb-4ade-a54a-646b5af9b690", |
|
1281 | 1184 | "id": "80851fb2-9911-4976-8cd4-298c7a6b6938", |
1282 | 1185 | "metadata": {}, |
1283 | 1186 | "source": [ |
1284 | | - "### 3.6 Evaluate the model using DeepEval" |
| 1187 | + "### 3.5 Evaluate the model using DeepEval" |
1285 | 1188 | ] |
1286 | 1189 | }, |
1287 | 1190 | { |
|
2543 | 2446 | " watch=False,\n", |
2544 | 2447 | ")" |
2545 | 2448 | ] |
2546 | | - }, |
2547 | | - { |
2548 | | - "cell_type": "code", |
2549 | | - "execution_count": null, |
2550 | | - "id": "dea99e0a-6fd0-4c4c-92c1-f6d551ea5e0a", |
2551 | | - "metadata": {}, |
2552 | | - "outputs": [], |
2553 | | - "source": [] |
2554 | 2449 | } |
2555 | 2450 | ], |
2556 | 2451 | "metadata": { |
2557 | 2452 | "kernelspec": { |
2558 | | - "display_name": "test-mlrun-172", |
| 2453 | + "display_name": "mlrun-base", |
2559 | 2454 | "language": "python", |
2560 | | - "name": "conda-env-.conda-test-mlrun-172-py" |
| 2455 | + "name": "conda-env-mlrun-base-py" |
2561 | 2456 | }, |
2562 | 2457 | "language_info": { |
2563 | 2458 | "codemirror_mode": { |
|
2569 | 2464 | "name": "python", |
2570 | 2465 | "nbconvert_exporter": "python", |
2571 | 2466 | "pygments_lexer": "ipython3", |
2572 | | - "version": "3.9.21" |
| 2467 | + "version": "3.9.18" |
2573 | 2468 | } |
2574 | 2469 | }, |
2575 | 2470 | "nbformat": 4, |
|
0 commit comments