mlrun
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎images/feedback_loop.png‎
-35.4 KB b/‎images/feedback_loop.png‎
-35.4 KB
diff --git a/‎images/model_endpoint.png‎
92.9 KB b/‎images/model_endpoint.png‎
92.9 KB
diff --git a/‎llm-monitoring-main.ipynb‎
Lines changed: 53 additions & 158 deletions b/‎llm-monitoring-main.ipynb‎
Lines changed: 53 additions & 158 deletions
diff --git a/‎src/metric_sample.py‎
Lines changed: 0 additions & 15 deletions b/‎src/metric_sample.py‎
Lines changed: 0 additions & 15 deletions
diff --git a/‎src/model-iris.pkl‎
-171 KB b/‎src/model-iris.pkl‎
-171 KB
diff --git a/‎src/project_setup.py‎
Lines changed: 1 addition & 13 deletions b/‎src/project_setup.py‎
Lines changed: 1 addition & 13 deletions
@@ -1,6 +1,6 @@
 # Banking LLM monitoring and feedback loop demo
 
-This demo showcases how to train, deploy, and monitor LLM using an approach described as [LLM as a judge](https://www.confident-ai.com/blog/why-llm-as-a-judge-is-the-best-llm-evaluation-method).
+This demo showcases how to train, deploy, and monitor LLM using an approach described as [LLM as a judge](https://www.mlrun.org/blog/llm-as-a-judge-practical-example-with-open-source-mlrun/).
 
 This demo illustrates training an open-source model to answer banking-related questions only. It does this by analyzing the responses that were generated by the model traffic, and retraining the model according to the performance. The model performance analysis is done by a separate LLM that judges the results. Once the dataset is large enough, you can then retrain the model and mesure the performance again.
 
 
@@ -75,12 +75,32 @@
    },
    "outputs": [],
    "source": [
-    "#%pip install -U -r requirements.txt"
+    "%pip install -U mlrun openai transformers datasets trl peft bitsandbytes sentencepiece "
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
+   "id": "a618cb8f-f34d-4ae2-8fc8-2da00c56e601",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install deepeval==1.1.9"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7b2735a2-2f36-4a4b-9474-5ded80304274",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install \"protobuf<3.20\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
    "id": "c1c99e64-b5a2-45c8-83f3-eda2e0d79cb2",
    "metadata": {
     "tags": []
@@ -90,16 +110,13 @@
     "import os\n",
     "import random\n",
     "import time\n",
-    "import dotenv   \n",
+    "import dotenv\n",
     "import pandas as pd\n",
     "from tqdm.notebook import tqdm\n",
     "from datasets import load_dataset\n",
     "\n",
     "import mlrun\n",
     "from mlrun.features import Feature  # To log the model with inputs and outputs information\n",
-    "import mlrun.common.schemas.alert as alert_constants  # To configure an alert\n",
-    "from mlrun.model_monitoring.helpers import get_result_instance_fqn  # To configure an alert\n",
-    "\n",
     "from src.llm_as_a_judge import OpenAIJudge\n",
     "pd.set_option(\"display.max_colwidth\", None)"
    ]
@@ -157,7 +174,7 @@
     "project = mlrun.get_or_create_project(\n",
     "    name=\"llm-monitoring\",\n",
     "    parameters={\n",
-    "        \"default_image\": \"gcr.io/iguazio/llm-serving:1.7.2\",\n",
+    "        \"default_image\": \"edmondg/llm-serving:1.8.0-rc11\",\n",
     "        \"node_selector\": {\"alpha.eksctl.io/nodegroup-name\": \"added-a10x4\"},\n",
     "    },\n",
     "    context=\"./src\",\n",
@@ -902,7 +919,13 @@
    "id": "cd171097-960e-4971-8b2e-d2c371823fbd",
    "metadata": {},
    "source": [
-    "First log it:"
+    "Note: The [gemma-2b](https://huggingface.co/google/gemma-2b) model by Google is publicly accessible, but if you want to use it then you\n",
+    "have to first read and accept its terms and conditions. Alternatively, look for a different model and change the\n",
+    "code of this demo.\n",
+    "\n",
+    "Second Note: The model serving implementation is done using `V2ModelServer`. This is naive solution and will be relplaced soon.\n",
+    "\n",
+    "Let's log it first:"
    ]
   },
   {
@@ -929,7 +952,7 @@
     "base_model = \"google-gemma-2b\"\n",
     "project.log_model(\n",
     "    base_model,\n",
-    "    model_file=\"src/model-iris.pkl\",\n",
+    "    model_file=\"src/no-op.pkl\",\n",
     "    inputs=[Feature(value_type=\"str\", name=\"question\")],\n",
     "    outputs=[Feature(value_type=\"str\", name=\"answer\")],\n",
     ")"
@@ -1032,148 +1055,12 @@
     "deployment = serving_function.deploy()"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "id": "84d6db53-6514-4af6-b6c8-8eecc5043f48",
-   "metadata": {},
-   "source": [
-    "### 3.4. Configure an Alert"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "c30b25bf-028d-40b3-aa7b-275ad190ac80",
-   "metadata": {},
-   "source": [
-    "Define an alert to be triggered on degradation of model performance."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "id": "fe9c4369-16c7-42b4-9057-6e623be63a09",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "app_name = \"llm-as-a-judge\"\n",
-    "result_name = \"restrict-to-banking\"\n",
-    "message = \"Model perf detected\"\n",
-    "alert_config_name = \"restrict-to-banking\"\n",
-    "dummy_url = \"dummy-webhook.default-tenant.app.llm-dev.iguazio-cd1.com\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "id": "27ee93a4-b296-42a6-9f2d-d9ed549670c9",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "# Get Endpoint ID:\n",
-    "endpoints = mlrun.get_run_db().list_model_endpoints(project=project.name, model=\"\")\n",
-    "ep_id = endpoints[0].metadata.uid"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "id": "6144ddc2-5552-4670-ba15-c21b19b4164f",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "prj_alert_obj = get_result_instance_fqn(\n",
-    "    ep_id, app_name=app_name, result_name=result_name\n",
-    ")\n",
-    "\n",
-    "webhook_notification = mlrun.common.schemas.Notification(\n",
-    "    name=\"webhook\",\n",
-    "    kind=\"webhook\",\n",
-    "    params={\"url\": dummy_url},\n",
-    "    when=[\"completed\", \"error\"],\n",
-    "    severity=\"debug\",\n",
-    "    message=\"Model perf detected\",\n",
-    "    condition=\"\",\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "id": "ea519ff5-0d4c-4f39-bd00-57c77b54fff4",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import mlrun.common.schemas.alert as alert_objects"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "id": "eecfcf75-d01f-49c7-92da-32b22c87f206",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "alert_config = mlrun.alerts.alert.AlertConfig(\n",
-    "    project=project.name,\n",
-    "    name=alert_config_name,\n",
-    "    summary=alert_config_name,\n",
-    "    severity=alert_constants.AlertSeverity.HIGH,\n",
-    "    entities=alert_constants.EventEntities(\n",
-    "        kind=alert_constants.EventEntityKind.MODEL_ENDPOINT_RESULT,\n",
-    "        project=project.name,\n",
-    "        ids=[prj_alert_obj],\n",
-    "    ),\n",
-    "    trigger=alert_constants.AlertTrigger(\n",
-    "        events=[alert_objects.EventKind.MODEL_PERFORMANCE_DETECTED, alert_objects.EventKind.MODEL_PERFORMANCE_SUSPECTED]\n",
-    "    ),\n",
-    "    criteria=alert_constants.AlertCriteria(count=1, period=\"10m\"),\n",
-    "    notifications=[\n",
-    "        alert_constants.AlertNotification(notification=webhook_notification)\n",
-    "    ],\n",
-    "    reset_policy=mlrun.common.schemas.alert.ResetPolicy.MANUAL,\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "id": "e18d85fb-f146-4923-9372-49a890dd25e8",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "> 2025-02-04 10:01:54,214 [warning] Alerts are disabled, alert will still be stored but will not be triggered\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "<mlrun.alerts.alert.AlertConfig at 0x7f93f879e250>"
-      ]
-     },
-     "execution_count": 31,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "project.store_alert_config(alert_config)"
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "e11348e6-e53a-4e5e-a680-7c18f4298316",
    "metadata": {},
    "source": [
-    "### 3.5. Check the Performance of the Base Model\n",
+    "### 3.4. Check the Performance of the Base Model\n",
     "\n",
     "To evaluate the base model, ask it a number of questions and give it some requests. \n",
     "\n",
@@ -1268,6 +1155,22 @@
     "![](./images/grafana_before.png)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "5788aa8c-9f22-48e4-8896-602ad273b3ce",
+   "metadata": {},
+   "source": [
+    "You can also check out the model endpoint screen under projects:"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2865c395-9b61-4301-8844-9597225856aa",
+   "metadata": {},
+   "source": [
+    "![](./images/model_endpoint.png)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "635f8310-4efb-4ade-a54a-646b5af9b690",
@@ -1281,7 +1184,7 @@
    "id": "80851fb2-9911-4976-8cd4-298c7a6b6938",
    "metadata": {},
    "source": [
-    "### 3.6 Evaluate the model using DeepEval"
+    "### 3.5 Evaluate the model using DeepEval"
    ]
   },
   {
@@ -2543,21 +2446,13 @@
     "    watch=False,\n",
     ")"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "dea99e0a-6fd0-4c4c-92c1-f6d551ea5e0a",
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "test-mlrun-172",
+   "display_name": "mlrun-base",
    "language": "python",
-   "name": "conda-env-.conda-test-mlrun-172-py"
+   "name": "conda-env-mlrun-base-py"
   },
   "language_info": {
    "codemirror_mode": {
@@ -2569,7 +2464,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.21"
+   "version": "3.9.18"
   }
  },
  "nbformat": 4,
 
@@ -40,9 +40,6 @@ def setup(
     # Unpack parameters:
     source = project.get_param(key="source")
     default_image = project.get_param(key="default_image")
-    node_selector = project.get_param(key="node_selector", default=None)
-    # gpus = project.get_param(key="gpus", default=0)
-    node_name = project.get_param(key="node_name", default=None)
 
     # Set the project git source:
     if source:
@@ -55,7 +52,7 @@ def setup(
         _build_image(project=project)
     else:
         project.set_default_image(default_image)
-
+        
     # Set functions
     _set_function(
         project=project,
@@ -75,15 +72,6 @@ def setup(
         node_selector=node_selector,
         node_name=node_name,
     )
-    _set_function(
-        project=project,
-        func="metric_sample.py",
-        name="metric-sample",
-        kind="job",
-        image="mlrun/mlrun",
-        node_selector=node_selector,
-        node_name=node_name,
-    )
     _set_function(
         project=project,
         func="generate_ds.py",