mlrun
diff --git a/‎01_churn_ml_model.ipynb‎
Lines changed: 432 additions & 25 deletions b/‎01_churn_ml_model.ipynb‎
Lines changed: 432 additions & 25 deletions
diff --git a/‎02_guardrail_deployment.ipynb‎
Lines changed: 1 addition & 1 deletion b/‎02_guardrail_deployment.ipynb‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎03_application_deployment.ipynb‎
Lines changed: 107 additions & 88 deletions b/‎03_application_deployment.ipynb‎
Lines changed: 107 additions & 88 deletions
diff --git a/‎README.md‎
Lines changed: 2 additions & 0 deletions b/‎README.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎requirements.txt‎
Lines changed: 3 additions & 2 deletions b/‎requirements.txt‎
Lines changed: 3 additions & 2 deletions
@@ -17,7 +17,7 @@
    "source": [
     "# Guardrail deployment\n",
     "\n",
-    "The second part of the demo is to deploy guardrails to be used later in the application pipeline to filter user inputs. This notebook will also deploy an LLM as a Judge monitoring application to monitor our generative input guardrail for banking topic adherence.\n",
+    "The second part of the demo is to deploy guardrails to be used later in the application pipeline to filter user inputs. This notebook will also deploy an LLM as a Judge monitoring application to monitor our generative input guardrail for banking topic adherence. The user can choose whether to use an OpenAI remote model or a HuggingFace local model.\n",
     "\n",
     "In this notebook, you will:\n",
     "- Deploy multiple guardrail functions using HuggingFace or OpenAI models, including banking-topic and toxicity filters.\n",
 
@@ -27,9 +27,7 @@
    "id": "78f8c28c-0fe4-40a5-857d-6f7d9ebb0832",
    "metadata": {},
    "source": [
-    "import os\n",
     "import mlrun\n",
-    "from langchain_community.callbacks.uptrain_callback import handler\n",
     "from langchain_community.document_loaders import UnstructuredMarkdownLoader\n",
     "from langchain_openai import OpenAIEmbeddings\n",
     "from dotenv import load_dotenv\n",
@@ -105,7 +103,9 @@
    "source": [
     "warnings.filterwarnings(\"ignore\", category=DeprecationWarning, module=\"pkg_resources\")\n",
     "\n",
-    "if not os.environ.get(\"OPENAI_API_KEY\"):\n",
+    "openai_available = os.environ.get(\"OPENAI_API_KEY\")\n",
+    "\n",
+    "if not openai_available:\n",
     "    embeddings = HuggingFaceEmbeddings(model_name=\"sentence-transformers/all-mpnet-base-v2\")\n",
     "else:\n",
     "    embeddings = OpenAIEmbeddings(model=\"text-embedding-3-small\")\n",
@@ -212,14 +212,13 @@
     "from mlrun.serving import ModelRunnerStep\n",
     "\n",
     "graph = agent_graph.set_topology(\"flow\", engine=\"async\", exist_ok=True)\n",
-    "# Step to process the data\n",
+    "# Step to process the input this step is there to make it invocation simpler with less arguments\n",
     "graph.add_step(\n",
     "    name=\"enrich_request\",\n",
     "    handler= \"enrich_request\",\n",
-    "    # full_event=True,\n",
     ")\n",
     "\n",
-    "# Topic and toxicity guardrail router\n",
+    "# Topic and toxicity guardrail router (from notebook 2)\n",
     "guardrails_router = graph.add_step(\n",
     "    \"*ParallelRunMerger\",\n",
     "    name=\"input-guardrails\",\n",
@@ -239,21 +238,22 @@
     "    method=\"POST\",\n",
     "    url=toxicity_guardrail.get_url(),\n",
     ")\n",
+    "\n",
+    "# Filtering accept and reject\n",
     "graph.add_step(\n",
     "    name=\"guardrail-filter\",\n",
     "    class_name=\"GuardrailsChoice\",\n",
     "    mapping={\"True\": \"accept\", \"False\": \"reject\"},\n",
     "    after=\"input-guardrails\",\n",
     ")\n",
-    "graph.add_step(name=\"accept\", handler=\"accept\", after=\"guardrail-filter\")\n",
     "\n",
+    "graph.add_step(name=\"accept\", handler=\"accept\", after=\"guardrail-filter\")\n",
     "\n",
     "# Add model runner step to run the sentiment and churn analysis\n",
     "model_runner_step = ModelRunnerStep(\n",
     "    name=\"input-analysis\",\n",
     "    result_path=\"input_analysis_output\",\n",
     "    )\n",
-    "\n",
     "model_runner_step.add_model(\n",
     "    model_class=\"SentimentAnalysisModelServer\",\n",
     "    endpoint_name=\"sentiment_analysis_output\",\n",
@@ -270,16 +270,16 @@
     "    churn_mappings={\"high\": 0.50, \"medium\": 0.20, \"low\": 0},\n",
     "    result_path=\"churn_model_output\",)\n",
     "\n",
-    "graph.add_step(model_runner_step, after=[\"accept\"])\n",
+    "graph.add_step(model_runner_step, after=[\"accept\"], full_event= True,)\n",
     "\n",
     "\n",
     "graph.add_step(\n",
     "    name=\"build-context\",\n",
     "    class_name=\"BuildContext\",\n",
     "    context_mappings = {\n",
-    "      \"name\": \"name\",\n",
-    "      \"sentiment\": \"input_analysis_output.sentiment_analysis_output.response[0]\",\n",
-    "      \"churn\": \"input_analysis_output.churn_model_output.response[0]\",\n",
+    "        \"name\": \"sentiment_analysis_output.name\",  # name is nested inside sentiment_analysis_output\n",
+    "        \"sentiment\": \"sentiment_analysis_output.response[0]\",  # direct path, no input_analysis_output wrapper\n",
+    "        \"churn\": \"churn_model_output.response[0]\",  # direct path\n",
     "    },\n",
     "    output_key=\"formatted_prompt\",\n",
     "    prompt=\"\"\"\n",
@@ -296,16 +296,17 @@
     "    Use the sentiment to craft your response.\n",
     "    \"\"\",\n",
     "    after=\"input-analysis\",\n",
+    "    full_event= True,\n",
     ")\n",
     "# Add the BankingAgent LLM using HF or OpenAI (if OPENAI credentials)\n",
     "MRS_banking_agent = ModelRunnerStep(name=\"banking-agent\")\n",
     "\n",
-    "if not os.environ.get(\"OPENAI_API_KEY\"):\n",
+    "if not openai_available:\n",
     "    MRS_banking_agent.add_model(\n",
     "        model_class=\"BankingAgentHuggingFace\",\n",
     "        endpoint_name=\"BankingAgentHuggingFace\",\n",
     "        execution_mechanism=\"naive\",\n",
-    "        model_name=os.environ.get(\"HF_MODEL_NAME\", \"mistralai/Mistral-7B-Instruct-v0.2\"),\n",
+    "        model_name=os.environ.get(\"HF_MODEL_NAME\", \"Qwen/Qwen2.5-1.5B-Instruct\"),\n",
     "        prompt_input_key=\"formatted_prompt\",\n",
     "        messages_input_key=\"inputs\",\n",
     "        max_new_tokens=256,\n",
@@ -347,11 +348,20 @@
    "outputs": [],
    "execution_count": null
   },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "### Since running the LLM model is very resource demanding some systems can't run it locally so we will use the mock server only with OpenAI",
+   "id": "294bec722b745a15"
+  },
   {
    "cell_type": "code",
    "id": "a7b33baa-d143-49a9-8007-6bade49b6813",
    "metadata": {},
-   "source": "mock = agent_graph.to_mock_server()",
+   "source": [
+    "if openai_available:\n",
+    "    mock = agent_graph.to_mock_server()"
+   ],
    "outputs": [],
    "execution_count": null
   },
@@ -390,15 +400,16 @@
    "id": "a8f936c3-2776-4b4c-b18e-184a94fc4c8d",
    "metadata": {},
    "source": [
-    "resp = mock.test(\n",
-    "    path=\"/\",\n",
-    "    body={\n",
-    "        \"name\": \"John\",\n",
-    "        \"inputs\": [_format_question(\"What is a mortgage, from the bank?\")],\n",
-    "        \"user_id\": LOW_PROPENSITY_CHURN_USER_ID,\n",
-    "    },\n",
-    ")\n",
-    "print(resp[\"outputs\"][0])"
+    "if openai_available:\n",
+    "    resp = mock.test(\n",
+    "        path=\"/\",\n",
+    "        body={\n",
+    "            \"name\": \"John\",\n",
+    "            \"inputs\": [_format_question(\"What is a mortgage, from the bank?\")],\n",
+    "            \"user_id\": LOW_PROPENSITY_CHURN_USER_ID,\n",
+    "        },\n",
+    "    )\n",
+    "    print(resp[\"outputs\"][0])"
    ],
    "outputs": [],
    "execution_count": null
@@ -416,15 +427,16 @@
    "id": "6406eeea-1849-4aab-b6f0-a7e1d1f18138",
    "metadata": {},
    "source": [
-    "resp = mock.test(\n",
-    "    path=\"/\",\n",
-    "    body={\n",
-    "        \"name\": \"John\",\n",
-    "        \"inputs\": [_format_question(\"i hate you\")],\n",
-    "        \"user_id\": LOW_PROPENSITY_CHURN_USER_ID,\n",
-    "    },\n",
-    ")\n",
-    "print(resp[\"outputs\"][0])"
+    "if openai_available:\n",
+    "    resp = mock.test(\n",
+    "        path=\"/\",\n",
+    "        body={\n",
+    "            \"name\": \"John\",\n",
+    "            \"inputs\": [_format_question(\"i hate you\")],\n",
+    "            \"user_id\": LOW_PROPENSITY_CHURN_USER_ID,\n",
+    "        },\n",
+    "    )\n",
+    "    print(resp[\"outputs\"][0])"
    ],
    "outputs": [],
    "execution_count": null
@@ -450,15 +462,16 @@
    "id": "93faf1a7-48d4-46c5-8571-03b90d2ce7b9",
    "metadata": {},
    "source": [
-    "resp = mock.test(\n",
-    "    path=\"/\",\n",
-    "    body={\n",
-    "        \"name\": \"John\",\n",
-    "        \"inputs\": [_format_question(\"how to apply for checking account?\")],\n",
-    "        \"user_id\": LOW_PROPENSITY_CHURN_USER_ID,\n",
-    "    },\n",
-    ")\n",
-    "print(resp[\"outputs\"][0])"
+    "if openai_available:\n",
+    "    resp = mock.test(\n",
+    "        path=\"/\",\n",
+    "        body={\n",
+    "            \"name\": \"John\",\n",
+    "            \"inputs\": [_format_question(\"how to apply for checking account?\")],\n",
+    "            \"user_id\": LOW_PROPENSITY_CHURN_USER_ID,\n",
+    "        },\n",
+    "    )\n",
+    "    print(resp[\"outputs\"][0])"
    ],
    "outputs": [],
    "execution_count": null
@@ -476,19 +489,20 @@
    "id": "91d0ebe4-9c8c-4463-857e-1cd61ea42a1e",
    "metadata": {},
    "source": [
-    "resp = mock.test(\n",
-    "    path=\"/\",\n",
-    "    body={\n",
-    "        \"name\": \"John\",\n",
-    "        \"inputs\": [\n",
-    "            _format_question(\n",
-    "                \"how to apply for checking account? I keep trying but I'm really frustrated\"\n",
-    "            )\n",
-    "        ],\n",
-    "        \"user_id\": LOW_PROPENSITY_CHURN_USER_ID,\n",
-    "    },\n",
-    ")\n",
-    "print(resp[\"outputs\"][0])"
+    "if openai_available:\n",
+    "    resp = mock.test(\n",
+    "        path=\"/\",\n",
+    "        body={\n",
+    "            \"name\": \"John\",\n",
+    "            \"inputs\": [\n",
+    "                _format_question(\n",
+    "                    \"how to apply for checking account? I keep trying but I'm really frustrated\"\n",
+    "                )\n",
+    "            ],\n",
+    "            \"user_id\": LOW_PROPENSITY_CHURN_USER_ID,\n",
+    "        },\n",
+    "    )\n",
+    "    print(resp[\"outputs\"][0])"
    ],
    "outputs": [],
    "execution_count": null
@@ -514,22 +528,23 @@
    "id": "79d62a7c-02d7-41cc-8b23-a8a418284db3",
    "metadata": {},
    "source": [
-    "resp = mock.test(\n",
-    "    path=\"/\",\n",
-    "    body={\n",
-    "        \"name\": \"Alice\",\n",
-    "        \"inputs\": [\n",
-    "            {\"role\": \"user\", \"content\": \"Hi—how do I open a checking account?\"},\n",
-    "            {\n",
-    "                \"role\": \"assistant\",\n",
-    "                \"content\": \"To open a checking account, you need two forms of ID and a minimum deposit of $25.\",\n",
-    "            },\n",
-    "            {\"role\": \"user\", \"content\": \"Is it possible to get cashback rewards?\"},\n",
-    "        ],\n",
-    "        \"user_id\": HIGH_PROPENSITY_CHURN_USER_ID,  # <-- High churn propensity user\n",
-    "    },\n",
-    ")\n",
-    "print(resp[\"outputs\"][0])"
+    "if openai_available:\n",
+    "    resp = mock.test(\n",
+    "        path=\"/\",\n",
+    "        body={\n",
+    "            \"name\": \"Alice\",\n",
+    "            \"inputs\": [\n",
+    "                {\"role\": \"user\", \"content\": \"Hi—how do I open a checking account?\"},\n",
+    "                {\n",
+    "                    \"role\": \"assistant\",\n",
+    "                    \"content\": \"To open a checking account, you need two forms of ID and a minimum deposit of $25.\",\n",
+    "                },\n",
+    "                {\"role\": \"user\", \"content\": \"Is it possible to get cashback rewards?\"},\n",
+    "            ],\n",
+    "            \"user_id\": HIGH_PROPENSITY_CHURN_USER_ID,  # <-- High churn propensity user\n",
+    "        },\n",
+    "    )\n",
+    "    print(resp[\"outputs\"][0])"
    ],
    "outputs": [],
    "execution_count": null
@@ -549,7 +564,8 @@
    "id": "24825160-fb6a-4852-b357-accd6106c033",
    "metadata": {},
    "source": [
-    "resp"
+    "if openai_available:\n",
+    "    resp"
    ],
    "outputs": [],
    "execution_count": null
@@ -584,22 +600,7 @@
     "    path=\"/\",\n",
     "    body={\n",
     "        \"name\": \"Alice\",\n",
-    "        \"inputs\": [\n",
-    "            {\"role\": \"user\", \"content\": \"Hi—how do I open a checking account?\"},\n",
-    "            {\n",
-    "                \"role\": \"assistant\",\n",
-    "                \"content\": \"To open a checking account, you need two forms of ID and a minimum deposit of $25.\",\n",
-    "            },\n",
-    "            {\"role\": \"user\", \"content\": \"Is it possible to get cashback rewards?\"},\n",
-    "        ],\n",
-    "        \"questions\": [\n",
-    "            {\"role\": \"user\", \"content\": \"Hi—how do I open a checking account?\"},\n",
-    "            {\n",
-    "                \"role\": \"assistant\",\n",
-    "                \"content\": \"To open a checking account, you need two forms of ID and a minimum deposit of $25.\",\n",
-    "            },\n",
-    "            {\"role\": \"user\", \"content\": \"Is it possible to get cashback rewards?\"},\n",
-    "        ],\n",
+    "        \"inputs\": [{\"role\": \"user\", \"content\": \"Hi, how do I open a checking account?\"}],\n",
     "        \"user_id\": HIGH_PROPENSITY_CHURN_USER_ID,  # <-- High churn propensity user\n",
     "    },\n",
     ")\n",
@@ -608,6 +609,24 @@
    "outputs": [],
    "execution_count": null
   },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "outputs": [],
+   "execution_count": null,
+   "source": [
+    "resp = agent_graph.invoke(\n",
+    "    path=\"/\",\n",
+    "    body={\n",
+    "        \"name\": \"Alice\",\n",
+    "        \"inputs\": [{\"role\": \"user\", \"content\": \"what is a mortgage?\"}],\n",
+    "        \"user_id\": HIGH_PROPENSITY_CHURN_USER_ID,  # <-- High churn propensity user\n",
+    "    },\n",
+    ")\n",
+    "print(resp)"
+   ],
+   "id": "b77bcfda7be45b56"
+  },
   {
    "cell_type": "markdown",
    "id": "53049435-bde9-4d5c-9313-f2af716cb1ee",
 
@@ -2,6 +2,8 @@
 
 This demo showcases a modular, production-grade banking customer service chatbot. It combines traditional machine learning (churn propensity) and large language models (LLMs) in a single, observable inference pipeline. The system features conditional routing based on guardrails (banking topic and toxicity filtering), and dynamically adapts model behavior using conversation history, sentiment, and churn risk.
 
+In this demo the user can choose to use remote LLM (we used openAI) or a local LLM (in the demo we used Qwen).
+Note: for demonstration purposes we used a model that does not require GPU. However, this model has significantly less accurate results and slow performance.
 ## Overview
 
 The banking agent demo architecture is customizable, with observability for project, tabular, and generative models. MLRun is used to orchestrate the entire workflow, from data processing to model deployment and serving.
 
@@ -5,7 +5,7 @@ evaluate==0.4.5
 fastapi==0.116.1
 graphviz==0.20.0
 langchain==0.3.27
-langchain-community==0.3.21
+langchain-community==0.3.27
 langchain-core==0.3.81
 langchain-milvus==0.2.1
 langchain-openai==0.3.32
@@ -23,4 +23,5 @@ duckduckgo_search==8.1.1
 pymilvus
 milvus-lite
 scikit-learn==1.5.2
-streamlit
+streamlit
+sentence-transformers