Skip to content

Commit 9a210dd

Browse files
authored
Merge pull request #27 from mlrun/1.11.x
reviewed by edmond here: #26
2 parents 488de8e + b8a6303 commit 9a210dd

File tree

9 files changed

+723
-198
lines changed

9 files changed

+723
-198
lines changed

01_churn_ml_model.ipynb

Lines changed: 432 additions & 25 deletions
Large diffs are not rendered by default.

02_guardrail_deployment.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
"source": [
1818
"# Guardrail deployment\n",
1919
"\n",
20-
"The second part of the demo is to deploy guardrails to be used later in the application pipeline to filter user inputs. This notebook will also deploy an LLM as a Judge monitoring application to monitor our generative input guardrail for banking topic adherence.\n",
20+
"The second part of the demo is to deploy guardrails to be used later in the application pipeline to filter user inputs. This notebook will also deploy an LLM as a Judge monitoring application to monitor our generative input guardrail for banking topic adherence. The user can choose whether to use an OpenAI remote model or a HuggingFace local model.\n",
2121
"\n",
2222
"In this notebook, you will:\n",
2323
"- Deploy multiple guardrail functions using HuggingFace or OpenAI models, including banking-topic and toxicity filters.\n",

03_application_deployment.ipynb

Lines changed: 107 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,7 @@
2727
"id": "78f8c28c-0fe4-40a5-857d-6f7d9ebb0832",
2828
"metadata": {},
2929
"source": [
30-
"import os\n",
3130
"import mlrun\n",
32-
"from langchain_community.callbacks.uptrain_callback import handler\n",
3331
"from langchain_community.document_loaders import UnstructuredMarkdownLoader\n",
3432
"from langchain_openai import OpenAIEmbeddings\n",
3533
"from dotenv import load_dotenv\n",
@@ -105,7 +103,9 @@
105103
"source": [
106104
"warnings.filterwarnings(\"ignore\", category=DeprecationWarning, module=\"pkg_resources\")\n",
107105
"\n",
108-
"if not os.environ.get(\"OPENAI_API_KEY\"):\n",
106+
"openai_available = os.environ.get(\"OPENAI_API_KEY\")\n",
107+
"\n",
108+
"if not openai_available:\n",
109109
" embeddings = HuggingFaceEmbeddings(model_name=\"sentence-transformers/all-mpnet-base-v2\")\n",
110110
"else:\n",
111111
" embeddings = OpenAIEmbeddings(model=\"text-embedding-3-small\")\n",
@@ -212,14 +212,13 @@
212212
"from mlrun.serving import ModelRunnerStep\n",
213213
"\n",
214214
"graph = agent_graph.set_topology(\"flow\", engine=\"async\", exist_ok=True)\n",
215-
"# Step to process the data\n",
215+
"# Step to process the input this step is there to make it invocation simpler with less arguments\n",
216216
"graph.add_step(\n",
217217
" name=\"enrich_request\",\n",
218218
" handler= \"enrich_request\",\n",
219-
" # full_event=True,\n",
220219
")\n",
221220
"\n",
222-
"# Topic and toxicity guardrail router\n",
221+
"# Topic and toxicity guardrail router (from notebook 2)\n",
223222
"guardrails_router = graph.add_step(\n",
224223
" \"*ParallelRunMerger\",\n",
225224
" name=\"input-guardrails\",\n",
@@ -239,21 +238,22 @@
239238
" method=\"POST\",\n",
240239
" url=toxicity_guardrail.get_url(),\n",
241240
")\n",
241+
"\n",
242+
"# Filtering accept and reject\n",
242243
"graph.add_step(\n",
243244
" name=\"guardrail-filter\",\n",
244245
" class_name=\"GuardrailsChoice\",\n",
245246
" mapping={\"True\": \"accept\", \"False\": \"reject\"},\n",
246247
" after=\"input-guardrails\",\n",
247248
")\n",
248-
"graph.add_step(name=\"accept\", handler=\"accept\", after=\"guardrail-filter\")\n",
249249
"\n",
250+
"graph.add_step(name=\"accept\", handler=\"accept\", after=\"guardrail-filter\")\n",
250251
"\n",
251252
"# Add model runner step to run the sentiment and churn analysis\n",
252253
"model_runner_step = ModelRunnerStep(\n",
253254
" name=\"input-analysis\",\n",
254255
" result_path=\"input_analysis_output\",\n",
255256
" )\n",
256-
"\n",
257257
"model_runner_step.add_model(\n",
258258
" model_class=\"SentimentAnalysisModelServer\",\n",
259259
" endpoint_name=\"sentiment_analysis_output\",\n",
@@ -270,16 +270,16 @@
270270
" churn_mappings={\"high\": 0.50, \"medium\": 0.20, \"low\": 0},\n",
271271
" result_path=\"churn_model_output\",)\n",
272272
"\n",
273-
"graph.add_step(model_runner_step, after=[\"accept\"])\n",
273+
"graph.add_step(model_runner_step, after=[\"accept\"], full_event= True,)\n",
274274
"\n",
275275
"\n",
276276
"graph.add_step(\n",
277277
" name=\"build-context\",\n",
278278
" class_name=\"BuildContext\",\n",
279279
" context_mappings = {\n",
280-
" \"name\": \"name\",\n",
281-
" \"sentiment\": \"input_analysis_output.sentiment_analysis_output.response[0]\",\n",
282-
" \"churn\": \"input_analysis_output.churn_model_output.response[0]\",\n",
280+
" \"name\": \"sentiment_analysis_output.name\", # name is nested inside sentiment_analysis_output\n",
281+
" \"sentiment\": \"sentiment_analysis_output.response[0]\", # direct path, no input_analysis_output wrapper\n",
282+
" \"churn\": \"churn_model_output.response[0]\", # direct path\n",
283283
" },\n",
284284
" output_key=\"formatted_prompt\",\n",
285285
" prompt=\"\"\"\n",
@@ -296,16 +296,17 @@
296296
" Use the sentiment to craft your response.\n",
297297
" \"\"\",\n",
298298
" after=\"input-analysis\",\n",
299+
" full_event= True,\n",
299300
")\n",
300301
"# Add the BankingAgent LLM using HF or OpenAI (if OPENAI credentials)\n",
301302
"MRS_banking_agent = ModelRunnerStep(name=\"banking-agent\")\n",
302303
"\n",
303-
"if not os.environ.get(\"OPENAI_API_KEY\"):\n",
304+
"if not openai_available:\n",
304305
" MRS_banking_agent.add_model(\n",
305306
" model_class=\"BankingAgentHuggingFace\",\n",
306307
" endpoint_name=\"BankingAgentHuggingFace\",\n",
307308
" execution_mechanism=\"naive\",\n",
308-
" model_name=os.environ.get(\"HF_MODEL_NAME\", \"mistralai/Mistral-7B-Instruct-v0.2\"),\n",
309+
" model_name=os.environ.get(\"HF_MODEL_NAME\", \"Qwen/Qwen2.5-1.5B-Instruct\"),\n",
309310
" prompt_input_key=\"formatted_prompt\",\n",
310311
" messages_input_key=\"inputs\",\n",
311312
" max_new_tokens=256,\n",
@@ -347,11 +348,20 @@
347348
"outputs": [],
348349
"execution_count": null
349350
},
351+
{
352+
"metadata": {},
353+
"cell_type": "markdown",
354+
"source": "### Since running the LLM model is very resource demanding some systems can't run it locally so we will use the mock server only with OpenAI",
355+
"id": "294bec722b745a15"
356+
},
350357
{
351358
"cell_type": "code",
352359
"id": "a7b33baa-d143-49a9-8007-6bade49b6813",
353360
"metadata": {},
354-
"source": "mock = agent_graph.to_mock_server()",
361+
"source": [
362+
"if openai_available:\n",
363+
" mock = agent_graph.to_mock_server()"
364+
],
355365
"outputs": [],
356366
"execution_count": null
357367
},
@@ -390,15 +400,16 @@
390400
"id": "a8f936c3-2776-4b4c-b18e-184a94fc4c8d",
391401
"metadata": {},
392402
"source": [
393-
"resp = mock.test(\n",
394-
" path=\"/\",\n",
395-
" body={\n",
396-
" \"name\": \"John\",\n",
397-
" \"inputs\": [_format_question(\"What is a mortgage, from the bank?\")],\n",
398-
" \"user_id\": LOW_PROPENSITY_CHURN_USER_ID,\n",
399-
" },\n",
400-
")\n",
401-
"print(resp[\"outputs\"][0])"
403+
"if openai_available:\n",
404+
" resp = mock.test(\n",
405+
" path=\"/\",\n",
406+
" body={\n",
407+
" \"name\": \"John\",\n",
408+
" \"inputs\": [_format_question(\"What is a mortgage, from the bank?\")],\n",
409+
" \"user_id\": LOW_PROPENSITY_CHURN_USER_ID,\n",
410+
" },\n",
411+
" )\n",
412+
" print(resp[\"outputs\"][0])"
402413
],
403414
"outputs": [],
404415
"execution_count": null
@@ -416,15 +427,16 @@
416427
"id": "6406eeea-1849-4aab-b6f0-a7e1d1f18138",
417428
"metadata": {},
418429
"source": [
419-
"resp = mock.test(\n",
420-
" path=\"/\",\n",
421-
" body={\n",
422-
" \"name\": \"John\",\n",
423-
" \"inputs\": [_format_question(\"i hate you\")],\n",
424-
" \"user_id\": LOW_PROPENSITY_CHURN_USER_ID,\n",
425-
" },\n",
426-
")\n",
427-
"print(resp[\"outputs\"][0])"
430+
"if openai_available:\n",
431+
" resp = mock.test(\n",
432+
" path=\"/\",\n",
433+
" body={\n",
434+
" \"name\": \"John\",\n",
435+
" \"inputs\": [_format_question(\"i hate you\")],\n",
436+
" \"user_id\": LOW_PROPENSITY_CHURN_USER_ID,\n",
437+
" },\n",
438+
" )\n",
439+
" print(resp[\"outputs\"][0])"
428440
],
429441
"outputs": [],
430442
"execution_count": null
@@ -450,15 +462,16 @@
450462
"id": "93faf1a7-48d4-46c5-8571-03b90d2ce7b9",
451463
"metadata": {},
452464
"source": [
453-
"resp = mock.test(\n",
454-
" path=\"/\",\n",
455-
" body={\n",
456-
" \"name\": \"John\",\n",
457-
" \"inputs\": [_format_question(\"how to apply for checking account?\")],\n",
458-
" \"user_id\": LOW_PROPENSITY_CHURN_USER_ID,\n",
459-
" },\n",
460-
")\n",
461-
"print(resp[\"outputs\"][0])"
465+
"if openai_available:\n",
466+
" resp = mock.test(\n",
467+
" path=\"/\",\n",
468+
" body={\n",
469+
" \"name\": \"John\",\n",
470+
" \"inputs\": [_format_question(\"how to apply for checking account?\")],\n",
471+
" \"user_id\": LOW_PROPENSITY_CHURN_USER_ID,\n",
472+
" },\n",
473+
" )\n",
474+
" print(resp[\"outputs\"][0])"
462475
],
463476
"outputs": [],
464477
"execution_count": null
@@ -476,19 +489,20 @@
476489
"id": "91d0ebe4-9c8c-4463-857e-1cd61ea42a1e",
477490
"metadata": {},
478491
"source": [
479-
"resp = mock.test(\n",
480-
" path=\"/\",\n",
481-
" body={\n",
482-
" \"name\": \"John\",\n",
483-
" \"inputs\": [\n",
484-
" _format_question(\n",
485-
" \"how to apply for checking account? I keep trying but I'm really frustrated\"\n",
486-
" )\n",
487-
" ],\n",
488-
" \"user_id\": LOW_PROPENSITY_CHURN_USER_ID,\n",
489-
" },\n",
490-
")\n",
491-
"print(resp[\"outputs\"][0])"
492+
"if openai_available:\n",
493+
" resp = mock.test(\n",
494+
" path=\"/\",\n",
495+
" body={\n",
496+
" \"name\": \"John\",\n",
497+
" \"inputs\": [\n",
498+
" _format_question(\n",
499+
" \"how to apply for checking account? I keep trying but I'm really frustrated\"\n",
500+
" )\n",
501+
" ],\n",
502+
" \"user_id\": LOW_PROPENSITY_CHURN_USER_ID,\n",
503+
" },\n",
504+
" )\n",
505+
" print(resp[\"outputs\"][0])"
492506
],
493507
"outputs": [],
494508
"execution_count": null
@@ -514,22 +528,23 @@
514528
"id": "79d62a7c-02d7-41cc-8b23-a8a418284db3",
515529
"metadata": {},
516530
"source": [
517-
"resp = mock.test(\n",
518-
" path=\"/\",\n",
519-
" body={\n",
520-
" \"name\": \"Alice\",\n",
521-
" \"inputs\": [\n",
522-
" {\"role\": \"user\", \"content\": \"Hi—how do I open a checking account?\"},\n",
523-
" {\n",
524-
" \"role\": \"assistant\",\n",
525-
" \"content\": \"To open a checking account, you need two forms of ID and a minimum deposit of $25.\",\n",
526-
" },\n",
527-
" {\"role\": \"user\", \"content\": \"Is it possible to get cashback rewards?\"},\n",
528-
" ],\n",
529-
" \"user_id\": HIGH_PROPENSITY_CHURN_USER_ID, # <-- High churn propensity user\n",
530-
" },\n",
531-
")\n",
532-
"print(resp[\"outputs\"][0])"
531+
"if openai_available:\n",
532+
" resp = mock.test(\n",
533+
" path=\"/\",\n",
534+
" body={\n",
535+
" \"name\": \"Alice\",\n",
536+
" \"inputs\": [\n",
537+
" {\"role\": \"user\", \"content\": \"Hi—how do I open a checking account?\"},\n",
538+
" {\n",
539+
" \"role\": \"assistant\",\n",
540+
" \"content\": \"To open a checking account, you need two forms of ID and a minimum deposit of $25.\",\n",
541+
" },\n",
542+
" {\"role\": \"user\", \"content\": \"Is it possible to get cashback rewards?\"},\n",
543+
" ],\n",
544+
" \"user_id\": HIGH_PROPENSITY_CHURN_USER_ID, # <-- High churn propensity user\n",
545+
" },\n",
546+
" )\n",
547+
" print(resp[\"outputs\"][0])"
533548
],
534549
"outputs": [],
535550
"execution_count": null
@@ -549,7 +564,8 @@
549564
"id": "24825160-fb6a-4852-b357-accd6106c033",
550565
"metadata": {},
551566
"source": [
552-
"resp"
567+
"if openai_available:\n",
568+
" resp"
553569
],
554570
"outputs": [],
555571
"execution_count": null
@@ -584,22 +600,7 @@
584600
" path=\"/\",\n",
585601
" body={\n",
586602
" \"name\": \"Alice\",\n",
587-
" \"inputs\": [\n",
588-
" {\"role\": \"user\", \"content\": \"Hi—how do I open a checking account?\"},\n",
589-
" {\n",
590-
" \"role\": \"assistant\",\n",
591-
" \"content\": \"To open a checking account, you need two forms of ID and a minimum deposit of $25.\",\n",
592-
" },\n",
593-
" {\"role\": \"user\", \"content\": \"Is it possible to get cashback rewards?\"},\n",
594-
" ],\n",
595-
" \"questions\": [\n",
596-
" {\"role\": \"user\", \"content\": \"Hi—how do I open a checking account?\"},\n",
597-
" {\n",
598-
" \"role\": \"assistant\",\n",
599-
" \"content\": \"To open a checking account, you need two forms of ID and a minimum deposit of $25.\",\n",
600-
" },\n",
601-
" {\"role\": \"user\", \"content\": \"Is it possible to get cashback rewards?\"},\n",
602-
" ],\n",
603+
" \"inputs\": [{\"role\": \"user\", \"content\": \"Hi, how do I open a checking account?\"}],\n",
603604
" \"user_id\": HIGH_PROPENSITY_CHURN_USER_ID, # <-- High churn propensity user\n",
604605
" },\n",
605606
")\n",
@@ -608,6 +609,24 @@
608609
"outputs": [],
609610
"execution_count": null
610611
},
612+
{
613+
"metadata": {},
614+
"cell_type": "code",
615+
"outputs": [],
616+
"execution_count": null,
617+
"source": [
618+
"resp = agent_graph.invoke(\n",
619+
" path=\"/\",\n",
620+
" body={\n",
621+
" \"name\": \"Alice\",\n",
622+
" \"inputs\": [{\"role\": \"user\", \"content\": \"what is a mortgage?\"}],\n",
623+
" \"user_id\": HIGH_PROPENSITY_CHURN_USER_ID, # <-- High churn propensity user\n",
624+
" },\n",
625+
")\n",
626+
"print(resp)"
627+
],
628+
"id": "b77bcfda7be45b56"
629+
},
611630
{
612631
"cell_type": "markdown",
613632
"id": "53049435-bde9-4d5c-9313-f2af716cb1ee",

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
This demo showcases a modular, production-grade banking customer service chatbot. It combines traditional machine learning (churn propensity) and large language models (LLMs) in a single, observable inference pipeline. The system features conditional routing based on guardrails (banking topic and toxicity filtering), and dynamically adapts model behavior using conversation history, sentiment, and churn risk.
44

5+
In this demo the user can choose to use remote LLM (we used openAI) or a local LLM (in the demo we used Qwen).
6+
Note: for demonstration purposes we used a model that does not require GPU. However, this model has significantly less accurate results and slow performance.
57
## Overview
68

79
The banking agent demo architecture is customizable, with observability for project, tabular, and generative models. MLRun is used to orchestrate the entire workflow, from data processing to model deployment and serving.

requirements.txt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ evaluate==0.4.5
55
fastapi==0.116.1
66
graphviz==0.20.0
77
langchain==0.3.27
8-
langchain-community==0.3.21
8+
langchain-community==0.3.27
99
langchain-core==0.3.81
1010
langchain-milvus==0.2.1
1111
langchain-openai==0.3.32
@@ -23,4 +23,5 @@ duckduckgo_search==8.1.1
2323
pymilvus
2424
milvus-lite
2525
scikit-learn==1.5.2
26-
streamlit
26+
streamlit
27+
sentence-transformers

0 commit comments

Comments
 (0)