Merge pull request #935 from guylei-code/openai_with_notebook

danielperezz · web-flow · commit 56170b0aafba · 2025-12-07T10:54:35.000+02:00
[Module] OpenAI with notebook and test
diff --git a/cli/utils/helpers.py b/cli/utils/helpers.py
@@ -90,12 +90,12 @@ def install_python(directory: Union[str, Path]):
     return python_location
 
 
-def _run_subprocess(cmd: str, directory):
+def _run_subprocess(cmd: List[str], directory):
     completed_process: subprocess.CompletedProcess = subprocess.run(
         cmd,
         stdout=sys.stdout,
         stderr=subprocess.PIPE,
-        shell=True,
+        shell=False,
         cwd=directory,
     )
     exit_on_non_zero_return(completed_process)
@@ -118,9 +118,8 @@ def install_requirements(
 
     if requirements_file.exists():
         print(f"Installing requirements from {requirements_file}...")
-        _run_subprocess(
-            f"pipenv install --skip-lock -r {requirements_file}", directory
-        )
+        cmd = ["pipenv", "install", "--skip-lock", "-r", str(requirements_file)]
+        _run_subprocess(cmd, directory)
         with open(requirements_file, "r") as f:
             mlrun_version = [l.replace("\n", "") for l in f.readlines() if "mlrun" in l]
             # remove mlrun from requirements if installed with version limits:
@@ -129,9 +128,8 @@ def install_requirements(
 
     if requirements:
         print(f"Installing requirements [{' '.join(requirements)}] for {directory}...")
-        _run_subprocess(
-            f"pipenv install --skip-lock {' '.join(requirements)}", directory
-        )
+        cmd = ["pipenv", "install", "--skip-lock", *requirements]
+        _run_subprocess(cmd, directory)
 
 
 def get_item_yaml_values(
diff --git a/modules/src/openai_proxy_app/item.yaml b/modules/src/openai_proxy_app/item.yaml
@@ -14,6 +14,8 @@ spec:
     image: mlrun/mlrun
     requirements:
       - fastapi>=0.110,<1.0
+      - uvicorn[standard]>=0.29,<1.0
+      - gunicorn>=21.2,<22.0
       - requests>=2.31,<3.0
     kind: generic
 version: 1.0.0
diff --git a/modules/src/openai_proxy_app/openai_proxy_app.ipynb b/modules/src/openai_proxy_app/openai_proxy_app.ipynb
@@ -2,57 +2,289 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "id": "220629c8-17aa-45f6-ac81-0ca31e165412",
+   "id": "dce042eb-d3ad-463c-ac41-4e0895e67c2a",
    "metadata": {},
    "source": [
-    "# OpenAI Module Demo"
+    "# Using MLRun Hub Module for OpenAI Proxy App"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "58850fbe-ef31-4e36-9154-d8ca3d212532",
+   "metadata": {},
+   "source": [
+    "This notebook walks through the process of importing an OpenAI proxy application from an MLRun Hub module and deploying it as part of your MLRun project. \n",
+    "\n",
+    "The module provides a flexible FastAPI endpoint that exposes the following OpenAI URLs: chat completions, responses, and embeddings. So you can generate text, query models, and work with vector representations.\n",
+    "\n",
+    "\n",
+    "**Note** - Before running this notebook please generate an .env file with the following credentials \n",
+    "\n",
+    "```\n",
+    "OPENAI_BASE_URL=\"..\"\n",
+    "OPENAI_API_KEY=\"..\"\n",
+    "\n",
+    "# optional:\n",
+    "OPENAI_DEFAULT_MODEL=\"..\" # by default uses gpt-4o-mini, it can changed by using this key\n",
+    "```\n"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "967b4d5d-7250-40bf-8149-de11e1e3244c",
+   "id": "9262e948-a1b3-4a9e-8b5f-cfa3310bb875",
    "metadata": {},
    "outputs": [],
    "source": [
     "import mlrun\n",
-    "import pandas as pd"
+    "import os\n",
+    "import pandas as pd\n",
+    "from dotenv import load_dotenv\n",
+    "load_dotenv()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3ae0cf4f-0183-42e0-9dda-7ba6a5cfcc7b",
+   "metadata": {},
+   "source": [
+    "Load or create a project and set credentials."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "17d208f4-a00a-42ef-a849-0fa79bed10cb",
+   "id": "ea80c1fb-014d-4db1-95d3-71e6cd362a87",
    "metadata": {},
    "outputs": [],
    "source": [
-    "project = mlrun.get_or_create_project(\"fastapi-openai\",user_project=True,context=\"./src\")"
+    "project = mlrun.get_or_create_project(\"openai-module\", user_project=True)\n",
+    "\n",
+    "project.set_secrets({\n",
+    "    \"OPENAI_BASE_URL\": os.environ[\"OPENAI_BASE_URL\"],\n",
+    "    \"OPENAI_API_KEY\": os.environ[\"OPENAI_API_KEY\"],\n",
+    "    \"OPENAI_DEFAULT_MODEL\": os.environ[\"OPENAI_DEFAULT_MODEL\"]\n",
+    "})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c59fd225-f719-4881-a643-f41553b529d6",
+   "metadata": {},
+   "source": [
+    "### Import the OpenAI proxy module from the Hub"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "id": "67c93a0d-8240-48b8-808e-9cd0af418309",
+   "execution_count": 3,
+   "id": "5d294a0a-0500-464e-b8f7-c3c5f02bcc45",
    "metadata": {},
    "outputs": [],
    "source": [
-    "app = mlrun.import_module(\"hub://openai\")"
+    "openai_module = mlrun.import_module(\"hub://openai_proxy_app\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "id": "93e67d6a-5f53-4bda-b0b5-4e2977088139",
+   "execution_count": 4,
+   "id": "104a64c4-6707-4b01-b6b8-503e023f03a3",
+   "metadata": {
+    "scrolled": true,
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Instantiate the module with your MLRun project and deploy it \n",
+    "openai_obj = openai_module.OpenAIModule(project)\n",
+    "openai_obj.openai_proxy_app.deploy()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c2720a12-9b60-42cc-b50b-96a6bfb3d7b0",
+   "metadata": {},
+   "source": [
+    "## Examples of OpenAI app API's "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "47469ec3-8345-439c-933b-1fd16a994939",
+   "metadata": {},
+   "source": [
+    "### Chat completions API\n",
+    "This example asks for the three largest countries in Europe and their capitals and returns a standard chat completion response."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "6beba771-a011-4952-b00c-416289b67179",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "response = openai_obj.openai_proxy_app.invoke(\n",
+    "    path=\"/v1/chat/completions\",\n",
+    "    body={\n",
+    "        \"model\": \"gpt-4o-mini\",\n",
+    "        \"messages\": [{\"role\": \"user\", \"content\": \"What are the 3 largest countries in Europe and what are their capitals names\"}],\n",
+    "    },\n",
+    "    method=\"POST\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f2aabeff-9d68-44e5-9ea6-5ddd43e4caed",
+   "metadata": {},
+   "source": [
+    "### Go over the OpenAI response"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "9acfacde-63c8-4f4d-a767-7a4cd33e6ac8",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The three largest countries in Europe by area are:\n",
+      "\n",
+      "1. **Russia** (part of it is in Europe) - Capital: Moscow\n",
+      "2. **Ukraine** - Capital: Kyiv\n",
+      "3. **France** - Capital: Paris\n",
+      "\n",
+      "Note that while Russia is the largest country in the world, only a portion of its landmass is in Europe.\n"
+     ]
+    }
+   ],
+   "source": [
+    "data = response.json()\n",
+    "text = data[\"choices\"][0][\"message\"][\"content\"]\n",
+    "print(text)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a63c02d1-939a-4d74-b3a2-d0cad6ea65f5",
+   "metadata": {},
+   "source": [
+    "### Embedding with the Deployed OpenAI Proxy\n",
+    "This example sends a short sentence to the embeddings endpoint and extracts the returned vector from the response payload.  \n",
+    "The result is a numeric embedding you can use for similarity search, clustering, or downstream model features."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "62251f4d-817e-4f7c-8b09-13f0c9f5085b",
+   "metadata": {
+    "scrolled": true,
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "\n",
+    "response = openai_obj.openai_proxy_app.invoke(\n",
+    "    path=\"/v1/embeddings\",\n",
+    "    body={\n",
+    "        \"model\": \"text-embedding-3-small\",\n",
+    "        \"input\": \"Kubernetes whispers to its pods at night\"\n",
+    "    },\n",
+    "    method=\"POST\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "43a06fd4-2c7b-448d-83c9-456f2d817446",
+   "metadata": {},
+   "source": [
+    "### Go over the OpenAI response"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "dfa478ca-f70c-48c3-a75b-9aa6f36375e9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "embedding = response.json()[\"data\"][0][\"embedding\"]\n",
+    "\n",
+    "#print if you want to see the embedding\n",
+    "#print(embedding) "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9a92fca0-c579-47f4-afa5-31b0f8bb484e",
+   "metadata": {},
+   "source": [
+    "### Request a Text Response and Extract the Output\n",
+    "The proxy also supports the unified responses endpoint.  \n",
+    "Here we send a compact request for a short joke and then extract the generated text from the structured output.  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "f343f347-75bf-440a-bdcf-5950d80fd706",
    "metadata": {},
    "outputs": [],
-   "source": "app.OpenAIModule.deploy()"
+   "source": [
+    "response = openai_obj.openai_proxy_app.invoke(\n",
+    "    path=\"/v1/responses\",\n",
+    "    body={\n",
+    "        \"model\": \"gpt-4o-mini\",\n",
+    "        \"input\": \"Give me a short joke about high tech workers\",\n",
+    "        \"max_output_tokens\": 30\n",
+    "    },\n",
+    "    method=\"POST\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "73456c7b-80e6-4ac9-a94c-8258e7efad60",
+   "metadata": {},
+   "source": [
+    "### Go over the OpenAI response"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "54e3ac7b-842a-4b02-bd76-3baa16941b36",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Why did the high-tech worker bring a ladder to work?\n",
+      "\n",
+      "Because they wanted to reach new heights in their career!\n"
+     ]
+    }
+   ],
+   "source": [
+    "data = response.json()\n",
+    "text = data[\"output\"][0][\"content\"][0][\"text\"]\n",
+    "print(text)"
+   ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "mlrun-base",
    "language": "python",
-   "name": "python3"
+   "name": "conda-env-mlrun-base-py"
   },
   "language_info": {
    "codemirror_mode": {
@@ -64,7 +296,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.10"
+   "version": "3.9.22"
   }
  },
  "nbformat": 4,
diff --git a/modules/src/openai_proxy_app/openai_proxy_app.py b/modules/src/openai_proxy_app/openai_proxy_app.py
@@ -38,17 +38,17 @@
 class OpenAIModule:
     def __init__(self,project):
         self.project = project
-        self.fastapi_app = self.project.set_function(name="openai",kind="application",image="python:3.11")
-        self.fastapi_app.with_requirements([
+        self.openai_proxy_app = self.project.set_function(name="openai",kind="application",image="python:3.11")
+        self.openai_proxy_app.with_requirements([
                 "fastapi>=0.110,<1.0",
                 "uvicorn[standard]>=0.29,<1.0",
                 "gunicorn>=21.2,<22.0",
                 "requests>=2.31,<3.0",
             ])
-        self.fastapi_app.set_env("BASE64",BASE64)
-        self.fastapi_app.set_internal_application_port(8000)
-        self.fastapi_app.spec.command = "/bin/sh"
-        self.fastapi_app.spec.args = ["-c", CMD]
+        self.openai_proxy_app.set_env("BASE64",BASE64)
+        self.openai_proxy_app.set_internal_application_port(8000)
+        self.openai_proxy_app.spec.command = "/bin/sh"
+        self.openai_proxy_app.spec.args = ["-c", CMD]
         
 
         
diff --git a/modules/src/openai_proxy_app/requirements.txt b/modules/src/openai_proxy_app/requirements.txt
@@ -0,0 +1,4 @@
+fastapi>=0.110,<1.0
+uvicorn[standard]>=0.29,<1.0
+gunicorn>=21.2,<22.0
+requests>=2.31,<3.0
diff --git a/modules/src/openai_proxy_app/test_openai_proxy_app.py b/modules/src/openai_proxy_app/test_openai_proxy_app.py