Add CI Workflow (#15)

pratik-bhatt-nv · web-flow · commit ea91d6373f1f · 2025-06-02T23:01:47.000-07:00
* add workflow

* add workflow

* add workflow

* update notebook

* update runner
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -0,0 +1,83 @@
+name: Jupyter Notebook Runner
+
+on:
+  push:
+    branches:
+      - main
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  run-notebook:
+    runs-on: arc-runner-set-oke-org-nv-ai-bp
+    env:
+      NOTEBOOK_PATH: ./launchable/PDFtoPodcast.ipynb
+      PYTHON_VERSION: 3.12
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ env.PYTHON_VERSION }}
+          cache: 'pip'
+          cache-dependency-path: |
+            requirements.txt
+            **/*.ipynb
+
+      - name: Install uv
+        run: |
+          # Check if uv is installed, if not, install it
+          if ! command -v uv &> /dev/null; then
+            echo "Installing uv..."
+            curl -LsSf https://astral.sh/uv/install.sh | sh
+          fi
+
+          # Add ~/.local/bin to PATH to make `uv` accessible
+          echo "PATH=$PATH:$HOME/.local/bin" >> $GITHUB_ENV
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install papermill jupyter
+          # Install Docker and Docker Compose in a single step
+          curl -fsSL https://get.docker.com -o get-docker.sh
+          sudo sh get-docker.sh
+          sudo apt-get update
+          sudo apt-get install -y docker-compose-plugin docker-compose build-essential
+
+      - name: Run Jupyter Notebook
+        env:
+          ${{ secrets }}
+        run: |
+          # Verify required environment variables
+          required_vars=("ELEVENLABS_API_KEY" "NVIDIA_API_KEY")
+          for var in "${required_vars[@]}"; do
+            if [ -z "${!var}" ]; then
+              echo "Error: $var is not set"
+              exit 1
+            fi
+          done
+
+          OUTPUT_NOTEBOOK="result.ipynb"
+          echo "Executing notebook: $NOTEBOOK_PATH"
+          papermill "$NOTEBOOK_PATH" "$OUTPUT_NOTEBOOK" --log-output --log-level DEBUG
+
+      - name: Convert result to html format
+        if: always()
+        run: |
+          OUTPUT_NOTEBOOK="result.ipynb"
+          jupyter nbconvert --to html "$OUTPUT_NOTEBOOK"
+
+      - name: Upload the result notebook as artifact
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: result-notebook
+          path: "result.html"
+          retention-days: 30
+
diff --git a/launchable/PDFtoPodcast.ipynb b/launchable/PDFtoPodcast.ipynb
@@ -112,11 +112,11 @@
     "Required variables:\n",
     "- `ELEVENLABS_API_KEY`: Your ElevenLabs API key\n",
     "- `NVIDIA_API_KEY`: Your NVIDIA API key\n",
-    "- `MAX_CONCURRENT_REQUESTS`: Number of concurrent requests allowed (recommended: 1 for local development)\n",
+    "- `MAX_CONCURRENT_REQUESTS` = Number of concurrent requests allowed (recommended: 1 for local development)\n",
     "\n",
     "> **Note**: While production environments use the NVIDIA Eleven Labs API key which supports concurrent requests, for local development we recommend setting `MAX_CONCURRENT_REQUESTS=1` to avoid rate limiting issues. You can obtain a free testing API key from [ElevenLabs](https://elevenlabs.io).\n",
     "\n",
-    "Run the code cell below to create your `.env` file. Make sure to replace the placeholder values with your actual API keys."
+    "Run the code cell below to create your `.env` file. Make sure to update variables `ELEVENLABS_API_KEY` and `NVIDIA_API_KEY`. "
    ]
   },
   {
@@ -126,7 +126,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%%bash\n",
+    "%%bash \n",
     "\n",
     "cd pdf-to-podcast/\n",
     "\n",
@@ -138,15 +138,13 @@
     "\n",
     "# Create new .env file\n",
     "cat > .env << EOL\n",
-    "ELEVENLABS_API_KEY=<ENTER-KEY>\n",
-    "NVIDIA_API_KEY=<ENTER_KEY>\n",
+    "ELEVENLABS_API_KEY=$ELEVENLABS_API_KEY\n",
+    "NVIDIA_API_KEY=$NVIDIA_API_KEY\n",
     "MAX_CONCURRENT_REQUESTS=1\n",
     "EOL\n",
     "\n",
     "echo \"Created .env file. Please edit it with your actual API keys.\"\n",
-    "echo -e \"\\nCurrent .env contents:\"\n",
-    "echo \"----------------------------------------\"\n",
-    "cat .env"
+    "echo \"----------------------------------------\""
    ]
   },
   {
@@ -171,7 +169,6 @@
     "%%bash\n",
     "\n",
     "cd pdf-to-podcast/\n",
-    "\n",
     "make uv"
    ]
   },
@@ -188,7 +185,51 @@
     "```\n",
     "cd pdf-to-podcast/\n",
     "make all-services\n",
-    "```"
+    "```\n",
+    "Or Run below command for starting services in detach mode"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "44fb2da5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%bash\n",
+    "\n",
+    "cd pdf-to-podcast/\n",
+    "make all-services DETACH=1\n",
+    "\n",
+    "wait_for_services() {\n",
+    "    local max_wait_time=$1  # Max wait time in seconds\n",
+    "    local check_interval=$2  # Interval to check in seconds\n",
+    "\n",
+    "    local elapsed_time=0\n",
+    "\n",
+    "    while [ $elapsed_time -lt $max_wait_time ]; do\n",
+    "        # Check if any containers are up\n",
+    "        if docker-compose ps | grep -q \"Up\"; then\n",
+    "            echo \"Services are up and running!\"\n",
+    "            return 0  # Return success if services are up\n",
+    "        fi\n",
+    "        # Wait for the next check interval\n",
+    "        sleep $check_interval\n",
+    "        elapsed_time=$((elapsed_time + check_interval))\n",
+    "        echo \"Waiting for services to come up... $((elapsed_time / 60)) min elapsed\"\n",
+    "    done\n",
+    "\n",
+    "    # If max wait time is reached and services are not up, print a timeout message\n",
+    "    echo \"Timeout reached. Services did not come up in $((max_wait_time / 60)) minutes.\"\n",
+    "    return 1  # Return failure if services did not come up in time\n",
+    "}\n",
+    "\n",
+    "# Maximum wait time in seconds (15 minutes)\n",
+    "MAX_WAIT_TIME=900  # 15 minutes\n",
+    "CHECK_INTERVAL=10  # Check every 10 seconds\n",
+    "\n",
+    "# Call the function\n",
+    "wait_for_services $MAX_WAIT_TIME $CHECK_INTERVAL"
    ]
   },
   {
@@ -198,7 +239,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!docker ps --format \"table {{.ID}}\\t{{.Names}}\\t{{.Status}}\""
+    "%%bash\n",
+    "docker ps --format \"table {{.ID}}\\t{{.Names}}\\t{{.Status}}\""
    ]
   },
   {
@@ -237,7 +279,27 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!curl localhost:8002/health"
+    "import requests\n",
+    "\n",
+    "def make_request(url, params=None, headers=None):\n",
+    "\n",
+    "    try:\n",
+    "        # Send GET request with optional parameters and headers\n",
+    "        response = requests.get(url, params=params, headers=headers)\n",
+    "        \n",
+    "        # Check if the request was successful (status code 200)\n",
+    "        if response.status_code != 200:\n",
+    "            raise Exception(f\"Request failed with status code {response.status_code}. Response: {response.text}\")\n",
+    "        \n",
+    "        # Return the response object if successful\n",
+    "        return response\n",
+    "    \n",
+    "    except requests.exceptions.RequestException as e:\n",
+    "        print(f\"Error occurred during the request: {e}\")\n",
+    "        raise Exception(f\"Request failed: {e}\")\n",
+    "\n",
+    "response = make_request(\"http://localhost:8002/health\")\n",
+    "print(response.text)"
    ]
   },
   {
@@ -475,7 +537,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!curl \"localhost:8002/output/{job_id}?userId=test-userid\" --output temp_audio.mp3\n",
+    "response = make_request(f\"http://localhost:8002/output/{job_id}?userId=test-userid\")\n",
+    "if response.content:\n",
+    "    with open(\"temp_audio.mp3\", 'wb') as f:\n",
+    "        f.write(response.content)\n",
+    "    print(\"MP3 file saved successfully as temp_audio.mp3\")\n",
+    "else:\n",
+    "    print(\"No content received in the response.\")\n",
     "Audio(\"temp_audio.mp3\")"
    ]
   },
@@ -508,7 +576,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!curl \"localhost:8002/saved_podcast/{job_id}/transcript?userId=test-userid\""
+    "response = make_request(f\"http://localhost:8002/saved_podcast/{job_id}/transcript?userId=test-userid\")\n",
+    "print(response.text)"
    ]
   },
   {
@@ -544,7 +613,8 @@
    },
    "outputs": [],
    "source": [
-    "!curl \"localhost:8002/saved_podcast/{job_id}/history?userId=test-userid\""
+    "response = make_request(f\"http://localhost:8002/saved_podcast/{job_id}/history?userId=test-userid\")\n",
+    "print(response.text)\n"
    ]
   },
   {
@@ -554,7 +624,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!curl \"localhost:8002/saved_podcast/{job_id}/metadata?userId=test-userid\""
+    "response = make_request(f\"http://localhost:8002/saved_podcast/{job_id}/metadata?userId=test-userid\")\n",
+    "print(response.text)\n"
    ]
   },
   {