feat: Add experimental integration tests with cached providers

derekhiggins · derekhiggins · commit 4f1c7ecffda7 · 2025-05-16T14:09:17.000+01:00
Introduce a new workflow to run integration and agent tests against the OpenAI provider, and a subset of inference tests against Fireworks AI. Responses from inference providers are cached and reused in subsequent jobs, reducing API quota usage, speeding up test runs, and improving reliability by avoiding server-side errors. (see https://github.com/derekhiggins/cachemeifyoucan for caching code) The cache is updated on successful job completion. To prevent the cache from diverging or growing indefinitely, periodically refresh or manually update the cache by running this job without a cache(will do as a follow on if this merges). Any updates to integration tests that change the request to the provider will use provider quota and update the cache (if they fail), so need to be careful not to repeatedly run failing PR's Signed-off-by: Derek Higgins <derekh@redhat.com>
diff --git a/.github/workflows/integration-tests-experimental.yml b/.github/workflows/integration-tests-experimental.yml
@@ -0,0 +1,125 @@
+name: Integration Tests (Experimental)
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+    paths:
+      - 'llama_stack/**'
+      - 'tests/integration/**'
+      - 'uv.lock'
+      - 'pyproject.toml'
+      - 'requirements.txt'
+      - '.github/workflows/integration-tests-experimental.yml' # This workflow
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  test-matrix:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false # we want to run all tests regardless of failure
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@c7f87aa956e4c323abf06d5dec078e358f6b4d04 # v6.0.0
+        with:
+          python-version: "3.10"
+          activate-environment: true
+
+      # TODO: some kind of pruning is required to prevent cache growing indefinitely
+      - uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
+        with:
+          path: /home/runner/.cache/cachemeifyoucan
+          key: http-cache-integration-tests-${{ github.sha }}
+          restore-keys:
+            http-cache-integration-tests-
+
+      - name: Set Up Environment and Install Dependencies
+        run: |
+          uv sync --extra dev --extra test
+          uv pip install git+https://github.com/derekhiggins/cachemeifyoucan.git@69cd438
+          # always test against the latest version of the client
+          # TODO: this is not necessarily a good idea. we need to test against both published and latest
+          # to find out backwards compatibility issues.
+          uv pip install git+https://github.com/meta-llama/llama-stack-client-python.git@main
+          uv pip install -e .
+          llama stack build --template verification --image-type venv
+
+          # Start cachemeifyoucan server in the background, it will be used to cache OpenAI responses
+          echo -e 'openai:\n  url: https://api.openai.com\ntogether:\n  url: https://api.together.xyz\nfireworks:\n  url: https://api.fireworks.ai' > cachemeifyoucan.yaml
+          nohup uv run uvicorn cachemeifyoucan:app --host 127.0.0.1 --port 9999 > cachemeifyoucan.log 2>&1 &
+
+          # NotFoundError: Error code: 404 - {'error': 'Model not found'}
+          # TODO: remove this once we figure out the problem
+          yq '(.shields = [])' ./llama_stack/templates/verification/run.yaml > ./run_t1.yaml
+          yq '(.providers.inference[] | select(.provider_id == "together-openai-compat")).config.openai_compat_api_base = "http://127.0.0.1:9999/together"' \
+            ./run_t1.yaml > ./run_t2.yaml
+          yq '(.providers.inference[] | select(.provider_id == "fireworks-openai-compat")).config.openai_compat_api_base = "http://127.0.0.1:9999/fireworks/inference/v1"' \
+            ./run_t2.yaml > ./run.yaml
+
+      - name: Start Llama Stack server in background
+        env:
+          # TODO: instead if adding keys here, we could add support to cachemeifyoucan to add the header
+          # this would ensure no traffic is routed to 3rd parties without going via the cache
+          OPENAI_API_KEY: ${{ secrets.OPENAI_KEY }}
+          TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }}
+          FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
+        run: |
+          source .venv/bin/activate
+          export OPENAI_BASE_URL=http://127.0.0.1:9999/openai/v1
+          nohup uv run llama stack run ./run.yaml --image-type venv > server.log 2>&1 &
+
+      - name: Wait for Llama Stack server to be ready
+        run: |
+          echo "Waiting for Llama Stack server..."
+          for i in {1..30}; do
+            if curl -s http://localhost:8321/v1/health | grep -q "OK"; then
+              echo "Llama Stack server is up!"
+              exit 0
+            fi
+            sleep 1
+          done
+          echo "Llama Stack server failed to start"
+          cat server.log
+          exit 1
+
+      - name: Run Integration Tests
+        run: |
+          # openai
+          echo "Running OpenAI tests"
+          uv run pytest -v tests/integration/inference tests/integration/agents --stack-config=http://localhost:8321 \
+            -k "not(builtin_tool or safety_with_image or code_interpreter)" \
+            --text-model="openai/gpt-4o"
+
+          # skipping togetherai for now, free tier get rate limited when no cach)
+          #uv run pytest -v tests/integration/inference/test_text_inference.py --stack-config=http://localhost:8321 \
+          #  -k "not(builtin_tool or tool_calling)"  --text-model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo"
+
+          # fireworks (only running text inference for now, free tier get rate limited when no cach)
+          echo "Running Fireworks tests"
+          uv run pytest -v tests/integration/inference/test_text_inference.py --stack-config=http://localhost:8321 \
+            -k "not(builtin_tool or tool_calling)"  --text-model="accounts/fireworks/models/llama-v3p1-8b-instruct"
+
+      - name: Clean up
+        if: always()
+        run: |
+          if [ "$(find /home/runner/.cache/cachemeifyoucan -type f -newer cachemeifyoucan.yaml )" = '' ] ; then
+            echo "Removing cache as nothing was cached"
+            rm -rf /home/runner/.cache/cachemeifyoucan
+          fi
+
+      - name: Upload all logs to artifacts
+        if: always()
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+        with:
+          name: logs-${{ github.run_id }}-${{ github.run_attempt }}
+          path: |
+            *.log
+          retention-days: 1