|
| 1 | +name: Integration Tests (Experimental) |
| 2 | + |
| 3 | +on: |
| 4 | + push: |
| 5 | + branches: [ main ] |
| 6 | + pull_request: |
| 7 | + branches: [ main ] |
| 8 | + paths: |
| 9 | + - 'llama_stack/**' |
| 10 | + - 'tests/integration/**' |
| 11 | + - 'uv.lock' |
| 12 | + - 'pyproject.toml' |
| 13 | + - 'requirements.txt' |
| 14 | + - '.github/workflows/integration-tests-experimental.yml' # This workflow |
| 15 | + |
| 16 | +concurrency: |
| 17 | + group: ${{ github.workflow }}-${{ github.ref }} |
| 18 | + cancel-in-progress: true |
| 19 | + |
| 20 | +jobs: |
| 21 | + test-matrix: |
| 22 | + runs-on: ubuntu-latest |
| 23 | + strategy: |
| 24 | + fail-fast: false # we want to run all tests regardless of failure |
| 25 | + |
| 26 | + steps: |
| 27 | + - name: Checkout repository |
| 28 | + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 |
| 29 | + |
| 30 | + - name: Install uv |
| 31 | + uses: astral-sh/setup-uv@c7f87aa956e4c323abf06d5dec078e358f6b4d04 # v6.0.0 |
| 32 | + with: |
| 33 | + python-version: "3.10" |
| 34 | + activate-environment: true |
| 35 | + |
| 36 | + # TODO: some kind of pruning is required to prevent cache growing indefinitely |
| 37 | + - uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 |
| 38 | + with: |
| 39 | + path: /home/runner/.cache/cachemeifyoucan |
| 40 | + key: http-cache-integration-tests-${{ github.sha }} |
| 41 | + restore-keys: |
| 42 | + http-cache-integration-tests- |
| 43 | + |
| 44 | + - name: Set Up Environment and Install Dependencies |
| 45 | + run: | |
| 46 | + uv sync --extra dev --extra test |
| 47 | + uv pip install git+https://github.com/derekhiggins/cachemeifyoucan.git@69cd438 |
| 48 | + # always test against the latest version of the client |
| 49 | + # TODO: this is not necessarily a good idea. we need to test against both published and latest |
| 50 | + # to find out backwards compatibility issues. |
| 51 | + uv pip install git+https://github.com/meta-llama/llama-stack-client-python.git@main |
| 52 | + uv pip install -e . |
| 53 | + llama stack build --template verification --image-type venv |
| 54 | +
|
| 55 | + # Start cachemeifyoucan server in the background, it will be used to cache OpenAI responses |
| 56 | + echo -e 'openai:\n url: https://api.openai.com\ntogether:\n url: https://api.together.xyz\nfireworks:\n url: https://api.fireworks.ai' > cachemeifyoucan.yaml |
| 57 | + nohup uv run uvicorn cachemeifyoucan:app --host 127.0.0.1 --port 9999 > cachemeifyoucan.log 2>&1 & |
| 58 | +
|
| 59 | + # NotFoundError: Error code: 404 - {'error': 'Model not found'} |
| 60 | + # TODO: remove this once we figure out the problem |
| 61 | + yq '(.shields = [])' ./llama_stack/templates/verification/run.yaml > ./run_t1.yaml |
| 62 | + yq '(.providers.inference[] | select(.provider_id == "together-openai-compat")).config.openai_compat_api_base = "http://127.0.0.1:9999/together"' \ |
| 63 | + ./run_t1.yaml > ./run_t2.yaml |
| 64 | + yq '(.providers.inference[] | select(.provider_id == "fireworks-openai-compat")).config.openai_compat_api_base = "http://127.0.0.1:9999/fireworks/inference/v1"' \ |
| 65 | + ./run_t2.yaml > ./run.yaml |
| 66 | +
|
| 67 | + - name: Start Llama Stack server in background |
| 68 | + env: |
| 69 | + # TODO: instead if adding keys here, we could add support to cachemeifyoucan to add the header |
| 70 | + # this would ensure no traffic is routed to 3rd parties without going via the cache |
| 71 | + OPENAI_API_KEY: ${{ secrets.OPENAI_KEY }} |
| 72 | + TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }} |
| 73 | + FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }} |
| 74 | + run: | |
| 75 | + source .venv/bin/activate |
| 76 | + export OPENAI_BASE_URL=http://127.0.0.1:9999/openai/v1 |
| 77 | + nohup uv run llama stack run ./run.yaml --image-type venv > server.log 2>&1 & |
| 78 | +
|
| 79 | + - name: Wait for Llama Stack server to be ready |
| 80 | + run: | |
| 81 | + echo "Waiting for Llama Stack server..." |
| 82 | + for i in {1..30}; do |
| 83 | + if curl -s http://localhost:8321/v1/health | grep -q "OK"; then |
| 84 | + echo "Llama Stack server is up!" |
| 85 | + exit 0 |
| 86 | + fi |
| 87 | + sleep 1 |
| 88 | + done |
| 89 | + echo "Llama Stack server failed to start" |
| 90 | + cat server.log |
| 91 | + exit 1 |
| 92 | +
|
| 93 | + - name: Run Integration Tests |
| 94 | + run: | |
| 95 | + # openai |
| 96 | + echo "Running OpenAI tests" |
| 97 | + uv run pytest -v tests/integration/inference tests/integration/agents --stack-config=http://localhost:8321 \ |
| 98 | + -k "not(builtin_tool or safety_with_image or code_interpreter)" \ |
| 99 | + --text-model="openai/gpt-4o" |
| 100 | +
|
| 101 | + # skipping togetherai for now, free tier get rate limited when no cach) |
| 102 | + #uv run pytest -v tests/integration/inference/test_text_inference.py --stack-config=http://localhost:8321 \ |
| 103 | + # -k "not(builtin_tool or tool_calling)" --text-model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo" |
| 104 | +
|
| 105 | + # fireworks (only running text inference for now, free tier get rate limited when no cach) |
| 106 | + echo "Running Fireworks tests" |
| 107 | + uv run pytest -v tests/integration/inference/test_text_inference.py --stack-config=http://localhost:8321 \ |
| 108 | + -k "not(builtin_tool or tool_calling)" --text-model="accounts/fireworks/models/llama-v3p1-8b-instruct" |
| 109 | +
|
| 110 | + - name: Clean up |
| 111 | + if: always() |
| 112 | + run: | |
| 113 | + if [ "$(find /home/runner/.cache/cachemeifyoucan -type f -newer cachemeifyoucan.yaml )" = '' ] ; then |
| 114 | + echo "Removing cache as nothing was cached" |
| 115 | + rm -rf /home/runner/.cache/cachemeifyoucan |
| 116 | + fi |
| 117 | +
|
| 118 | + - name: Upload all logs to artifacts |
| 119 | + if: always() |
| 120 | + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 |
| 121 | + with: |
| 122 | + name: logs-${{ github.run_id }}-${{ github.run_attempt }} |
| 123 | + path: | |
| 124 | + *.log |
| 125 | + retention-days: 1 |
0 commit comments