Skip to content

Commit 283ce83

Browse files
authored
Merge branch 'master' into rekhoff/nativeaot-net10
2 parents 503f790 + 16a2a8f commit 283ce83

411 files changed

Lines changed: 12917 additions & 45850 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/ci.yml

Lines changed: 0 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -625,39 +625,6 @@ jobs:
625625
run: |
626626
cargo ci cli-docs
627627
628-
llm_ci_check:
629-
name: Verify LLM benchmark is up to date
630-
permissions:
631-
contents: read
632-
runs-on: ubuntu-latest
633-
# Disable the tests because they are causing us headaches with merge conflicts and re-runs etc.
634-
if: false
635-
steps:
636-
# Build the tool from master to ensure consistent hash computation
637-
# with the llm-benchmark-update workflow (which also uses master's tool).
638-
- name: Checkout master (build tool from trusted code)
639-
uses: actions/checkout@v4
640-
with:
641-
ref: master
642-
fetch-depth: 1
643-
644-
- uses: dtolnay/rust-toolchain@stable
645-
- uses: Swatinem/rust-cache@v2
646-
647-
- name: Install llm-benchmark tool from master
648-
run: |
649-
cargo install --path tools/xtask-llm-benchmark --locked
650-
command -v llm_benchmark
651-
652-
# Now checkout the PR branch to verify its benchmark files
653-
- name: Checkout PR branch
654-
uses: actions/checkout@v4
655-
with:
656-
clean: false
657-
658-
- name: Run hash check (both langs)
659-
run: llm_benchmark ci-check
660-
661628
unity-testsuite:
662629
needs: [lints]
663630
# Skip if this is an external contribution.
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
name: Docs / Update llms files
2+
3+
permissions:
4+
contents: write
5+
6+
on:
7+
push:
8+
branches:
9+
- docs/release
10+
paths:
11+
- 'docs/docs/**'
12+
- 'skills/**'
13+
workflow_dispatch: # Allow manual trigger
14+
15+
jobs:
16+
update-llms:
17+
runs-on: spacetimedb-new-runner-2
18+
steps:
19+
- name: Checkout repository
20+
uses: actions/checkout@v3
21+
with:
22+
ref: docs/release
23+
24+
- name: Set up Node.js
25+
uses: actions/setup-node@v3
26+
with:
27+
node-version: '22'
28+
29+
- uses: pnpm/action-setup@v4
30+
with:
31+
run_install: true
32+
33+
- name: Get pnpm store directory
34+
working-directory: sdks/typescript
35+
shell: bash
36+
run: |
37+
echo "STORE_PATH=$(pnpm store path --silent)" >> $GITHUB_ENV
38+
39+
- uses: actions/cache@v4
40+
name: Setup pnpm cache
41+
with:
42+
path: ${{ env.STORE_PATH }}
43+
key: ${{ runner.os }}-pnpm-store-${{ hashFiles('**/pnpm-lock.yaml') }}
44+
restore-keys: |
45+
${{ runner.os }}-pnpm-store-
46+
47+
- name: Install dependencies
48+
working-directory: docs
49+
run: pnpm install
50+
51+
- name: Docusaurus build
52+
working-directory: docs
53+
run: pnpm build
54+
55+
- name: Generate llms files
56+
working-directory: docs
57+
run: node scripts/generate-llms.mjs
58+
59+
- name: Commit updated llms files
60+
working-directory: docs
61+
run: |
62+
git config user.name "github-actions[bot]"
63+
git config user.email "github-actions[bot]@users.noreply.github.com"
64+
git add static/llms.md
65+
git diff --staged --quiet && echo "No changes" && exit 0
66+
git commit -m "Update llms files from docs build"
67+
git push
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
name: Periodic LLM benchmarks
2+
3+
on:
4+
schedule:
5+
# Daily at midnight UTC. Change to '0 */6 * * *' for every 6h,
6+
# or '0 */4 * * *' for every 4h.
7+
- cron: '0 0 * * *'
8+
workflow_dispatch:
9+
inputs:
10+
models:
11+
description: 'Models to run (provider:model format, comma-separated, or "all")'
12+
required: false
13+
default: 'all'
14+
languages:
15+
description: 'Languages to benchmark (comma-separated: rust,csharp,typescript)'
16+
required: false
17+
default: 'rust,csharp,typescript'
18+
modes:
19+
description: 'Modes to run (comma-separated: guidelines,no_context,docs,...)'
20+
required: false
21+
default: 'guidelines,no_context'
22+
23+
permissions:
24+
contents: read
25+
26+
concurrency:
27+
group: llm-benchmark-periodic
28+
cancel-in-progress: true
29+
30+
jobs:
31+
run-benchmarks:
32+
runs-on: spacetimedb-new-runner
33+
container:
34+
image: localhost:5000/spacetimedb-ci:latest
35+
options: >-
36+
--privileged
37+
timeout-minutes: 180
38+
39+
steps:
40+
- name: Install spacetime CLI
41+
run: |
42+
curl -sSf https://install.spacetimedb.com | sh -s -- -y
43+
echo "$HOME/.local/bin" >> $GITHUB_PATH
44+
45+
- name: Checkout master
46+
uses: actions/checkout@v4
47+
with:
48+
ref: master
49+
fetch-depth: 1
50+
51+
- uses: dtolnay/rust-toolchain@stable
52+
- uses: Swatinem/rust-cache@v2
53+
54+
- name: Setup .NET SDK
55+
uses: actions/setup-dotnet@v4
56+
with:
57+
dotnet-version: "8.0.x"
58+
59+
- name: Install WASI workload
60+
env:
61+
DOTNET_MULTILEVEL_LOOKUP: "0"
62+
DOTNET_CLI_HOME: ${{ runner.temp }}/dotnet-home
63+
DOTNET_SKIP_FIRST_TIME_EXPERIENCE: "1"
64+
run: |
65+
dotnet workload install wasi-experimental --skip-manifest-update --disable-parallel
66+
67+
- name: Set up Node.js
68+
uses: actions/setup-node@v4
69+
with:
70+
node-version: 22
71+
72+
- name: Install pnpm
73+
uses: pnpm/action-setup@v4
74+
75+
- name: Build llm-benchmark tool
76+
run: cargo install --path tools/xtask-llm-benchmark --locked
77+
78+
- name: Run benchmarks
79+
env:
80+
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
81+
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
82+
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
83+
LLM_BENCHMARK_API_KEY: ${{ secrets.LLM_BENCHMARK_API_KEY }}
84+
LLM_BENCHMARK_UPLOAD_URL: ${{ secrets.LLM_BENCHMARK_UPLOAD_URL }}
85+
MSBUILDDISABLENODEREUSE: "1"
86+
DOTNET_CLI_USE_MSBUILD_SERVER: "0"
87+
INPUT_LANGUAGES: ${{ inputs.languages || 'rust,csharp,typescript' }}
88+
INPUT_MODELS: ${{ inputs.models || 'all' }}
89+
INPUT_MODES: ${{ inputs.modes || 'guidelines,no_context' }}
90+
run: |
91+
LANGS="$INPUT_LANGUAGES"
92+
MODELS="$INPUT_MODELS"
93+
MODES="$INPUT_MODES"
94+
95+
SUCCEEDED=0
96+
FAILED=0
97+
for LANG in $(echo "$LANGS" | tr ',' ' '); do
98+
if [ "$MODELS" = "all" ]; then
99+
if llm_benchmark run --lang "$LANG" --modes "$MODES"; then
100+
SUCCEEDED=$((SUCCEEDED + 1))
101+
else
102+
echo "::warning::Benchmark run failed for lang=$LANG"
103+
FAILED=$((FAILED + 1))
104+
fi
105+
else
106+
if llm_benchmark run --lang "$LANG" --modes "$MODES" --models "$MODELS"; then
107+
SUCCEEDED=$((SUCCEEDED + 1))
108+
else
109+
echo "::warning::Benchmark run failed for lang=$LANG models=$MODELS"
110+
FAILED=$((FAILED + 1))
111+
fi
112+
fi
113+
done
114+
echo "Benchmark runs: $SUCCEEDED succeeded, $FAILED failed"
115+
if [ "$SUCCEEDED" -eq 0 ] && [ "$FAILED" -gt 0 ]; then
116+
echo "::error::All benchmark runs failed"
117+
exit 1
118+
fi

0 commit comments

Comments
 (0)