Skip to content

Benchmark - Tokenizer #299

Benchmark - Tokenizer

Benchmark - Tokenizer #299

name: Benchmark - Tokenizer
on:
push:
branches: [ main ]
paths:
- 'model_gateway/benches/tokenizer_benchmark.rs'
- 'crates/tokenizer/**'
- '!crates/tokenizer/**/tests/**'
- '!crates/tokenizer/**/*_test.rs'
- '!crates/tokenizer/**/test_*.rs'
pull_request:
branches: [ main ]
paths:
- 'model_gateway/benches/tokenizer_benchmark.rs'
- 'crates/tokenizer/**'
- '!crates/tokenizer/**/tests/**'
- '!crates/tokenizer/**/*_test.rs'
- '!crates/tokenizer/**/test_*.rs'
workflow_dispatch:
schedule:
- cron: '0 0 * * 0'
concurrency:
group: benchmark-tokenizer-${{ github.ref }}
cancel-in-progress: true
env:
RUSTC_WRAPPER: sccache
SCCACHE_GHA_ENABLED: "true"
permissions:
contents: read
jobs:
benchmark:
name: Tokenizer
if: github.repository == 'lightseekorg/smg'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
with:
fetch-depth: 100
- name: Setup Rust
uses: ./.github/actions/setup-rust
- name: Run benchmark
timeout-minutes: 40
run: |
source "$HOME/.cargo/env"
cargo bench --bench tokenizer_benchmark 2>&1 | tee benchmark_output.txt
- name: Upload benchmark results
if: always()
uses: actions/upload-artifact@v7
with:
name: tokenizer-results-${{ github.sha }}
path: |
target/criterion/tokenizer*/
benchmark_output.txt
retention-days: 30
- name: Show sccache stats
if: always()
run: sccache --show-stats || true
- name: Summary
if: always()
run: |
echo "## Tokenizer Benchmark" >> $GITHUB_STEP_SUMMARY
if [ -f benchmark_output.txt ]; then
echo '```' >> $GITHUB_STEP_SUMMARY
tail -100 benchmark_output.txt >> $GITHUB_STEP_SUMMARY
echo '```' >> $GITHUB_STEP_SUMMARY
fi