fix #39

Workflow file for this run

.github/workflows/tpu-tests.yml at 009afe5

	# Copyright 2025 Google LLC
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# https://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
	# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python

	name: TPU Tests

	on:
	workflow_call:
	secrets:
	HF_TOKEN:
	required: true
	description: 'HuggingFace token for model downloads'

	concurrency:
	# Dedup pull requests (canceling previous runs of the same workflow for same PR), and scheduled runs but nothing else
	group: ${{ github.event_name == 'pull_request' && format('{0}-pr-{1}', github.workflow, github.event.pull_request.number) \|\| github.event_name == 'schedule' && format('{0}-schedule', github.workflow) \|\| github.run_id }}
	cancel-in-progress: true

	env:
	HF_HOME: ~/.cache/huggingface
	HF_HUB_ENABLE_HF_TRANSFER: "1"

	jobs:
	# run_prod:
	# runs-on: [linux-x86-ct5lp-224-8tpu]
	# environment: testing
	# container:
	# image: us-docker.pkg.dev/tpu-prod-env-multipod/jax-stable-stack/candidate/tpu:jax0.7.1_rev1
	# options: --privileged
	# env:
	# CLOUD_TPU_ACCELERATOR: v5e-8
	# JAX_PLATFORMS: tpu
	# steps:
	#
	# # Cache Hugging Face hub
	# - name: Cache HF hub
	# uses: actions/cache@v4
	# with:
	# path: ~/.cache/huggingface
	# key: hf-${{ runner.os }}-${{ hashFiles('pyproject.toml', 'requirements.txt', 'constraints.txt') }}
	# restore-keys: \|
	# hf-${{ runner.os }}-
	#
	# - name: Checkout code
	# uses: actions/checkout@v4
	# with:
	# fetch-depth: 0
	#
	# - name: Install tunix dependencies
	# run: \|
	# pip install -e .[prod]
	# pip install pytest pytest-xdist
	#
	# - name: Verify TPU availability
	# run: \|
	# python -c "
	# import jax
	# print(f'JAX version: {jax.__version__}')
	# print(f'JAX devices: {jax.devices()}')
	#
	# # Check if we have TPU devices specifically
	# devices = jax.devices()
	# has_tpu = len(devices) > 0 and all(device.platform == 'tpu' for device in devices)
	# print(f'TPU available: {has_tpu}')
	#
	# if not has_tpu:
	# print('ERROR: No TPU devices found! Expected TPU devices but got:', [device.platform for device in devices])
	# exit(1)
	# else:
	# print(f'SUCCESS: Found {len(devices)} TPU device(s)')
	# "
	#
	# - name: Run tunix model tests
	# run: \|
	# python -m pytest tests/models/ -v --tb=short -m "not cpu_only and not gpu_only"
	#
	# - name: Run tunix cli tests
	# env:
	# HF_TOKEN: ${{ secrets.HF_TOKEN }}
	# KAGGLE_USERNAME: ${{ secrets.KAGGLE_USERNAME }}
	# KAGGLE_KEY: ${{ secrets.KAGGLE_KEY }}
	# run: \|
	# # Config tests that passed
	# python -m pytest tests/cli/ -v --tb=short \
	# --ignore=tests/cli/utils/model_test.py
	#
	# - name: Run tunix generation tests (PASSED only)
	# run: \|
	# # tokenizer_adapter_test requires access to gated repo
	# python -m pytest tests/generate/ -v --tb=short \
	# --ignore=tests/generate/vllm_sampler_test.py \
	# --ignore=tests/generate/vllm_driver_test.py \
	# --ignore=tests/generate/tokenizer_adapter_test.py \
	# --ignore=tests/generate/sglang_jax_sampler_test.py
	#
	# - name: Run tunix SFT tests
	# run: \|
	# python -m pytest tests/sft/ -v --tb=short
	#
	# - name: Run tunix SFT integration tests
	# env:
	# HF_TOKEN: ${{ secrets.HF_TOKEN }}
	# run: \|
	# ./tests/sft/sft_tpu_smoke_test.sh
	#
	# - name: Run tunix distillation tests
	# run: \|
	# python -m pytest tests/distillation/ -v --tb=short
	#
	# - name: Run tunix RL tests
	# run: \|
	# # RL common tests that passed
	# # b/448133814: test_grpo_with_lora_model fails
	# python -m pytest tests/rl/ -v --tb=short -k "not test_grpo_with_lora_model" --ignore=tests/rl/experimental/agentic
	#
	# - name: GRPO Integration Test
	# env:
	# HF_TOKEN: ${{ secrets.HF_TOKEN }}
	# run: \|
	#
	# # Download GSM8K dataset
	# mkdir -p /tmp/grpo_test/rl/grpo/data
	# python3 -c "
	# from datasets import load_dataset
	# import json
	#
	# # Download and save GSM8K train split
	# dataset = load_dataset('openai/gsm8k', 'main', split='train')
	# train_data = [{'question': item['question'], 'answer': item['answer']} for item in dataset]
	# with open('/tmp/grpo_test/rl/grpo/data/gsm8k_train.json', 'w') as f:
	# json.dump(train_data, f)
	#
	# # Download and save GSM8K test split
	# dataset = load_dataset('openai/gsm8k', 'main', split='test')
	# test_data = [{'question': item['question'], 'answer': item['answer']} for item in dataset]
	# with open('/tmp/grpo_test/rl/grpo/data/gsm8k_test.json', 'w') as f:
	# json.dump(test_data, f)
	#
	# print('GSM8K dataset downloaded successfully')
	# "
	#
	# # Run GRPO demo script with minimal configuration
	# python3 scripts/grpo_demo_llama3_qwen2.py \
	# --root-dir=/tmp/grpo_test \
	# --model-version=Qwen/Qwen2.5-0.5B-Instruct \
	# --num-batches=8 \
	# --num-test-batches=4 \
	# --rollout-engine=vanilla
	#
	# - name: Run tunix tests not covered by the above categories
	# run: \|
	# # This category is to catch tests added but not covered by CI yet. Whenever you add new folders under tests/, please add a new category above and skip those tests here.
	# python -m pytest tests/ -v --tb=short --ignore=tests/models/ --ignore=tests/cli/ --ignore=tests/generate/ --ignore=tests/sft/ --ignore=tests/distillation/ --ignore=tests/rl/ \|\| code=$?
	# if [ "${code:-0}" = "5" ]; then
	# echo "No tests collected (expected)."
	# exit 0
	# else
	# exit "${code:-0}"
	# fi

	run_dev:
	runs-on: [linux-x86-ct5lp-224-8tpu]
	environment: testing
	container:
	image: vllm/vllm-tpu:v0.11.1
	options: --privileged
	env:
	CLOUD_TPU_ACCELERATOR: v5e-8
	JAX_PLATFORMS: tpu
	steps:
	# Cache Hugging Face hub
	- name: Cache HF hub
	uses: actions/cache@v4
	with:
	path: ~/.cache/huggingface
	key: hf-${{ runner.os }}-${{ hashFiles('pyproject.toml', 'requirements.txt', 'constraints.txt') }}
	restore-keys: \|
	hf-${{ runner.os }}-

	- name: Checkout code
	uses: actions/checkout@v4
	with:
	fetch-depth: 0

Check failure on line 197 in .github/workflows/tpu-tests.yml View workflow run for this annotation GitHub Actions / .github/workflows/tpu-tests.yml Invalid workflow file `You have an error in your yaml syntax on line 197`
	- name: Setup Tunix , tpu-inference and dependencies
	run: \|
	echo "Current directory:"
	pwd
	pip install --upgrade pip setuptools wheel

	# Install Tunix
	pip uninstall torch torch-xla libtpu jax jaxlib -y
	pip install -e .[dev]

	# Install tpu-inference
	# pip uninstall torch libtpu jax jaxlib -y
	# pip install tpu-inference==v0.11.1 --force-reinstall
	pip install pytest pytest-xdist

	# - name: Run tests
	# env:
	# HF_TOKEN: ${{ secrets.HF_TOKEN }}
	# run: \|
	# pytest tests/generate/vllm_driver_test.py -v --tb=short
	# pytest tests/generate/vllm_sampler_test.py --collect-only -q --no-header --no-summary --disable-warnings \| grep '::' > test_collections.txt
	# while read -r test; do
	# pytest "$test" -v --tb=short
	# done < test_collections.txt

	- name: Run install sglang-jax && test
	env:
	HF_TOKEN: ${{ secrets.HF_TOKEN }}
	DEBUG: true
	run: \|
	cd ..
	git clone https://github.com/sgl-project/sglang-jax.git && cd sglang-jax/python && pip install -e . && cd ../..
	cd tunix && python -m pytest tests/generate/sglang_jax_sampler_test.py -v --tb=long

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

fix #39

Workflow file

fix #39

Uh oh!

Workflow file for this run

GitHub Actions / .github/workflows/tpu-tests.yml