IMAGE hack for tinygrad#16335 (from tinygrad#16343) #52

Workflow file for this run

	name: Unit Tests
	env:
	# increment this when downloads substantially change to avoid the internet
	CACHE_VERSION: '19'
	CAPTURE_PROCESS_REPLAY: ${{ github.event_name == 'pull_request' && contains(github.event.pull_request.title, '[pr]') && '1' \|\| '0' }}
	GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
	PYTHONPATH: ${{ github.workspace }}
	CHECK_OOB: 1

	on:
	push:
	branches:
	- master
	pull_request:
	workflow_dispatch:

	concurrency:
	group: test-${{ github.event_name }}-${{ github.event_name == 'pull_request' && github.event.pull_request.number \|\| github.run_id }}
	cancel-in-progress: ${{ github.event_name == 'pull_request' }}

	jobs:
	docs:
	name: Docs
	runs-on: &linux ${{ github.repository == 'tinygrad/tinygrad' && github.event_name == 'pull_request' && github.event.pull_request.author_association == 'COLLABORATOR' && 'namespace-profile-tinygrad' \|\| 'ubuntu-24.04' }}
	timeout-minutes: 10
	env:
	CHECK_OOB: 0
	steps:
	- name: Checkout Code
	uses: actions/checkout@v6
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	deps: docs
	pydeps: "capstone torch"
	- name: Build wheel and show size
	run: \|
	uv build --wheel
	ls -lh dist/*.whl
	- name: Use as an external package
	run: \|
	mkdir $HOME/test_external_dir
	cd $HOME/test_external_dir
	uv venv venv
	uv pip install --python venv $GITHUB_WORKSPACE mypy
	cp $GITHUB_WORKSPACE/examples/beautiful_mnist.py .
	venv/bin/python -c "from tinygrad.tensor import Tensor; print(Tensor([1,2,3,4,5]))"
	venv/bin/mypy -c "from tinygrad.tensor import Tensor; print(Tensor([1,2,3,4,5]))"
	BS=2 STEPS=10 MAX_BUFFER_SIZE=0 venv/bin/python beautiful_mnist.py
	- name: Test Docs
	run: \|
	parallel --link --tagstring '[{1}]' '{2}' \
	::: mkdocs abstractions3 readme quickstart export \
	::: 'mkdocs build --strict' \
	'python docs/abstractions3.py' \
	$'awk \'/```python/{flag=1;next}/```/{flag=0}flag\' README.md \| python' \
	$'awk \'/```python/{flag=1;next}/```/{flag=0}flag\' docs/quickstart.md \| python' \
	'DEV=CPU python examples/compile_efficientnet.py > recognize.c && clang -O2 recognize.c -lm -o recognize && cat test/models/efficientnet/Chicken.jpg \| ./recognize \| grep cock'
	- name: Test DEBUG
	run: DEBUG=100 python3 -c "from tinygrad import Tensor; N = 1024; a, b = Tensor.rand(N, N), Tensor.rand(N, N); c = (a.reshape(N, 1, N) * b.T.reshape(1, N, N)).sum(axis=2); print((c.numpy() - (a.numpy() @ b.numpy())).mean())"

	torchbackend:
	name: Torch Backend Tests
	runs-on: *linux
	timeout-minutes: 15
	steps:
	- name: Checkout Code
	uses: actions/checkout@v6
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: torch-backend-pillow-torchvision-et-pt
	deps: testing_unit
	pydeps: "pillow torchvision expecttest"
	llvm: 'true'
	- name: Install ninja
	run: \|
	sudo apt update \|\| true
	sudo apt install -y --no-install-recommends ninja-build
	- name: Test one op
	run: FORWARD_ONLY=1 TINY_BACKEND=1 python3 test/test_tiny.py TestTiny.test_plus
	- name: Test ResNet-18
	run: DEBUG=2 python3 extra/torch_backend/example.py
	- name: custom tests
	run: python3 -m pytest -n auto extra/torch_backend/test.py --durations=20
	- name: Test one op in torch tests
	run: DEBUG=2 python3 extra/torch_backend/torch_tests.py TestTinyBackendPRIVATEUSE1.test_unary_log_tiny_float32
	- name: Test Ops with TINY_BACKEND
	run: DEV=CPU:LLVM LLVMOPT=0 TINY_BACKEND=1 python3 -m pytest -n auto test/backend/test_ops.py --durations=20
	- name: Test in-place operations on views
	run: TORCH_DEBUG=1 python3 extra/torch_backend/test_inplace.py
	- name: Test multi-gpu
	run: DEV=CPU:LLVM GPUS=4 TORCH_DEBUG=1 python3 extra/torch_backend/test_multigpu.py
	- name: Test kernel fusion
	run: python3 extra/torch_backend/test_kernel_fusion.py


	torchbackendmore:
	name: Torch Backend Tests More
	runs-on: *linux
	timeout-minutes: 15
	steps:
	- name: Checkout Code
	uses: actions/checkout@v6
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: torch-backend-pillow-torchvision-et-pt
	deps: testing_unit
	llvm: 'true'
	- name: Install ninja
	run: \|
	sudo apt update \|\| true
	sudo apt install -y --no-install-recommends ninja-build
	- name: Test beautiful_mnist in torch with TINY_BACKEND
	run: STEPS=20 DEV=CPU TARGET_EVAL_ACC_PCT=90.0 MAX_BUFFER_SIZE=0 TINY_BACKEND=1 python3 examples/other_mnist/beautiful_mnist_torch.py
	- name: Test some torch tests (expect failure)
	run: python3 -m pytest extra/torch_backend/torch_tests.py -v --tb=no \|\| true

	bepython:
	name: Python Backend
	runs-on: *linux
	timeout-minutes: 15
	steps:
	- name: Checkout Code
	uses: actions/checkout@v6
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: be-minimal
	deps: testing_unit
	- name: Run backend tests
	run: SKIP_SLOW_TEST=1 DEV=PYTHON python3 -m pytest -n=auto test/backend/test_dtype.py test/backend/test_dtype_alu.py test/backend/test_ops.py test/backend/test_uops.py test/backend/test_symbolic_ops.py test/backend/test_renderer_failures.py::TestRendererFailures --durations=20
	- name: Test IMAGE support
	run: IMAGE=1 DEV=PYTHON python3 test/backend/test_ops.py TestOps.test_gemm TestOps.test_simple_conv2d
	- name: Test emulated METAL tensor cores
	env:
	DEV: 'PYTHON::METAL'
	run: \|
	DEBUG=2 python3 test/backend/test_ops.py TestOps.test_big_gemm
	python3 -m pytest -nauto test/opt/test_tensor_cores.py
	- name: Test emulated AMD tensor cores
	env:
	DEV: 'PYTHON::gfx1100'
	run: \|
	DEBUG=2 N=16 HALF=1 ACC_HALF=0 python3 ./extra/gemm/simple_matmul.py
	DEBUG=2 N=64 HALF=1 ACC_HALF=0 python3 ./extra/gemm/simple_matmul.py
	DEBUG=2 N=16 HALF=1 ACC_HALF=1 ATOL=1e-3 python3 ./extra/gemm/simple_matmul.py
	DEBUG=2 N=64 HALF=1 ACC_HALF=1 ATOL=1e-3 python3 ./extra/gemm/simple_matmul.py
	python3 -m pytest -nauto test/opt/test_tensor_cores.py
	- name: Test emulated AMD MFMA tensor cores
	env:
	DEV: 'PYTHON::gfx950'
	run: \|
	DEBUG=2 N=64 HALF=1 ACC_HALF=0 python3 ./extra/gemm/simple_matmul.py
	python3 -m pytest -nauto test/opt/test_tensor_cores.py
	- name: Test emulated AMD RDNA4 tensor cores
	env:
	DEV: 'PYTHON::gfx1201'
	run: \|
	DEBUG=2 N=16 HALF=1 ACC_HALF=0 python3 ./extra/gemm/simple_matmul.py
	DEBUG=2 N=64 HALF=1 ACC_HALF=0 python3 ./extra/gemm/simple_matmul.py
	DEBUG=2 N=16 HALF=1 ACC_HALF=1 ATOL=1e-3 python3 ./extra/gemm/simple_matmul.py
	DEBUG=2 N=64 HALF=1 ACC_HALF=1 ATOL=1e-3 python3 ./extra/gemm/simple_matmul.py
	python3 -m pytest -nauto test/opt/test_tensor_cores.py
	- name: Test emulated CUDA tensor cores
	run: \|
	DEBUG=2 DEV=PYTHON::sm_80 python3 test/backend/test_ops.py TestOps.test_gemm_fp16
	DEBUG=2 ALLOW_TF32=1 DEV=PYTHON::sm_80 python3 test/backend/test_ops.py TestOps.test_gemm
	DEBUG=2 DEV=PYTHON::sm_75 python3 test/backend/test_ops.py TestOps.test_gemm_fp16
	ALLOW_TF32=1 DEV=PYTHON::sm_89 python3 -m pytest -nauto test/opt/test_tensor_cores.py
	- name: Test device flop counts
	run: \|
	DEBUG=2 DEV=PYTHON::METAL python3 ./test/null/test_uops_stats.py TestUOpsStatsMatmulHalf
	DEBUG=2 DEV=PYTHON::gfx1100 python3 ./test/null/test_uops_stats.py TestUOpsStatsMatmulHalf
	DEBUG=2 DEV=PYTHON::sm_80 python3 ./test/null/test_uops_stats.py TestUOpsStatsMatmulHalf

	linter:
	name: Linters
	runs-on: *linux
	timeout-minutes: 10

	steps:
	- name: Checkout Code
	uses: actions/checkout@v6
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: linting-only
	python-version: '3.11'
	deps: linting
	- name: Lint bad-indentation and trailing-whitespace with pylint
	run: python -m pylint --disable=all -e W0311 -e C0303 --jobs=0 --indent-string=' ' --recursive=y .
	- name: Run pre-commit linting hooks
	run: SKIP=tiny,tests,example,mypy pre-commit run --all-files
	- name: Lint additional files with ruff
	run: \|
	python3 -m ruff check examples/mlperf/ --ignore E501
	python3 -m ruff check extra/thunder/tiny/ --ignore E501 --ignore F841 --ignore E722
	python3 -m ruff check extra/torch_backend/backend.py
	- name: Run mypy with lineprecision report
	run: \|
	python -m mypy --lineprecision-report .
	grep -v autogen lineprecision.txt \| awk 'NR>2 {lines+=$2; precise+=$3; imprecise+=$4; any+=$5; empty+=$6} END {t=lines-empty; printf "TOTAL: %d lines, %d precise (%.1f%%), %d imprecise (%.1f%%), %d any (%.1f%%)\n", t, precise, 100precise/t, imprecise, 100imprecise/t, any, 100*any/t}'
	cat lineprecision.txt
	- name: Run TYPED=1
	run: CHECK_OOB=0 DEV=CPU TYPED=1 python test/test_tiny.py

	nulltest:
	name: Null Tests
	runs-on: *linux
	timeout-minutes: 15

	steps:
	- name: Checkout Code
	uses: actions/checkout@v6
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: unittest-13
	pydeps: "pillow ftfy regex pre-commit"
	deps: testing_unit
	llvm: 'true'
	amd: 'true'
	- name: Run NULL backend tests
	run: DEV=NULL python -m pytest -n=auto test/null/ --durations=20
	- name: Run targeted tests on NULL backend
	run: DEV=NULL python3 -m unittest test.backend.test_multitensor.TestMultiTensor.test_data_parallel_resnet_train_step
	# TODO: too slow
	# - name: Run SDXL on NULL backend
	# run: DEV=NULL DEBUG=1 python3 examples/sdxl.py --seed 0 --noshow --timing --fakeweights
	- name: Run Clip tests for SD MLPerf on NULL backend
	run: DEV=NULL python -m pytest -n=auto test/external/mlperf_stable_diffusion/external_test_models.py::TestOpenClip --durations=20
	- name: Run AMD emulated BERT training on NULL backend
	run: DEV=NULL::gfx1201 NULL_ALLOW_COPYOUT=1 CAPTURE_PROCESS_REPLAY=0 DEFAULT_FLOAT=HALF BENCHMARK=10 BS=66 GPUS=1 BERT_LAYERS=2 MODEL=bert python3 examples/mlperf/model_train.py
	# TODO: support fake weights
	#- name: Run LLaMA 7B on 4 fake devices
	# run: DEV=NULL python3 examples/llama.py --gen 1 --size 7B --shard 4 --prompt "Hello." --count 3 --temperature 0 --timing

	unittest:
	name: Unit Tests
	runs-on: *linux
	timeout-minutes: 15

	steps:
	- name: Checkout Code
	uses: actions/checkout@v6
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: unittest-13
	pydeps: "pillow ftfy regex pre-commit"
	deps: testing_unit
	llvm: 'true'
	- name: Run pre-commit test hooks
	run: SKIP=ruff,mypy,tests pre-commit run --all-files
	- name: Check Device.DEFAULT
	run: python -c "from tinygrad import Device; assert Device.DEFAULT == 'CPU', Device.DEFAULT"
	- name: Run unit tests
	run: \|
	DEV=CPU python test/null/test_device.py TestRunAsModule.test_module_runs
	DEV=CPU python -m pytest -n=auto test/unit/ --durations=20
	- name: Run GC tests
	run: python test/external/external_uop_gc.py
	- name: External Benchmark Schedule
	run: python3 test/external/external_benchmark_schedule.py
	- name: Run process replay tests
	uses: ./.github/actions/process-replay
	- name: Regen dataset on test_tiny
	run: \|
	test/external/process_replay/reset.py
	CAPTURE_PROCESS_REPLAY=1 python test/test_tiny.py TestTiny.test_plus
	python extra/optimization/extract_dataset.py
	gzip -c /tmp/sops > extra/datasets/sops.gz
	#DEBUG=1 MIN_ASTS=1 python extra/optimization/get_action_space.py
	- name: Repo line count < 25000 lines
	run: MAX_LINE_COUNT=25000 python sz.py

	spec:
	strategy:
	fail-fast: false
	matrix:
	group: [1, 2]
	name: SPEC=2 (${{ matrix.group }})
	runs-on: *linux
	timeout-minutes: 15
	steps:
	- name: Checkout Code
	uses: actions/checkout@v6
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: spec-unit
	deps: testing_unit
	llvm: 'true'
	- name: Test SPEC=2
	run: SPEC=2 pytest --maxfail=10 -n auto --durations=30 test/unit test/backend test/opt --ignore test/backend/test_custom_kernel.py --ignore test/unit/test_hashing.py --timeout 60 -k "not test_setitem_big" -k "not test_conv2d_ceildiv_edge_case" --splits 2 --group ${{ matrix.group }}

	fuzzing:
	name: Fuzzing
	runs-on: *linux
	timeout-minutes: 10
	steps:
	- name: Checkout Code
	uses: actions/checkout@v6
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: fuzzing-unit
	deps: testing_unit
	- name: Fuzz Test symbolic
	run: python test/external/fuzz_symbolic.py
	- name: Fuzz Test symbolic (symbolic divisors)
	run: python test/external/fuzz_symbolic_symbolic_div.py
	- name: Fuzz Test fast idiv
	run: python test/external/fuzz_fast_idiv.py
	- name: Fuzz Test shape ops
	run: python test/external/fuzz_shape_ops.py

	testopenclimage:
	name: CL IMAGE Tests
	runs-on: *linux
	timeout-minutes: 15
	steps:
	- name: Checkout Code
	uses: actions/checkout@v6
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: gpu-image
	deps: testing_unit
	opencl: 'true'
	- name: Test CL IMAGE=1 ops
	run: \|
	DEV=CL IMAGE=1 python -m pytest -n=auto test/backend/test_ops.py --durations=20
	DEV=CL IMAGE=1 python test/models/test_end2end.py TestEnd2End.test_linear_mnist
	- name: Run process replay tests
	uses: ./.github/actions/process-replay

	testgpumisc:
	name: CL Misc tests
	runs-on: *linux
	timeout-minutes: 10
	steps:
	- name: Checkout Code
	uses: actions/checkout@v6
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: gen-dataset
	deps: testing
	opencl: 'true'
	- name: Generate Dataset
	run: DEV=CL extra/optimization/generate_dataset.sh
	- name: Run Kernel Count Test
	run: DEV=CL python -m pytest -n=auto test/external/external_test_opt.py
	- name: Run fused optimizer tests
	run: DEV=CL FUSE_OPTIM=1 python -m pytest -n=auto test/models/test_mnist.py test/backend/test_optim.py -k "not muon"
	- name: Upload artifact
	uses: actions/upload-artifact@v7
	with:
	name: sops.gz
	path: /tmp/sops.gz

	testopenpilot:
	name: openpilot Compile Tests
	runs-on: *linux
	timeout-minutes: 15
	steps:
	- name: Checkout Code
	uses: actions/checkout@v6
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: openpilot-compile
	deps: testing
	opencl: 'true'
	llvm: 'true'
	- name: Test openpilot model kernel count and gate usage
	run: \|
	ALLOWED_KERNEL_COUNT=123 ALLOWED_READ_IMAGE=1468 ALLOWED_GATED_READ_IMAGE=18 FLOAT16=1 DEV=CL IMAGE=1 python examples/openpilot/compile3.py https://gitlab.com/commaai/openpilot-lfs.git/gitlab-lfs/objects/cf6376aa9a090f0da26c280ef69eabf9bbdd51d1faac9ed392919c3db69be916
	- name: Test openpilot CL compile fp32 (test correctness)
	run: \|
	DEV=CL IMAGE=1 SELFTEST=1 python examples/openpilot/compile3.py https://github.com/haraschax/filedump/raw/refs/heads/master/driving_vision_fp32.onnx
	DEV=CL IMAGE=1 SELFTEST=1 RUN_PICKLE=1 python examples/openpilot/compile3.py https://github.com/haraschax/filedump/raw/refs/heads/master/driving_vision_fp32.onnx
	- name: Test openpilot LLVM compile fp16
	run: IMAGE=1 FLOAT16=1 DEV=CPU:LLVM python examples/openpilot/compile3.py https://gitlab.com/commaai/openpilot-lfs.git/gitlab-lfs/objects/cf6376aa9a090f0da26c280ef69eabf9bbdd51d1faac9ed392919c3db69be916
	- name: Run process replay tests
	uses: ./.github/actions/process-replay

	# **** ONNX Tests ****

	testonnxcpu:
	name: ONNX (CPU) Tests
	runs-on: *linux
	timeout-minutes: 20

	steps:
	- name: Checkout Code
	uses: actions/checkout@v6
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: onnxoptc
	deps: testing
	llvm: 'true'
	- name: Test ONNX (CPU)
	run: DEV=CPU python -m pytest -n=auto test/external/external_test_onnx_backend.py test/external/external_test_onnx_runner.py test/external/external_test_onnx_ops.py test/backend/test_quantize_onnx.py --durations=20
	- name: Run process replay tests
	uses: ./.github/actions/process-replay

	testoptim:
	name: Optimization Tests
	runs-on: *linux
	timeout-minutes: 20
	steps:
	- name: Checkout Code
	uses: actions/checkout@v6
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: optim
	deps: testing
	pydeps: "tensorflow==2.19"
	opencl: 'true'
	#- name: Test Optimization Helpers
	# run: DEBUG=1 python3 extra/optimization/test_helpers.py
	#- name: Test Action Space
	# run: DEBUG=1 DEV=CL python3 extra/optimization/get_action_space.py
	- name: Test Beam Search
	run: DEV=CL IGNORE_BEAM_CACHE=1 python3 -m pytest extra/optimization/test_beam_search.py
	- name: Test MLPerf stuff
	run: DEV=CL python -m pytest -n=auto test/external/external_test_optim.py test/external/external_test_losses.py test/external/external_test_metrics.py test/external/external_test_datasets.py --durations=20
	- name: DEV=NULL beautiful_mnist_multigpu
	run: DEV=NULL NULL_ALLOW_COPYOUT=1 python examples/beautiful_mnist_multigpu.py
	- name: Test Bert training
	run: DEV=NULL NULL_ALLOW_COPYOUT=1 DEFAULT_FLOAT=HALF BENCHMARK=10 BS=24 GPUS=4 BERT_LAYERS=2 MODEL=bert python3 examples/mlperf/model_train.py
	- name: Test llama 3 training
	run: DEV=NULL NULL_ALLOW_COPYOUT=1 SAMPLES=300 BS=8 SEQLEN=512 GRADIENT_ACC_STEPS=1 FAKEDATA=1 DEFAULT_FLOAT=bfloat16 OPTIM_DTYPE=bfloat16 LLAMA3_SIZE=1B MODEL=llama3 python3 examples/mlperf/model_train.py
	- name: Run process replay tests
	uses: ./.github/actions/process-replay

	testllm:
	name: Test LLM
	runs-on: *linux
	timeout-minutes: 15
	env:
	CHECK_OOB: 0
	steps:
	- name: Checkout Code
	uses: actions/checkout@v6
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: apps_llm
	- name: Test LLMs
	env:
	MAX_BUFFER_SIZE: 0
	run: \|
	parallel --link --tagstring '[{1}]' '{2}' \
	::: llama 'llama q4' qwen3.5 qwen \
	::: $'echo "What\'s a male chicken called? Answer with only one word." \| python3 -m tinygrad.llm --model llama3.2:1b \| tee /dev/stderr \| grep -i rooster' \
	$'echo "What\'s a male chicken called? Answer with only one word." \| python3 -m tinygrad.llm --model llama3.2:1b-q4 \| tee /dev/stderr \| grep -i rooster' \
	$'echo "What\'s a male chicken called? Answer with only one word." \| python3 -m tinygrad.llm --model qwen3.5:0.8b \| tee /dev/stderr \| grep -i rooster' \
	$'echo "What\'s a female chicken called? Answer with only one word." \| python3 -m tinygrad.llm --model qwen3:0.6b \| tee /dev/stderr \| grep -i hen'
	# NOTE: qwen is dumb and only knows about female chickens

	# **** Models Tests ****

	testmodels:
	name: Models (llvm+cpu+gpu)
	runs-on: *linux
	timeout-minutes: 15
	steps:
	- name: Checkout Code
	uses: actions/checkout@v6
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: models
	deps: testing
	opencl: 'true'
	llvm: 'true'
	- name: Test models (llvm)
	run: DEV=CPU:LLVM python -m pytest -n=auto test/models --durations=20
	- name: Test models (opencl)
	run: DEV=CL python -m pytest -n=auto test/models --durations=20
	- name: Test models (cpu)
	run: DEV=CPU python -m pytest -n=auto test/models --durations=20
	- name: Run process replay tests
	uses: ./.github/actions/process-replay

	testmetalmodels:
	name: Models (metal)
	runs-on: &macos macos-26
	timeout-minutes: 20
	steps:
	- name: Checkout Code
	uses: actions/checkout@v6
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: metal
	deps: testing
	- name: Test models (Metal)
	run: DEV=METAL python -m pytest -n=auto test/models --durations=20
	- name: Test LLaMA compile speed
	run: DEV=METAL python test/external/external_test_speed_llama.py

	# **** Feature Tests ****

	testdsp:
	name: Linux (DSP)
	runs-on: *linux
	timeout-minutes: 15
	steps:
	- name: Checkout Code
	uses: actions/checkout@v6
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: dsp-minimal
	deps: testing
	llvm: "true"
	qemu: "true"
	- name: Run tests
	run: MOCKDSP=1 DEV=DSP python -m pytest -n=auto test/test_tiny.py test/backend/test_transcendental.py::TestTranscendentalVectorized test/backend/test_quantize_onnx.py

	testlinux:
	strategy:
	fail-fast: false
	matrix:
	dev:
	- 'CPU:CLANG'
	- 'CPU:LLVM'
	- 'CPU:LVP'
	- 'CPU:X86'
	- 'CL'
	- 'WEBGPU'

	name: Linux (DEV=${{ matrix.dev }})
	runs-on: *linux
	timeout-minutes: 20
	steps:
	- name: Checkout Code
	uses: actions/checkout@v6
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: linux-${{ matrix.dev }}
	deps: testing_unit
	llvm: ${{ contains(matrix.dev, 'LLVM') \|\| contains(matrix.dev, 'LVP') \|\| contains(matrix.dev, 'CLANG') }}
	mesa: ${{ contains(matrix.dev, 'LVP') && 'cpu' \|\| 'false' }}
	webgpu: ${{ matrix.dev == 'WEBGPU' }}
	opencl: ${{ matrix.dev == 'CL' }}
	- name: Set env
	run: printf "DEV=${{ matrix.dev }}${{ matrix.dev == 'CPU:CLANG' && '\nCPU_COUNT=2' \|\| '' }}" >> $GITHUB_ENV
	- name: Check Device.DEFAULT and print some source
	run: \|
	python -c "from tinygrad import Device; from tinygrad.helpers import Target; assert Device.DEFAULT == Target.parse('${{ matrix.dev }}').device"
	DEBUG=4 python test/test_tiny.py TestTiny.test_plus
	- name: Run backend tests
	run: python -m pytest -n=auto test/backend --durations=20
	- name: Run process replay tests
	uses: ./.github/actions/process-replay

	testamdasm:
	name: AMD ASM IDE
	runs-on: *linux
	timeout-minutes: 20
	env:
	DEV: MOCKKFD+AMD
	steps:
	- name: Checkout Code
	uses: actions/checkout@v6
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: rdna3-emu
	deps: testing_unit
	amd: 'true'
	- name: Verify AMD autogen is up to date
	run: \|
	python -m tinygrad.renderer.amd.generate
	git diff --exit-code tinygrad/runtime/autogen/amd/
	- name: Install LLVM 21
	run: \|
	wget -qO- https://apt.llvm.org/llvm-snapshot.gpg.key \| sudo tee /etc/apt/trusted.gpg.d/apt.llvm.org.asc
	echo "deb http://apt.llvm.org/$(lsb_release -cs)/ llvm-toolchain-$(lsb_release -cs)-21 main" \| sudo tee /etc/apt/sources.list.d/llvm.list
	sudo apt-get update
	sudo apt-get install llvm-21 llvm-21-tools cloc
	- name: Install rocprof-trace-decoder
	run: sudo PYTHONPATH="." ./extra/sqtt/install_rocprof_decoder.py
	- name: Run AMD renderer tests
	run: python -m pytest -n=auto test/amd/ --durations 20
	- name: Run AMD renderer tests (AMD:LLVM)
	run: DEV=MOCKKFD+AMD:LLVM python -m pytest -n=auto test/amd/ --durations 20
	- name: Run SQTT profiling tests
	run: PROFILE=1 SQTT=1 python3 -m pytest -n=auto test/amd/test_sqtt_profiler.py
	- name: Run AMD emulated tests on NULL backend
	env:
	AMD: 0
	run: \|
	PYTHONPATH=. DEV=NULL:HIP:gfx1100 python extra/mmapeak/mmapeak.py
	PYTHONPATH=. DEV=NULL:HIP:gfx950 python3 -m pytest -n=auto test/testextra/test_tk.py test/backend/test_asm_gemm.py
	- name: Run matmul on MOCKKFD
	run: \|
	PYTHONPATH="." DEV=MOCKKFD+AMD N=256 python3 extra/gemm/amd_asm_matmul.py
	PYTHONPATH="." DEV=MOCKKFD+AMD N=256 python3 extra/gemm/amd_copy_matmul.py
	- name: Run LLVM test
	run: DEV=MOCKKFD+AMD:LLVM python test/device/test_amd_llvm.py

	testmockam:
	name: Linux (am)
	runs-on: *linux
	timeout-minutes: 15
	env:
	DEV: MOCKPCI+AMD
	steps:
	- name: Checkout Code
	uses: actions/checkout@v6
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: mockam
	deps: testing_unit
	amd: 'true'
	- name: Run test_tiny on MOCKAM
	run: python test/test_tiny.py
	- name: Run test_tiny on MOCKUSB
	run: GMMU=0 DEV=MOCKUSB+AMD python test/test_tiny.py
	- name: Run test_hcq on MOCKPCI
	run: python -m pytest test/device/test_hcq.py
	- name: Run disk copy tests on MOCKPCI
	run: python -m pytest test/unit/test_disk_tensor.py -k test_copy_from_disk
	- name: Run test_tiny on MOCKPCI Remote
	run: \|
	python extra/remote/serve.py 6667 &
	sleep 2
	REMOTE=127.0.0.1:6667 python test/test_tiny.py
	REMOTE=127.0.0.1:6667 python -m pytest test/unit/test_disk_tensor.py -k test_copy_from_disk; kill %1

	testamd:
	strategy:
	fail-fast: false
	matrix:
	backend: [amd, amdllvm]
	arch: [gfx1100, gfx1201, gfx950]

	name: Linux (${{ matrix.backend }} ${{ matrix.arch }})
	runs-on: *linux
	timeout-minutes: 15
	env:
	DEV: MOCKKFD+AMD:${{ matrix.backend == 'amdllvm' && 'LLVM' \|\| '' }}:${{ matrix.arch }}
	SKIP_SLOW_TEST: 1
	steps:
	- name: Checkout Code
	uses: actions/checkout@v6
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: ${{ matrix.backend }}-minimal
	deps: testing_unit
	amd: 'true'
	llvm: ${{ matrix.backend == 'amdllvm' && 'true' }}
	- name: Check Device.DEFAULT and print some source
	run: \|
	python3 -c "from tinygrad import Device; assert Device.DEFAULT in ['AMD'], Device.DEFAULT"
	DEBUG=5 FORWARD_ONLY=1 python3 test/test_tiny.py TestTiny.test_plus
	- name: Run pytest (amd)
	run: python -m pytest -n=auto test/backend/test_ops.py test/backend/test_dtype.py test/backend/test_dtype_alu.py test/backend/test_linearizer.py test/backend/test_randomness.py test/backend/test_jit.py test/backend/test_graph.py test/backend/test_multitensor.py test/device/test_hcq.py test/external/external_test_am.py test/backend/test_asm_gemm.py::TestAsmGEMM --durations=20
	- name: Run disk copy tests
	run: python -m pytest test/unit/test_disk_tensor.py -k test_copy_from_disk
	- name: Run TRANSCENDENTAL math
	run: TRANSCENDENTAL=2 python -m pytest -n=auto test/backend/test_ops.py::TestOps::test_sin test/backend/test_ops.py::TestOps::test_cos test/backend/test_ops.py::TestOps::test_tan test/backend/test_ops.py::TestOps::test_exp test/backend/test_ops.py::TestOps::test_log --durations=20
	- name: Run process replay tests
	uses: ./.github/actions/process-replay

	testnvidia:
	strategy:
	fail-fast: false
	matrix:
	backend: [ptx, nv]

	name: Linux (${{ matrix.backend }})
	runs-on: *linux
	timeout-minutes: 20
	env:
	FORWARD_ONLY: 1
	steps:
	- name: Checkout Code
	uses: actions/checkout@v6
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: ${{ matrix.backend }}-minimal
	deps: testing_unit
	cuda: 'true'
	ocelot: 'true'
	- name: Set env
	run: printf "${{ matrix.backend == 'ptx' && 'DEV=MOCK+CUDA:PTX' \|\| matrix.backend == 'nv' && 'DEV=MOCK+NV\nSKIP_SLOW_TEST=1' }}" >> $GITHUB_ENV
	- name: Check Device.DEFAULT and print some source
	run: \|
	python3 -c "from tinygrad import Device; assert Device.DEFAULT in ['CUDA','NV'], Device.DEFAULT"
	DEBUG=5 FORWARD_ONLY=1 python3 test/test_tiny.py TestTiny.test_plus
	- name: Run pytest (cuda)
	# skip multitensor because it's slow
	run: python -m pytest -n=auto test/backend --ignore test/backend/test_multitensor.py --durations=20
	- name: Run TestOps.test_add with PMA
	run: VIZ=-1 PMA=1 DEBUG=5 python3 test/backend/test_ops.py TestOps.test_add
	- name: Run process replay tests
	uses: ./.github/actions/process-replay

	# **** OSX Tests ****

	unittestmacos:
	name: MacOS (unit)
	runs-on: *macos
	timeout-minutes: 20
	steps:
	- name: Checkout Code
	uses: actions/checkout@v6
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: unittest-macos
	deps: testing_unit
	amd: 'true'
	ocelot: 'true'
	- name: Run unit tests
	run: DEV=METAL python -m pytest -n=auto test/unit/ --durations=20
	- name: Run NULL backend tests
	run: DEV=NULL python -m pytest -n=auto test/null/ --durations=20
	- name: Test tensor core ops (fake)
	run: DEV=METAL DEBUG=3 TC=2 python test/backend/test_ops.py TestOps.test_gemm
	- name: Test tensor core ops (real)
	run: DEV=METAL DEBUG=3 python test/backend/test_ops.py TestOps.test_big_gemm
	- name: Test Beam Search
	run: DEV=METAL IGNORE_BEAM_CACHE=1 python3 -m pytest extra/optimization/test_beam_search.py
	- name: Test Device Specific
	run: DEV=METAL python3 -m pytest test/device/test_metal.py
	#- name: Fuzz Test linearizer
	# run: DEV=METAL DEPTH=4 FUZZ_N=50 FUZZ_MAX_SIZE=1000000 python test/external/fuzz_linearizer.py
	- name: Run pytest (amd)
	env:
	DEV: MOCKKFD+AMD
	FORWARD_ONLY: 1
	run: \|
	python3 -m pytest -n=auto test/device/test_hcq.py test/test_tiny.py --durations=20
	- name: Run pytest (ptx)
	env:
	DEV: "MOCK+NV:PTX"
	FORWARD_ONLY: 1
	# TODO: failing due to library loading error
	CAPTURE_PROCESS_REPLAY: 0
	run: \|
	python3 -m pytest -n=auto test/device/test_hcq.py test/test_tiny.py --durations=20
	- name: Run process replay tests
	uses: ./.github/actions/process-replay

	testmacos:
	strategy:
	fail-fast: false
	matrix:
	dev:
	- 'CPU:CLANG'
	- 'CPU:LLVM'
	- 'CPU:LVP'
	- 'METAL'
	- 'WEBGPU'

	name: MacOS (DEV=${{ matrix.dev }})
	runs-on: *macos
	timeout-minutes: 20
	steps:
	- name: Checkout Code
	uses: actions/checkout@v6
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: macos-${{ matrix.dev }}
	deps: testing_unit
	llvm: ${{ contains(matrix.dev, 'LLVM') \|\| contains(matrix.dev, 'LVP') }}
	mesa: ${{ contains(matrix.dev, 'LVP') && 'cpu' \|\| 'false' }}
	webgpu: ${{ matrix.dev == 'WEBGPU' }}
	- name: Set env
	run: printf "DEV=${{ matrix.dev }}${{ matrix.dev == 'CPU:CLANG' && '\nCPU_COUNT=2' \|\| '' }}" >> $GITHUB_ENV
	- name: Check Device.DEFAULT and print some source
	run: \|
	python -c "from tinygrad import Device; from tinygrad.helpers import Target; assert Device.DEFAULT == Target.parse('${{ matrix.dev }}').device"
	DEBUG=4 python test/test_tiny.py TestTiny.test_plus
	- name: Run backend tests
	run: python -m pytest -n=auto test/backend --durations=20
	- name: Run process replay tests
	uses: ./.github/actions/process-replay

	# **** Windows Tests ****

	testwindows:
	strategy:
	fail-fast: false
	matrix:
	dev:
	- 'CPU:CLANG'
	- 'CPU:LLVM'
	- 'CPU:X86'
	- 'WEBGPU'

	name: Windows (DEV=${{ matrix.dev }})
	runs-on: windows-2025
	timeout-minutes: 15
	steps:
	- name: Checkout Code
	uses: actions/checkout@v6
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: windows-${{ matrix.dev }}-minimal
	deps: testing_unit
	pydeps: ${{ matrix.dev == 'WEBGPU' && 'dawn-python' \|\| '' }}
	- name: Set env
	shell: bash
	run: printf "DEV=${{ matrix.dev }}${{ matrix.dev == 'CPU:CLANG' && '\nCPU_COUNT=2' \|\| '' }}" >> $GITHUB_ENV
	- name: Check Device.DEFAULT and print some source
	shell: bash
	run: \|
	python -c "from tinygrad import Device; from tinygrad.helpers import Target; assert Device.DEFAULT == Target.parse('${{ matrix.dev }}').device"
	DEBUG=4 python test/test_tiny.py TestTiny.test_plus
	- name: Run test_tiny
	shell: bash
	run: python -m pytest -n=auto test/test_tiny.py --durations=20

	# **** Compile-only Tests ****

	compiletests:
	strategy:
	fail-fast: false
	matrix:
	backend: [ir3, nak]
	name: Compile-only (${{ matrix.backend }})
	runs-on: *linux
	timeout-minutes: 15
	steps:
	- name: Checkout Code
	uses: actions/checkout@v6
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: compile-${{ matrix.backend }}
	deps: testing_unit
	mesa: ${{ (matrix.backend == 'ir3' \|\| matrix.backend == 'nak') && 'true' }}
	- name: Set env
	shell: bash
	run: printf "NULL_ALLOW_COPYOUT=1\n${{ matrix.backend == 'ir3' && 'DEV=NULL:IR3:a630' \|\| matrix.backend == 'nak' && 'DEV=NULL:NAK:sm_120' }}" >> $GITHUB_ENV
	- name: Run test_ops
	shell: bash
	run: \|
	python -c "from tinygrad import Device; assert Device.DEFAULT == 'NULL'"
	DEBUG=4 python3 test/backend/test_ops.py TestOps.test_add
	python -m pytest -n=auto test/backend/test_ops.py --durations=20
	- name: Run test_ops (IMAGE)
	if: matrix.backend == 'ir3'
	shell: bash
	env:
	IMAGE: 1
	DEV: "NULL:IR3:a630,IMAGE_PITCH_ALIGNMENT=64"
	run: \|
	DEBUG=4 python3 test/backend/test_ops.py TestOps.test_gemm \| grep image_load
	python -m pytest -n=auto test/backend/test_ops.py --durations=20
	qcomclcompiletests:
	name: Compile-only (QCOM CL)
	runs-on: ubuntu-24.04-arm
	timeout-minutes: 15
	steps:
	- name: Checkout Code
	uses: actions/checkout@v6
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: compile-qcomcl
	deps: testing_unit
	tinydreno: 'true'
	- name: Set env
	shell: bash
	run: printf "DEV=NULL:QCOMCL:a630\nNULL_ALLOW_COPYOUT=1" >> $GITHUB_ENV
	- name: Run test_ops
	shell: bash
	run: \|
	python -c "from tinygrad import Device; assert Device.DEFAULT == 'NULL'"
	DEBUG=4 python3 test/backend/test_ops.py TestOps.test_add
	python -m pytest -n=auto test/backend/test_ops.py --durations=20
	- name: Run test_ops (IMAGE)
	shell: bash
	env:
	IMAGE: 1
	DEV: "NULL:QCOMCL:a630,IMAGE_PITCH_ALIGNMENT=64"
	run: \|
	DEBUG=4 python test/backend/test_ops.py TestOps.test_gemm \| grep read_imagef
	python -m pytest -n=auto test/backend/test_ops.py --durations=20

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

IMAGE hack for tinygrad#16335 (from tinygrad#16343) #52

Workflow file

IMAGE hack for tinygrad#16335 (from tinygrad#16343) #52

Uh oh!

Workflow file for this run