IMAGE hack for tinygrad#16335 (from tinygrad#16343) #52
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Unit Tests | |
| env: | |
| # increment this when downloads substantially change to avoid the internet | |
| CACHE_VERSION: '19' | |
| CAPTURE_PROCESS_REPLAY: ${{ github.event_name == 'pull_request' && contains(github.event.pull_request.title, '[pr]') && '1' || '0' }} | |
| GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| PYTHONPATH: ${{ github.workspace }} | |
| CHECK_OOB: 1 | |
| on: | |
| push: | |
| branches: | |
| - master | |
| pull_request: | |
| workflow_dispatch: | |
| concurrency: | |
| group: test-${{ github.event_name }}-${{ github.event_name == 'pull_request' && github.event.pull_request.number || github.run_id }} | |
| cancel-in-progress: ${{ github.event_name == 'pull_request' }} | |
| jobs: | |
| docs: | |
| name: Docs | |
| runs-on: &linux ${{ github.repository == 'tinygrad/tinygrad' && github.event_name == 'pull_request' && github.event.pull_request.author_association == 'COLLABORATOR' && 'namespace-profile-tinygrad' || 'ubuntu-24.04' }} | |
| timeout-minutes: 10 | |
| env: | |
| CHECK_OOB: 0 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v6 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| deps: docs | |
| pydeps: "capstone torch" | |
| - name: Build wheel and show size | |
| run: | | |
| uv build --wheel | |
| ls -lh dist/*.whl | |
| - name: Use as an external package | |
| run: | | |
| mkdir $HOME/test_external_dir | |
| cd $HOME/test_external_dir | |
| uv venv venv | |
| uv pip install --python venv $GITHUB_WORKSPACE mypy | |
| cp $GITHUB_WORKSPACE/examples/beautiful_mnist.py . | |
| venv/bin/python -c "from tinygrad.tensor import Tensor; print(Tensor([1,2,3,4,5]))" | |
| venv/bin/mypy -c "from tinygrad.tensor import Tensor; print(Tensor([1,2,3,4,5]))" | |
| BS=2 STEPS=10 MAX_BUFFER_SIZE=0 venv/bin/python beautiful_mnist.py | |
| - name: Test Docs | |
| run: | | |
| parallel --link --tagstring '[{1}]' '{2}' \ | |
| ::: mkdocs abstractions3 readme quickstart export \ | |
| ::: 'mkdocs build --strict' \ | |
| 'python docs/abstractions3.py' \ | |
| $'awk \'/```python/{flag=1;next}/```/{flag=0}flag\' README.md | python' \ | |
| $'awk \'/```python/{flag=1;next}/```/{flag=0}flag\' docs/quickstart.md | python' \ | |
| 'DEV=CPU python examples/compile_efficientnet.py > recognize.c && clang -O2 recognize.c -lm -o recognize && cat test/models/efficientnet/Chicken.jpg | ./recognize | grep cock' | |
| - name: Test DEBUG | |
| run: DEBUG=100 python3 -c "from tinygrad import Tensor; N = 1024; a, b = Tensor.rand(N, N), Tensor.rand(N, N); c = (a.reshape(N, 1, N) * b.T.reshape(1, N, N)).sum(axis=2); print((c.numpy() - (a.numpy() @ b.numpy())).mean())" | |
| torchbackend: | |
| name: Torch Backend Tests | |
| runs-on: *linux | |
| timeout-minutes: 15 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v6 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: torch-backend-pillow-torchvision-et-pt | |
| deps: testing_unit | |
| pydeps: "pillow torchvision expecttest" | |
| llvm: 'true' | |
| - name: Install ninja | |
| run: | | |
| sudo apt update || true | |
| sudo apt install -y --no-install-recommends ninja-build | |
| - name: Test one op | |
| run: FORWARD_ONLY=1 TINY_BACKEND=1 python3 test/test_tiny.py TestTiny.test_plus | |
| - name: Test ResNet-18 | |
| run: DEBUG=2 python3 extra/torch_backend/example.py | |
| - name: custom tests | |
| run: python3 -m pytest -n auto extra/torch_backend/test.py --durations=20 | |
| - name: Test one op in torch tests | |
| run: DEBUG=2 python3 extra/torch_backend/torch_tests.py TestTinyBackendPRIVATEUSE1.test_unary_log_tiny_float32 | |
| - name: Test Ops with TINY_BACKEND | |
| run: DEV=CPU:LLVM LLVMOPT=0 TINY_BACKEND=1 python3 -m pytest -n auto test/backend/test_ops.py --durations=20 | |
| - name: Test in-place operations on views | |
| run: TORCH_DEBUG=1 python3 extra/torch_backend/test_inplace.py | |
| - name: Test multi-gpu | |
| run: DEV=CPU:LLVM GPUS=4 TORCH_DEBUG=1 python3 extra/torch_backend/test_multigpu.py | |
| - name: Test kernel fusion | |
| run: python3 extra/torch_backend/test_kernel_fusion.py | |
| torchbackendmore: | |
| name: Torch Backend Tests More | |
| runs-on: *linux | |
| timeout-minutes: 15 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v6 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: torch-backend-pillow-torchvision-et-pt | |
| deps: testing_unit | |
| llvm: 'true' | |
| - name: Install ninja | |
| run: | | |
| sudo apt update || true | |
| sudo apt install -y --no-install-recommends ninja-build | |
| - name: Test beautiful_mnist in torch with TINY_BACKEND | |
| run: STEPS=20 DEV=CPU TARGET_EVAL_ACC_PCT=90.0 MAX_BUFFER_SIZE=0 TINY_BACKEND=1 python3 examples/other_mnist/beautiful_mnist_torch.py | |
| - name: Test some torch tests (expect failure) | |
| run: python3 -m pytest extra/torch_backend/torch_tests.py -v --tb=no || true | |
| bepython: | |
| name: Python Backend | |
| runs-on: *linux | |
| timeout-minutes: 15 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v6 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: be-minimal | |
| deps: testing_unit | |
| - name: Run backend tests | |
| run: SKIP_SLOW_TEST=1 DEV=PYTHON python3 -m pytest -n=auto test/backend/test_dtype.py test/backend/test_dtype_alu.py test/backend/test_ops.py test/backend/test_uops.py test/backend/test_symbolic_ops.py test/backend/test_renderer_failures.py::TestRendererFailures --durations=20 | |
| - name: Test IMAGE support | |
| run: IMAGE=1 DEV=PYTHON python3 test/backend/test_ops.py TestOps.test_gemm TestOps.test_simple_conv2d | |
| - name: Test emulated METAL tensor cores | |
| env: | |
| DEV: 'PYTHON::METAL' | |
| run: | | |
| DEBUG=2 python3 test/backend/test_ops.py TestOps.test_big_gemm | |
| python3 -m pytest -nauto test/opt/test_tensor_cores.py | |
| - name: Test emulated AMD tensor cores | |
| env: | |
| DEV: 'PYTHON::gfx1100' | |
| run: | | |
| DEBUG=2 N=16 HALF=1 ACC_HALF=0 python3 ./extra/gemm/simple_matmul.py | |
| DEBUG=2 N=64 HALF=1 ACC_HALF=0 python3 ./extra/gemm/simple_matmul.py | |
| DEBUG=2 N=16 HALF=1 ACC_HALF=1 ATOL=1e-3 python3 ./extra/gemm/simple_matmul.py | |
| DEBUG=2 N=64 HALF=1 ACC_HALF=1 ATOL=1e-3 python3 ./extra/gemm/simple_matmul.py | |
| python3 -m pytest -nauto test/opt/test_tensor_cores.py | |
| - name: Test emulated AMD MFMA tensor cores | |
| env: | |
| DEV: 'PYTHON::gfx950' | |
| run: | | |
| DEBUG=2 N=64 HALF=1 ACC_HALF=0 python3 ./extra/gemm/simple_matmul.py | |
| python3 -m pytest -nauto test/opt/test_tensor_cores.py | |
| - name: Test emulated AMD RDNA4 tensor cores | |
| env: | |
| DEV: 'PYTHON::gfx1201' | |
| run: | | |
| DEBUG=2 N=16 HALF=1 ACC_HALF=0 python3 ./extra/gemm/simple_matmul.py | |
| DEBUG=2 N=64 HALF=1 ACC_HALF=0 python3 ./extra/gemm/simple_matmul.py | |
| DEBUG=2 N=16 HALF=1 ACC_HALF=1 ATOL=1e-3 python3 ./extra/gemm/simple_matmul.py | |
| DEBUG=2 N=64 HALF=1 ACC_HALF=1 ATOL=1e-3 python3 ./extra/gemm/simple_matmul.py | |
| python3 -m pytest -nauto test/opt/test_tensor_cores.py | |
| - name: Test emulated CUDA tensor cores | |
| run: | | |
| DEBUG=2 DEV=PYTHON::sm_80 python3 test/backend/test_ops.py TestOps.test_gemm_fp16 | |
| DEBUG=2 ALLOW_TF32=1 DEV=PYTHON::sm_80 python3 test/backend/test_ops.py TestOps.test_gemm | |
| DEBUG=2 DEV=PYTHON::sm_75 python3 test/backend/test_ops.py TestOps.test_gemm_fp16 | |
| ALLOW_TF32=1 DEV=PYTHON::sm_89 python3 -m pytest -nauto test/opt/test_tensor_cores.py | |
| - name: Test device flop counts | |
| run: | | |
| DEBUG=2 DEV=PYTHON::METAL python3 ./test/null/test_uops_stats.py TestUOpsStatsMatmulHalf | |
| DEBUG=2 DEV=PYTHON::gfx1100 python3 ./test/null/test_uops_stats.py TestUOpsStatsMatmulHalf | |
| DEBUG=2 DEV=PYTHON::sm_80 python3 ./test/null/test_uops_stats.py TestUOpsStatsMatmulHalf | |
| linter: | |
| name: Linters | |
| runs-on: *linux | |
| timeout-minutes: 10 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v6 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: linting-only | |
| python-version: '3.11' | |
| deps: linting | |
| - name: Lint bad-indentation and trailing-whitespace with pylint | |
| run: python -m pylint --disable=all -e W0311 -e C0303 --jobs=0 --indent-string=' ' --recursive=y . | |
| - name: Run pre-commit linting hooks | |
| run: SKIP=tiny,tests,example,mypy pre-commit run --all-files | |
| - name: Lint additional files with ruff | |
| run: | | |
| python3 -m ruff check examples/mlperf/ --ignore E501 | |
| python3 -m ruff check extra/thunder/tiny/ --ignore E501 --ignore F841 --ignore E722 | |
| python3 -m ruff check extra/torch_backend/backend.py | |
| - name: Run mypy with lineprecision report | |
| run: | | |
| python -m mypy --lineprecision-report . | |
| grep -v autogen lineprecision.txt | awk 'NR>2 {lines+=$2; precise+=$3; imprecise+=$4; any+=$5; empty+=$6} END {t=lines-empty; printf "TOTAL: %d lines, %d precise (%.1f%%), %d imprecise (%.1f%%), %d any (%.1f%%)\n", t, precise, 100*precise/t, imprecise, 100*imprecise/t, any, 100*any/t}' | |
| cat lineprecision.txt | |
| - name: Run TYPED=1 | |
| run: CHECK_OOB=0 DEV=CPU TYPED=1 python test/test_tiny.py | |
| nulltest: | |
| name: Null Tests | |
| runs-on: *linux | |
| timeout-minutes: 15 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v6 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: unittest-13 | |
| pydeps: "pillow ftfy regex pre-commit" | |
| deps: testing_unit | |
| llvm: 'true' | |
| amd: 'true' | |
| - name: Run NULL backend tests | |
| run: DEV=NULL python -m pytest -n=auto test/null/ --durations=20 | |
| - name: Run targeted tests on NULL backend | |
| run: DEV=NULL python3 -m unittest test.backend.test_multitensor.TestMultiTensor.test_data_parallel_resnet_train_step | |
| # TODO: too slow | |
| # - name: Run SDXL on NULL backend | |
| # run: DEV=NULL DEBUG=1 python3 examples/sdxl.py --seed 0 --noshow --timing --fakeweights | |
| - name: Run Clip tests for SD MLPerf on NULL backend | |
| run: DEV=NULL python -m pytest -n=auto test/external/mlperf_stable_diffusion/external_test_models.py::TestOpenClip --durations=20 | |
| - name: Run AMD emulated BERT training on NULL backend | |
| run: DEV=NULL::gfx1201 NULL_ALLOW_COPYOUT=1 CAPTURE_PROCESS_REPLAY=0 DEFAULT_FLOAT=HALF BENCHMARK=10 BS=66 GPUS=1 BERT_LAYERS=2 MODEL=bert python3 examples/mlperf/model_train.py | |
| # TODO: support fake weights | |
| #- name: Run LLaMA 7B on 4 fake devices | |
| # run: DEV=NULL python3 examples/llama.py --gen 1 --size 7B --shard 4 --prompt "Hello." --count 3 --temperature 0 --timing | |
| unittest: | |
| name: Unit Tests | |
| runs-on: *linux | |
| timeout-minutes: 15 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v6 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: unittest-13 | |
| pydeps: "pillow ftfy regex pre-commit" | |
| deps: testing_unit | |
| llvm: 'true' | |
| - name: Run pre-commit test hooks | |
| run: SKIP=ruff,mypy,tests pre-commit run --all-files | |
| - name: Check Device.DEFAULT | |
| run: python -c "from tinygrad import Device; assert Device.DEFAULT == 'CPU', Device.DEFAULT" | |
| - name: Run unit tests | |
| run: | | |
| DEV=CPU python test/null/test_device.py TestRunAsModule.test_module_runs | |
| DEV=CPU python -m pytest -n=auto test/unit/ --durations=20 | |
| - name: Run GC tests | |
| run: python test/external/external_uop_gc.py | |
| - name: External Benchmark Schedule | |
| run: python3 test/external/external_benchmark_schedule.py | |
| - name: Run process replay tests | |
| uses: ./.github/actions/process-replay | |
| - name: Regen dataset on test_tiny | |
| run: | | |
| test/external/process_replay/reset.py | |
| CAPTURE_PROCESS_REPLAY=1 python test/test_tiny.py TestTiny.test_plus | |
| python extra/optimization/extract_dataset.py | |
| gzip -c /tmp/sops > extra/datasets/sops.gz | |
| #DEBUG=1 MIN_ASTS=1 python extra/optimization/get_action_space.py | |
| - name: Repo line count < 25000 lines | |
| run: MAX_LINE_COUNT=25000 python sz.py | |
| spec: | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| group: [1, 2] | |
| name: SPEC=2 (${{ matrix.group }}) | |
| runs-on: *linux | |
| timeout-minutes: 15 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v6 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: spec-unit | |
| deps: testing_unit | |
| llvm: 'true' | |
| - name: Test SPEC=2 | |
| run: SPEC=2 pytest --maxfail=10 -n auto --durations=30 test/unit test/backend test/opt --ignore test/backend/test_custom_kernel.py --ignore test/unit/test_hashing.py --timeout 60 -k "not test_setitem_big" -k "not test_conv2d_ceildiv_edge_case" --splits 2 --group ${{ matrix.group }} | |
| fuzzing: | |
| name: Fuzzing | |
| runs-on: *linux | |
| timeout-minutes: 10 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v6 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: fuzzing-unit | |
| deps: testing_unit | |
| - name: Fuzz Test symbolic | |
| run: python test/external/fuzz_symbolic.py | |
| - name: Fuzz Test symbolic (symbolic divisors) | |
| run: python test/external/fuzz_symbolic_symbolic_div.py | |
| - name: Fuzz Test fast idiv | |
| run: python test/external/fuzz_fast_idiv.py | |
| - name: Fuzz Test shape ops | |
| run: python test/external/fuzz_shape_ops.py | |
| testopenclimage: | |
| name: CL IMAGE Tests | |
| runs-on: *linux | |
| timeout-minutes: 15 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v6 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: gpu-image | |
| deps: testing_unit | |
| opencl: 'true' | |
| - name: Test CL IMAGE=1 ops | |
| run: | | |
| DEV=CL IMAGE=1 python -m pytest -n=auto test/backend/test_ops.py --durations=20 | |
| DEV=CL IMAGE=1 python test/models/test_end2end.py TestEnd2End.test_linear_mnist | |
| - name: Run process replay tests | |
| uses: ./.github/actions/process-replay | |
| testgpumisc: | |
| name: CL Misc tests | |
| runs-on: *linux | |
| timeout-minutes: 10 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v6 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: gen-dataset | |
| deps: testing | |
| opencl: 'true' | |
| - name: Generate Dataset | |
| run: DEV=CL extra/optimization/generate_dataset.sh | |
| - name: Run Kernel Count Test | |
| run: DEV=CL python -m pytest -n=auto test/external/external_test_opt.py | |
| - name: Run fused optimizer tests | |
| run: DEV=CL FUSE_OPTIM=1 python -m pytest -n=auto test/models/test_mnist.py test/backend/test_optim.py -k "not muon" | |
| - name: Upload artifact | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: sops.gz | |
| path: /tmp/sops.gz | |
| testopenpilot: | |
| name: openpilot Compile Tests | |
| runs-on: *linux | |
| timeout-minutes: 15 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v6 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: openpilot-compile | |
| deps: testing | |
| opencl: 'true' | |
| llvm: 'true' | |
| - name: Test openpilot model kernel count and gate usage | |
| run: | | |
| ALLOWED_KERNEL_COUNT=123 ALLOWED_READ_IMAGE=1468 ALLOWED_GATED_READ_IMAGE=18 FLOAT16=1 DEV=CL IMAGE=1 python examples/openpilot/compile3.py https://gitlab.com/commaai/openpilot-lfs.git/gitlab-lfs/objects/cf6376aa9a090f0da26c280ef69eabf9bbdd51d1faac9ed392919c3db69be916 | |
| - name: Test openpilot CL compile fp32 (test correctness) | |
| run: | | |
| DEV=CL IMAGE=1 SELFTEST=1 python examples/openpilot/compile3.py https://github.com/haraschax/filedump/raw/refs/heads/master/driving_vision_fp32.onnx | |
| DEV=CL IMAGE=1 SELFTEST=1 RUN_PICKLE=1 python examples/openpilot/compile3.py https://github.com/haraschax/filedump/raw/refs/heads/master/driving_vision_fp32.onnx | |
| - name: Test openpilot LLVM compile fp16 | |
| run: IMAGE=1 FLOAT16=1 DEV=CPU:LLVM python examples/openpilot/compile3.py https://gitlab.com/commaai/openpilot-lfs.git/gitlab-lfs/objects/cf6376aa9a090f0da26c280ef69eabf9bbdd51d1faac9ed392919c3db69be916 | |
| - name: Run process replay tests | |
| uses: ./.github/actions/process-replay | |
| # ****** ONNX Tests ****** | |
| testonnxcpu: | |
| name: ONNX (CPU) Tests | |
| runs-on: *linux | |
| timeout-minutes: 20 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v6 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: onnxoptc | |
| deps: testing | |
| llvm: 'true' | |
| - name: Test ONNX (CPU) | |
| run: DEV=CPU python -m pytest -n=auto test/external/external_test_onnx_backend.py test/external/external_test_onnx_runner.py test/external/external_test_onnx_ops.py test/backend/test_quantize_onnx.py --durations=20 | |
| - name: Run process replay tests | |
| uses: ./.github/actions/process-replay | |
| testoptim: | |
| name: Optimization Tests | |
| runs-on: *linux | |
| timeout-minutes: 20 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v6 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: optim | |
| deps: testing | |
| pydeps: "tensorflow==2.19" | |
| opencl: 'true' | |
| #- name: Test Optimization Helpers | |
| # run: DEBUG=1 python3 extra/optimization/test_helpers.py | |
| #- name: Test Action Space | |
| # run: DEBUG=1 DEV=CL python3 extra/optimization/get_action_space.py | |
| - name: Test Beam Search | |
| run: DEV=CL IGNORE_BEAM_CACHE=1 python3 -m pytest extra/optimization/test_beam_search.py | |
| - name: Test MLPerf stuff | |
| run: DEV=CL python -m pytest -n=auto test/external/external_test_optim.py test/external/external_test_losses.py test/external/external_test_metrics.py test/external/external_test_datasets.py --durations=20 | |
| - name: DEV=NULL beautiful_mnist_multigpu | |
| run: DEV=NULL NULL_ALLOW_COPYOUT=1 python examples/beautiful_mnist_multigpu.py | |
| - name: Test Bert training | |
| run: DEV=NULL NULL_ALLOW_COPYOUT=1 DEFAULT_FLOAT=HALF BENCHMARK=10 BS=24 GPUS=4 BERT_LAYERS=2 MODEL=bert python3 examples/mlperf/model_train.py | |
| - name: Test llama 3 training | |
| run: DEV=NULL NULL_ALLOW_COPYOUT=1 SAMPLES=300 BS=8 SEQLEN=512 GRADIENT_ACC_STEPS=1 FAKEDATA=1 DEFAULT_FLOAT=bfloat16 OPTIM_DTYPE=bfloat16 LLAMA3_SIZE=1B MODEL=llama3 python3 examples/mlperf/model_train.py | |
| - name: Run process replay tests | |
| uses: ./.github/actions/process-replay | |
| testllm: | |
| name: Test LLM | |
| runs-on: *linux | |
| timeout-minutes: 15 | |
| env: | |
| CHECK_OOB: 0 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v6 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: apps_llm | |
| - name: Test LLMs | |
| env: | |
| MAX_BUFFER_SIZE: 0 | |
| run: | | |
| parallel --link --tagstring '[{1}]' '{2}' \ | |
| ::: llama 'llama q4' qwen3.5 qwen \ | |
| ::: $'echo "What\'s a male chicken called? Answer with only one word." | python3 -m tinygrad.llm --model llama3.2:1b | tee /dev/stderr | grep -i rooster' \ | |
| $'echo "What\'s a male chicken called? Answer with only one word." | python3 -m tinygrad.llm --model llama3.2:1b-q4 | tee /dev/stderr | grep -i rooster' \ | |
| $'echo "What\'s a male chicken called? Answer with only one word." | python3 -m tinygrad.llm --model qwen3.5:0.8b | tee /dev/stderr | grep -i rooster' \ | |
| $'echo "What\'s a female chicken called? Answer with only one word." | python3 -m tinygrad.llm --model qwen3:0.6b | tee /dev/stderr | grep -i hen' | |
| # NOTE: qwen is dumb and only knows about female chickens | |
| # ****** Models Tests ****** | |
| testmodels: | |
| name: Models (llvm+cpu+gpu) | |
| runs-on: *linux | |
| timeout-minutes: 15 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v6 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: models | |
| deps: testing | |
| opencl: 'true' | |
| llvm: 'true' | |
| - name: Test models (llvm) | |
| run: DEV=CPU:LLVM python -m pytest -n=auto test/models --durations=20 | |
| - name: Test models (opencl) | |
| run: DEV=CL python -m pytest -n=auto test/models --durations=20 | |
| - name: Test models (cpu) | |
| run: DEV=CPU python -m pytest -n=auto test/models --durations=20 | |
| - name: Run process replay tests | |
| uses: ./.github/actions/process-replay | |
| testmetalmodels: | |
| name: Models (metal) | |
| runs-on: &macos macos-26 | |
| timeout-minutes: 20 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v6 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: metal | |
| deps: testing | |
| - name: Test models (Metal) | |
| run: DEV=METAL python -m pytest -n=auto test/models --durations=20 | |
| - name: Test LLaMA compile speed | |
| run: DEV=METAL python test/external/external_test_speed_llama.py | |
| # ****** Feature Tests ****** | |
| testdsp: | |
| name: Linux (DSP) | |
| runs-on: *linux | |
| timeout-minutes: 15 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v6 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: dsp-minimal | |
| deps: testing | |
| llvm: "true" | |
| qemu: "true" | |
| - name: Run tests | |
| run: MOCKDSP=1 DEV=DSP python -m pytest -n=auto test/test_tiny.py test/backend/test_transcendental.py::TestTranscendentalVectorized test/backend/test_quantize_onnx.py | |
| testlinux: | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| dev: | |
| - 'CPU:CLANG' | |
| - 'CPU:LLVM' | |
| - 'CPU:LVP' | |
| - 'CPU:X86' | |
| - 'CL' | |
| - 'WEBGPU' | |
| name: Linux (DEV=${{ matrix.dev }}) | |
| runs-on: *linux | |
| timeout-minutes: 20 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v6 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: linux-${{ matrix.dev }} | |
| deps: testing_unit | |
| llvm: ${{ contains(matrix.dev, 'LLVM') || contains(matrix.dev, 'LVP') || contains(matrix.dev, 'CLANG') }} | |
| mesa: ${{ contains(matrix.dev, 'LVP') && 'cpu' || 'false' }} | |
| webgpu: ${{ matrix.dev == 'WEBGPU' }} | |
| opencl: ${{ matrix.dev == 'CL' }} | |
| - name: Set env | |
| run: printf "DEV=${{ matrix.dev }}${{ matrix.dev == 'CPU:CLANG' && '\nCPU_COUNT=2' || '' }}" >> $GITHUB_ENV | |
| - name: Check Device.DEFAULT and print some source | |
| run: | | |
| python -c "from tinygrad import Device; from tinygrad.helpers import Target; assert Device.DEFAULT == Target.parse('${{ matrix.dev }}').device" | |
| DEBUG=4 python test/test_tiny.py TestTiny.test_plus | |
| - name: Run backend tests | |
| run: python -m pytest -n=auto test/backend --durations=20 | |
| - name: Run process replay tests | |
| uses: ./.github/actions/process-replay | |
| testamdasm: | |
| name: AMD ASM IDE | |
| runs-on: *linux | |
| timeout-minutes: 20 | |
| env: | |
| DEV: MOCKKFD+AMD | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v6 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: rdna3-emu | |
| deps: testing_unit | |
| amd: 'true' | |
| - name: Verify AMD autogen is up to date | |
| run: | | |
| python -m tinygrad.renderer.amd.generate | |
| git diff --exit-code tinygrad/runtime/autogen/amd/ | |
| - name: Install LLVM 21 | |
| run: | | |
| wget -qO- https://apt.llvm.org/llvm-snapshot.gpg.key | sudo tee /etc/apt/trusted.gpg.d/apt.llvm.org.asc | |
| echo "deb http://apt.llvm.org/$(lsb_release -cs)/ llvm-toolchain-$(lsb_release -cs)-21 main" | sudo tee /etc/apt/sources.list.d/llvm.list | |
| sudo apt-get update | |
| sudo apt-get install llvm-21 llvm-21-tools cloc | |
| - name: Install rocprof-trace-decoder | |
| run: sudo PYTHONPATH="." ./extra/sqtt/install_rocprof_decoder.py | |
| - name: Run AMD renderer tests | |
| run: python -m pytest -n=auto test/amd/ --durations 20 | |
| - name: Run AMD renderer tests (AMD:LLVM) | |
| run: DEV=MOCKKFD+AMD:LLVM python -m pytest -n=auto test/amd/ --durations 20 | |
| - name: Run SQTT profiling tests | |
| run: PROFILE=1 SQTT=1 python3 -m pytest -n=auto test/amd/test_sqtt_profiler.py | |
| - name: Run AMD emulated tests on NULL backend | |
| env: | |
| AMD: 0 | |
| run: | | |
| PYTHONPATH=. DEV=NULL:HIP:gfx1100 python extra/mmapeak/mmapeak.py | |
| PYTHONPATH=. DEV=NULL:HIP:gfx950 python3 -m pytest -n=auto test/testextra/test_tk.py test/backend/test_asm_gemm.py | |
| - name: Run matmul on MOCKKFD | |
| run: | | |
| PYTHONPATH="." DEV=MOCKKFD+AMD N=256 python3 extra/gemm/amd_asm_matmul.py | |
| PYTHONPATH="." DEV=MOCKKFD+AMD N=256 python3 extra/gemm/amd_copy_matmul.py | |
| - name: Run LLVM test | |
| run: DEV=MOCKKFD+AMD:LLVM python test/device/test_amd_llvm.py | |
| testmockam: | |
| name: Linux (am) | |
| runs-on: *linux | |
| timeout-minutes: 15 | |
| env: | |
| DEV: MOCKPCI+AMD | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v6 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: mockam | |
| deps: testing_unit | |
| amd: 'true' | |
| - name: Run test_tiny on MOCKAM | |
| run: python test/test_tiny.py | |
| - name: Run test_tiny on MOCKUSB | |
| run: GMMU=0 DEV=MOCKUSB+AMD python test/test_tiny.py | |
| - name: Run test_hcq on MOCKPCI | |
| run: python -m pytest test/device/test_hcq.py | |
| - name: Run disk copy tests on MOCKPCI | |
| run: python -m pytest test/unit/test_disk_tensor.py -k test_copy_from_disk | |
| - name: Run test_tiny on MOCKPCI Remote | |
| run: | | |
| python extra/remote/serve.py 6667 & | |
| sleep 2 | |
| REMOTE=127.0.0.1:6667 python test/test_tiny.py | |
| REMOTE=127.0.0.1:6667 python -m pytest test/unit/test_disk_tensor.py -k test_copy_from_disk; kill %1 | |
| testamd: | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| backend: [amd, amdllvm] | |
| arch: [gfx1100, gfx1201, gfx950] | |
| name: Linux (${{ matrix.backend }} ${{ matrix.arch }}) | |
| runs-on: *linux | |
| timeout-minutes: 15 | |
| env: | |
| DEV: MOCKKFD+AMD:${{ matrix.backend == 'amdllvm' && 'LLVM' || '' }}:${{ matrix.arch }} | |
| SKIP_SLOW_TEST: 1 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v6 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: ${{ matrix.backend }}-minimal | |
| deps: testing_unit | |
| amd: 'true' | |
| llvm: ${{ matrix.backend == 'amdllvm' && 'true' }} | |
| - name: Check Device.DEFAULT and print some source | |
| run: | | |
| python3 -c "from tinygrad import Device; assert Device.DEFAULT in ['AMD'], Device.DEFAULT" | |
| DEBUG=5 FORWARD_ONLY=1 python3 test/test_tiny.py TestTiny.test_plus | |
| - name: Run pytest (amd) | |
| run: python -m pytest -n=auto test/backend/test_ops.py test/backend/test_dtype.py test/backend/test_dtype_alu.py test/backend/test_linearizer.py test/backend/test_randomness.py test/backend/test_jit.py test/backend/test_graph.py test/backend/test_multitensor.py test/device/test_hcq.py test/external/external_test_am.py test/backend/test_asm_gemm.py::TestAsmGEMM --durations=20 | |
| - name: Run disk copy tests | |
| run: python -m pytest test/unit/test_disk_tensor.py -k test_copy_from_disk | |
| - name: Run TRANSCENDENTAL math | |
| run: TRANSCENDENTAL=2 python -m pytest -n=auto test/backend/test_ops.py::TestOps::test_sin test/backend/test_ops.py::TestOps::test_cos test/backend/test_ops.py::TestOps::test_tan test/backend/test_ops.py::TestOps::test_exp test/backend/test_ops.py::TestOps::test_log --durations=20 | |
| - name: Run process replay tests | |
| uses: ./.github/actions/process-replay | |
| testnvidia: | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| backend: [ptx, nv] | |
| name: Linux (${{ matrix.backend }}) | |
| runs-on: *linux | |
| timeout-minutes: 20 | |
| env: | |
| FORWARD_ONLY: 1 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v6 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: ${{ matrix.backend }}-minimal | |
| deps: testing_unit | |
| cuda: 'true' | |
| ocelot: 'true' | |
| - name: Set env | |
| run: printf "${{ matrix.backend == 'ptx' && 'DEV=MOCK+CUDA:PTX' || matrix.backend == 'nv' && 'DEV=MOCK+NV\nSKIP_SLOW_TEST=1' }}" >> $GITHUB_ENV | |
| - name: Check Device.DEFAULT and print some source | |
| run: | | |
| python3 -c "from tinygrad import Device; assert Device.DEFAULT in ['CUDA','NV'], Device.DEFAULT" | |
| DEBUG=5 FORWARD_ONLY=1 python3 test/test_tiny.py TestTiny.test_plus | |
| - name: Run pytest (cuda) | |
| # skip multitensor because it's slow | |
| run: python -m pytest -n=auto test/backend --ignore test/backend/test_multitensor.py --durations=20 | |
| - name: Run TestOps.test_add with PMA | |
| run: VIZ=-1 PMA=1 DEBUG=5 python3 test/backend/test_ops.py TestOps.test_add | |
| - name: Run process replay tests | |
| uses: ./.github/actions/process-replay | |
| # ****** OSX Tests ****** | |
| unittestmacos: | |
| name: MacOS (unit) | |
| runs-on: *macos | |
| timeout-minutes: 20 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v6 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: unittest-macos | |
| deps: testing_unit | |
| amd: 'true' | |
| ocelot: 'true' | |
| - name: Run unit tests | |
| run: DEV=METAL python -m pytest -n=auto test/unit/ --durations=20 | |
| - name: Run NULL backend tests | |
| run: DEV=NULL python -m pytest -n=auto test/null/ --durations=20 | |
| - name: Test tensor core ops (fake) | |
| run: DEV=METAL DEBUG=3 TC=2 python test/backend/test_ops.py TestOps.test_gemm | |
| - name: Test tensor core ops (real) | |
| run: DEV=METAL DEBUG=3 python test/backend/test_ops.py TestOps.test_big_gemm | |
| - name: Test Beam Search | |
| run: DEV=METAL IGNORE_BEAM_CACHE=1 python3 -m pytest extra/optimization/test_beam_search.py | |
| - name: Test Device Specific | |
| run: DEV=METAL python3 -m pytest test/device/test_metal.py | |
| #- name: Fuzz Test linearizer | |
| # run: DEV=METAL DEPTH=4 FUZZ_N=50 FUZZ_MAX_SIZE=1000000 python test/external/fuzz_linearizer.py | |
| - name: Run pytest (amd) | |
| env: | |
| DEV: MOCKKFD+AMD | |
| FORWARD_ONLY: 1 | |
| run: | | |
| python3 -m pytest -n=auto test/device/test_hcq.py test/test_tiny.py --durations=20 | |
| - name: Run pytest (ptx) | |
| env: | |
| DEV: "MOCK+NV:PTX" | |
| FORWARD_ONLY: 1 | |
| # TODO: failing due to library loading error | |
| CAPTURE_PROCESS_REPLAY: 0 | |
| run: | | |
| python3 -m pytest -n=auto test/device/test_hcq.py test/test_tiny.py --durations=20 | |
| - name: Run process replay tests | |
| uses: ./.github/actions/process-replay | |
| testmacos: | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| dev: | |
| - 'CPU:CLANG' | |
| - 'CPU:LLVM' | |
| - 'CPU:LVP' | |
| - 'METAL' | |
| - 'WEBGPU' | |
| name: MacOS (DEV=${{ matrix.dev }}) | |
| runs-on: *macos | |
| timeout-minutes: 20 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v6 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: macos-${{ matrix.dev }} | |
| deps: testing_unit | |
| llvm: ${{ contains(matrix.dev, 'LLVM') || contains(matrix.dev, 'LVP') }} | |
| mesa: ${{ contains(matrix.dev, 'LVP') && 'cpu' || 'false' }} | |
| webgpu: ${{ matrix.dev == 'WEBGPU' }} | |
| - name: Set env | |
| run: printf "DEV=${{ matrix.dev }}${{ matrix.dev == 'CPU:CLANG' && '\nCPU_COUNT=2' || '' }}" >> $GITHUB_ENV | |
| - name: Check Device.DEFAULT and print some source | |
| run: | | |
| python -c "from tinygrad import Device; from tinygrad.helpers import Target; assert Device.DEFAULT == Target.parse('${{ matrix.dev }}').device" | |
| DEBUG=4 python test/test_tiny.py TestTiny.test_plus | |
| - name: Run backend tests | |
| run: python -m pytest -n=auto test/backend --durations=20 | |
| - name: Run process replay tests | |
| uses: ./.github/actions/process-replay | |
| # ****** Windows Tests ****** | |
| testwindows: | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| dev: | |
| - 'CPU:CLANG' | |
| - 'CPU:LLVM' | |
| - 'CPU:X86' | |
| - 'WEBGPU' | |
| name: Windows (DEV=${{ matrix.dev }}) | |
| runs-on: windows-2025 | |
| timeout-minutes: 15 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v6 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: windows-${{ matrix.dev }}-minimal | |
| deps: testing_unit | |
| pydeps: ${{ matrix.dev == 'WEBGPU' && 'dawn-python' || '' }} | |
| - name: Set env | |
| shell: bash | |
| run: printf "DEV=${{ matrix.dev }}${{ matrix.dev == 'CPU:CLANG' && '\nCPU_COUNT=2' || '' }}" >> $GITHUB_ENV | |
| - name: Check Device.DEFAULT and print some source | |
| shell: bash | |
| run: | | |
| python -c "from tinygrad import Device; from tinygrad.helpers import Target; assert Device.DEFAULT == Target.parse('${{ matrix.dev }}').device" | |
| DEBUG=4 python test/test_tiny.py TestTiny.test_plus | |
| - name: Run test_tiny | |
| shell: bash | |
| run: python -m pytest -n=auto test/test_tiny.py --durations=20 | |
| # ****** Compile-only Tests ****** | |
| compiletests: | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| backend: [ir3, nak] | |
| name: Compile-only (${{ matrix.backend }}) | |
| runs-on: *linux | |
| timeout-minutes: 15 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v6 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: compile-${{ matrix.backend }} | |
| deps: testing_unit | |
| mesa: ${{ (matrix.backend == 'ir3' || matrix.backend == 'nak') && 'true' }} | |
| - name: Set env | |
| shell: bash | |
| run: printf "NULL_ALLOW_COPYOUT=1\n${{ matrix.backend == 'ir3' && 'DEV=NULL:IR3:a630' || matrix.backend == 'nak' && 'DEV=NULL:NAK:sm_120' }}" >> $GITHUB_ENV | |
| - name: Run test_ops | |
| shell: bash | |
| run: | | |
| python -c "from tinygrad import Device; assert Device.DEFAULT == 'NULL'" | |
| DEBUG=4 python3 test/backend/test_ops.py TestOps.test_add | |
| python -m pytest -n=auto test/backend/test_ops.py --durations=20 | |
| - name: Run test_ops (IMAGE) | |
| if: matrix.backend == 'ir3' | |
| shell: bash | |
| env: | |
| IMAGE: 1 | |
| DEV: "NULL:IR3:a630,IMAGE_PITCH_ALIGNMENT=64" | |
| run: | | |
| DEBUG=4 python3 test/backend/test_ops.py TestOps.test_gemm | grep image_load | |
| python -m pytest -n=auto test/backend/test_ops.py --durations=20 | |
| qcomclcompiletests: | |
| name: Compile-only (QCOM CL) | |
| runs-on: ubuntu-24.04-arm | |
| timeout-minutes: 15 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v6 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: compile-qcomcl | |
| deps: testing_unit | |
| tinydreno: 'true' | |
| - name: Set env | |
| shell: bash | |
| run: printf "DEV=NULL:QCOMCL:a630\nNULL_ALLOW_COPYOUT=1" >> $GITHUB_ENV | |
| - name: Run test_ops | |
| shell: bash | |
| run: | | |
| python -c "from tinygrad import Device; assert Device.DEFAULT == 'NULL'" | |
| DEBUG=4 python3 test/backend/test_ops.py TestOps.test_add | |
| python -m pytest -n=auto test/backend/test_ops.py --durations=20 | |
| - name: Run test_ops (IMAGE) | |
| shell: bash | |
| env: | |
| IMAGE: 1 | |
| DEV: "NULL:QCOMCL:a630,IMAGE_PITCH_ALIGNMENT=64" | |
| run: | | |
| DEBUG=4 python test/backend/test_ops.py TestOps.test_gemm | grep read_imagef | |
| python -m pytest -n=auto test/backend/test_ops.py --durations=20 |