Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .bazelignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.direnv
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
.direnv/
.pre-commit-config.yaml
MODULE.bazel.lock
bazel-*
compile_commands.json
lre.bazelrc
32 changes: 32 additions & 0 deletions finetuning_on_cpu/.bazelrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Pick up system specific configs.
common --enable_platform_specific_config

# Don't inherit PATH and LD_LIBRARY_PATH.
build --incompatible_strict_action_env

# Forbid network access unless explicitly enabled.
build --sandbox_default_allow_network=false

# Use correct runfile locations.
build --nolegacy_external_runfiles

# Enable sandboxing for exclusive tests like GPU performance tests.
test --incompatible_exclusive_test_sandboxed

# Simulate rules_python's initialization with a script.
common --@rules_python//python/config_settings:bootstrap_impl=script

# Remote optimizations.
build --remote_download_minimal

# Smaller profiling. Careful. Disabling this might explode remote cache usage.
build --slim_profile
build --experimental_profile_include_target_label
build --noexperimental_profile_include_primary_output

# Use the container image specified in `platforms/BUILD.bazel`.
build:remote --extra_execution_platforms=@//platforms:python-remote
build:remote --extra_toolchains=@zig_sdk//toolchain:linux_amd64_musl

# Add user specific flags.
try-import %workspace%/user.bazelrc
1 change: 1 addition & 0 deletions finetuning_on_cpu/.bazelversion
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
8.2.1
4 changes: 4 additions & 0 deletions finetuning_on_cpu/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
.DS_Store
bazel-*
.env
user.bazelrc
19 changes: 19 additions & 0 deletions finetuning_on_cpu/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
load("@rules_python//python/uv:lock.bzl", "lock")

exports_files(
["python_provider.sh"],
visibility = ["//visibility:public"],
)

[
lock(
name = "requirements_{}".format(os),
srcs = ["pyproject.toml"],
out = "requirements_{}.lock".format(os),
build_constraints = ["@platforms//os:{}".format(os)],
)
for os in [
"linux",
"macos",
]
]
42 changes: 42 additions & 0 deletions finetuning_on_cpu/MODULE.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
bazel_dep(name = "hermetic_cc_toolchain", version = "4.0.0")
bazel_dep(name = "platforms", version = "0.0.11")
bazel_dep(name = "rules_cc", version = "0.1.1")
bazel_dep(name = "rules_python", version = "1.4.1")
bazel_dep(name = "rules_shell", version = "0.4.1")
bazel_dep(name = "bazel_skylib", version = "1.7.1")

# Minimal C++ toolchain required internally by rules_python.
cc_toolchains = use_extension("@hermetic_cc_toolchain//toolchain:ext.bzl", "toolchains")
use_repo(cc_toolchains, "zig_sdk")

# Python configuration
python = use_extension("@rules_python//python/extensions:python.bzl", "python")
python.toolchain(python_version = "3.13")

# Python packages
pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip")
pip.parse(
# CPU-only torch is not available in the regular pypi repositories.
experimental_index_url = "https://pypi.org/simple",
experimental_index_url_overrides = {
"torch": "https://download.pytorch.org/whl/cpu",
},
hub_name = "pypi",
python_version = "3.13",
# This file may be regenerated via `bazel run //:requirements_macos.update`.
requirements_darwin = "//:requirements_macos.lock",
# This file may be regenerated via `bazel run //:requirements_linux.update`.
requirements_linux = "//:requirements_linux.lock",
)
use_repo(pip, "pypi")

# UV toolchain config for regenerating the lockfile
uv = use_extension(
"@rules_python//python/uv:uv.bzl",
"uv",
dev_dependency = True,
)
uv.configure(
# Latest version that supports the old URL schema used in rules_python.
version = "0.6.5",
)
380 changes: 380 additions & 0 deletions finetuning_on_cpu/MODULE.bazel.lock

Large diffs are not rendered by default.

67 changes: 67 additions & 0 deletions finetuning_on_cpu/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# Train a machine learning model with Bazel and NativeLink

An example project for training a machine learning model with Bazel.

## ❄️ Setup

1. Either have Bazelisk installed or enter the nix flake from this repository.
2. Create a `user.bazelrc` in this directory and paste the values from the
`Quickstart` section at <https://app.nativelink.com> into it:

```bash
# user.bazelrc
build --remote_cache=grpcs://TODO
build --remote_header=x-nativelink-api-key=TODO
build --bes_backend=grpcs://TODO
build --bes_header=x-nativelink-api-key=TODO
build --bes_results_url=https://app.nativelink.com/TODO
build --remote_timeout=600
build --remote_executor=grpcs://TODO
```

## 🚄 Training

Run a local invocation:

```bash
bazel run training
```

Now run a remote invocation. The first time you do this it'll be slow (~15min)
as the runner needs to spin up and fetch the various toolchains:

```bash
bazel test training --config=remote --test_output=all
```

Run the above command a second time. Since the runner is now warm it'll
immediately start the test (~5min).

## ✨ Keeping dependencies up-to-date

The dependencies for this project are tracked in `pyproject.toml` and locked in
`requirements_{linux±|macos}.lock` via a Bazel-wrapped `uv` toolchain. To
regenerate the lockfiles:

```bash
# On linux
bazel run //:requirements_linux.update

# On macos
bazel run //:requirements_macos.update
```

On darwin you might have to disable remote execution temporarily for this.

## 🧹 Fixing formatting and linting

Use the flake if you develop on this code. Don't use local ruff installations as
versions other than the one used in the flake might format code differently.

```bash
# Check lints
ruff check

# Format
ruff format
```
12 changes: 12 additions & 0 deletions finetuning_on_cpu/platforms/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
platform(
name = "python-remote",
constraint_values = [
"@platforms//os:linux",
"@platforms//cpu:x86_64",
],
exec_properties = {
"OSFamily": "Linux",
"ISA": "x86-64",
"container-image": "ubuntu:24.04",
},
)
103 changes: 103 additions & 0 deletions finetuning_on_cpu/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
[project]
name = "finetuning_on_cpu"
version = "0.1.0"
description = "An example ML training setup using Bazel"
requires-python = ">=3.13"
dependencies = [
"numpy",
"datasets",
"torch",
"transformers[torch]",
]

[tool.hatch.build.targets.wheel]
packages = ["training"]

[project.optional-dependencies]
dev = [
"ruff"
]

[tool.ruff]
line-length = 80
target-version = "py313"
namespace-packages = ["training"]

[tool.ruff.lint]
select = [
"AIR", # Airflow
"ERA", # eradicate
"FAST", # FastAPI
"YTT", # flake8-2020
"ANN", # flake8-annotations
"ASYNC", # flake8-async
"S", # flake8-bandit
"BLE", # flake8-blind-except
"FBT", # flake8-boolean-trap
"B", # flake8-bugbear
"A", # flake8-builtins
"COM", # flake8-commas
"C4", # flake8-comprehensions
"DTZ", # flake8-datetimez
"T10", # flake8-debugger
"DJ", # flake8-django
"EM", # flake8-errmsg
"EXE", # flake8-executable
"FIX", # flake8-fixme
"FA", # flake8-future-annotations
"INT", # flake8-gettext
"ISC", # flake8-implicit-str-concat
"ICN", # flake8-import-conventions
"LOG", # flake8-logging
"G", # flake8-logging-format
"INP", # flake8-no-pep420
"PIE", # flake8-pie
"T20", # flake8-print
"PYI", # flake8-pyi
"PT", # flake8-pytest-style
"Q", # flake8-quotes
"RSE", # flake8-raise
"RET", # flake8-return
"SLF", # flake8-self
"SIM", # flake8-simplify
"SLOT", # flake8-slots
"TID", # flake8-tidy-imports
"TD", # flake8-todos
"TC", # flake8-type-checking
"ARG", # flake8-unused-arguments
"PTH", # flake8-use-pathlib
"FLY", # flynt
"I", # isort
"C90", # mccabe
"NPY", # NumPy-specific rules
"PD", # pandas-vet
"N", # pep8-naming
"PERF", # Perflint
"E", # pycodestyle errors
"W", # pycodestyle warnings
"D", # pydocstyle
"F", # Pyflakes
"PGH", # pygrep-hooks
"PLC", # Pylint convention
"PLE", # Pylint error
"PLR", # Pylint refactor
"PLW", # Pylint warning
"UP", # pyupgrade
"FURB", # refurb
"RUF", # Ruff-specific rules
"TRY", # tryceratops
]
ignore = [
"COM812" # Incompatible with the formatter
]

[tool.ruff.lint.pydocstyle]
convention = "google"

[[tool.uv.index]]
name = "pytorch-cpu"
url = "https://download.pytorch.org/whl/cpu"
explicit = true

[tool.uv.sources]
torch = { index = "pytorch-cpu" }
30 changes: 30 additions & 0 deletions finetuning_on_cpu/python_provider.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/usr/bin/env bash
# Generic Python test wrapper that handles cross-architecture execution

# Get the directory where this script is located
DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"

# First argument is the Python module or script to run
PYTHON_SCRIPT="$1"
# Remaining arguments are passed to the Python script
shift

echo "Running Python test: $PYTHON_SCRIPT"
echo "Python version:"
python3 --version

# Check if the argument is a file or a module
if [[ -f "$PYTHON_SCRIPT" ]]; then
# Run as a script if it's a file
echo "Running as script"
python3 "$PYTHON_SCRIPT" "$@"
else
# Run as a module if it's not a file
echo "Running as module"
python3 -m "$PYTHON_SCRIPT" "$@"
fi

# Capture and return the exit code
exit_code=$?
echo "Python test completed with exit code: $exit_code"
exit $exit_code
Loading