Skip to content

Commit 081916f

Browse files
Implement python training code
Co-authored-by: Evaan Yaser Ahmed <ahmedeva@grinnell.edu>
1 parent ee0ecec commit 081916f

17 files changed

Lines changed: 2415 additions & 1 deletion

.bazelignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
.direnv

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
.direnv/
22
.pre-commit-config.yaml
3-
MODULE.bazel.lock
43
bazel-*
54
compile_commands.json
65
lre.bazelrc

finetuning_on_cpu/.bazelrc

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# Pick up system specific configs.
2+
common --enable_platform_specific_config
3+
4+
# Don't inherit PATH and LD_LIBRARY_PATH.
5+
build --incompatible_strict_action_env
6+
7+
# Forbid network access unless explicitly enabled.
8+
build --sandbox_default_allow_network=false
9+
10+
# Use correct runfile locations.
11+
build --nolegacy_external_runfiles
12+
13+
# Enable sandboxing for exclusive tests like GPU performance tests.
14+
test --incompatible_exclusive_test_sandboxed
15+
16+
# Simulate rules_python's initialization with a script.
17+
common --@rules_python//python/config_settings:bootstrap_impl=script
18+
19+
# Remote optimizations.
20+
build --remote_download_minimal
21+
22+
# Smaller profiling. Careful. Disabling this might explode remote cache usage.
23+
build --slim_profile
24+
build --experimental_profile_include_target_label
25+
build --noexperimental_profile_include_primary_output
26+
27+
# Use the container image specified in `platforms/BUILD.bazel`.
28+
build:remote --extra_execution_platforms=@//platforms:python-remote
29+
build:remote --extra_toolchains=@zig_sdk//toolchain:linux_amd64_musl
30+
31+
# Add user specific flags.
32+
try-import %workspace%/user.bazelrc

finetuning_on_cpu/.bazelversion

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
8.2.1

finetuning_on_cpu/.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
.DS_Store
2+
bazel-*
3+
.env
4+
user.bazelrc

finetuning_on_cpu/BUILD.bazel

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
load("@rules_python//python/uv:lock.bzl", "lock")
2+
3+
exports_files(
4+
["python_provider.sh"],
5+
visibility = ["//visibility:public"],
6+
)
7+
8+
[
9+
lock(
10+
name = "requirements_{}".format(os),
11+
srcs = ["pyproject.toml"],
12+
out = "requirements_{}.lock".format(os),
13+
build_constraints = ["@platforms//os:{}".format(os)],
14+
)
15+
for os in [
16+
"linux",
17+
"macos",
18+
]
19+
]

finetuning_on_cpu/MODULE.bazel

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
bazel_dep(name = "hermetic_cc_toolchain", version = "4.0.0")
2+
bazel_dep(name = "platforms", version = "0.0.11")
3+
bazel_dep(name = "rules_cc", version = "0.1.1")
4+
bazel_dep(name = "rules_python", version = "1.4.1")
5+
bazel_dep(name = "rules_shell", version = "0.4.1")
6+
bazel_dep(name = "bazel_skylib", version = "1.7.1")
7+
8+
# Minimal C++ toolchain required internally by rules_python.
9+
cc_toolchains = use_extension("@hermetic_cc_toolchain//toolchain:ext.bzl", "toolchains")
10+
use_repo(cc_toolchains, "zig_sdk")
11+
12+
# Python configuration
13+
python = use_extension("@rules_python//python/extensions:python.bzl", "python")
14+
python.toolchain(python_version = "3.13")
15+
16+
# Python packages
17+
pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip")
18+
pip.parse(
19+
# CPU-only torch is not available in the regular pypi repositories.
20+
experimental_index_url = "https://pypi.org/simple",
21+
experimental_index_url_overrides = {
22+
"torch": "https://download.pytorch.org/whl/cpu",
23+
},
24+
hub_name = "pypi",
25+
python_version = "3.13",
26+
# This file may be regenerated via `bazel run //:requirements_macos.update`.
27+
requirements_darwin = "//:requirements_macos.lock",
28+
# This file may be regenerated via `bazel run //:requirements_linux.update`.
29+
requirements_linux = "//:requirements_linux.lock",
30+
)
31+
use_repo(pip, "pypi")
32+
33+
# UV toolchain config for regenerating the lockfile
34+
uv = use_extension(
35+
"@rules_python//python/uv:uv.bzl",
36+
"uv",
37+
dev_dependency = True,
38+
)
39+
uv.configure(
40+
# Latest version that supports the old URL schema used in rules_python.
41+
version = "0.6.5",
42+
)

finetuning_on_cpu/MODULE.bazel.lock

Lines changed: 380 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

finetuning_on_cpu/README.md

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
# Train a machine learning model with Bazel and NativeLink
2+
3+
An example project for training a machine learning model with Bazel.
4+
5+
## ❄️ Setup
6+
7+
1. Either have Bazelisk installed or enter the nix flake from this repository.
8+
2. Create a `user.bazelrc` in this directory and paste the values from the
9+
`Quickstart` section at <https://app.nativelink.com> into it:
10+
11+
```bash
12+
# user.bazelrc
13+
build --remote_cache=grpcs://TODO
14+
build --remote_header=x-nativelink-api-key=TODO
15+
build --bes_backend=grpcs://TODO
16+
build --bes_header=x-nativelink-api-key=TODO
17+
build --bes_results_url=https://app.nativelink.com/TODO
18+
build --remote_timeout=600
19+
build --remote_executor=grpcs://TODO
20+
```
21+
22+
## 🚄 Training
23+
24+
Run a local invocation:
25+
26+
```bash
27+
bazel run training
28+
```
29+
30+
Now run a remote invocation. The first time you do this it'll be slow (~15min)
31+
as the runner needs to spin up and fetch the various toolchains:
32+
33+
```bash
34+
bazel test training --config=remote --test_output=all
35+
```
36+
37+
Run the above command a second time. Since the runner is now warm it'll
38+
immediately start the test (~5min).
39+
40+
## ✨ Keeping dependencies up-to-date
41+
42+
The dependencies for this project are tracked in `pyproject.toml` and locked in
43+
`requirements_{linux±|macos}.lock` via a Bazel-wrapped `uv` toolchain. To
44+
regenerate the lockfiles:
45+
46+
```bash
47+
# On linux
48+
bazel run //:requirements_linux.update
49+
50+
# On macos
51+
bazel run //:requirements_macos.update
52+
```
53+
54+
On darwin you might have to disable remote execution temporarily for this.
55+
56+
## 🧹 Fixing formatting and linting
57+
58+
Use the flake if you develop on this code. Don't use local ruff installations as
59+
versions other than the one used in the flake might format code differently.
60+
61+
```bash
62+
# Check lints
63+
ruff check
64+
65+
# Format
66+
ruff format
67+
```
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
platform(
2+
name = "python-remote",
3+
constraint_values = [
4+
"@platforms//os:linux",
5+
"@platforms//cpu:x86_64",
6+
],
7+
exec_properties = {
8+
"OSFamily": "Linux",
9+
"ISA": "x86-64",
10+
"container-image": "ubuntu:24.04",
11+
},
12+
)

0 commit comments

Comments
 (0)