kaito-project
diff --git a/‎AGENTS.md‎
Lines changed: 41 additions & 0 deletions b/‎AGENTS.md‎
Lines changed: 41 additions & 0 deletions
diff --git a/‎CLAUDE.md‎
Lines changed: 0 additions & 41 deletions b/‎CLAUDE.md‎
Lines changed: 0 additions & 41 deletions
diff --git a/‎CLAUDE.md‎
Lines changed: 1 addition & 0 deletions b/‎CLAUDE.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎CONTRIBUTING.md‎
Lines changed: 3 additions & 0 deletions b/‎CONTRIBUTING.md‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎Makefile‎
Lines changed: 5 additions & 0 deletions b/‎Makefile‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 11 additions & 3 deletions b/‎README.md‎
Lines changed: 11 additions & 3 deletions
diff --git a/‎charts/aikit/values.yaml‎
Lines changed: 1 addition & 1 deletion b/‎charts/aikit/values.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pkg/aikit2llb/inference/backend.go‎
Lines changed: 21 additions & 1 deletion b/‎pkg/aikit2llb/inference/backend.go‎
Lines changed: 21 additions & 1 deletion
diff --git a/‎pkg/aikit2llb/inference/backend_test.go‎
Lines changed: 9 additions & 0 deletions b/‎pkg/aikit2llb/inference/backend_test.go‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎pkg/aikit2llb/inference/convert.go‎
Lines changed: 48 additions & 2 deletions b/‎pkg/aikit2llb/inference/convert.go‎
Lines changed: 48 additions & 2 deletions
@@ -0,0 +1,41 @@
+# AGENTS.md
+
+## Build & Test Commands
+
+- `make test` — run all unit tests (`go test -v ./... -race -coverprofile=coverage.txt -covermode=atomic`)
+- `make lint` — run linter (`golangci-lint run -v ./... --timeout 5m`)
+- `make build-aikit` — build the AIKit Docker image via `docker buildx`
+- `make build-test-model` — build a test model image from a YAML aikitfile
+- Run `go mod tidy` after changing dependencies; CI verifies `go.mod`/`go.sum` are clean
+
+## Code Style & Formatting
+
+- golangci-lint v2 with formatters: `gofmt`, `gofumpt`, `goimports`, `gci` (import ordering)
+- Key linters enforced: `errcheck`, `errorlint`, `gosec`, `govet`, `staticcheck`, `revive`, `goconst`, `gocritic`, `godot`, `forcetypeassert`, `unconvert`, `unused`, `whitespace`, `misspell` (US locale)
+- Max line length: 200 characters
+- End every comment with a period (enforced by `godot`)
+- All files must end with a newline and have no trailing whitespace (pre-commit hooks)
+
+## Commit Conventions
+
+- PR titles must follow conventional commits: `feat`, `fix`, `build`, `chore`, `ci`, `docs`, `perf`, `refactor`, `revert`, `style`, `test`
+- Pre-commit hooks run: `gitleaks` (secret scanning), `golangci-lint`, `shellcheck`, `typos`
+
+## Go Conventions
+
+- Module path: `github.com/kaito-project/aikit`
+- Go 1.24.3 minimum, toolchain go1.26.1
+- Use `github.com/pkg/errors` for error wrapping (not `fmt.Errorf` with `%w`)
+- Logging via `github.com/sirupsen/logrus`
+- YAML parsing via `gopkg.in/yaml.v2`
+
+## Architecture Quick Reference
+
+- `cmd/frontend/` — BuildKit frontend entrypoint
+- `pkg/aikit/config/` — aikitfile YAML config structs and parsing
+- `pkg/aikit2llb/` — converts aikitfile configs to BuildKit LLB (inference/ and finetune/ subdirs)
+- `pkg/build/` — build orchestration and validation
+- `pkg/packager/` — OCI artifact packaging following CNCF ModelPack spec
+- `models/` — pre-made model YAML configs
+- `runners/` — runner definition YAMLs (llama-cpp-cpu, llama-cpp-cuda, vllm-cuda, diffusers-cuda)
+- `test/` — test aikitfile YAML fixtures
@@ -0,0 +1 @@
+AGENTS.md
@@ -57,6 +57,9 @@ This will automatically run linting and formatting checks before each commit.
 
 ## Building AIKit
 
+> [!TIP]
+> Build targets default to multi-platform (`linux/amd64,linux/arm64`). For local development, pass your host architecture to speed up builds and avoid multi-platform issues — e.g. `make build-aikit PLATFORMS=linux/amd64`. You should also use the `default` buildx builder (`docker buildx use default`) so that locally built images are available to subsequent builds via the `#syntax=` directive.
+
 ### Build the AIKit Binary
 
 ```bash
 
@@ -51,6 +51,11 @@ run-test-model:
 run-test-model-gpu:
 	docker run --rm -p 8080:8080 --gpus all ${REGISTRY}${REPOSITORY}/${TEST_IMAGE_NAME}:${TAG}
 
+.PHONY: run-test-model-rocm
+run-test-model-rocm:
+	docker run --rm -p 8080:8080 --device /dev/kfd --device /dev/dri --group-add video --group-add $$(stat -c '%g' /dev/dri/renderD128) \
+		${REGISTRY}${REPOSITORY}/${TEST_IMAGE_NAME}:${TAG}
+
 .PHONY: run-test-model-applesilicon
 run-test-model-applesilicon:
 	podman run --rm -p 8080:8080 --device /dev/dri ${REGISTRY}${REPOSITORY}/${TEST_IMAGE_NAME}:${TAG}
 
@@ -29,7 +29,7 @@ AIKit offers three main capabilities:
 - 🦙 Support for GGUF ([`llama`](https://github.com/ggerganov/llama.cpp)) and GGML ([`llama-ggml`](https://github.com/ggerganov/llama.cpp)) models
 - 🚢 [Kubernetes deployment ready](https://kaito-project.github.io/aikit/docs/kubernetes)
 - 📚 Supports multiple models with a single image
-- 🖥️ Supports [AMD64 and ARM64](https://kaito-project.github.io/aikit/docs/create-images#multi-platform-support) CPUs and [GPU-accelerated inferencing with NVIDIA GPUs](https://kaito-project.github.io/aikit/docs/gpu)
+- 🖥️ Supports [AMD64 and ARM64](https://kaito-project.github.io/aikit/docs/create-images#multi-platform-support) CPUs and [GPU-accelerated inferencing with NVIDIA CUDA and AMD ROCm support](https://kaito-project.github.io/aikit/docs/gpu)
 - 🔐 Ensure [supply chain security](https://kaito-project.github.io/aikit/docs/security) with SBOMs, Provenance attestations, and signed images
 - 🌈 Supports air-gapped environments with self-hosted, local, or any remote container registries to store model images for inference on the edge.
 
@@ -107,9 +107,9 @@ If it doesn't include a specific model, you can always [create your own images](
 ### NVIDIA CUDA
 
 > [!NOTE]
-> To enable GPU acceleration, please see [GPU Acceleration](https://kaito-project.github.io/aikit/docs/gpu).
+> To enable NVIDIA GPU acceleration, please see [GPU Acceleration](https://kaito-project.github.io/aikit/docs/gpu).
 >
-> Please note that only difference between CPU and GPU section is the `--gpus all` flag in the command to enable GPU acceleration.
+> Published pre-made GPU images include NVIDIA CUDA libraries. For the NVIDIA CUDA commands below, the only difference from the CPU section is the `--gpus all` flag.
 
 | Model           | Optimization  | Parameters | Command                                                                                | Model Name               | License                                                                                                                     |
 | --------------- | ------------- | ---------- | -------------------------------------------------------------------------------------- | ------------------------ | --------------------------------------------------------------------------------------------------------------------------- |
@@ -127,6 +127,14 @@ If it doesn't include a specific model, you can always [create your own images](
 | 🤖 GPT-OSS       |               | 120B       | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/kaito-project/aikit/gpt-oss:120b`  | `gpt-oss-120b`           | [Apache 2.0](https://choosealicense.com/licenses/apache-2.0/)                                                               |
 
 
+### AMD ROCm (experimental)
+
+> [!NOTE]
+> AMD GPU acceleration is currently available for custom `llama-cpp` images built with `runtime: rocm`. Published pre-made model images are currently CUDA-based, so for AMD GPUs please [create your own image](https://kaito-project.github.io/aikit/docs/create-images) and follow the ROCm instructions in [GPU Acceleration](https://kaito-project.github.io/aikit/docs/gpu).
+>
+> ROCm support currently applies to the `llama-cpp` backend on `linux/amd64`.
+
+
 ### Apple Silicon (experimental)
 
 > [!NOTE]
 
@@ -81,7 +81,7 @@ postInstall:
     enabled: true
     image:
       repository: registry.k8s.io/kubectl
-      tag: v1.35.2
+      tag: v1.35.4
       pullPolicy: IfNotPresent
       pullSecrets: []
     podSecurity: ["pod-security.kubernetes.io/audit=restricted",
 
@@ -81,7 +81,7 @@ func getBackendTag(backend, runtime string, platform specs.Platform) string {
 	baseTag := getBackendVersion(backend, runtime, platform)
 	backendName := getEffectiveBackend(backend, runtime, platform)
 
-	// Handle Apple Silicon - use Vulkan llama-cpp
+	// Handle Apple Silicon - use Vulkan llama-cpp.
 	if runtime == utils.RuntimeAppleSilicon {
 		return fmt.Sprintf("%s-%s", baseTag, vulkanLlamaCppBackend)
 	}
@@ -101,6 +101,12 @@ func getBackendTag(backend, runtime string, platform specs.Platform) string {
 		}
 	}
 
+	// Handle ROCm runtime.
+	if runtime == utils.RuntimeROCm && platform.Architecture == utils.PlatformAMD64 {
+		return fmt.Sprintf("%s-gpu-rocm-hipblas-llama-cpp", localAIROCmBackendVersion)
+	}
+
+	// Handle CPU runtime (default).
 	return fmt.Sprintf("%s-cpu-llama-cpp", baseTag)
 }
 
@@ -131,6 +137,12 @@ func getBackendName(backend, runtime string, platform specs.Platform) string {
 		}
 	}
 
+	// Handle ROCm runtime
+	if runtime == utils.RuntimeROCm && platform.Architecture == utils.PlatformAMD64 {
+		// Only llama-cpp backend is supported for ROCm
+		return "hipblas-llama-cpp"
+	}
+
 	// Handle CPU runtime (default)
 	return cpuLlamaCppBackend
 }
@@ -220,6 +232,14 @@ func installBackends(c *config.InferenceConfig, platform specs.Platform, s llb.S
 			cpuConfig.Runtime = "cpu" // Use CPU runtime to force CPU backend installation
 			merge = installBackend(backend, &cpuConfig, platform, s, merge)
 		}
+
+		// For llama-cpp backend with ROCm runtime, also install the CPU version for fallback
+		if backend == utils.BackendLlamaCpp && c.Runtime == utils.RuntimeROCm && platform.Architecture == utils.PlatformAMD64 {
+			// Create a modified config with CPU runtime to install the CPU version
+			cpuConfig := *c
+			cpuConfig.Runtime = "cpu" // Use CPU runtime to force CPU backend installation
+			merge = installBackend(backend, &cpuConfig, platform, s, merge)
+		}
 	}
 
 	return merge
 
@@ -98,6 +98,15 @@ func TestGetBackendTag(t *testing.T) {
 			},
 			want: fmt.Sprintf("%s-gpu-nvidia-cuda-12-llama-cpp", localAILlamaCppBackendVersion),
 		},
+		{
+			name:    "ROCm llama-cpp",
+			backend: utils.BackendLlamaCpp,
+			runtime: utils.RuntimeROCm,
+			platform: specs.Platform{
+				Architecture: utils.PlatformAMD64,
+			},
+			want: fmt.Sprintf("%s-gpu-rocm-hipblas-llama-cpp", localAIROCmBackendVersion),
+		},
 		{
 			name:    "Empty backend name defaults to CPU llama-cpp",
 			backend: "",
 
@@ -17,16 +17,22 @@ const (
 	localAIBinaryVersion          = "v4.0.0"
 	localAILlamaCppBackendVersion = localAIBinaryVersion
 	localAILegacyBackendVersion   = "v3.12.1"
+	localAIROCmBackendVersion     = "rocm7"
 	localAIRepo                   = "ghcr.io/kaito-project/aikit/localai:"
 	cudaVersion                   = "12-5"
+	rocmVersion                   = "7.2"
 )
 
 // Aikit2LLB converts an InferenceConfig to an LLB state.
 func Aikit2LLB(c *config.InferenceConfig, platform *specs.Platform) (llb.State, *specs.Image, error) {
 	var merge, state llb.State
-	if c.Runtime == utils.RuntimeAppleSilicon {
+	switch c.Runtime {
+	case utils.RuntimeAppleSilicon:
 		state = llb.Image(utils.AppleSiliconBase, llb.Platform(*platform))
-	} else {
+	case utils.RuntimeROCm:
+		// Use Ubuntu 24.04 for ROCm to match noble repository
+		state = llb.Image(utils.Ubuntu24Base, llb.Platform(*platform))
+	default:
 		state = llb.Image(utils.UbuntuBase, llb.Platform(*platform))
 	}
 	base := getBaseImage(c, platform)
@@ -55,6 +61,11 @@ func Aikit2LLB(c *config.InferenceConfig, platform *specs.Platform) (llb.State,
 		state, merge = installCuda(c, state, merge)
 	}
 
+	// install rocm if runtime is rocm and architecture is amd64
+	if c.Runtime == utils.RuntimeROCm && platform.Architecture == utils.PlatformAMD64 {
+		state, merge = installRocm(c, state, merge)
+	}
+
 	// install backend dependencies
 	merge = installBackends(c, *platform, state, merge)
 
@@ -67,6 +78,10 @@ func getBaseImage(c *config.InferenceConfig, platform *specs.Platform) llb.State
 	if c.Runtime == utils.RuntimeAppleSilicon {
 		return llb.Image(utils.AppleSiliconBase, llb.Platform(*platform))
 	}
+	if c.Runtime == utils.RuntimeROCm {
+		// Use Ubuntu 24.04 for ROCm to match noble repository.
+		return llb.Image(utils.Ubuntu24Base, llb.Platform(*platform))
+	}
 	if len(c.Backends) > 0 {
 		return llb.Image(utils.UbuntuBase, llb.Platform(*platform))
 	}
@@ -155,6 +170,37 @@ func installCuda(c *config.InferenceConfig, s llb.State, merge llb.State) (llb.S
 	return s, llb.Merge([]llb.State{merge, diff})
 }
 
+func installRocm(c *config.InferenceConfig, s llb.State, merge llb.State) (llb.State, llb.State) {
+	savedState := s
+
+	// Set up ROCm repository
+	s = s.Run(utils.Sh("apt-get update && apt-get install --no-install-recommends -y ca-certificates curl gnupg"), llb.IgnoreCache).Root()
+
+	// Add ROCm GPG key and repository
+	s = s.Run(utils.Sh("curl -fsSL https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor -o /etc/apt/trusted.gpg.d/rocm.gpg")).Root()
+	s = s.Run(utils.Shf("echo 'deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm.gpg] https://repo.radeon.com/rocm/apt/%s/ noble main' >> /etc/apt/sources.list.d/rocm.list", rocmVersion)).Root()
+	s = s.Run(utils.Shf("echo 'deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm.gpg] https://repo.radeon.com/graphics/%s/ubuntu noble main' >> /etc/apt/sources.list.d/rocm.list", rocmVersion)).Root()
+	rocmPinning := `
+Package: *
+Pin: release o=repo.radeon.com
+Pin-Priority: 600
+`
+	s = s.Run(utils.Shf("echo '%s' > /etc/apt/preferences.d/repo-radeon-pin-600", rocmPinning)).Root()
+	s = s.Run(utils.Sh("apt-get update"), llb.IgnoreCache).Root()
+
+	// install rocm libraries and pciutils for gpu detection when using the default
+	// llama-cpp backend or when it is configured explicitly
+	if len(c.Backends) == 0 || slices.Contains(c.Backends, utils.BackendLlamaCpp) {
+		s = s.Run(utils.Sh("apt-get install -y pciutils rocm && apt-get clean")).Root()
+	}
+
+	// hipblaslt soname compatibility: backend may be linked against .so.0 while ROCm 7.2 ships .so.1
+	s = s.Run(utils.Sh("set -e; cd /opt/rocm/lib; [ -e libhipblaslt.so.0 ] || ln -sf libhipblaslt.so.1 libhipblaslt.so.0")).Root()
+
+	diff := llb.Diff(savedState, s)
+	return s, llb.Merge([]llb.State{merge, diff})
+}
+
 // addLocalAI adds the LocalAI binary to the image.
 func addLocalAI(c *config.InferenceConfig, s llb.State, merge llb.State, platform specs.Platform) (llb.State, llb.State, error) {
 	artifactVersion := getLocalAIArtifactVersion(c, platform)
Original file line number	Diff line number	Diff line change
`@@ -81,7 +81,7 @@ func getBackendTag(backend, runtime string, platform specs.Platform) string {`
`81`	`81`	`baseTag := getBackendVersion(backend, runtime, platform)`
`82`	`82`	`backendName := getEffectiveBackend(backend, runtime, platform)`
`83`	`83`
`84`		`- // Handle Apple Silicon - use Vulkan llama-cpp`
	`84`	`+ // Handle Apple Silicon - use Vulkan llama-cpp.`
`85`	`85`	`if runtime == utils.RuntimeAppleSilicon {`
`86`	`86`	`return fmt.Sprintf("%s-%s", baseTag, vulkanLlamaCppBackend)`
`87`	`87`	`}`
`@@ -101,6 +101,12 @@ func getBackendTag(backend, runtime string, platform specs.Platform) string {`
`101`	`101`	`}`
`102`	`102`	`}`
`103`	`103`
	`104`	`+ // Handle ROCm runtime.`
	`105`	`+ if runtime == utils.RuntimeROCm && platform.Architecture == utils.PlatformAMD64 {`
	`106`	`+ return fmt.Sprintf("%s-gpu-rocm-hipblas-llama-cpp", localAIROCmBackendVersion)`
	`107`	`+ }`
	`108`	`+`
	`109`	`+ // Handle CPU runtime (default).`
`104`	`110`	`return fmt.Sprintf("%s-cpu-llama-cpp", baseTag)`
`105`	`111`	`}`
`106`	`112`
`@@ -131,6 +137,12 @@ func getBackendName(backend, runtime string, platform specs.Platform) string {`
`131`	`137`	`}`
`132`	`138`	`}`
`133`	`139`
	`140`	`+ // Handle ROCm runtime`
	`141`	`+ if runtime == utils.RuntimeROCm && platform.Architecture == utils.PlatformAMD64 {`
	`142`	`+ // Only llama-cpp backend is supported for ROCm`
	`143`	`+ return "hipblas-llama-cpp"`
	`144`	`+ }`
	`145`	`+`
`134`	`146`	`// Handle CPU runtime (default)`
`135`	`147`	`return cpuLlamaCppBackend`
`136`	`148`	`}`
`@@ -220,6 +232,14 @@ func installBackends(c *config.InferenceConfig, platform specs.Platform, s llb.S`
`220`	`232`	`cpuConfig.Runtime = "cpu" // Use CPU runtime to force CPU backend installation`
`221`	`233`	`merge = installBackend(backend, &cpuConfig, platform, s, merge)`
`222`	`234`	`}`
	`235`	`+`
	`236`	`+ // For llama-cpp backend with ROCm runtime, also install the CPU version for fallback`
	`237`	`+ if backend == utils.BackendLlamaCpp && c.Runtime == utils.RuntimeROCm && platform.Architecture == utils.PlatformAMD64 {`
	`238`	`+ // Create a modified config with CPU runtime to install the CPU version`
	`239`	`+ cpuConfig := *c`
	`240`	`+ cpuConfig.Runtime = "cpu" // Use CPU runtime to force CPU backend installation`
	`241`	`+ merge = installBackend(backend, &cpuConfig, platform, s, merge)`
	`242`	`+ }`
`223`	`243`	`}`
`224`	`244`
`225`	`245`	`return merge`