kaito-project
diff --git a/‎CONTRIBUTING.md‎
Lines changed: 3 additions & 0 deletions b/‎CONTRIBUTING.md‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎Makefile‎
Lines changed: 5 additions & 0 deletions b/‎Makefile‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 11 additions & 3 deletions b/‎README.md‎
Lines changed: 11 additions & 3 deletions
diff --git a/‎pkg/aikit2llb/inference/backend.go‎
Lines changed: 21 additions & 1 deletion b/‎pkg/aikit2llb/inference/backend.go‎
Lines changed: 21 additions & 1 deletion
diff --git a/‎pkg/aikit2llb/inference/backend_test.go‎
Lines changed: 9 additions & 0 deletions b/‎pkg/aikit2llb/inference/backend_test.go‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎pkg/aikit2llb/inference/convert.go‎
Lines changed: 48 additions & 2 deletions b/‎pkg/aikit2llb/inference/convert.go‎
Lines changed: 48 additions & 2 deletions
diff --git a/‎pkg/aikit2llb/inference/convert_test.go‎
Lines changed: 63 additions & 0 deletions b/‎pkg/aikit2llb/inference/convert_test.go‎
Lines changed: 63 additions & 0 deletions
diff --git a/‎pkg/aikit2llb/inference/image.go‎
Lines changed: 9 additions & 0 deletions b/‎pkg/aikit2llb/inference/image.go‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎pkg/build/build.go‎
Lines changed: 14 additions & 1 deletion b/‎pkg/build/build.go‎
Lines changed: 14 additions & 1 deletion
diff --git a/‎pkg/build/build_test.go‎
Lines changed: 37 additions & 0 deletions b/‎pkg/build/build_test.go‎
Lines changed: 37 additions & 0 deletions
@@ -57,6 +57,9 @@ This will automatically run linting and formatting checks before each commit.
 
 ## Building AIKit
 
+> [!TIP]
+> Build targets default to multi-platform (`linux/amd64,linux/arm64`). For local development, pass your host architecture to speed up builds and avoid multi-platform issues — e.g. `make build-aikit PLATFORMS=linux/amd64`. You should also use the `default` buildx builder (`docker buildx use default`) so that locally built images are available to subsequent builds via the `#syntax=` directive.
+
 ### Build the AIKit Binary
 
 ```bash
 
@@ -51,6 +51,11 @@ run-test-model:
 run-test-model-gpu:
 	docker run --rm -p 8080:8080 --gpus all ${REGISTRY}${REPOSITORY}/${TEST_IMAGE_NAME}:${TAG}
 
+.PHONY: run-test-model-rocm
+run-test-model-rocm:
+	docker run --rm -p 8080:8080 --device /dev/kfd --device /dev/dri --group-add video --group-add $$(stat -c '%g' /dev/dri/renderD128) \
+		${REGISTRY}${REPOSITORY}/${TEST_IMAGE_NAME}:${TAG}
+
 .PHONY: run-test-model-applesilicon
 run-test-model-applesilicon:
 	podman run --rm -p 8080:8080 --device /dev/dri ${REGISTRY}${REPOSITORY}/${TEST_IMAGE_NAME}:${TAG}
 
@@ -29,7 +29,7 @@ AIKit offers three main capabilities:
 - 🦙 Support for GGUF ([`llama`](https://github.com/ggerganov/llama.cpp)) and GGML ([`llama-ggml`](https://github.com/ggerganov/llama.cpp)) models
 - 🚢 [Kubernetes deployment ready](https://kaito-project.github.io/aikit/docs/kubernetes)
 - 📚 Supports multiple models with a single image
-- 🖥️ Supports [AMD64 and ARM64](https://kaito-project.github.io/aikit/docs/create-images#multi-platform-support) CPUs and [GPU-accelerated inferencing with NVIDIA GPUs](https://kaito-project.github.io/aikit/docs/gpu)
+- 🖥️ Supports [AMD64 and ARM64](https://kaito-project.github.io/aikit/docs/create-images#multi-platform-support) CPUs and [GPU-accelerated inferencing with NVIDIA CUDA and AMD ROCm support](https://kaito-project.github.io/aikit/docs/gpu)
 - 🔐 Ensure [supply chain security](https://kaito-project.github.io/aikit/docs/security) with SBOMs, Provenance attestations, and signed images
 - 🌈 Supports air-gapped environments with self-hosted, local, or any remote container registries to store model images for inference on the edge.
 
@@ -107,9 +107,9 @@ If it doesn't include a specific model, you can always [create your own images](
 ### NVIDIA CUDA
 
 > [!NOTE]
-> To enable GPU acceleration, please see [GPU Acceleration](https://kaito-project.github.io/aikit/docs/gpu).
+> To enable NVIDIA GPU acceleration, please see [GPU Acceleration](https://kaito-project.github.io/aikit/docs/gpu).
 >
-> Please note that only difference between CPU and GPU section is the `--gpus all` flag in the command to enable GPU acceleration.
+> Published pre-made GPU images include NVIDIA CUDA libraries. For the NVIDIA CUDA commands below, the only difference from the CPU section is the `--gpus all` flag.
 
 | Model           | Optimization  | Parameters | Command                                                                                | Model Name               | License                                                                                                                     |
 | --------------- | ------------- | ---------- | -------------------------------------------------------------------------------------- | ------------------------ | --------------------------------------------------------------------------------------------------------------------------- |
@@ -127,6 +127,14 @@ If it doesn't include a specific model, you can always [create your own images](
 | 🤖 GPT-OSS       |               | 120B       | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/kaito-project/aikit/gpt-oss:120b`  | `gpt-oss-120b`           | [Apache 2.0](https://choosealicense.com/licenses/apache-2.0/)                                                               |
 
 
+### AMD ROCm (experimental)
+
+> [!NOTE]
+> AMD GPU acceleration is currently available for custom `llama-cpp` images built with `runtime: rocm`. Published pre-made model images are currently CUDA-based, so for AMD GPUs please [create your own image](https://kaito-project.github.io/aikit/docs/create-images) and follow the ROCm instructions in [GPU Acceleration](https://kaito-project.github.io/aikit/docs/gpu).
+>
+> ROCm support currently applies to the `llama-cpp` backend on `linux/amd64`.
+
+
 ### Apple Silicon (experimental)
 
 > [!NOTE]
 
@@ -81,7 +81,7 @@ func getBackendTag(backend, runtime string, platform specs.Platform) string {
 	baseTag := getBackendVersion(backend, runtime, platform)
 	backendName := getEffectiveBackend(backend, runtime, platform)
 
-	// Handle Apple Silicon - use Vulkan llama-cpp
+	// Handle Apple Silicon - use Vulkan llama-cpp.
 	if runtime == utils.RuntimeAppleSilicon {
 		return fmt.Sprintf("%s-%s", baseTag, vulkanLlamaCppBackend)
 	}
@@ -101,6 +101,12 @@ func getBackendTag(backend, runtime string, platform specs.Platform) string {
 		}
 	}
 
+	// Handle ROCm runtime.
+	if runtime == utils.RuntimeROCm && platform.Architecture == utils.PlatformAMD64 {
+		return fmt.Sprintf("%s-gpu-rocm-hipblas-llama-cpp", localAIROCmBackendVersion)
+	}
+
+	// Handle CPU runtime (default).
 	return fmt.Sprintf("%s-cpu-llama-cpp", baseTag)
 }
 
@@ -131,6 +137,12 @@ func getBackendName(backend, runtime string, platform specs.Platform) string {
 		}
 	}
 
+	// Handle ROCm runtime
+	if runtime == utils.RuntimeROCm && platform.Architecture == utils.PlatformAMD64 {
+		// Only llama-cpp backend is supported for ROCm
+		return "hipblas-llama-cpp"
+	}
+
 	// Handle CPU runtime (default)
 	return cpuLlamaCppBackend
 }
@@ -220,6 +232,14 @@ func installBackends(c *config.InferenceConfig, platform specs.Platform, s llb.S
 			cpuConfig.Runtime = "cpu" // Use CPU runtime to force CPU backend installation
 			merge = installBackend(backend, &cpuConfig, platform, s, merge)
 		}
+
+		// For llama-cpp backend with ROCm runtime, also install the CPU version for fallback
+		if backend == utils.BackendLlamaCpp && c.Runtime == utils.RuntimeROCm && platform.Architecture == utils.PlatformAMD64 {
+			// Create a modified config with CPU runtime to install the CPU version
+			cpuConfig := *c
+			cpuConfig.Runtime = "cpu" // Use CPU runtime to force CPU backend installation
+			merge = installBackend(backend, &cpuConfig, platform, s, merge)
+		}
 	}
 
 	return merge
 
@@ -98,6 +98,15 @@ func TestGetBackendTag(t *testing.T) {
 			},
 			want: fmt.Sprintf("%s-gpu-nvidia-cuda-12-llama-cpp", localAILlamaCppBackendVersion),
 		},
+		{
+			name:    "ROCm llama-cpp",
+			backend: utils.BackendLlamaCpp,
+			runtime: utils.RuntimeROCm,
+			platform: specs.Platform{
+				Architecture: utils.PlatformAMD64,
+			},
+			want: fmt.Sprintf("%s-gpu-rocm-hipblas-llama-cpp", localAIROCmBackendVersion),
+		},
 		{
 			name:    "Empty backend name defaults to CPU llama-cpp",
 			backend: "",
 
@@ -17,16 +17,22 @@ const (
 	localAIBinaryVersion          = "v4.0.0"
 	localAILlamaCppBackendVersion = localAIBinaryVersion
 	localAILegacyBackendVersion   = "v3.12.1"
+	localAIROCmBackendVersion     = "rocm7"
 	localAIRepo                   = "ghcr.io/kaito-project/aikit/localai:"
 	cudaVersion                   = "12-5"
+	rocmVersion                   = "7.2"
 )
 
 // Aikit2LLB converts an InferenceConfig to an LLB state.
 func Aikit2LLB(c *config.InferenceConfig, platform *specs.Platform) (llb.State, *specs.Image, error) {
 	var merge, state llb.State
-	if c.Runtime == utils.RuntimeAppleSilicon {
+	switch c.Runtime {
+	case utils.RuntimeAppleSilicon:
 		state = llb.Image(utils.AppleSiliconBase, llb.Platform(*platform))
-	} else {
+	case utils.RuntimeROCm:
+		// Use Ubuntu 24.04 for ROCm to match noble repository
+		state = llb.Image(utils.Ubuntu24Base, llb.Platform(*platform))
+	default:
 		state = llb.Image(utils.UbuntuBase, llb.Platform(*platform))
 	}
 	base := getBaseImage(c, platform)
@@ -55,6 +61,11 @@ func Aikit2LLB(c *config.InferenceConfig, platform *specs.Platform) (llb.State,
 		state, merge = installCuda(c, state, merge)
 	}
 
+	// install rocm if runtime is rocm and architecture is amd64
+	if c.Runtime == utils.RuntimeROCm && platform.Architecture == utils.PlatformAMD64 {
+		state, merge = installRocm(c, state, merge)
+	}
+
 	// install backend dependencies
 	merge = installBackends(c, *platform, state, merge)
 
@@ -67,6 +78,10 @@ func getBaseImage(c *config.InferenceConfig, platform *specs.Platform) llb.State
 	if c.Runtime == utils.RuntimeAppleSilicon {
 		return llb.Image(utils.AppleSiliconBase, llb.Platform(*platform))
 	}
+	if c.Runtime == utils.RuntimeROCm {
+		// Use Ubuntu 24.04 for ROCm to match noble repository.
+		return llb.Image(utils.Ubuntu24Base, llb.Platform(*platform))
+	}
 	if len(c.Backends) > 0 {
 		return llb.Image(utils.UbuntuBase, llb.Platform(*platform))
 	}
@@ -155,6 +170,37 @@ func installCuda(c *config.InferenceConfig, s llb.State, merge llb.State) (llb.S
 	return s, llb.Merge([]llb.State{merge, diff})
 }
 
+func installRocm(c *config.InferenceConfig, s llb.State, merge llb.State) (llb.State, llb.State) {
+	savedState := s
+
+	// Set up ROCm repository
+	s = s.Run(utils.Sh("apt-get update && apt-get install --no-install-recommends -y ca-certificates curl gnupg"), llb.IgnoreCache).Root()
+
+	// Add ROCm GPG key and repository
+	s = s.Run(utils.Sh("curl -fsSL https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor -o /etc/apt/trusted.gpg.d/rocm.gpg")).Root()
+	s = s.Run(utils.Shf("echo 'deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm.gpg] https://repo.radeon.com/rocm/apt/%s/ noble main' >> /etc/apt/sources.list.d/rocm.list", rocmVersion)).Root()
+	s = s.Run(utils.Shf("echo 'deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm.gpg] https://repo.radeon.com/graphics/%s/ubuntu noble main' >> /etc/apt/sources.list.d/rocm.list", rocmVersion)).Root()
+	rocmPinning := `
+Package: *
+Pin: release o=repo.radeon.com
+Pin-Priority: 600
+`
+	s = s.Run(utils.Shf("echo '%s' > /etc/apt/preferences.d/repo-radeon-pin-600", rocmPinning)).Root()
+	s = s.Run(utils.Sh("apt-get update"), llb.IgnoreCache).Root()
+
+	// install rocm libraries and pciutils for gpu detection when using the default
+	// llama-cpp backend or when it is configured explicitly
+	if len(c.Backends) == 0 || slices.Contains(c.Backends, utils.BackendLlamaCpp) {
+		s = s.Run(utils.Sh("apt-get install -y pciutils rocm && apt-get clean")).Root()
+	}
+
+	// hipblaslt soname compatibility: backend may be linked against .so.0 while ROCm 7.2 ships .so.1
+	s = s.Run(utils.Sh("set -e; cd /opt/rocm/lib; [ -e libhipblaslt.so.0 ] || ln -sf libhipblaslt.so.1 libhipblaslt.so.0")).Root()
+
+	diff := llb.Diff(savedState, s)
+	return s, llb.Merge([]llb.State{merge, diff})
+}
+
 // addLocalAI adds the LocalAI binary to the image.
 func addLocalAI(c *config.InferenceConfig, s llb.State, merge llb.State, platform specs.Platform) (llb.State, llb.State, error) {
 	artifactVersion := getLocalAIArtifactVersion(c, platform)
 
@@ -0,0 +1,63 @@
+package inference
+
+import (
+	"context"
+	"strings"
+	"testing"
+
+	"github.com/kaito-project/aikit/pkg/aikit/config"
+	"github.com/kaito-project/aikit/pkg/utils"
+	"github.com/moby/buildkit/client/llb"
+)
+
+func TestInstallRocmInstallsPciutilsForLlamaCpp(t *testing.T) {
+	tests := []struct {
+		name     string
+		backends []string
+	}{
+		{
+			name:     "implicit default llama-cpp backend",
+			backends: nil,
+		},
+		{
+			name:     "explicit llama-cpp backend",
+			backends: []string{utils.BackendLlamaCpp},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			cfg := &config.InferenceConfig{
+				Runtime:  utils.RuntimeROCm,
+				Backends: tt.backends,
+			}
+
+			base := llb.Image(utils.Ubuntu24Base)
+			_, merged := installRocm(cfg, base, base)
+
+			def, err := merged.Marshal(context.Background())
+			if err != nil {
+				t.Fatalf("marshal failed: %v", err)
+			}
+
+			combined := marshalDefinitionToString(def)
+			wantInstall := "apt-get install -y pciutils rocm && apt-get clean"
+			if !strings.Contains(combined, wantInstall) {
+				t.Fatalf("expected ROCm install to contain %q, got: %s", wantInstall, combined)
+			}
+		})
+	}
+}
+
+func marshalDefinitionToString(def *llb.Definition) string {
+	if def == nil {
+		return ""
+	}
+
+	var combined strings.Builder
+	for _, d := range def.ToPB().Def {
+		combined.Write(d)
+	}
+
+	return combined.String()
+}
@@ -77,5 +77,14 @@ func emptyImage(c *config.InferenceConfig, platform *specs.Platform) *specs.Imag
 		)
 	}
 
+	rocmEnv := []string{
+		"PATH=" + system.DefaultPathEnv(utils.PlatformLinux) + ":/opt/rocm/bin",
+		"LD_LIBRARY_PATH=/opt/rocm/lib:/opt/rocm/lib64:/opt/rocm/llvm/lib",
+		"LOCALAI_FORCE_META_BACKEND_CAPABILITY=amd",
+	}
+	if c.Runtime == utils.RuntimeROCm && platform.Architecture == "amd64" {
+		img.Config.Env = append(img.Config.Env, rocmEnv...)
+	}
+
 	return img
 }
@@ -491,14 +491,22 @@ func validateInferenceConfig(c *config.InferenceConfig) error {
 		return errors.New("runner mode (backends without models) is not supported on apple silicon runtime")
 	}
 
+	if c.Runtime == utils.RuntimeROCm && len(c.Backends) > 0 {
+		for _, backend := range c.Backends {
+			if backend != utils.BackendLlamaCpp {
+				return errors.New("rocm runtime only supports llama-cpp backend")
+			}
+		}
+	}
+
 	backends := []string{utils.BackendLlamaCpp, utils.BackendDiffusers, utils.BackendVLLM}
 	for _, b := range c.Backends {
 		if !slices.Contains(backends, b) {
 			return errors.Errorf("backend %s is not supported", b)
 		}
 	}
 
-	runtimes := []string{"", utils.RuntimeNVIDIA, utils.RuntimeAppleSilicon}
+	runtimes := []string{"", utils.RuntimeNVIDIA, utils.RuntimeROCm, utils.RuntimeAppleSilicon}
 	if !slices.Contains(runtimes, c.Runtime) {
 		return errors.Errorf("runtime %s is not supported", c.Runtime)
 	}
@@ -517,6 +525,11 @@ func validateBackendPlatformCompatibility(c *config.InferenceConfig, targetPlatf
 		}
 	}
 
+	// ROCm runtime only supports amd64.
+	if c.Runtime == utils.RuntimeROCm && hasARM64Platform {
+		return errors.New("rocm runtime is only supported on linux/amd64 platform")
+	}
+
 	// If we have ARM64 platforms, validate backend compatibility
 	if hasARM64Platform {
 		for _, backend := range c.Backends {
 
@@ -241,6 +241,43 @@ func Test_validateBackendPlatformCompatibility(t *testing.T) {
 			},
 			wantErr: true,
 		},
+		{
+			name: "rocm runtime with amd64 platform - should pass",
+			config: &config.InferenceConfig{
+				APIVersion: "v1alpha1",
+				Runtime:    "rocm",
+				Backends:   []string{"llama-cpp"},
+			},
+			targetPlatforms: []*specs.Platform{
+				{Architecture: "amd64", OS: "linux"},
+			},
+			wantErr: false,
+		},
+		{
+			name: "rocm runtime with arm64 platform - should fail",
+			config: &config.InferenceConfig{
+				APIVersion: "v1alpha1",
+				Runtime:    "rocm",
+				Backends:   []string{"llama-cpp"},
+			},
+			targetPlatforms: []*specs.Platform{
+				{Architecture: "arm64", OS: "linux"},
+			},
+			wantErr: true,
+		},
+		{
+			name: "rocm runtime with mixed platforms - should fail",
+			config: &config.InferenceConfig{
+				APIVersion: "v1alpha1",
+				Runtime:    "rocm",
+				Backends:   []string{"llama-cpp"},
+			},
+			targetPlatforms: []*specs.Platform{
+				{Architecture: "amd64", OS: "linux"},
+				{Architecture: "arm64", OS: "linux"},
+			},
+			wantErr: true,
+		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
Original file line number	Diff line number	Diff line change
`@@ -81,7 +81,7 @@ func getBackendTag(backend, runtime string, platform specs.Platform) string {`
`81`	`81`	`baseTag := getBackendVersion(backend, runtime, platform)`
`82`	`82`	`backendName := getEffectiveBackend(backend, runtime, platform)`
`83`	`83`
`84`		`- // Handle Apple Silicon - use Vulkan llama-cpp`
	`84`	`+ // Handle Apple Silicon - use Vulkan llama-cpp.`
`85`	`85`	`if runtime == utils.RuntimeAppleSilicon {`
`86`	`86`	`return fmt.Sprintf("%s-%s", baseTag, vulkanLlamaCppBackend)`
`87`	`87`	`}`
`@@ -101,6 +101,12 @@ func getBackendTag(backend, runtime string, platform specs.Platform) string {`
`101`	`101`	`}`
`102`	`102`	`}`
`103`	`103`
	`104`	`+ // Handle ROCm runtime.`
	`105`	`+ if runtime == utils.RuntimeROCm && platform.Architecture == utils.PlatformAMD64 {`
	`106`	`+ return fmt.Sprintf("%s-gpu-rocm-hipblas-llama-cpp", localAIROCmBackendVersion)`
	`107`	`+ }`
	`108`	`+`
	`109`	`+ // Handle CPU runtime (default).`
`104`	`110`	`return fmt.Sprintf("%s-cpu-llama-cpp", baseTag)`
`105`	`111`	`}`
`106`	`112`
`@@ -131,6 +137,12 @@ func getBackendName(backend, runtime string, platform specs.Platform) string {`
`131`	`137`	`}`
`132`	`138`	`}`
`133`	`139`
	`140`	`+ // Handle ROCm runtime`
	`141`	`+ if runtime == utils.RuntimeROCm && platform.Architecture == utils.PlatformAMD64 {`
	`142`	`+ // Only llama-cpp backend is supported for ROCm`
	`143`	`+ return "hipblas-llama-cpp"`
	`144`	`+ }`
	`145`	`+`
`134`	`146`	`// Handle CPU runtime (default)`
`135`	`147`	`return cpuLlamaCppBackend`
`136`	`148`	`}`
`@@ -220,6 +232,14 @@ func installBackends(c *config.InferenceConfig, platform specs.Platform, s llb.S`
`220`	`232`	`cpuConfig.Runtime = "cpu" // Use CPU runtime to force CPU backend installation`
`221`	`233`	`merge = installBackend(backend, &cpuConfig, platform, s, merge)`
`222`	`234`	`}`
	`235`	`+`
	`236`	`+ // For llama-cpp backend with ROCm runtime, also install the CPU version for fallback`
	`237`	`+ if backend == utils.BackendLlamaCpp && c.Runtime == utils.RuntimeROCm && platform.Architecture == utils.PlatformAMD64 {`
	`238`	`+ // Create a modified config with CPU runtime to install the CPU version`
	`239`	`+ cpuConfig := *c`
	`240`	`+ cpuConfig.Runtime = "cpu" // Use CPU runtime to force CPU backend installation`
	`241`	`+ merge = installBackend(backend, &cpuConfig, platform, s, merge)`
	`242`	`+ }`
`223`	`243`	`}`
`224`	`244`
`225`	`245`	`return merge`
Original file line number	Diff line number	Diff line change
`@@ -77,5 +77,14 @@ func emptyImage(c config.InferenceConfig, platform specs.Platform) *specs.Imag`
`77`	`77`	`)`
`78`	`78`	`}`
`79`	`79`
	`80`	`+ rocmEnv := []string{`
	`81`	`+ "PATH=" + system.DefaultPathEnv(utils.PlatformLinux) + ":/opt/rocm/bin",`
	`82`	`+ "LD_LIBRARY_PATH=/opt/rocm/lib:/opt/rocm/lib64:/opt/rocm/llvm/lib",`
	`83`	`+ "LOCALAI_FORCE_META_BACKEND_CAPABILITY=amd",`
	`84`	`+ }`
	`85`	`+ if c.Runtime == utils.RuntimeROCm && platform.Architecture == "amd64" {`
	`86`	`+ img.Config.Env = append(img.Config.Env, rocmEnv...)`
	`87`	`+ }`
	`88`	`+`
`80`	`89`	`return img`
`81`	`90`	`}`
Original file line number	Diff line number	Diff line change
`@@ -491,14 +491,22 @@ func validateInferenceConfig(c *config.InferenceConfig) error {`
`491`	`491`	`return errors.New("runner mode (backends without models) is not supported on apple silicon runtime")`
`492`	`492`	`}`
`493`	`493`
	`494`	`+ if c.Runtime == utils.RuntimeROCm && len(c.Backends) > 0 {`
	`495`	`+ for _, backend := range c.Backends {`
	`496`	`+ if backend != utils.BackendLlamaCpp {`
	`497`	`+ return errors.New("rocm runtime only supports llama-cpp backend")`
	`498`	`+ }`
	`499`	`+ }`
	`500`	`+ }`
	`501`	`+`
`494`	`502`	`backends := []string{utils.BackendLlamaCpp, utils.BackendDiffusers, utils.BackendVLLM}`
`495`	`503`	`for _, b := range c.Backends {`
`496`	`504`	`if !slices.Contains(backends, b) {`
`497`	`505`	`return errors.Errorf("backend %s is not supported", b)`
`498`	`506`	`}`
`499`	`507`	`}`
`500`	`508`
`501`		`- runtimes := []string{"", utils.RuntimeNVIDIA, utils.RuntimeAppleSilicon}`
	`509`	`+ runtimes := []string{"", utils.RuntimeNVIDIA, utils.RuntimeROCm, utils.RuntimeAppleSilicon}`
`502`	`510`	`if !slices.Contains(runtimes, c.Runtime) {`
`503`	`511`	`return errors.Errorf("runtime %s is not supported", c.Runtime)`
`504`	`512`	`}`
`@@ -517,6 +525,11 @@ func validateBackendPlatformCompatibility(c *config.InferenceConfig, targetPlatf`
`517`	`525`	`}`
`518`	`526`	`}`
`519`	`527`
	`528`	`+ // ROCm runtime only supports amd64.`
	`529`	`+ if c.Runtime == utils.RuntimeROCm && hasARM64Platform {`
	`530`	`+ return errors.New("rocm runtime is only supported on linux/amd64 platform")`
	`531`	`+ }`
	`532`	`+`
`520`	`533`	`// If we have ARM64 platforms, validate backend compatibility`
`521`	`534`	`if hasARM64Platform {`
`522`	`535`	`for _, backend := range c.Backends {`