huggingface · sayakpaul · Jun 2, 2026 · Jun 2, 2026 · Jun 4, 2026 · Jun 4, 2026
diff --git a/docs/source/builder/writing-kernels.md b/docs/source/builder/writing-kernels.md
@@ -239,6 +239,19 @@ options:
   For an example, see the [`relu-torch-stable-abi`](https://github.com/huggingface/kernels/tree/main/examples/kernels/relu-torch-stable-abi)
   example kernel.
 
+### `torch-noarch`
+
+This marks a kernel as a "noarch" Torch kernel (pure Python/Triton,
+with no compile step). Since there is no compilation, the supported GPU
+architectures cannot be detected automatically as they are for arch
+kernels. They can instead be declared here and are exported to each
+variant's `metadata.json` (`backend.archs`):
+
+- `cuda-capabilities` (optional): a list of CUDA compute capabilities the
+  kernel supports (e.g. `["9.0", "10.0"]`).
+- `rocm-archs` (optional): a list of ROCm architectures the kernel supports
+  (e.g. `["gfx942"]`).
+
 ### `kernel.<name>`
 
 Specification of a kernel with the name `<name>`. Multiple `kernel.<name>`

diff --git a/kernel-builder/src/pyproject/common.rs b/kernel-builder/src/pyproject/common.rs
@@ -3,7 +3,7 @@ use std::path::PathBuf;
 use eyre::Result;
 use itertools::Itertools;
 
-use kernels_data::config::{Backend, General};
+use kernels_data::config::{Backend, General, TorchNoarch};
 use kernels_data::metadata::{BackendInfo, Metadata};
 
 use crate::pyproject::ops_identifier::KernelIdentifier;
@@ -24,6 +24,28 @@ pub fn write_metadata(
     general: &General,
     kernel_id: &KernelIdentifier,
     file_set: &mut FileSet,
+) -> Result<()> {
+    write_metadata_impl(general, None, kernel_id, file_set)
+}
+
+/// Like [`write_metadata`], but also exports the GPU architectures that a
+/// noarch kernel declares in its `[torch-noarch]` section. Arch kernels
+/// detect their architectures at build time instead, so they use
+/// [`write_metadata`].
+pub fn write_noarch_metadata(
+    general: &General,
+    noarch: &TorchNoarch,
+    kernel_id: &KernelIdentifier,
+    file_set: &mut FileSet,
+) -> Result<()> {
+    write_metadata_impl(general, Some(noarch), kernel_id, file_set)
+}
+
+fn write_metadata_impl(
+    general: &General,
+    noarch: Option<&TorchNoarch>,
+    kernel_id: &KernelIdentifier,
+    file_set: &mut FileSet,
 ) -> Result<()> {
     for backend in &Backend::all() {
         let writer = file_set.entry(format!("metadata-{backend}.json"));
@@ -46,7 +68,9 @@ pub fn write_metadata(
             upstream: general.upstream.clone(),
             python_depends,
             backend: BackendInfo {
-                archs: None,
+                archs: noarch
+                    .and_then(|noarch| noarch.backend_archs(*backend))
+                    .cloned(),
                 backend_type: *backend,
             },
             digest: None,
@@ -85,3 +109,74 @@ pub fn write_cmake_file(file_set: &mut FileSet, filename: &str, content: &[u8])
     path.push(filename);
     file_set.entry(path).extend_from_slice(content);
 }
+
+#[cfg(test)]
+mod tests {
+    use kernels_data::config::{Backend, General, KernelName, TorchNoarch};
+    use kernels_data::metadata::Metadata;
+
+    use super::write_noarch_metadata;
+    use crate::pyproject::fileset::FileSet;
+    use crate::pyproject::ops_identifier::KernelIdentifier;
+
+    fn general() -> General {
+        General {
+            name: KernelName::new("my-kernel").unwrap(),
+            version: 1,
+            license: "Apache-2.0".to_string(),
+            upstream: None,
+            backends: vec![Backend::Cuda, Backend::Rocm, Backend::Cpu],
+            hub: None,
+            python_depends: None,
+            cuda: None,
+            neuron: None,
+            xpu: None,
+        }
+    }
+
+    fn metadata_for(noarch: &TorchNoarch, backend: &str) -> Metadata {
+        let general = general();
+        let kernel_id =
+            KernelIdentifier::new(".", general.name.to_string(), Some("abc1234".into()));
+
+        let mut file_set = FileSet::default();
+        write_noarch_metadata(&general, noarch, &kernel_id, &mut file_set).unwrap();
+
+        let dir = tempfile::tempdir().unwrap();
+        file_set.write(dir.path(), false).unwrap();
+        Metadata::from_reader(
+            std::fs::File::open(dir.path().join(format!("metadata-{backend}.json"))).unwrap(),
+        )
+        .unwrap()
+    }
+
+    #[test]
+    fn noarch_capabilities_are_exported_to_metadata() {
+        let noarch = TorchNoarch {
+            cuda_capabilities: Some(vec!["9.0".to_string(), "10.0".to_string()]),
+            rocm_archs: Some(vec!["gfx942".to_string()]),
+        };
+
+        assert_eq!(
+            metadata_for(&noarch, "cuda").backend.archs.as_deref(),
+            Some(["9.0".to_string(), "10.0".to_string()].as_slice()),
+        );
+        assert_eq!(
+            metadata_for(&noarch, "rocm").backend.archs.as_deref(),
+            Some(["gfx942".to_string()].as_slice()),
+        );
+        // Backends without declared archs (and CPU, which has no arch concept)
+        // leave `archs` unset.
+        assert_eq!(metadata_for(&noarch, "cpu").backend.archs, None);
+    }
+
+    #[test]
+    fn metadata_archs_unset_when_not_declared() {
+        let noarch = TorchNoarch {
+            cuda_capabilities: None,
+            rocm_archs: None,
+        };
+
+        assert_eq!(metadata_for(&noarch, "cuda").backend.archs, None);
+    }
+}
diff --git a/kernel-builder/src/pyproject/torch/noarch.rs b/kernel-builder/src/pyproject/torch/noarch.rs
@@ -6,7 +6,7 @@ use kernels_data::config::{Backend, Build, General, Torch};
 use minijinja::{context, Environment};
 
 use crate::pyproject::common::write_compat_py;
-use crate::pyproject::common::write_metadata;
+use crate::pyproject::common::{write_metadata, write_noarch_metadata};
 use crate::pyproject::fileset::FileSet;
 use crate::pyproject::ops_identifier::KernelIdentifier;
 
@@ -28,7 +28,10 @@ pub fn write_torch_ext_noarch(
     )?;
     write_pyproject_toml(env, build.framework.torch(), &build.general, &mut file_set)?;
     write_setup_py(&mut file_set)?;
-    write_metadata(&build.general, kernel_id, &mut file_set)?;
+    match build.framework.torch_noarch() {
+        Some(noarch) => write_noarch_metadata(&build.general, noarch, kernel_id, &mut file_set)?,
+        None => write_metadata(&build.general, kernel_id, &mut file_set)?,
+    }
 
     Ok(file_set)
 }

diff --git a/kernels-data/src/config/mod.rs b/kernels-data/src/config/mod.rs
@@ -48,6 +48,13 @@ impl Framework {
             _ => None,
         }
     }
+
+    pub fn torch_noarch(&self) -> Option<&TorchNoarch> {
+        match self {
+            Framework::TorchNoarch(torch_noarch) => Some(torch_noarch),
+            _ => None,
+        }
+    }
 }
 
 impl Build {
@@ -188,7 +195,23 @@ impl Torch {
         data_extensions(self.pyext.as_deref())
     }
 }
-pub struct TorchNoarch {}
+/// A noarch kernel has no compile step, so it cannot detect the GPU
+/// architectures it supports the way arch kernels do. Instead it declares
+/// them here, and they are exported to each variant's `metadata.json`.
+pub struct TorchNoarch {
+    pub cuda_capabilities: Option<Vec<String>>,
+    pub rocm_archs: Option<Vec<String>>,
+}
+
+impl TorchNoarch {
+    pub fn backend_archs(&self, backend: Backend) -> Option<&Vec<String>> {
+        match backend {
+            Backend::Cuda => self.cuda_capabilities.as_ref(),
+            Backend::Rocm => self.rocm_archs.as_ref(),
+            _ => None,
+        }
+    }
+}
 
 pub struct TvmFfi {
     pub include: Option<Vec<String>>,

diff --git a/kernels-data/src/config/v3.rs b/kernels-data/src/config/v3.rs
@@ -171,7 +171,10 @@ impl TryFrom<Build> for super::Build {
         let framework = match build.framework {
             Some(Framework::Torch(torch)) => super::Framework::Torch(torch.into()),
             Some(Framework::TvmFfi(tvm_ffi)) => super::Framework::TvmFfi(tvm_ffi.into()),
-            None => super::Framework::TorchNoarch(super::TorchNoarch {}),
+            None => super::Framework::TorchNoarch(super::TorchNoarch {
+                cuda_capabilities: None,
+                rocm_archs: None,
+            }),
         };
 
         Ok(Self {

diff --git a/kernels-data/src/config/v4.rs b/kernels-data/src/config/v4.rs
@@ -92,8 +92,11 @@ pub struct Torch {
 }
 
 #[derive(Debug, Deserialize, Clone, Serialize)]
-#[serde(deny_unknown_fields)]
-pub struct TorchNoarch {}
+#[serde(deny_unknown_fields, rename_all = "kebab-case")]
+pub struct TorchNoarch {
+    pub cuda_capabilities: Option<Vec<String>>,
+    pub rocm_archs: Option<Vec<String>>,
+}
 
 #[derive(Debug, Deserialize, Clone, Serialize)]
 #[serde(deny_unknown_fields)]
@@ -255,8 +258,11 @@ impl From<Torch> for super::Torch {
 }
 
 impl From<TorchNoarch> for super::TorchNoarch {
-    fn from(_torch_noarch: TorchNoarch) -> Self {
-        Self {}
+    fn from(torch_noarch: TorchNoarch) -> Self {
+        Self {
+            cuda_capabilities: torch_noarch.cuda_capabilities,
+            rocm_archs: torch_noarch.rocm_archs,
+        }
     }
 }
 
@@ -449,8 +455,11 @@ impl From<super::Torch> for Torch {
     }
 }
 impl From<super::TorchNoarch> for TorchNoarch {
-    fn from(_torch_noarch: super::TorchNoarch) -> Self {
-        Self {}
+    fn from(torch_noarch: super::TorchNoarch) -> Self {
+        Self {
+            cuda_capabilities: torch_noarch.cuda_capabilities,
+            rocm_archs: torch_noarch.rocm_archs,
+        }
     }
 }