Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions docs/source/builder/writing-kernels.md
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,19 @@ options:
For an example, see the [`relu-torch-stable-abi`](https://github.com/huggingface/kernels/tree/main/examples/kernels/relu-torch-stable-abi)
example kernel.

### `torch-noarch`

This marks a kernel as a "noarch" Torch kernel (pure Python/Triton,
with no compile step). Since there is no compilation, the supported GPU
architectures cannot be detected automatically as they are for arch
kernels. They can instead be declared here and are exported to each
variant's `metadata.json` (`backend.archs`):

- `cuda-capabilities` (optional): a list of CUDA compute capabilities the
kernel supports (e.g. `["9.0", "10.0"]`).
- `rocm-archs` (optional): a list of ROCm architectures the kernel supports
(e.g. `["gfx942"]`).

### `kernel.<name>`

Specification of a kernel with the name `<name>`. Multiple `kernel.<name>`
Expand Down
99 changes: 97 additions & 2 deletions kernel-builder/src/pyproject/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::path::PathBuf;
use eyre::Result;
use itertools::Itertools;

use kernels_data::config::{Backend, General};
use kernels_data::config::{Backend, General, TorchNoarch};
use kernels_data::metadata::{BackendInfo, Metadata};

use crate::pyproject::ops_identifier::KernelIdentifier;
Expand All @@ -24,6 +24,28 @@ pub fn write_metadata(
general: &General,
kernel_id: &KernelIdentifier,
file_set: &mut FileSet,
) -> Result<()> {
write_metadata_impl(general, None, kernel_id, file_set)
}

/// Like [`write_metadata`], but also exports the GPU architectures that a
/// noarch kernel declares in its `[torch-noarch]` section. Arch kernels
/// detect their architectures at build time instead, so they use
/// [`write_metadata`].
pub fn write_noarch_metadata(
general: &General,
noarch: &TorchNoarch,
kernel_id: &KernelIdentifier,
file_set: &mut FileSet,
) -> Result<()> {
write_metadata_impl(general, Some(noarch), kernel_id, file_set)
}

fn write_metadata_impl(
general: &General,
noarch: Option<&TorchNoarch>,

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not very keen on this, because it adds noarch-specific things to agnostic/common functions.

I think we should keep it like this for this PR, but I have to think a bit about it. I like a certain amount of parallelism between arch/noarch, but I am not sure yet how to do that nicely (maybe by writing the bits to setup.py and extending the metadata from there).

Generally, it becomes hard to understand how the generation works if for different frameworks (torch, torch-noarch, tvm-ffi) we do things in different phases (create-pyproject, configure, build, install). So even though it might be slightly less elegant to update the metadata from Python, it would be clean because it's the same between all cases and we don't need different metadata writing paths.

I can do a follow-up PR once I have a clearer idea of what it should look like.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for this insight. I agree with that. I actually want to hold off on merging this PR now that I have a better picture from you. It leads to an unpleasant solution that we might have to discard anyway. I will also think it over a bit and see if I can repurpose this PR.

kernel_id: &KernelIdentifier,
file_set: &mut FileSet,
) -> Result<()> {
for backend in &Backend::all() {
let writer = file_set.entry(format!("metadata-{backend}.json"));
Expand All @@ -46,7 +68,9 @@ pub fn write_metadata(
upstream: general.upstream.clone(),
python_depends,
backend: BackendInfo {
archs: None,
archs: noarch
.and_then(|noarch| noarch.backend_archs(*backend))
.cloned(),
backend_type: *backend,
},
digest: None,
Expand Down Expand Up @@ -85,3 +109,74 @@ pub fn write_cmake_file(file_set: &mut FileSet, filename: &str, content: &[u8])
path.push(filename);
file_set.entry(path).extend_from_slice(content);
}

#[cfg(test)]
mod tests {
use kernels_data::config::{Backend, General, KernelName, TorchNoarch};
use kernels_data::metadata::Metadata;

use super::write_noarch_metadata;
use crate::pyproject::fileset::FileSet;
use crate::pyproject::ops_identifier::KernelIdentifier;

fn general() -> General {
General {
name: KernelName::new("my-kernel").unwrap(),
version: 1,
license: "Apache-2.0".to_string(),
upstream: None,
backends: vec![Backend::Cuda, Backend::Rocm, Backend::Cpu],
hub: None,
python_depends: None,
cuda: None,
neuron: None,
xpu: None,
}
}

fn metadata_for(noarch: &TorchNoarch, backend: &str) -> Metadata {
let general = general();
let kernel_id =
KernelIdentifier::new(".", general.name.to_string(), Some("abc1234".into()));

let mut file_set = FileSet::default();
write_noarch_metadata(&general, noarch, &kernel_id, &mut file_set).unwrap();

let dir = tempfile::tempdir().unwrap();
file_set.write(dir.path(), false).unwrap();
Metadata::from_reader(
std::fs::File::open(dir.path().join(format!("metadata-{backend}.json"))).unwrap(),
)
.unwrap()
}

#[test]
fn noarch_capabilities_are_exported_to_metadata() {
let noarch = TorchNoarch {
cuda_capabilities: Some(vec!["9.0".to_string(), "10.0".to_string()]),
rocm_archs: Some(vec!["gfx942".to_string()]),
};

assert_eq!(
metadata_for(&noarch, "cuda").backend.archs.as_deref(),
Some(["9.0".to_string(), "10.0".to_string()].as_slice()),
);
assert_eq!(
metadata_for(&noarch, "rocm").backend.archs.as_deref(),
Some(["gfx942".to_string()].as_slice()),
);
// Backends without declared archs (and CPU, which has no arch concept)
// leave `archs` unset.
assert_eq!(metadata_for(&noarch, "cpu").backend.archs, None);
}

#[test]
fn metadata_archs_unset_when_not_declared() {
let noarch = TorchNoarch {
cuda_capabilities: None,
rocm_archs: None,
};

assert_eq!(metadata_for(&noarch, "cuda").backend.archs, None);
}
}
7 changes: 5 additions & 2 deletions kernel-builder/src/pyproject/torch/noarch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use kernels_data::config::{Backend, Build, General, Torch};
use minijinja::{context, Environment};

use crate::pyproject::common::write_compat_py;
use crate::pyproject::common::write_metadata;
use crate::pyproject::common::{write_metadata, write_noarch_metadata};
use crate::pyproject::fileset::FileSet;
use crate::pyproject::ops_identifier::KernelIdentifier;

Expand All @@ -28,7 +28,10 @@ pub fn write_torch_ext_noarch(
)?;
write_pyproject_toml(env, build.framework.torch(), &build.general, &mut file_set)?;
write_setup_py(&mut file_set)?;
write_metadata(&build.general, kernel_id, &mut file_set)?;
match build.framework.torch_noarch() {
Some(noarch) => write_noarch_metadata(&build.general, noarch, kernel_id, &mut file_set)?,
None => write_metadata(&build.general, kernel_id, &mut file_set)?,
}

Ok(file_set)
}
Expand Down
25 changes: 24 additions & 1 deletion kernels-data/src/config/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@ impl Framework {
_ => None,
}
}

pub fn torch_noarch(&self) -> Option<&TorchNoarch> {
match self {
Framework::TorchNoarch(torch_noarch) => Some(torch_noarch),
_ => None,
}
}
}

impl Build {
Expand Down Expand Up @@ -188,7 +195,23 @@ impl Torch {
data_extensions(self.pyext.as_deref())
}
}
pub struct TorchNoarch {}
/// A noarch kernel has no compile step, so it cannot detect the GPU
/// architectures it supports the way arch kernels do. Instead it declares
/// them here, and they are exported to each variant's `metadata.json`.
pub struct TorchNoarch {
pub cuda_capabilities: Option<Vec<String>>,
pub rocm_archs: Option<Vec<String>>,
}

impl TorchNoarch {
pub fn backend_archs(&self, backend: Backend) -> Option<&Vec<String>> {
match backend {
Backend::Cuda => self.cuda_capabilities.as_ref(),
Backend::Rocm => self.rocm_archs.as_ref(),
_ => None,
}
}
}

pub struct TvmFfi {
pub include: Option<Vec<String>>,
Expand Down
5 changes: 4 additions & 1 deletion kernels-data/src/config/v3.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,10 @@ impl TryFrom<Build> for super::Build {
let framework = match build.framework {
Some(Framework::Torch(torch)) => super::Framework::Torch(torch.into()),
Some(Framework::TvmFfi(tvm_ffi)) => super::Framework::TvmFfi(tvm_ffi.into()),
None => super::Framework::TorchNoarch(super::TorchNoarch {}),
None => super::Framework::TorchNoarch(super::TorchNoarch {
cuda_capabilities: None,
rocm_archs: None,
}),
};

Ok(Self {
Expand Down
21 changes: 15 additions & 6 deletions kernels-data/src/config/v4.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,11 @@ pub struct Torch {
}

#[derive(Debug, Deserialize, Clone, Serialize)]
#[serde(deny_unknown_fields)]
pub struct TorchNoarch {}
#[serde(deny_unknown_fields, rename_all = "kebab-case")]
pub struct TorchNoarch {
pub cuda_capabilities: Option<Vec<String>>,
pub rocm_archs: Option<Vec<String>>,
}

#[derive(Debug, Deserialize, Clone, Serialize)]
#[serde(deny_unknown_fields)]
Expand Down Expand Up @@ -255,8 +258,11 @@ impl From<Torch> for super::Torch {
}

impl From<TorchNoarch> for super::TorchNoarch {
fn from(_torch_noarch: TorchNoarch) -> Self {
Self {}
fn from(torch_noarch: TorchNoarch) -> Self {
Self {
cuda_capabilities: torch_noarch.cuda_capabilities,
rocm_archs: torch_noarch.rocm_archs,
}
}
}

Expand Down Expand Up @@ -449,8 +455,11 @@ impl From<super::Torch> for Torch {
}
}
impl From<super::TorchNoarch> for TorchNoarch {
fn from(_torch_noarch: super::TorchNoarch) -> Self {
Self {}
fn from(torch_noarch: super::TorchNoarch) -> Self {
Self {
cuda_capabilities: torch_noarch.cuda_capabilities,
rocm_archs: torch_noarch.rocm_archs,
}
}
}

Expand Down
Loading