Merge pull request #4 from erwei-xilinx/update-readme-and-fix-urls

erwei-xilinx · web-flow · commit 0f2c2d2fdc21 · 2026-02-23T21:29:28.000-08:00
Update README with project overview and fix obsolete URLs
diff --git a/README.md b/README.md
@@ -1,12 +1,38 @@
 # Triton-XDNA
-This repository contains a plugin for building AIR as Triton's compiler backend.
+
+**An experimental open-source project demonstrating compiler-driven kernel generation for AMD XDNA NPUs using [Triton](https://github.com/triton-lang/triton) and [MLIR-AIR](https://github.com/Xilinx/mlir-air).**
+
+Triton-XDNA provides an end-to-end compilation flow that lowers standard Triton kernels directly to AMD NPU hardware — no prebuilt kernel libraries required. It bridges Triton's high-level parallel programming model with AMD's MLIR-AIR/AIE compilation stack, producing XRT-compatible binaries for AMD AI Engine architectures (AIE2 and AIE2P).
+
+### How it works
+
+Triton kernels are first lowered to compact Linalg compute graphs via [triton-shared](https://github.com/microsoft/triton-shared), then tiled and mapped onto parallel NPU cores using the MLIR Transform dialect, and finally compiled through [MLIR-AIR](https://github.com/Xilinx/mlir-air) and [MLIR-AIE](https://github.com/Xilinx/mlir-aie) to produce device binaries.
+
+```
+Triton kernel (@triton.jit)
+  -> triton-shared (Linalg)
+    -> MLIR Transform dialect (tiling, bufferization, vectorization)
+      -> MLIR-AIR / MLIR-AIE
+        -> XRT binary (aie.xclbin)
+```
+
+### Key results
+
+- For dense matrix multiplication (I8/I16/BF16), compiler-generated kernels achieve **performance parity with handwritten NPU implementations**
+- Over **90% of tested matmul configurations reach at least 90% of baseline throughput**; no configuration falls below 80%
+- Currently supports matrix multiplication, elementwise operations, softmax, and layer normalization
+- Complex compute graphs with reductions and broadcasts are mapped onto parallel NPU tiles
+
+### Contributing
+
+This is an experimental project and we welcome community contributions. Whether it's adding support for new kernel types, improving performance, or extending platform support — we'd love to collaborate.
 
 ## Usage
 
 ### Clone the repository
 ```
-git clone https://github.com/AARInternal/triton-xdna.git
-cd triton-xdna
+git clone https://github.com/amd/Triton-XDNA.git
+cd Triton-XDNA
 git submodule update --init
 ```
 
@@ -27,7 +53,7 @@ python3 -m pip install --upgrade pip
 
 # Install triton-xdna from GitHub Releases
 pip install triton-xdna \
-  --find-links https://github.com/AARInternal/triton-xdna/releases/expanded_assets/latest-wheels \
+  --find-links https://github.com/amd/Triton-XDNA/releases/expanded_assets/latest-wheels \
   --find-links https://github.com/Xilinx/mlir-aie/releases/expanded_assets/latest-wheels-no-rtti \
   --find-links https://github.com/Xilinx/llvm-aie/releases/expanded_assets/nightly \
   --find-links https://github.com/Xilinx/mlir-air/releases/expanded_assets/latest-air-wheels-no-rtti
@@ -52,7 +78,7 @@ python3 -m pip install --upgrade pip
 pip install cmake pybind11 nanobind wheel ninja pytest setuptools Cython
 
 # Install triton-xdna from source and all dependencies automatically
-pip install . \
+pip install . --no-build-isolation \
   --find-links https://github.com/Xilinx/mlir-aie/releases/expanded_assets/latest-wheels-no-rtti \
   --find-links https://github.com/Xilinx/llvm-aie/releases/expanded_assets/nightly \
   --find-links https://github.com/Xilinx/mlir-air/releases/expanded_assets/latest-air-wheels-no-rtti
@@ -74,7 +100,7 @@ python3 -m pip install --upgrade pip
 pip install cmake pybind11 nanobind wheel ninja pytest setuptools Cython
 source utils/env_setup.sh
 
-cmake cmake -GNinja -S . -Bbuild
+cmake -GNinja -S . -Bbuild
 cd build
 ninja
 ```
diff --git a/ci/docker-based/loop_docker_ci.sh b/ci/docker-based/loop_docker_ci.sh
@@ -6,8 +6,8 @@
 set -x 
 
 IMAGE_NAME="triton-xdna-public-dev-github-runner"
-GITHUB_OWNER="AARInternal"
-GITHUB_REPO="triton-xdna"
+GITHUB_OWNER="amd"
+GITHUB_REPO="Triton-XDNA"
 
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 GITHUB_PAT=$(cat "${SCRIPT_DIR}/secret_github_token")
diff --git a/ci/docker-based/test_docker_ci.sh b/ci/docker-based/test_docker_ci.sh
@@ -4,8 +4,8 @@
 # SPDX-License-Identifier: MIT
 
 IMAGE_NAME="triton-xdna-public-dev-github-runner"
-GITHUB_OWNER="AARInternal"
-GITHUB_REPO="triton-xdna"
+GITHUB_OWNER="amd"
+GITHUB_REPO="Triton-XDNA"
 
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 GITHUB_PAT=$(cat "${SCRIPT_DIR}/secret_github_token")
diff --git a/pyproject.toml b/pyproject.toml
@@ -26,14 +26,14 @@ classifiers = [
 
 # Installation command:
 # pip install triton-xdna \
-#   --find-links https://github.com/AARInternal/triton-xdna/releases/expanded_assets/latest-wheels \
+#   --find-links https://github.com/amd/Triton-XDNA/releases/expanded_assets/latest-wheels \
 #   --find-links https://github.com/Xilinx/mlir-aie/releases/expanded_assets/latest-wheels-no-rtti \
 #   --find-links https://github.com/Xilinx/llvm-aie/releases/expanded_assets/nightly \
 #   --find-links https://github.com/Xilinx/mlir-air/releases/expanded_assets/latest-air-wheels-no-rtti
 
 [project.urls]
-Homepage = "https://github.com/AARInternal/triton-xdna"
-Repository = "https://github.com/AARInternal/triton-xdna.git"
+Homepage = "https://github.com/amd/Triton-XDNA"
+Repository = "https://github.com/amd/Triton-XDNA.git"
 
 [project.entry-points."triton.backends"]
 amd_triton_npu = "triton.backends.amd_triton_npu"
diff --git a/setup.py b/setup.py
@@ -805,7 +805,7 @@ def run(self):
     description="Triton compiler with MLIR-AIR backend for AMD NPU devices",
     long_description=(BASE_DIR / "README.md").read_text(),
     long_description_content_type="text/markdown",
-    url="https://github.com/AARInternal/triton-xdna",
+    url="https://github.com/amd/Triton-XDNA",
     license="MIT",
     packages=[],  # No packages - we build from triton
     install_requires=get_install_requires(),