Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1720,3 +1720,20 @@ if(EXISTS "${_GGUF_CAPS_TEST_SRC}")
include(CTest)
add_test(NAME GgufCapabilitiesTest COMMAND test_gguf_capabilities)
endif()

# Header-only unit under test (lemon/rocm_arch.h), so no extra source files.
set(_ROCM_ARCH_TEST_SRC
"${CMAKE_CURRENT_SOURCE_DIR}/test/cpp/test_rocm_arch.cpp"
)
if(EXISTS "${_ROCM_ARCH_TEST_SRC}")
add_executable(test_rocm_arch
test/cpp/test_rocm_arch.cpp
)
target_include_directories(test_rocm_arch PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/src/cpp/include
${CMAKE_CURRENT_BINARY_DIR}/include
)

# CTest is already enabled by the earlier test blocks; just register the test.
add_test(NAME RocmArchTest COMMAND test_rocm_arch)
endif()
43 changes: 43 additions & 0 deletions src/cpp/include/lemon/rocm_arch.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#pragma once

#include <string>

namespace lemon {

// Collapse a specific RDNA dGPU gfx target to the ROCm "family" download target
// that the backend support set and install filenames expect:
// gfx1030-gfx1036 -> gfx103X (RDNA2, Radeon RX 6000)
// gfx1100-gfx1103 -> gfx110X (RDNA3, Radeon RX 7000)
// gfx1200/gfx1201 -> gfx120X (RDNA4, Radeon RX 9000)
// iGPU targets (gfx1150/gfx1151/gfx1152) ship as exact binaries and pass through
// unchanged, as do CDNA / exact-package targets (e.g. gfx90a). Anything else
// passes through unchanged.
//
// The RDNA2 set mirrors backend_versions.json `url_mapping`, which maps every
// gfx1030-gfx1036 to the published `gfx103X-all` archive.
//
// This restores the specific->family normalization that #2093 (commit 2a7aa18c)
// dropped when it removed ROCM_ARCH_MAPPING. Without it, identify_rocm_arch_from_name
// returns the specific arch (e.g. gfx1100, gfx1201) from the gfx-regex and KFD
// numeric-ISA detection paths, which no longer matches the gfx103X/gfx110X/gfx120X
// families in the support set -> ROCm reported "unsupported" for every RDNA2/3/4
// dGPU on those paths (#2319).
inline std::string normalize_rocm_family(const std::string& arch) {
// RDNA2 / RDNA3 / RDNA4 dGPU families collapse to one ROCm download target.
if (arch == "gfx1030" || arch == "gfx1031" || arch == "gfx1032" ||
arch == "gfx1033" || arch == "gfx1034" || arch == "gfx1035" || arch == "gfx1036") {
return "gfx103X";
}
if (arch == "gfx1100" || arch == "gfx1101" || arch == "gfx1102" || arch == "gfx1103") {
return "gfx110X";
}
if (arch == "gfx1200" || arch == "gfx1201") {
return "gfx120X";
}
// iGPU exact targets (gfx1150/gfx1151/gfx1152), CDNA (gfx90a, ...), and any
// other target pass through unchanged. (Family normalization assumes a
// single hex minor/step digit, which holds for all RDNA families today.)
return arch;
}

} // namespace lemon
11 changes: 9 additions & 2 deletions src/cpp/server/system_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "lemon/utils/json_utils.h"
#include "lemon/utils/process_manager.h"
#include "lemon/backends/backend_utils.h"
#include "lemon/rocm_arch.h"
#include <filesystem>
#include <fstream>
#include <sstream>
Expand Down Expand Up @@ -1846,8 +1847,12 @@ std::string identify_rocm_arch_from_name(const std::string& device_name) {

std::smatch gfx_match;
// Match 3- or 4-digit gfx tokens; the trailing nibble can be hex (e.g. gfx90a).
// The 3-digit/hex form intentionally also captures CDNA exact targets
// (gfx90a, gfx908, gfx942), which normalize_rocm_family passes through unchanged.
if (std::regex_search(device_lower, gfx_match, std::regex(R"((gfx[0-9a-f]{3,4}))"))) {
return gfx_match[1].str();
// Collapse the specific arch (gfx1100, gfx1201, ...) to its ROCm family
// download target (gfx110X, gfx120X) so it matches the support set (#2319).
return normalize_rocm_family(gfx_match[1].str());
}

// Linux will pass the ISA from KFD, transform it to what the rest of lemonade expects
Expand All @@ -1866,7 +1871,9 @@ std::string identify_rocm_arch_from_name(const std::string& device_name) {

char buf[16];
std::snprintf(buf, sizeof(buf), "gfx%d%x%x", major, minor, step);
return std::string(buf);
// KFD hands the ISA through as a number (e.g. 110000 -> gfx1100); collapse
// it to the ROCm family target so it matches the support set (#2319).
return normalize_rocm_family(std::string(buf));
}

if (device_lower.find("radeon") == std::string::npos &&
Expand Down
67 changes: 67 additions & 0 deletions test/cpp/test_rocm_arch.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// Standalone test for lemon::normalize_rocm_family (src/cpp/include/lemon/rocm_arch.h).
//
// Guards the regression in #2319: identify_rocm_arch_from_name() must return the
// ROCm *family* download target (gfx103X / gfx110X / gfx120X) for RDNA2/3/4 dGPUs,
// not the specific arch (gfx1030 / gfx1100 / gfx1201). Commit 2a7aa18c (#2093)
// removed ROCM_ARCH_MAPPING, so the gfx-regex and KFD numeric-ISA detection paths
// began returning the specific arch, which no longer matched the support set and
// made ROCm report "Unsupported GPU: gfx1100" for e.g. an RX 7900 XT. This header
// restores the specific->family normalization those paths apply.
//
// Compile with: cl /std:c++17 /EHsc /I src/cpp/include test/cpp/test_rocm_arch.cpp
// or: g++ -std=c++17 -I src/cpp/include test/cpp/test_rocm_arch.cpp -o rocm_arch_test

#include "lemon/rocm_arch.h"

#include <cstdio>
#include <string>

using lemon::normalize_rocm_family;

static int g_failures = 0;

static void expect(const char* name, const std::string& got, const std::string& want) {
bool ok = (got == want);
if (!ok) ++g_failures;
std::printf("[%s] %s (got \"%s\", want \"%s\")\n",
ok ? "PASS" : "FAIL", name, got.c_str(), want.c_str());
}

int main() {
std::printf("=== normalize_rocm_family tests ===\n");

// RDNA3 (gfx110X) — the #2319 reporter's family + the ai3 RX 7900 XT repro.
expect("gfx1100 -> gfx110X", normalize_rocm_family("gfx1100"), "gfx110X");
expect("gfx1101 -> gfx110X", normalize_rocm_family("gfx1101"), "gfx110X");
expect("gfx1102 -> gfx110X", normalize_rocm_family("gfx1102"), "gfx110X");
expect("gfx1103 -> gfx110X", normalize_rocm_family("gfx1103"), "gfx110X");

// RDNA2 (gfx103X) — the full gfx1030-gfx1036 range, all mapped to the
// gfx103X-all archive in backend_versions.json (#2319 review: gfx1033/1035/1036
// must NOT be dropped, a published bundle covers them).
expect("gfx1030 -> gfx103X", normalize_rocm_family("gfx1030"), "gfx103X");
expect("gfx1031 -> gfx103X", normalize_rocm_family("gfx1031"), "gfx103X");
expect("gfx1032 -> gfx103X", normalize_rocm_family("gfx1032"), "gfx103X");
expect("gfx1033 -> gfx103X", normalize_rocm_family("gfx1033"), "gfx103X");
expect("gfx1034 -> gfx103X", normalize_rocm_family("gfx1034"), "gfx103X");
expect("gfx1035 -> gfx103X", normalize_rocm_family("gfx1035"), "gfx103X");
expect("gfx1036 -> gfx103X", normalize_rocm_family("gfx1036"), "gfx103X");

// RDNA4 (gfx120X).
expect("gfx1200 -> gfx120X", normalize_rocm_family("gfx1200"), "gfx120X");
expect("gfx1201 -> gfx120X", normalize_rocm_family("gfx1201"), "gfx120X");

// iGPU exact targets pass through unchanged (ship as exact binaries).
expect("gfx1150 unchanged", normalize_rocm_family("gfx1150"), "gfx1150");
expect("gfx1151 unchanged", normalize_rocm_family("gfx1151"), "gfx1151");
expect("gfx1152 unchanged", normalize_rocm_family("gfx1152"), "gfx1152");

// Idempotent on an already-collapsed family (the name-heuristic paths already
// return families) and pass-through for unrelated / empty input.
expect("gfx110X idempotent", normalize_rocm_family("gfx110X"), "gfx110X");
expect("gfx90a unchanged", normalize_rocm_family("gfx90a"), "gfx90a");
expect("empty unchanged", normalize_rocm_family(""), "");

std::printf("\n%s\n", g_failures == 0 ? "ALL PASS" : "FAILURES PRESENT");
return g_failures == 0 ? 0 : 1;
}
Loading