Skip to content

Commit 07ab2ba

Browse files
ianbmacdonaldclaudeGLM-5.2codex
committed
fix(server): restore ROCm gfx family normalization for dGPU detection
identify_rocm_arch_from_name() returned the specific gfx arch (e.g. gfx1100, gfx1201) from the gfx-regex and KFD numeric-ISA detection paths instead of the ROCm family download target (gfx110X, gfx120X) the backend support set expects. Commit 2a7aa18 (#2093) removed ROCM_ARCH_MAPPING, which regressed ROCm availability for every RDNA2/3/4 dGPU detected via those paths: the server reported "Unsupported GPU: gfx1100" for e.g. an RX 7900 XT. Restore the specific->family normalization in a small header-only helper (lemon/rocm_arch.h) applied to both regressed paths: gfx1030-gfx1036 -> gfx103X, gfx1100-gfx1103 -> gfx110X, gfx1200/gfx1201 -> gfx120X The RDNA2 range mirrors backend_versions.json url_mapping (all gfx1030-gfx1036 map to the published gfx103X-all archive). gfx115x iGPUs and CDNA targets pass through as exact package IDs. Add a standalone unit test (CTest RocmArchTest). Fixes #2319 Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> Co-Authored-By: GLM-5.2 <noreply@zhipuai.cn> Co-Authored-By: GPT-5.5 <noreply@openai.com>
1 parent aab5528 commit 07ab2ba

4 files changed

Lines changed: 136 additions & 2 deletions

File tree

CMakeLists.txt

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1720,3 +1720,20 @@ if(EXISTS "${_GGUF_CAPS_TEST_SRC}")
17201720
include(CTest)
17211721
add_test(NAME GgufCapabilitiesTest COMMAND test_gguf_capabilities)
17221722
endif()
1723+
1724+
# Header-only unit under test (lemon/rocm_arch.h), so no extra source files.
1725+
set(_ROCM_ARCH_TEST_SRC
1726+
"${CMAKE_CURRENT_SOURCE_DIR}/test/cpp/test_rocm_arch.cpp"
1727+
)
1728+
if(EXISTS "${_ROCM_ARCH_TEST_SRC}")
1729+
add_executable(test_rocm_arch
1730+
test/cpp/test_rocm_arch.cpp
1731+
)
1732+
target_include_directories(test_rocm_arch PRIVATE
1733+
${CMAKE_CURRENT_SOURCE_DIR}/src/cpp/include
1734+
${CMAKE_CURRENT_BINARY_DIR}/include
1735+
)
1736+
1737+
# CTest is already enabled by the earlier test blocks; just register the test.
1738+
add_test(NAME RocmArchTest COMMAND test_rocm_arch)
1739+
endif()

src/cpp/include/lemon/rocm_arch.h

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
#pragma once
2+
3+
#include <string>
4+
5+
namespace lemon {
6+
7+
// Collapse a specific RDNA dGPU gfx target to the ROCm "family" download target
8+
// that the backend support set and install filenames expect:
9+
// gfx1030-gfx1036 -> gfx103X (RDNA2, Radeon RX 6000)
10+
// gfx1100-gfx1103 -> gfx110X (RDNA3, Radeon RX 7000)
11+
// gfx1200/gfx1201 -> gfx120X (RDNA4, Radeon RX 9000)
12+
// iGPU targets (gfx1150/gfx1151/gfx1152) ship as exact binaries and pass through
13+
// unchanged, as do CDNA / exact-package targets (e.g. gfx90a). Anything else
14+
// passes through unchanged.
15+
//
16+
// The RDNA2 set mirrors backend_versions.json `url_mapping`, which maps every
17+
// gfx1030-gfx1036 to the published `gfx103X-all` archive.
18+
//
19+
// This restores the specific->family normalization that #2093 (commit 2a7aa18c)
20+
// dropped when it removed ROCM_ARCH_MAPPING. Without it, identify_rocm_arch_from_name
21+
// returns the specific arch (e.g. gfx1100, gfx1201) from the gfx-regex and KFD
22+
// numeric-ISA detection paths, which no longer matches the gfx103X/gfx110X/gfx120X
23+
// families in the support set -> ROCm reported "unsupported" for every RDNA2/3/4
24+
// dGPU on those paths (#2319).
25+
inline std::string normalize_rocm_family(const std::string& arch) {
26+
// RDNA2 / RDNA3 / RDNA4 dGPU families collapse to one ROCm download target.
27+
if (arch == "gfx1030" || arch == "gfx1031" || arch == "gfx1032" ||
28+
arch == "gfx1033" || arch == "gfx1034" || arch == "gfx1035" || arch == "gfx1036") {
29+
return "gfx103X";
30+
}
31+
if (arch == "gfx1100" || arch == "gfx1101" || arch == "gfx1102" || arch == "gfx1103") {
32+
return "gfx110X";
33+
}
34+
if (arch == "gfx1200" || arch == "gfx1201") {
35+
return "gfx120X";
36+
}
37+
// iGPU exact targets (gfx1150/gfx1151/gfx1152), CDNA (gfx90a, ...), and any
38+
// other target pass through unchanged. (Family normalization assumes a
39+
// single hex minor/step digit, which holds for all RDNA families today.)
40+
return arch;
41+
}
42+
43+
} // namespace lemon

src/cpp/server/system_info.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include "lemon/utils/json_utils.h"
88
#include "lemon/utils/process_manager.h"
99
#include "lemon/backends/backend_utils.h"
10+
#include "lemon/rocm_arch.h"
1011
#include <filesystem>
1112
#include <fstream>
1213
#include <sstream>
@@ -1846,8 +1847,12 @@ std::string identify_rocm_arch_from_name(const std::string& device_name) {
18461847

18471848
std::smatch gfx_match;
18481849
// Match 3- or 4-digit gfx tokens; the trailing nibble can be hex (e.g. gfx90a).
1850+
// The 3-digit/hex form intentionally also captures CDNA exact targets
1851+
// (gfx90a, gfx908, gfx942), which normalize_rocm_family passes through unchanged.
18491852
if (std::regex_search(device_lower, gfx_match, std::regex(R"((gfx[0-9a-f]{3,4}))"))) {
1850-
return gfx_match[1].str();
1853+
// Collapse the specific arch (gfx1100, gfx1201, ...) to its ROCm family
1854+
// download target (gfx110X, gfx120X) so it matches the support set (#2319).
1855+
return normalize_rocm_family(gfx_match[1].str());
18511856
}
18521857

18531858
// Linux will pass the ISA from KFD, transform it to what the rest of lemonade expects
@@ -1866,7 +1871,9 @@ std::string identify_rocm_arch_from_name(const std::string& device_name) {
18661871

18671872
char buf[16];
18681873
std::snprintf(buf, sizeof(buf), "gfx%d%x%x", major, minor, step);
1869-
return std::string(buf);
1874+
// KFD hands the ISA through as a number (e.g. 110000 -> gfx1100); collapse
1875+
// it to the ROCm family target so it matches the support set (#2319).
1876+
return normalize_rocm_family(std::string(buf));
18701877
}
18711878

18721879
if (device_lower.find("radeon") == std::string::npos &&

test/cpp/test_rocm_arch.cpp

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
// Standalone test for lemon::normalize_rocm_family (src/cpp/include/lemon/rocm_arch.h).
2+
//
3+
// Guards the regression in #2319: identify_rocm_arch_from_name() must return the
4+
// ROCm *family* download target (gfx103X / gfx110X / gfx120X) for RDNA2/3/4 dGPUs,
5+
// not the specific arch (gfx1030 / gfx1100 / gfx1201). Commit 2a7aa18c (#2093)
6+
// removed ROCM_ARCH_MAPPING, so the gfx-regex and KFD numeric-ISA detection paths
7+
// began returning the specific arch, which no longer matched the support set and
8+
// made ROCm report "Unsupported GPU: gfx1100" for e.g. an RX 7900 XT. This header
9+
// restores the specific->family normalization those paths apply.
10+
//
11+
// Compile with: cl /std:c++17 /EHsc /I src/cpp/include test/cpp/test_rocm_arch.cpp
12+
// or: g++ -std=c++17 -I src/cpp/include test/cpp/test_rocm_arch.cpp -o rocm_arch_test
13+
14+
#include "lemon/rocm_arch.h"
15+
16+
#include <cstdio>
17+
#include <string>
18+
19+
using lemon::normalize_rocm_family;
20+
21+
static int g_failures = 0;
22+
23+
static void expect(const char* name, const std::string& got, const std::string& want) {
24+
bool ok = (got == want);
25+
if (!ok) ++g_failures;
26+
std::printf("[%s] %s (got \"%s\", want \"%s\")\n",
27+
ok ? "PASS" : "FAIL", name, got.c_str(), want.c_str());
28+
}
29+
30+
int main() {
31+
std::printf("=== normalize_rocm_family tests ===\n");
32+
33+
// RDNA3 (gfx110X) — the #2319 reporter's family + the ai3 RX 7900 XT repro.
34+
expect("gfx1100 -> gfx110X", normalize_rocm_family("gfx1100"), "gfx110X");
35+
expect("gfx1101 -> gfx110X", normalize_rocm_family("gfx1101"), "gfx110X");
36+
expect("gfx1102 -> gfx110X", normalize_rocm_family("gfx1102"), "gfx110X");
37+
expect("gfx1103 -> gfx110X", normalize_rocm_family("gfx1103"), "gfx110X");
38+
39+
// RDNA2 (gfx103X) — the full gfx1030-gfx1036 range, all mapped to the
40+
// gfx103X-all archive in backend_versions.json (#2319 review: gfx1033/1035/1036
41+
// must NOT be dropped, a published bundle covers them).
42+
expect("gfx1030 -> gfx103X", normalize_rocm_family("gfx1030"), "gfx103X");
43+
expect("gfx1031 -> gfx103X", normalize_rocm_family("gfx1031"), "gfx103X");
44+
expect("gfx1032 -> gfx103X", normalize_rocm_family("gfx1032"), "gfx103X");
45+
expect("gfx1033 -> gfx103X", normalize_rocm_family("gfx1033"), "gfx103X");
46+
expect("gfx1034 -> gfx103X", normalize_rocm_family("gfx1034"), "gfx103X");
47+
expect("gfx1035 -> gfx103X", normalize_rocm_family("gfx1035"), "gfx103X");
48+
expect("gfx1036 -> gfx103X", normalize_rocm_family("gfx1036"), "gfx103X");
49+
50+
// RDNA4 (gfx120X).
51+
expect("gfx1200 -> gfx120X", normalize_rocm_family("gfx1200"), "gfx120X");
52+
expect("gfx1201 -> gfx120X", normalize_rocm_family("gfx1201"), "gfx120X");
53+
54+
// iGPU exact targets pass through unchanged (ship as exact binaries).
55+
expect("gfx1150 unchanged", normalize_rocm_family("gfx1150"), "gfx1150");
56+
expect("gfx1151 unchanged", normalize_rocm_family("gfx1151"), "gfx1151");
57+
expect("gfx1152 unchanged", normalize_rocm_family("gfx1152"), "gfx1152");
58+
59+
// Idempotent on an already-collapsed family (the name-heuristic paths already
60+
// return families) and pass-through for unrelated / empty input.
61+
expect("gfx110X idempotent", normalize_rocm_family("gfx110X"), "gfx110X");
62+
expect("gfx90a unchanged", normalize_rocm_family("gfx90a"), "gfx90a");
63+
expect("empty unchanged", normalize_rocm_family(""), "");
64+
65+
std::printf("\n%s\n", g_failures == 0 ? "ALL PASS" : "FAILURES PRESENT");
66+
return g_failures == 0 ? 0 : 1;
67+
}

0 commit comments

Comments
 (0)