Skip to content

Commit adaeafe

Browse files
committed
fix ardeno flops estimate
1 parent b636abd commit adaeafe

1 file changed

Lines changed: 16 additions & 2 deletions

File tree

client/gpu_opencl.cpp

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,11 @@ static bool is_intel(char* vendor) {
109109
return false;
110110
}
111111

112+
static bool is_ardeno(char* vendor) {
113+
if (strcasestr(vendor, "QUALCOMM")) return true;
114+
return false;
115+
}
116+
112117
#ifdef __APPLE__
113118
static bool is_apple(char* vendor) {
114119
if (strcasestr(vendor, "apple")) return true;
@@ -742,8 +747,17 @@ void COPROCS::get_opencl(
742747
//
743748
prop.peak_flops = 0;
744749
if (prop.max_compute_units) {
745-
double freq = ((double)prop.max_clock_frequency) * MEGA;
746-
prop.peak_flops = ((double)prop.max_compute_units) * freq;
750+
double freq = ((double)prop.max_clock_frequency);
751+
if (is_ardeno(prop.vendor)) {
752+
if (freq == 1.0) {
753+
freq = 1000.0; // Estimate 1 GHz if driver returns 1 Mhz
754+
}
755+
}
756+
// may be inaccurate
757+
int simd_width = 128; // 128-bit vector unit
758+
int simd_per_compute_unit = 4; // 4 × FP32 ops per cycle
759+
760+
prop.peak_flops = ((double)prop.max_compute_units) * freq * simd_width * simd_per_compute_unit * 1e6;
747761
}
748762
if (prop.peak_flops <= 0 || prop.peak_flops > GPU_MAX_PEAK_FLOPS) {
749763
char buf2[256];

0 commit comments

Comments
 (0)