You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
- [peak performance on GPUs](#single-gpu-benchmarks) (datacenter/gaming/professional/laptop), validated with roofline model
208
212
- [DDF-shifting](https://www.researchgate.net/publication/362275548_Accuracy_and_performance_of_the_lattice_Boltzmann_method_with_64-bit_32-bit_and_customized_16-bit_number_formats) and other algebraic optimization to minimize round-off error
uint cores=0u; // for CPUs, compute_units is the number of threads (twice the number of cores with hyperthreading)
29
30
float tflops=0.0f; // estimated device FP32 floating point performance in TeraFLOPs/s
@@ -63,6 +64,19 @@ struct Device_Info {
63
64
constfloat arm = (float)(contains(to_lower(vendor), "arm"))*(is_gpu?8.0f:1.0f); // ARM GPUs usually have 8 cores/CU, ARM CPUs have 1 core/CU
64
65
cores = to_uint((float)compute_units*(nvidia+amd+intel+apple+arm)); // for CPUs, compute_units is the number of threads (twice the number of cores with hyperthreading)
65
66
tflops = 1E-6f*(float)cores*(float)ipc*(float)clock_frequency; // estimated device floating point performance in TeraFLOPs/s
67
+
if(intel==8.0f) { // fix wrong global memory reporting for Intel Arc GPUs
68
+
if(contains_any(name, {"A770", "0x56a0"})&&(memory==12992u)) memory = 16240u; // fix wrong (80% on Windows) memory reporting on Intel Arc A770 16GB
69
+
if(contains_any(name, {"A770", "0x56a0"})&&(memory== 6476u)) memory = 8096u; // fix wrong (80% on Windows) memory reporting on Intel Arc A770 8GB
70
+
if(contains_any(name, {"A750", "0x56a1"})&&(memory== 6476u)) memory = 8096u; // fix wrong (80% on Windows) memory reporting on Intel Arc A750 8GB
71
+
if(contains_any(name, {"A580", "0x56a2"})&&(memory== 6476u)) memory = 8096u; // fix wrong (80% on Windows) memory reporting on Intel Arc A580 8GB
72
+
if(contains_any(name, {"A380", "0x56a5"})&&(memory== 4844u)) memory = 6056u; // fix wrong (80% on Windows) memory reporting on Intel Arc A380 6GB
73
+
if(contains_any(name, {"A770", "0x56a0"})&&(memory==15473u)) memory = 16288u; // fix wrong (95% on Linux) memory reporting on Intel Arc A770 16GB
74
+
if(contains_any(name, {"A770", "0x56a0"})&&(memory== 7721u)) memory = 8128u; // fix wrong (95% on Linux) memory reporting on Intel Arc A770 8GB
75
+
if(contains_any(name, {"A750", "0x56a1"})&&(memory== 7721u)) memory = 8128u; // fix wrong (95% on Linux) memory reporting on Intel Arc A750 8GB
76
+
if(contains_any(name, {"A580", "0x56a2"})&&(memory== 7721u)) memory = 8128u; // fix wrong (95% on Linux) memory reporting on Intel Arc A580 8GB
77
+
if(contains_any(name, {"A380""0x56a5"})&&(memory== 5783u)) memory = 6088u; // fix wrong (95% on Linux) memory reporting on Intel Arc A380 6GB
78
+
}
79
+
intel_gpu_above_4gb_patch = (intel==8.0f)&&(memory>4096); // enable memory allocations greater than 4GB for Intel GPUs with >4GB VRAM
if(device.info.memory_used>device.info.memory) print_error("Device \""+device.info.name+"\" does not have enough memory. Allocating another "+to_string((uint)(capacity()/1048576ull))+" MB would use a total of "+to_string(device.info.memory_used)+" MB / "+to_string(device.info.memory)+" MB.");
device_buffer = cl::Buffer(device.get_cl_context(), CL_MEM_READ_WRITE|((int)device.info.intel_gpu_above_4gb_patch<<23), capacity(), nullptr, &error);// for Intel GPUs, set flag CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL = (1<<23)
214
229
if(error==-61) print_error("Memory size is too large at "+to_string((uint)(capacity()/1048576ull))+" MB. Device \""+device.info.name+"\" accepts a maximum buffer size of "+to_string(device.info.max_global_buffer)+" MB.");
215
230
elseif(error) print_error("Device buffer allocation failed with error code "+to_string(error)+".");
0 commit comments