@@ -54,21 +54,21 @@ sudo shutdown -r now
54
54
)"+string(" \033 [96m" )+R"(.-----------------------------------------------------------------------------.
55
55
| CPU Option 1 : Intel CPU Runtime for OpenCL (works for both AMD/Intel CPUs) |
56
56
'-----------------------------------------------------------------------------'
57
- export OCLCPUEXP_VERSION ="2024.18.6.0.02_rel"
58
- export ONEAPI_TBB_VERSION ="2021.13.0"
57
+ export OCLV ="2024.18.6.0.02_rel"
58
+ export TBBV ="2021.13.0"
59
59
sudo apt update && sudo apt upgrade -y
60
60
sudo apt install -y g++ git make ocl-icd-libopencl1 ocl-icd-opencl-dev
61
- sudo mkdir -p ~/cpuruntime /opt/intel/oclcpuexp_${OCLCPUEXP_VERSION } /etc/OpenCL/vendors /etc/ld.so.conf.d
62
- sudo wget -P ~/cpuruntime https:// github.com/intel/llvm/releases/download/2024-WW25/oclcpuexp-${OCLCPUEXP_VERSION }.tar.gz
63
- sudo wget -P ~/cpuruntime https:// github.com/oneapi-src/oneTBB/releases/download/v${ONEAPI_TBB_VERSION }/oneapi-tbb-${ONEAPI_TBB_VERSION }-lin.tgz
64
- sudo tar -zxvf ~/cpuruntime/oclcpuexp-${OCLCPUEXP_VERSION }.tar.gz -C /opt/intel/oclcpuexp_${OCLCPUEXP_VERSION }
65
- sudo tar -zxvf ~/cpuruntime/oneapi-tbb-${ONEAPI_TBB_VERSION }-lin.tgz -C /opt/intel
66
- echo /opt/intel/oclcpuexp_${OCLCPUEXP_VERSION }/x64/libintelocl.so | sudo tee /etc/OpenCL/vendors/intel_expcpu.icd
67
- echo /opt/intel/oclcpuexp_${OCLCPUEXP_VERSION }/x64 | sudo tee /etc/ld.so.conf.d/libintelopenclexp.conf
68
- sudo ln -sf /opt/intel/oneapi-tbb-${ONEAPI_TBB_VERSION }/lib/intel64/gcc4.8 /libtbb.so /opt/intel/oclcpuexp_${OCLCPUEXP_VERSION }/x64
69
- sudo ln -sf /opt/intel/oneapi-tbb-${ONEAPI_TBB_VERSION }/lib/intel64/gcc4.8 /libtbbmalloc.so /opt/intel/oclcpuexp_${OCLCPUEXP_VERSION }/x64
70
- sudo ln -sf /opt/intel/oneapi-tbb-${ONEAPI_TBB_VERSION }/lib/intel64/gcc4.8 /libtbb.so.12 /opt/intel/oclcpuexp_${OCLCPUEXP_VERSION }/x64
71
- sudo ln -sf /opt/intel/oneapi-tbb-${ONEAPI_TBB_VERSION }/lib/intel64/gcc4.8 /libtbbmalloc.so.2 /opt/intel/oclcpuexp_${OCLCPUEXP_VERSION }/x64
61
+ sudo mkdir -p ~/cpuruntime /opt/intel/oclcpuexp_${OCLV } /etc/OpenCL/vendors /etc/ld.so.conf.d
62
+ sudo wget -P ~/cpuruntime https:// github.com/intel/llvm/releases/download/2024-WW25/oclcpuexp-${OCLV }.tar.gz
63
+ sudo wget -P ~/cpuruntime https:// github.com/oneapi-src/oneTBB/releases/download/v${TBBV }/oneapi-tbb-${TBBV }-lin.tgz
64
+ sudo tar -zxvf ~/cpuruntime/oclcpuexp-${OCLV }.tar.gz -C /opt/intel/oclcpuexp_${OCLV }
65
+ sudo tar -zxvf ~/cpuruntime/oneapi-tbb-${TBBV }-lin.tgz -C /opt/intel
66
+ echo /opt/intel/oclcpuexp_${OCLV }/x64/libintelocl.so | sudo tee /etc/OpenCL/vendors/intel_expcpu.icd
67
+ echo /opt/intel/oclcpuexp_${OCLV }/x64 | sudo tee /etc/ld.so.conf.d/libintelopenclexp.conf
68
+ sudo ln -sf /opt/intel/oneapi-tbb-${TBBV }/lib/intel64/gcc4.8 /libtbb.so /opt/intel/oclcpuexp_${OCLV }/x64
69
+ sudo ln -sf /opt/intel/oneapi-tbb-${TBBV }/lib/intel64/gcc4.8 /libtbbmalloc.so /opt/intel/oclcpuexp_${OCLV }/x64
70
+ sudo ln -sf /opt/intel/oneapi-tbb-${TBBV }/lib/intel64/gcc4.8 /libtbb.so.12 /opt/intel/oclcpuexp_${OCLV }/x64
71
+ sudo ln -sf /opt/intel/oneapi-tbb-${TBBV }/lib/intel64/gcc4.8 /libtbbmalloc.so.2 /opt/intel/oclcpuexp_${OCLV }/x64
72
72
sudo ldconfig -f /etc/ld.so.conf.d/libintelopenclexp.conf
73
73
sudo rm -r ~/cpuruntime
74
74
@@ -85,20 +85,20 @@ struct Device_Info {
85
85
cl::Device cl_device; // OpenCL device
86
86
cl::Context cl_context; // multiple devices in the same context can communicate buffers
87
87
uint id = 0u ; // unique device ID assigned by get_devices()
88
- string name, vendor; // device name, vendor
89
- string driver_version, opencl_c_version; // device driver version, OpenCL C version
90
- uint memory= 0u ; // global memory in MB
91
- uint memory_used= 0u ; // track global memory usage in MB
88
+ string name= " " , vendor= " " ; // device name, vendor
89
+ string driver_version= " " , opencl_c_version= " " ; // device driver version, OpenCL C version
90
+ uint memory = 0u ; // global memory in MB
91
+ uint memory_used = 0u ; // track global memory usage in MB
92
92
uint global_cache=0u , local_cache=0u ; // global cache in KB, local cache in KB
93
93
uint max_global_buffer=0u , max_constant_buffer=0u ; // maximum global buffer size in MB, maximum constant buffer size in KB
94
- uint compute_units= 0u ; // compute units (CUs) can contain multiple cores depending on the microarchitecture
95
- uint clock_frequency= 0u ; // in MHz
94
+ uint compute_units = 0u ; // compute units (CUs) can contain multiple cores depending on the microarchitecture
95
+ uint clock_frequency = 0u ; // in MHz
96
96
bool is_cpu=false , is_gpu=false ;
97
97
bool intel_gpu_above_4gb_patch = false ; // memory allocations greater than 4GB need to be specifically enabled on Intel GPUs
98
98
bool legacy_gpu_fma_patch = false ; // some old GPUs have terrible fma performance, so replace with a*b+c
99
99
uint is_fp64_capable=0u , is_fp32_capable=0u , is_fp16_capable=0u , is_int64_capable=0u , is_int32_capable=0u , is_int16_capable=0u , is_int8_capable=0u ;
100
- uint cores= 0u ; // for CPUs, compute_units is the number of threads (twice the number of cores with hyperthreading)
101
- float tflops= 0 .0f ; // estimated device FP32 floating point performance in TeraFLOPs/s
100
+ uint cores = 0u ; // for CPUs, compute_units is the number of threads (twice the number of cores with hyperthreading)
101
+ float tflops = 0 .0f ; // estimated device FP32 floating point performance in TeraFLOPs/s
102
102
inline Device_Info (const cl::Device& cl_device, const cl::Context& cl_context, const uint id) {
103
103
this ->cl_device = cl_device; // see https://www.khronos.org/registry/OpenCL/sdk/1.2/docs/man/xhtml/clGetDeviceInfo.html
104
104
this ->cl_context = cl_context;
@@ -565,14 +565,14 @@ class Kernel {
565
565
if (!device.is_initialized ()) print_error (" No OpenCL Device selected. Call Device constructor." );
566
566
this ->name = name;
567
567
cl_kernel = cl::Kernel (device.get_cl_program (), name.c_str ());
568
- link_parameters (number_of_parameters , parameters...); // expand variadic template to link kernel parameters
568
+ link_parameters (0u , parameters...); // expand variadic template to link kernel parameters
569
569
set_ranges (N);
570
570
cl_queue = device.get_cl_queue ();
571
571
}
572
572
template <class ... T> inline Kernel (const Device& device, const ulong N, const uint workgroup_size, const string& name, const T&... parameters) { // accepts Memory<T> objects and fundamental data type constants
573
573
if (!device.is_initialized ()) print_error (" No OpenCL Device selected. Call Device constructor." );
574
574
cl_kernel = cl::Kernel (device.get_cl_program (), name.c_str ());
575
- link_parameters (number_of_parameters , parameters...); // expand variadic template to link kernel parameters
575
+ link_parameters (0u , parameters...); // expand variadic template to link kernel parameters
576
576
set_ranges (N, (ulong)workgroup_size);
577
577
cl_queue = device.get_cl_queue ();
578
578
}
0 commit comments