Skip to content

[BOUNTY $25] Kernel error for MonteCarlo on NVIDIA and AMD GPUs #10

Open
@freemo

Description

@freemo

From @jjfumero on February 17, 2016 10:23

I am running MonteCarlo simulation within Aparapi. For testing I am using Intel OpenCL locally. I am running with JDK 1.8_65. The kernel that Aparapi generates is correct and the result when I compare to the sequential code is correct as well. However if I use the GPU, NVidia GPU or AMD GPU, the kernel is not correct. One declaration type is missing.

My understanding is, Aparapi generates the OpenCL kernel indendently of the architecture behind. Bytecodes -> C OpenCL. Is that correct? or is there any communication during the code generation?

Here the details, this is the Aparapi Kernel:

public static class MonteCarloKernel extends Kernel {

        private int size;
        private float[] result;

        public MonteCarloKernel(int size) {
            this.size = size;
            result = new float[size];
        }

        @Override
        public void run() {
            int idx = getGlobalId();
            int iter = 25000;

            long seed = idx;
            float sum = 0.0f;

            for (int j = 0; j < iter; ++j) {
                // generate a pseudo random number (you do need it twice)
                seed = (seed * 0x5DEECE66DL + 0xBL) & ((1L << 48) - 1);
                seed = (seed * 0x5DEECE66DL + 0xBL) & ((1L << 48) - 1);

                // this generates a number between 0 and 1 (with an awful entropy)
                float x = ((float) (seed & 0x0FFFFFFF)) / 268435455f;

                // repeat for y
                seed = (seed * 0x5DEECE66DL + 0xBL) & ((1L << 48) - 1);
                seed = (seed * 0x5DEECE66DL + 0xBL) & ((1L << 48) - 1);
                float y = ((float) (seed & 0x0FFFFFFF)) / 268435455f;

                float dist = (float) Math.sqrt(x * x + y * y);
                if (dist <= 1.0f)
                    sum += 1.0f;
            }
            sum *= 4;
            result[idx] = (float) sum / (float) iter;
        }

        public boolean checkResult(float[] seq) {
            for (int i = 0; i < seq.length; i++) {
                if (Math.abs( (float)(result[i] - seq[i])) > 0.001) {
                    return false;
                }
            }
            return true;
        }

        public float[] getResult() {
            return result;
        }

        public int getSize() {
            return size;
        }
    }


If I use Intel OpenCL:

NAME: Intel(R) Core(TM) i5-3470 CPU @ 3.20GHz
VENDOR: Intel(R) Corporation
TYPE: CPU
DRIVER: 1.2.0.57

This is the kernel Aparapi generates (the correct kernel):


#pragma OPENCL EXTENSION cl_khr_fp64 : enable

typedef struct This_s{
   __global float *result;
   int passid;
}This;
int get_pass_id(This *this){
   return this->passid;
}
__kernel void run(
   __global float *result, 
   int passid
){
   This thisStruct;
   This* this=&thisStruct;
   this->result = result;
   this->passid = passid;
   {
      int idx = get_global_id(0);
      int iter = 25000;
      long seed = (long)idx;
      float sum = 0.0f;
      for (int j = 0; j<iter; j++){
         seed = ((seed * 25214903917L) + 11L) & 281474976710655L;
         seed = ((seed * 25214903917L) + 11L) & 281474976710655L;
         float x = (float)(seed & 268435455L) / 2.68435456E8f;
         seed = ((seed * 25214903917L) + 11L) & 281474976710655L;
         seed = ((seed * 25214903917L) + 11L) & 281474976710655L;
         float y = (float)(seed & 268435455L) / 2.68435456E8f;
         float dist = (float)sqrt((double)((x * x) + (y * y)));
         if (dist<=1.0f){
            sum = sum + 1.0f;
         }
      }
      sum = sum * 4.0f;
      this->result[idx]  = sum / (float)iter;
      return;
   }
}

When I use Aparapi on NVIDIA or AMD GPUs (same JVM - JDK 1.8.65, but different driver), I get this kernel:

#pragma OPENCL EXTENSION cl_khr_fp64 : enable

typedef struct This_s{
   __global float *result;
   int passid;
}This;
int get_pass_id(This *this){
   return this->passid;
}
__kernel void run(
   __global float *result, 
   int passid
){
   This thisStruct;
   This* this=&thisStruct;
   this->result = result;
   this->passid = passid;
   {
      int i_1 = get_global_id(0);
      int i_2 = 25000;
       l_3 = (long)i_1;
      float f_5 = 0.0f;
      int i_6 = 0;
      for (; i_6<i_2; i_6++){
         l_3 = ((l_3 * 25214903917L) + 11L) & 281474976710655L;
         l_3 = ((l_3 * 25214903917L) + 11L) & 281474976710655L;
         float f_7 = (float)(l_3 & 268435455L) / 2.68435456E8f;
         l_3 = ((l_3 * 25214903917L) + 11L) & 281474976710655L;
         l_3 = ((l_3 * 25214903917L) + 11L) & 281474976710655L;
         float f_8 = (float)(l_3 & 268435455L) / 2.68435456E8f;
         float f_9 = (float)sqrt((double)((f_7 * f_7) + (f_8 * f_8)));
         if (f_9<=1.0f){
            f_5 = f_5 + 1.0f;
         }
      }
      f_5 = f_5 * 4.0f;
      this->result[i_1]  = f_5 / (float)i_2;
      return;
   }
}

There is an error:

clBuildProgram failed
************************************************
:21:8: error: use of undeclared identifier 'l_3'
       l_3 = (long)i_1;
       ^

Note:
NVIDIA-SMI 331.79 Driver Version: 331.79

AMD:

Name: Hawaii
Vendor: Advanced Micro Devices, Inc.
Device OpenCL C version: OpenCL C 1.2
Driver version: 1598.5 (VM)

Copied from original issue: aparapi/aparapi#25

Metadata

Metadata

Assignees

No one assigned

    Labels

    bounty $$$Cash reward!bugFix something that is broken

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions