JDuchniewicz · Komal0103 · Nov 8, 2022 · Nov 12, 2022 · Nov 12, 2022 · Nov 12, 2022
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
@@ -2,6 +2,7 @@ set(GPGPU_GLES_EXAMPLES
     array_add_float
     fir_conv_float
     array_add_fixed16
+    mult_mat_int
     # chain API
     chain_simple_float
     chain_conv2d_float

diff --git a/examples/mult_mat_int/CMakeLists.txt b/examples/mult_mat_int/CMakeLists.txt
@@ -0,0 +1,11 @@
+cmake_minimum_required(VERSION 3.7)
+project(GPGPUGLESMatrixMultiplyInt VERSION 1.0)
+
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
+
+find_package(GPGPU_GLES REQUIRED)
+
+add_executable(GPGPUGLESMatrixMultiplyInt
+        ${CMAKE_CURRENT_LIST_DIR}/mult_mat_int.c)
+
+target_link_libraries(GPGPUGLESMatrixMultiplyInt PRIVATE GPGPU_GLES::GPGPU_GLES)
diff --git a/examples/mult_mat_int/mult_mat_int.c b/examples/mult_mat_int/mult_mat_int.c
@@ -0,0 +1,51 @@
+#include <stdio.h>
+#include "gpgpu_gles.h"
+
+#define HEIGHT 4
+#define WIDTH HEIGHT
+
+int main()
+{
+    if (gpgpu_init(HEIGHT, WIDTH) != 0)
+    {
+        printf("Could not initialize the API\n");
+        return 0;
+    }
+
+    // create two float arrays
+    int* a = malloc(WIDTH * HEIGHT * sizeof(float));
+    int* b = malloc(WIDTH * HEIGHT * sizeof(float));
+    int* res = malloc(WIDTH * HEIGHT * sizeof(float));
+
+    for (int i = 0; i < WIDTH * HEIGHT; ++i)
+    {
+        a[i] = b[i] = i;
+    }
+
+    printf("Data before computation: \n");
+    for (int i = 0; i < WIDTH * HEIGHT; ++i)
+    {
+        printf("%d ", a[i]);
+        if ((i + 1) % WIDTH == 0)
+            printf("\n");
+    }
+    printf("\n");
+
+    if (gpgpu_matrixMultiplication(a, b, 4, res) != 0)
+        printf("Could not do the matrix multiplication\n");
+
+    printf("Contents after multiplication: \n");
+    for (int i = 0; i < WIDTH * HEIGHT; ++i)
+    {
+        printf("%d ", res[i]);
+        if ((i + 1) % WIDTH == 0)
+            printf("\n");
+    }
+    printf("\n");
+
+    gpgpu_deinit();
+    free(a);
+    free(b);
+    free(res);
+    return 0;
+}
diff --git a/include/defines.h b/include/defines.h
@@ -36,6 +36,7 @@ typedef enum
     SUBTRACT_ARRAY_FLOAT,
     MULTIPLY_ARRAY_FLOAT,
     DIVIDE_ARRAY_FLOAT,
+    MAT_MULT_INT,
     // a x a
     //
     FIR_CONV2D_FLOAT,

diff --git a/shaders/mult_mat_int.fs b/shaders/mult_mat_int.fs
@@ -0,0 +1,75 @@
+#ifdef GL_FRAGMENT_PRECISION_HIGH
+    precision highp float;
+#else
+    precision mediump float;
+#endif
+
+uniform sampler2D texture0;
+uniform sampler2D texture1;
+
+varying vec2 vTexCoord;
+
+vec4 pack(float value)
+{
+    if (value == 0.0) return vec4(0);
+
+    float exponent;
+    float mantissa;
+    vec4 result;
+    float sgn;
+
+    sgn = step(0.0, -value);
+    value = abs(value);
+
+    exponent = floor(log2(value));
+    mantissa = value * pow(2.0, -exponent) - 1.0;
+    exponent = exponent + 127.0;
+    result = vec4(0);
+
+    result.a = floor(exponent / 2.0);
+    exponent = exponent - result.a * 2.0;
+    result.a = result.a + 128.0 * sgn;
+
+    result.b = floor(mantissa * 128.0);
+    mantissa = mantissa - result.b / 128.0;
+    result.b = result.b + exponent * 128.0;
+
+    result.g = floor(mantissa * 32768.0);
+    mantissa = mantissa - result.g / 32768.0;
+
+    result.r = floor(mantissa * 8388608.0);
+
+    return result / 255.0;
+}
+
+float unpack(vec4 texel)
+{
+    float exponent;
+    float mantissa;
+    float value;
+    float sgn;
+
+    sgn = -step(128.0, texel.a);
+    texel.a += 128.0 * sgn;
+
+    exponent = step(128.0, texel.b);
+    texel.b -= exponent * 128.0;
+    exponent += 2.0 * texel.a - 127.0;
+
+    mantissa = texel.b * 65536.0 + texel.g * 256.0 + texel.r;
+    value = pow(-1.0, sgn) * exp2(exponent) * (1.0 + mantissa * exp2(-23.0));
+
+    return value;
+}
+
+void main (void)
+{
+    float i = vTexCoord.s;
+    float j = vTexCoord.t;
+    float result = 0.0;
+    for (float k = 0.0; k < 4.0; ++k)
+    {
+        result += unpack(texture2D(texture0, vec2(i, k / 4.0)) * 255.0) * unpack(texture2D(texture1, vec2(k / 4.0, j)) * 255.0);
+    }
+    gl_FragColor = pack(result);
+}
diff --git a/src/gpgpu_gles.c b/src/gpgpu_gles.c
@@ -320,8 +320,98 @@ int GPGPU_API gpgpu_firConvolution2D(float* data, float* kernel, int size, float
 
 int GPGPU_API gpgpu_matrixMultiplication(int* a, int* b, int size, int* res)
 {
+    int ret = 0;
+    if (g_helper.state != READY) // TODO: probably no need to set the states in single-shot API
+        ERR("Call gpgpu_init() first!");
 
-    return 0;
+    unsigned char* buffer = malloc(4 * g_helper.width * g_helper.height);
+    GLuint texId0, texId1;
+    gpgpu_make_texture(a, g_helper.width, g_helper.height, &texId0);
+    gpgpu_make_texture(b, g_helper.width, g_helper.height, &texId1);
+
+#if DEBUG
+    printf("RAW contents before addition: \n");
+    for (int i = 0; i < 4 * g_helper.width * g_helper.height; ++i)
+    {
+        printf("%d ", *((unsigned char*)a1 + i));
+        if ((i + 1)  % (4 * g_helper.width) == 0)
+            printf("\n");
+    }
+    printf("\n");
+#endif
+
+    // inputs are float textures, output is a vec4 of unsigned bytes representing the float result of one texel
+    // we need to extract the bits following the IEEE754 floating point format because GLES 2.0 does not have bit extraction
+    gpgpu_build_program(REGULAR, MULT_MAT_INT);
+
+    // create the geometry to draw the texture on
+    GLuint geometry; 
+    // GLuint VAO, EBO;
+    glGenBuffers(1, &geometry);
+    // glGenVertexArrays(1, &VAO);
+    // glGenBuffers(1, &EBO);
+    // glBindVertexArray(VAO);
+    glBindBuffer(GL_ARRAY_BUFFER, geometry);
+    glBufferData(GL_ARRAY_BUFFER, 20*sizeof(float), gpgpu_geometry, GL_STATIC_DRAW);
+    // glBindBuffer(GL_ARRAY_BUFFER, EBO);
+    // glBufferData(GL_ARRAY_BUFFER, 6*sizeof(float), indices, GL_STATIC_DRAW);
+
+    // setup the vertex position as the attribute of vertex shader
+    gpgpu_add_attribute("position", 3, 20, 0);
+    gpgpu_add_attribute("texCoord", 2, 20, 3);
+    glBindBuffer(GL_ARRAY_BUFFER, 0);
+    // glBindVertexArray(0);
+    // do the actual computation
+    // bind textures to their respective texturing units
+    // add texture uniforms to fragment shader
+    glActiveTexture(GL_TEXTURE0);
+    glBindTexture(GL_TEXTURE_2D, texId0);
+    gpgpu_add_uniform("texture0", 0, "uniform1i");
+
+    glActiveTexture(GL_TEXTURE0 + 1);
+    glBindTexture(GL_TEXTURE_2D, texId1);
+    gpgpu_add_uniform("texture1", 1, "uniform1i");
+
+    // glActiveTexture(GL_TEXTURE0);
+    // int fbo = gpgpu_make_FBO();
+    // printf("%d", fbo);
+
+    if (gpgpu_report_glError(glGetError()) != 0)
+        ERR("Could not prepare textures");
+
+    // finally draw it
+    glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+
+    //////////
+    // magic happens and the data is now ready
+    // poof!
+    //////////
+
+    glReadPixels(0, 0, g_helper.width, g_helper.height, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
+    // convert from unsigned bytes back to the original format (float?)
+
+#if DEBUG
+    printf("RAW contents after addition: \n");
+    for (int i = 0; i < 4 * g_helper.width * g_helper.height; ++i)
+    {
+        printf("%d ", buffer[i]);
+        if ((i + 1)  % (4 * g_helper.width) == 0)
+            printf("\n");
+    }
+    printf("\n");
+#endif
+
+    // copy the bytes as floats
+    for (int i = 0; i < 4 * g_helper.width * g_helper.height; i += 4)
+    {
+        res[i / 4] = *((float*)buffer + i / 4);
+    }
+
+bail:
+    // TODO: what should be released upon failure?
+    if (buffer)
+        free(buffer);
+    return ret;
 }
 
 int GPGPU_API gpgpu_noop(float* a1, float* res)

diff --git a/src/include/shaders.h b/src/include/shaders.h
@@ -10,7 +10,7 @@ typedef enum
     FIR_CONV2D_FLOAT_5,
     FIR_CONV2D_FLOAT_BBB_3, // special unrolled version for SGX
     FIR_CONV2D_FLOAT_BBB_5,
-    MAT_MULT_FLOAT,
+    MULT_MAT_INT,
     // chain operations
     CHAIN_ADD_SCALAR_FLOAT,
     CHAIN_SUBTRACT_SCALAR_FLOAT,
@@ -52,7 +52,7 @@ static const struct FFileName fFileNames[] = {
     { .type = FIR_CONV2D_FLOAT_5,             .filename = "../shaders/fir_conv2d_float_5.fs" },
     { .type = FIR_CONV2D_FLOAT_BBB_3,         .filename = "../shaders/fir_conv2d_float_bbb_3.fs" },
     { .type = FIR_CONV2D_FLOAT_BBB_5,         .filename = "../shaders/fir_conv2d_float_bbb_5.fs" },
-    { .type = MAT_MULT_FLOAT,                 .filename = "mat_mult_float.fs" },
+    { .type = MULT_MAT_INT,                   .filename = "../shaders/mult_mat_int.fs" },
     // chain operations
     { .type = CHAIN_ADD_SCALAR_FLOAT,         .filename = "../shaders/chain_add_scalar_float.fs" },
     { .type = CHAIN_SUBTRACT_SCALAR_FLOAT,    .filename = "../shaders/chain_subtract_scalar_float.fs" },