Skip to content

Added Matrix Multiplication #3

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ set(GPGPU_GLES_EXAMPLES
array_add_float
fir_conv_float
array_add_fixed16
mult_mat_int
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why not floats? ints make little sense here

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have changed the entire operation on ints to that on floats

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so change the name to float please :)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, done.

# chain API
chain_simple_float
chain_conv2d_float
Expand Down
11 changes: 11 additions & 0 deletions examples/mult_mat_int/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
cmake_minimum_required(VERSION 3.7)
project(GPGPUGLESMatrixMultiplyInt VERSION 1.0)

set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)

find_package(GPGPU_GLES REQUIRED)

add_executable(GPGPUGLESMatrixMultiplyInt
${CMAKE_CURRENT_LIST_DIR}/mult_mat_int.c)

target_link_libraries(GPGPUGLESMatrixMultiplyInt PRIVATE GPGPU_GLES::GPGPU_GLES)
51 changes: 51 additions & 0 deletions examples/mult_mat_int/mult_mat_int.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#include <stdio.h>
#include "gpgpu_gles.h"

#define HEIGHT 4
#define WIDTH HEIGHT

int main()
{
if (gpgpu_init(HEIGHT, WIDTH) != 0)
{
printf("Could not initialize the API\n");
return 0;
}

// create two float arrays
int* a = malloc(WIDTH * HEIGHT * sizeof(float));
int* b = malloc(WIDTH * HEIGHT * sizeof(float));
int* res = malloc(WIDTH * HEIGHT * sizeof(float));

for (int i = 0; i < WIDTH * HEIGHT; ++i)
{
a[i] = b[i] = i;
}

printf("Data before computation: \n");
for (int i = 0; i < WIDTH * HEIGHT; ++i)
{
printf("%d ", a[i]);
if ((i + 1) % WIDTH == 0)
printf("\n");
}
printf("\n");

if (gpgpu_matrixMultiplication(a, b, 4, res) != 0)
printf("Could not do the matrix multiplication\n");

printf("Contents after multiplication: \n");
for (int i = 0; i < WIDTH * HEIGHT; ++i)
{
printf("%d ", res[i]);
if ((i + 1) % WIDTH == 0)
printf("\n");
}
printf("\n");

gpgpu_deinit();
free(a);
free(b);
free(res);
return 0;
}
1 change: 1 addition & 0 deletions include/defines.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ typedef enum
SUBTRACT_ARRAY_FLOAT,
MULTIPLY_ARRAY_FLOAT,
DIVIDE_ARRAY_FLOAT,
MAT_MULT_INT,
// a x a
//
FIR_CONV2D_FLOAT,
Expand Down
75 changes: 75 additions & 0 deletions shaders/mult_mat_int.fs
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#ifdef GL_FRAGMENT_PRECISION_HIGH
precision highp float;
#else
precision mediump float;
#endif

uniform sampler2D texture0;
uniform sampler2D texture1;

varying vec2 vTexCoord;

vec4 pack(float value)
{
if (value == 0.0) return vec4(0);

float exponent;
float mantissa;
vec4 result;
float sgn;

sgn = step(0.0, -value);
value = abs(value);

exponent = floor(log2(value));
mantissa = value * pow(2.0, -exponent) - 1.0;
exponent = exponent + 127.0;
result = vec4(0);

result.a = floor(exponent / 2.0);
exponent = exponent - result.a * 2.0;
result.a = result.a + 128.0 * sgn;

result.b = floor(mantissa * 128.0);
mantissa = mantissa - result.b / 128.0;
result.b = result.b + exponent * 128.0;

result.g = floor(mantissa * 32768.0);
mantissa = mantissa - result.g / 32768.0;

result.r = floor(mantissa * 8388608.0);

return result / 255.0;
}

float unpack(vec4 texel)
{
float exponent;
float mantissa;
float value;
float sgn;

sgn = -step(128.0, texel.a);
texel.a += 128.0 * sgn;

exponent = step(128.0, texel.b);
texel.b -= exponent * 128.0;
exponent += 2.0 * texel.a - 127.0;

mantissa = texel.b * 65536.0 + texel.g * 256.0 + texel.r;
value = pow(-1.0, sgn) * exp2(exponent) * (1.0 + mantissa * exp2(-23.0));

return value;
}

void main (void)
{
float i = vTexCoord.s;
float j = vTexCoord.t;
float result = 0.0;
for (float k = 0.0; k < 4.0; ++k)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why 4? why k is a float?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To reduce the value of k to lie between 0 to 1, we are dividing it by 4. This is to make sure that the coordinates of the texture element we are multiplying lies in the range 0 to 1 in the texture coordinate system.
Foe texture2D function, the second parameter is vec2 which contains floats as its elements. This is the reason for k being a floating point value.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @JDuchniewicz any update on merging this inclusion of matrix multiplication to the upstream!?

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi, sorry got bogged down by other work. I still do not understand why 4.0 and if 4.0 is necessary (I presume the matrix size?). If this is indeed the matrix size, it should be configurable as a uniform and not hard-coded. Please change that.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi Jakub, yes its for size.
So, we are testing for a generalized notation and will update soon, can we use something like the const kspan for indicating this 4.0 as its tested for this size.

{
result += unpack(texture2D(texture0, vec2(i, k / 4.0)) * 255.0) * unpack(texture2D(texture1, vec2(k / 4.0, j)) * 255.0);
}
gl_FragColor = pack(result);
}
92 changes: 91 additions & 1 deletion src/gpgpu_gles.c
Original file line number Diff line number Diff line change
Expand Up @@ -320,8 +320,98 @@ int GPGPU_API gpgpu_firConvolution2D(float* data, float* kernel, int size, float

int GPGPU_API gpgpu_matrixMultiplication(int* a, int* b, int size, int* res)
{
int ret = 0;
if (g_helper.state != READY) // TODO: probably no need to set the states in single-shot API
ERR("Call gpgpu_init() first!");

return 0;
unsigned char* buffer = malloc(4 * g_helper.width * g_helper.height);
GLuint texId0, texId1;
gpgpu_make_texture(a, g_helper.width, g_helper.height, &texId0);
gpgpu_make_texture(b, g_helper.width, g_helper.height, &texId1);

#if DEBUG
printf("RAW contents before addition: \n");
for (int i = 0; i < 4 * g_helper.width * g_helper.height; ++i)
{
printf("%d ", *((unsigned char*)a1 + i));
if ((i + 1) % (4 * g_helper.width) == 0)
printf("\n");
}
printf("\n");
#endif

// inputs are float textures, output is a vec4 of unsigned bytes representing the float result of one texel
// we need to extract the bits following the IEEE754 floating point format because GLES 2.0 does not have bit extraction
gpgpu_build_program(REGULAR, MULT_MAT_INT);

// create the geometry to draw the texture on
GLuint geometry;
// GLuint VAO, EBO;
glGenBuffers(1, &geometry);
// glGenVertexArrays(1, &VAO);
// glGenBuffers(1, &EBO);
// glBindVertexArray(VAO);
glBindBuffer(GL_ARRAY_BUFFER, geometry);
glBufferData(GL_ARRAY_BUFFER, 20*sizeof(float), gpgpu_geometry, GL_STATIC_DRAW);
// glBindBuffer(GL_ARRAY_BUFFER, EBO);
// glBufferData(GL_ARRAY_BUFFER, 6*sizeof(float), indices, GL_STATIC_DRAW);

// setup the vertex position as the attribute of vertex shader
gpgpu_add_attribute("position", 3, 20, 0);
gpgpu_add_attribute("texCoord", 2, 20, 3);
glBindBuffer(GL_ARRAY_BUFFER, 0);
// glBindVertexArray(0);
// do the actual computation
// bind textures to their respective texturing units
// add texture uniforms to fragment shader
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, texId0);
gpgpu_add_uniform("texture0", 0, "uniform1i");

glActiveTexture(GL_TEXTURE0 + 1);
glBindTexture(GL_TEXTURE_2D, texId1);
gpgpu_add_uniform("texture1", 1, "uniform1i");

// glActiveTexture(GL_TEXTURE0);
// int fbo = gpgpu_make_FBO();
// printf("%d", fbo);

if (gpgpu_report_glError(glGetError()) != 0)
ERR("Could not prepare textures");

// finally draw it
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);

//////////
// magic happens and the data is now ready
// poof!
//////////

glReadPixels(0, 0, g_helper.width, g_helper.height, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
// convert from unsigned bytes back to the original format (float?)

#if DEBUG
printf("RAW contents after addition: \n");
for (int i = 0; i < 4 * g_helper.width * g_helper.height; ++i)
{
printf("%d ", buffer[i]);
if ((i + 1) % (4 * g_helper.width) == 0)
printf("\n");
}
printf("\n");
#endif

// copy the bytes as floats
for (int i = 0; i < 4 * g_helper.width * g_helper.height; i += 4)
{
res[i / 4] = *((float*)buffer + i / 4);
}

bail:
// TODO: what should be released upon failure?
if (buffer)
free(buffer);
return ret;
}

int GPGPU_API gpgpu_noop(float* a1, float* res)
Expand Down
4 changes: 2 additions & 2 deletions src/include/shaders.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ typedef enum
FIR_CONV2D_FLOAT_5,
FIR_CONV2D_FLOAT_BBB_3, // special unrolled version for SGX
FIR_CONV2D_FLOAT_BBB_5,
MAT_MULT_FLOAT,
MULT_MAT_INT,
// chain operations
CHAIN_ADD_SCALAR_FLOAT,
CHAIN_SUBTRACT_SCALAR_FLOAT,
Expand Down Expand Up @@ -52,7 +52,7 @@ static const struct FFileName fFileNames[] = {
{ .type = FIR_CONV2D_FLOAT_5, .filename = "../shaders/fir_conv2d_float_5.fs" },
{ .type = FIR_CONV2D_FLOAT_BBB_3, .filename = "../shaders/fir_conv2d_float_bbb_3.fs" },
{ .type = FIR_CONV2D_FLOAT_BBB_5, .filename = "../shaders/fir_conv2d_float_bbb_5.fs" },
{ .type = MAT_MULT_FLOAT, .filename = "mat_mult_float.fs" },
{ .type = MULT_MAT_INT, .filename = "../shaders/mult_mat_int.fs" },
// chain operations
{ .type = CHAIN_ADD_SCALAR_FLOAT, .filename = "../shaders/chain_add_scalar_float.fs" },
{ .type = CHAIN_SUBTRACT_SCALAR_FLOAT, .filename = "../shaders/chain_subtract_scalar_float.fs" },
Expand Down