Feat : Added a wrapper function for transpose

jaywyawhare · jaywyawhare · commit dc8eac5dd9f7 · 2025-04-12T06:16:57.000+05:30
diff --git a/include/Core/matrix_multiply.h b/include/Core/matrix_multiply.h
@@ -6,6 +6,37 @@
 /**
  * @brief Performs a matrix multiplication using SIMD instructions (AVX).
  *
+ * Note: The second matrix (B_T) must be transposed before calling this function.
+ *
+ * C = A * B_T * scale
+ *
+ * @param A Pointer to the first matrix (M x K).
+ * @param B_T Pointer to the transposed second matrix (N x K).
+ * @param C Pointer to the result matrix (M x N).
+ * @param M Number of rows in matrix A.
+ * @param N Number of columns in matrix B.
+ * @param K Number of columns in matrix A and rows in matrix B.
+ * @param scale Scaling factor to apply to the result.
+ */
+void matrix_multiply_simd(const float *A, const float *B_T, float *C,
+                          int M, int N, int K, float scale);
+
+/**
+ * @brief Transposes a matrix from row-major to row-major with flipped dimensions.
+ *        Input: B (K x N), Output: B_T (N x K)
+ *
+ * Note: This function validates input pointers and dimensions.
+ *
+ * @param B Original matrix
+ * @param B_T Transposed matrix
+ * @param K Rows of original B
+ * @param N Columns of original B
+ */
+void transpose_matrix(const float *B, float *B_T, int K, int N);
+
+/**
+ * @brief Performs a matrix multiplication, checking if the second matrix is already transposed.
+ *
  * C = A * B * scale
  *
  * @param A Pointer to the first matrix (M x K).
@@ -15,8 +46,9 @@
  * @param N Number of columns in matrix B.
  * @param K Number of columns in matrix A and rows in matrix B.
  * @param scale Scaling factor to apply to the result.
+ * @param is_transposed Flag indicating if B is already transposed (1 if true, 0 otherwise).
  */
-void matrix_multiply_simd(const float *A, const float *B, float *C,
-                         int M, int N, int K, float scale);
+void matrix_multiply(const float *A, const float *B, float *C,
+                     int M, int N, int K, float scale, int is_transposed);
 
-#endif  
+#endif
diff --git a/src/Core/matrix_multiply.c b/src/Core/matrix_multiply.c
@@ -1,6 +1,7 @@
 #include <immintrin.h>
 #include <omp.h>
 #include <stdio.h>
+#include "../../include/Core/matrix_multiply.h"
 
 /**
  * @brief Performs a matrix multiplication using SIMD instructions (AVX).
@@ -18,6 +19,18 @@
 void matrix_multiply_simd(const float *A, const float *B_T, float *C,
                           int M, int N, int K, float scale)
 {
+    if (A == NULL || B_T == NULL || C == NULL)
+    {
+        fprintf(stderr, "Error: Null pointer passed to matrix_multiply_simd.\n");
+        return;
+    }
+
+    if (M <= 0 || N <= 0 || K <= 0)
+    {
+        fprintf(stderr, "Error: Invalid matrix dimensions passed to matrix_multiply_simd.\n");
+        return;
+    }
+
 #pragma omp parallel for collapse(2)
     for (int i = 0; i < M; i++)
     {
@@ -29,7 +42,7 @@ void matrix_multiply_simd(const float *A, const float *B_T, float *C,
             for (k = 0; k <= K - 8; k += 8)
             {
                 __m256 a = _mm256_loadu_ps(&A[i * K + k]);
-                __m256 b = _mm256_loadu_ps(&B_T[j * K + k]);  // Access row in transposed B
+                __m256 b = _mm256_loadu_ps(&B_T[j * K + k]);
                 sum = _mm256_add_ps(sum, _mm256_mul_ps(a, b));
             }
 
@@ -62,8 +75,70 @@ void matrix_multiply_simd(const float *A, const float *B_T, float *C,
  */
 void transpose_matrix(const float *B, float *B_T, int K, int N)
 {
+    if (B == NULL || B_T == NULL)
+    {
+        fprintf(stderr, "Error: Null pointer passed to transpose_matrix.\n");
+        return;
+    }
+
+    if (K <= 0 || N <= 0)
+    {
+        fprintf(stderr, "Error: Invalid matrix dimensions passed to transpose_matrix.\n");
+        return;
+    }
+
 #pragma omp parallel for collapse(2)
     for (int i = 0; i < K; ++i)
         for (int j = 0; j < N; ++j)
             B_T[j * K + i] = B[i * N + j];
 }
+
+/**
+ * @brief Performs a matrix multiplication, checking if the second matrix is already transposed.
+ *
+ * C = A * B * scale
+ *
+ * @param A Pointer to the first matrix (M x K).
+ * @param B Pointer to the second matrix (K x N).
+ * @param C Pointer to the result matrix (M x N).
+ * @param M Number of rows in matrix A.
+ * @param N Number of columns in matrix B.
+ * @param K Number of columns in matrix A and rows in matrix B.
+ * @param scale Scaling factor to apply to the result.
+ * @param is_transposed Flag indicating if B is already transposed (1 if true, 0 otherwise).
+ */
+void matrix_multiply(const float *A, const float *B, float *C,
+                     int M, int N, int K, float scale, int is_transposed)
+{
+    if (A == NULL || B == NULL || C == NULL)
+    {
+        fprintf(stderr, "Error: Null pointer passed to matrix_multiply.\n");
+        return;
+    }
+
+    if (M <= 0 || N <= 0 || K <= 0)
+    {
+        fprintf(stderr, "Error: Invalid matrix dimensions passed to matrix_multiply.\n");
+        return;
+    }
+
+    if (is_transposed)
+    {
+        matrix_multiply_simd(A, B, C, M, N, K, scale);
+    }
+    else
+    {
+        float *B_T = (float *)aligned_alloc(32, N * K * sizeof(float));
+        if (B_T == NULL)
+        {
+            fprintf(stderr, "Error: Memory allocation failed for transposed matrix in matrix_multiply.\n");
+            return;
+        }
+
+        transpose_matrix(B, B_T, K, N);
+
+        matrix_multiply_simd(A, B_T, C, M, N, K, scale);
+
+        free(B_T);
+    }
+}