Fix cten_elemwise_broadcast function

Numbers0689 · Numbers0689 · commit 94d453ac167c · 2025-03-07T00:58:05.000+05:30
diff --git a/src/utils.c b/src/utils.c
@@ -33,46 +33,66 @@ void cten_assert_dim(const char* title, int a, int b) {
 bool cten_elemwise_broadcast(Tensor* a, Tensor* b) {
     int a_dim = TensorShape_dim(a->shape);
     int b_dim = TensorShape_dim(b->shape);
-    if(a_dim != b_dim) return false;
+
+    if (a_dim != b_dim) return false;
+
     int a_broadcast = -1;
-    for(int i = 0; i < a_dim; i++) {
-        if(a->shape[i] == b->shape[i]) continue;
-        if(a->shape[i] == 1) {
-            if(a_broadcast == 0) return false;
+
+    for (int i = 0; i < a_dim; i++) {
+        if (a->shape[i] == b->shape[i]) continue;
+        if (a->shape[i] == 1) {
+            if (a_broadcast == 0) return false;
             a_broadcast = 1;
-        } else if(b->shape[i] == 1) {
-            if(a_broadcast == 1) return false;
+        } else if (b->shape[i] == 1) {
+            if (a_broadcast == 1) return false;
             a_broadcast = 0;
         } else {
             return false;
         }
     }
-    if(a_broadcast != -1) {
-        if(a_broadcast == 0) {
+
+    if (a_broadcast != -1) {
+        if (a_broadcast == 0) { 
             Tensor* tmp = a;
             a = b;
             b = tmp;
             a_broadcast = 1;
         }
-        Tensor a_ = Tensor_new(b->shape, a->node != NULL);
-        for(int i = 0; i < a_.shape[0]; i++) {
-            int i_ = a->shape[0] == 1 ? 0 : i;
-            for(int j = 0; j < a_.shape[1]; j++) {
-                int j_ = a->shape[1] == 1 ? 0 : j;
-                for(int k = 0; k < a_.shape[2]; k++) {
-                    int k_ = a->shape[2] == 1 ? 0 : k;
-                    for(int l = 0; l < a_.shape[3]; l++) {
-                        int l_ = a->shape[3] == 1 ? 0 : l;
-                        // a_[i][j][k][l] = a[i_][j_][k_][l_]
-                        a_.data->flex[i * a_.shape[1] * a_.shape[2] * a_.shape[3] +
-                                      j * a_.shape[2] * a_.shape[3] + k * a_.shape[3] + l] =
-                            a->data->flex[i_ * a->shape[1] * a->shape[2] * a->shape[3] +
-                                          j_ * a->shape[2] * a->shape[3] + k_ * a->shape[3] + l_];
+
+        Tensor a_ = Tensor_zeros(b->shape, a->node != NULL);
+
+        int stride_a_1 = (a_dim > 1) ? a->shape[1] : 1;
+        int stride_a_2 = (a_dim > 2) ? a->shape[2] : 1;
+        int stride_a_3 = (a_dim > 3) ? a->shape[3] : 1;
+
+        int stride_a_1_new = (a_dim > 1) ? a_.shape[1] : 1;
+        int stride_a_2_new = (a_dim > 2) ? a_.shape[2] : 1;
+        int stride_a_3_new = (a_dim > 3) ? a_.shape[3] : 1;
+
+        for (int i = 0; i < a_.shape[0]; i++) {
+            int i_ = (a->shape[0] == 1) ? 0 : i;
+            for (int j = 0; j < ((a_dim > 1) ? a_.shape[1] : 1); j++) {
+                int j_ = (a_dim > 1 && a->shape[1] == 1) ? 0 : j;
+                for (int k = 0; k < ((a_dim > 2) ? a_.shape[2] : 1); k++) {
+                    int k_ = (a_dim > 2 && a->shape[2] == 1) ? 0 : k;
+                    for (int l = 0; l < ((a_dim > 3) ? a_.shape[3] : 1); l++) {
+                        int l_ = (a_dim > 3 && a->shape[3] == 1) ? 0 : l;
+
+                        int dst_idx = i * stride_a_1_new * stride_a_2_new * stride_a_3_new +
+                                      j * stride_a_2_new * stride_a_3_new +
+                                      k * stride_a_3_new + l;
+
+                        int src_idx = i_ * stride_a_1 * stride_a_2 * stride_a_3 +
+                                      j_ * stride_a_2 * stride_a_3 +
+                                      k_ * stride_a_3 + l_;
+
+                        a_.data->flex[dst_idx] = a->data->flex[src_idx];
                     }
                 }
             }
         }
         *a = a_;
     }
+
     return true;
-}
+}