@@ -2096,10 +2096,14 @@ static void ggml_compute_forward_gelu_f32(
20962096
20972097 const ggml_tensor * src0 = dst->src [0 ];
20982098
2099- assert (ggml_is_contiguous_1 (src0));
2100- assert (ggml_is_contiguous_1 (dst));
2099+ assert (ggml_is_contiguous_rows (src0));
21012100 assert (ggml_are_same_shape (src0, dst));
21022101
2102+ GGML_TENSOR_LOCALS (int64_t , ne0, src0, ne)
2103+ GGML_TENSOR_LOCALS (size_t , nb0, src0, nb)
2104+ GGML_TENSOR_LOCALS (int64_t , ne, dst, ne)
2105+ GGML_TENSOR_LOCALS (size_t , nb, dst, nb)
2106+
21032107 const int ith = params->ith ;
21042108 const int nth = params->nth ;
21052109
@@ -2113,10 +2117,14 @@ static void ggml_compute_forward_gelu_f32(
21132117 const int ir0 = dr*ith;
21142118 const int ir1 = MIN (ir0 + dr, nr);
21152119
2116- for (int i1 = ir0; i1 < ir1; i1++) {
2120+ for (int ir = ir0; ir < ir1; ++ir) {
2121+ const int i3 = ir/(ne02*ne01);
2122+ const int i2 = (ir - i3*ne02*ne01)/ne01;
2123+ const int i1 = (ir - i3*ne02*ne01 - i2*ne01);
2124+
21172125 ggml_vec_gelu_f32 (nc,
2118- (float *) ((char *) dst->data + i1*( dst-> nb [ 1 ]) ),
2119- (float *) ((char *) src0->data + i1*(src0-> nb [ 1 ]) ));
2126+ (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ),
2127+ (float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 ));
21202128
21212129#ifndef NDEBUG
21222130 for (int k = 0 ; k < nc; k++) {
@@ -2135,10 +2143,14 @@ static void ggml_compute_forward_gelu_f16(
21352143
21362144 const ggml_tensor * src0 = dst->src [0 ];
21372145
2138- assert (ggml_is_contiguous_1 (src0));
2139- assert (ggml_is_contiguous_1 (dst));
2146+ assert (ggml_is_contiguous_rows (src0));
21402147 assert (ggml_are_same_shape (src0, dst));
21412148
2149+ GGML_TENSOR_LOCALS (int64_t , ne0, src0, ne)
2150+ GGML_TENSOR_LOCALS (size_t , nb0, src0, nb)
2151+ GGML_TENSOR_LOCALS (int64_t , ne, dst, ne)
2152+ GGML_TENSOR_LOCALS (size_t , nb, dst, nb)
2153+
21422154 const int ith = params->ith ;
21432155 const int nth = params->nth ;
21442156
@@ -2152,10 +2164,14 @@ static void ggml_compute_forward_gelu_f16(
21522164 const int ir0 = dr*ith;
21532165 const int ir1 = MIN (ir0 + dr, nr);
21542166
2155- for (int i1 = ir0; i1 < ir1; i1++) {
2167+ for (int ir = ir0; ir < ir1; ++ir) {
2168+ const int i3 = ir/(ne02*ne01);
2169+ const int i2 = (ir - i3*ne02*ne01)/ne01;
2170+ const int i1 = (ir - i3*ne02*ne01 - i2*ne01);
2171+
21562172 ggml_vec_gelu_f16 (nc,
2157- (ggml_fp16_t *) ((char *) dst->data + i1*( dst-> nb [ 1 ]) ),
2158- (ggml_fp16_t *) ((char *) src0->data + i1*(src0-> nb [ 1 ]) ));
2173+ (ggml_fp16_t *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ),
2174+ (ggml_fp16_t *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 ));
21592175
21602176#ifndef NDEBUG
21612177 for (int k = 0 ; k < nc; k++) {
@@ -2276,10 +2292,14 @@ static void ggml_compute_forward_gelu_erf_f32(
22762292
22772293 const ggml_tensor * src0 = dst->src [0 ];
22782294
2279- assert (ggml_is_contiguous_1 (src0));
2280- assert (ggml_is_contiguous_1 (dst));
2295+ assert (ggml_is_contiguous_rows (src0));
22812296 assert (ggml_are_same_shape (src0, dst));
22822297
2298+ GGML_TENSOR_LOCALS (int64_t , ne0, src0, ne)
2299+ GGML_TENSOR_LOCALS (size_t , nb0, src0, nb)
2300+ GGML_TENSOR_LOCALS (int64_t , ne, dst, ne)
2301+ GGML_TENSOR_LOCALS (size_t , nb, dst, nb)
2302+
22832303 const int ith = params->ith ;
22842304 const int nth = params->nth ;
22852305
@@ -2293,10 +2313,14 @@ static void ggml_compute_forward_gelu_erf_f32(
22932313 const int ir0 = dr*ith;
22942314 const int ir1 = MIN (ir0 + dr, nr);
22952315
2296- for (int i1 = ir0; i1 < ir1; i1++) {
2316+ for (int ir = ir0; ir < ir1; ++ir) {
2317+ const int i3 = ir/(ne02*ne01);
2318+ const int i2 = (ir - i3*ne02*ne01)/ne01;
2319+ const int i1 = (ir - i3*ne02*ne01 - i2*ne01);
2320+
22972321 ggml_vec_gelu_erf_f32 (nc,
2298- (float *) ((char *) dst->data + i1*( dst-> nb [ 1 ]) ),
2299- (float *) ((char *) src0->data + i1*(src0-> nb [ 1 ]) ));
2322+ (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ),
2323+ (float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 ));
23002324
23012325#ifndef NDEBUG
23022326 for (int k = 0 ; k < nc; k++) {
@@ -2315,10 +2339,14 @@ static void ggml_compute_forward_gelu_erf_f16(
23152339
23162340 const ggml_tensor * src0 = dst->src [0 ];
23172341
2318- assert (ggml_is_contiguous_1 (src0));
2319- assert (ggml_is_contiguous_1 (dst));
2342+ assert (ggml_is_contiguous_rows (src0));
23202343 assert (ggml_are_same_shape (src0, dst));
23212344
2345+ GGML_TENSOR_LOCALS (int64_t , ne0, src0, ne)
2346+ GGML_TENSOR_LOCALS (size_t , nb0, src0, nb)
2347+ GGML_TENSOR_LOCALS (int64_t , ne, dst, ne)
2348+ GGML_TENSOR_LOCALS (size_t , nb, dst, nb)
2349+
23222350 const int ith = params->ith ;
23232351 const int nth = params->nth ;
23242352
@@ -2332,10 +2360,14 @@ static void ggml_compute_forward_gelu_erf_f16(
23322360 const int ir0 = dr*ith;
23332361 const int ir1 = MIN (ir0 + dr, nr);
23342362
2335- for (int i1 = ir0; i1 < ir1; i1++) {
2363+ for (int ir = ir0; ir < ir1; ++ir) {
2364+ const int i3 = ir/(ne02*ne01);
2365+ const int i2 = (ir - i3*ne02*ne01)/ne01;
2366+ const int i1 = (ir - i3*ne02*ne01 - i2*ne01);
2367+
23362368 ggml_vec_gelu_erf_f16 (nc,
2337- (ggml_fp16_t *) ((char *) dst->data + i1*( dst-> nb [ 1 ]) ),
2338- (ggml_fp16_t *) ((char *) src0->data + i1*(src0-> nb [ 1 ]) ));
2369+ (ggml_fp16_t *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ),
2370+ (ggml_fp16_t *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 ));
23392371
23402372#ifndef NDEBUG
23412373 for (int k = 0 ; k < nc; k++) {
@@ -2379,10 +2411,14 @@ static void ggml_compute_forward_gelu_quick_f32(
23792411
23802412 const ggml_tensor * src0 = dst->src [0 ];
23812413
2382- assert (ggml_is_contiguous_1 (src0));
2383- assert (ggml_is_contiguous_1 (dst));
2414+ assert (ggml_is_contiguous_rows (src0));
23842415 assert (ggml_are_same_shape (src0, dst));
23852416
2417+ GGML_TENSOR_LOCALS (int64_t , ne0, src0, ne)
2418+ GGML_TENSOR_LOCALS (size_t , nb0, src0, nb)
2419+ GGML_TENSOR_LOCALS (int64_t , ne, dst, ne)
2420+ GGML_TENSOR_LOCALS (size_t , nb, dst, nb)
2421+
23862422 const int ith = params->ith ;
23872423 const int nth = params->nth ;
23882424
@@ -2396,10 +2432,14 @@ static void ggml_compute_forward_gelu_quick_f32(
23962432 const int ir0 = dr*ith;
23972433 const int ir1 = MIN (ir0 + dr, nr);
23982434
2399- for (int i1 = ir0; i1 < ir1; i1++) {
2435+ for (int ir = ir0; ir < ir1; ++ir) {
2436+ const int i3 = ir/(ne02*ne01);
2437+ const int i2 = (ir - i3*ne02*ne01)/ne01;
2438+ const int i1 = (ir - i3*ne02*ne01 - i2*ne01);
2439+
24002440 ggml_vec_gelu_quick_f32 (nc,
2401- (float *) ((char *) dst->data + i1*( dst-> nb [ 1 ]) ),
2402- (float *) ((char *) src0->data + i1*(src0-> nb [ 1 ]) ));
2441+ (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ),
2442+ (float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 ));
24032443
24042444#ifndef NDEBUG
24052445 for (int k = 0 ; k < nc; k++) {
@@ -2418,10 +2458,14 @@ static void ggml_compute_forward_gelu_quick_f16(
24182458
24192459 const ggml_tensor * src0 = dst->src [0 ];
24202460
2421- assert (ggml_is_contiguous_1 (src0));
2422- assert (ggml_is_contiguous_1 (dst));
2461+ assert (ggml_is_contiguous_rows (src0));
24232462 assert (ggml_are_same_shape (src0, dst));
24242463
2464+ GGML_TENSOR_LOCALS (int64_t , ne0, src0, ne)
2465+ GGML_TENSOR_LOCALS (size_t , nb0, src0, nb)
2466+ GGML_TENSOR_LOCALS (int64_t , ne, dst, ne)
2467+ GGML_TENSOR_LOCALS (size_t , nb, dst, nb)
2468+
24252469 const int ith = params->ith ;
24262470 const int nth = params->nth ;
24272471
@@ -2435,10 +2479,14 @@ static void ggml_compute_forward_gelu_quick_f16(
24352479 const int ir0 = dr*ith;
24362480 const int ir1 = MIN (ir0 + dr, nr);
24372481
2438- for (int i1 = ir0; i1 < ir1; i1++) {
2482+ for (int ir = ir0; ir < ir1; ++ir) {
2483+ const int i3 = ir/(ne02*ne01);
2484+ const int i2 = (ir - i3*ne02*ne01)/ne01;
2485+ const int i1 = (ir - i3*ne02*ne01 - i2*ne01);
2486+
24392487 ggml_vec_gelu_quick_f16 (nc,
2440- (ggml_fp16_t *) ((char *) dst->data + i1*( dst-> nb [ 1 ]) ),
2441- (ggml_fp16_t *) ((char *) src0->data + i1*(src0-> nb [ 1 ]) ));
2488+ (ggml_fp16_t *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ),
2489+ (ggml_fp16_t *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 ));
24422490
24432491#ifndef NDEBUG
24442492 for (int k = 0 ; k < nc; k++) {
@@ -2482,10 +2530,14 @@ static void ggml_compute_forward_silu_f32(
24822530
24832531 const ggml_tensor * src0 = dst->src [0 ];
24842532
2485- assert (ggml_is_contiguous_1 (src0));
2486- assert (ggml_is_contiguous_1 (dst));
2533+ assert (ggml_is_contiguous_rows (src0));
24872534 assert (ggml_are_same_shape (src0, dst));
24882535
2536+ GGML_TENSOR_LOCALS (int64_t , ne0, src0, ne)
2537+ GGML_TENSOR_LOCALS (size_t , nb0, src0, nb)
2538+ GGML_TENSOR_LOCALS (int64_t , ne, dst, ne)
2539+ GGML_TENSOR_LOCALS (size_t , nb, dst, nb)
2540+
24892541 const int ith = params->ith ;
24902542 const int nth = params->nth ;
24912543
@@ -2499,10 +2551,14 @@ static void ggml_compute_forward_silu_f32(
24992551 const int ir0 = dr*ith;
25002552 const int ir1 = MIN (ir0 + dr, nr);
25012553
2502- for (int i1 = ir0; i1 < ir1; i1++) {
2554+ for (int ir = ir0; ir < ir1; ++ir) {
2555+ const int i3 = ir/(ne02*ne01);
2556+ const int i2 = (ir - i3*ne02*ne01)/ne01;
2557+ const int i1 = (ir - i3*ne02*ne01 - i2*ne01);
2558+
25032559 ggml_vec_silu_f32 (nc,
2504- (float *) ((char *) dst->data + i1*( dst-> nb [ 1 ]) ),
2505- (float *) ((char *) src0->data + i1*(src0-> nb [ 1 ]) ));
2560+ (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ),
2561+ (float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 ));
25062562
25072563#ifndef NDEBUG
25082564 for (int k = 0 ; k < nc; k++) {
@@ -2521,10 +2577,14 @@ static void ggml_compute_forward_silu_f16(
25212577
25222578 const ggml_tensor * src0 = dst->src [0 ];
25232579
2524- assert (ggml_is_contiguous_1 (src0));
2525- assert (ggml_is_contiguous_1 (dst));
2580+ assert (ggml_is_contiguous_rows (src0));
25262581 assert (ggml_are_same_shape (src0, dst));
25272582
2583+ GGML_TENSOR_LOCALS (int64_t , ne0, src0, ne)
2584+ GGML_TENSOR_LOCALS (size_t , nb0, src0, nb)
2585+ GGML_TENSOR_LOCALS (int64_t , ne, dst, ne)
2586+ GGML_TENSOR_LOCALS (size_t , nb, dst, nb)
2587+
25282588 const int ith = params->ith ;
25292589 const int nth = params->nth ;
25302590
@@ -2538,10 +2598,14 @@ static void ggml_compute_forward_silu_f16(
25382598 const int ir0 = dr*ith;
25392599 const int ir1 = MIN (ir0 + dr, nr);
25402600
2541- for (int i1 = ir0; i1 < ir1; i1++) {
2601+ for (int ir = ir0; ir < ir1; ++ir) {
2602+ const int i3 = ir/(ne02*ne01);
2603+ const int i2 = (ir - i3*ne02*ne01)/ne01;
2604+ const int i1 = (ir - i3*ne02*ne01 - i2*ne01);
2605+
25422606 ggml_vec_silu_f16 (nc,
2543- (ggml_fp16_t *) ((char *) dst->data + i1*( dst-> nb [ 1 ]) ),
2544- (ggml_fp16_t *) ((char *) src0->data + i1*(src0-> nb [ 1 ]) ));
2607+ (ggml_fp16_t *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ),
2608+ (ggml_fp16_t *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 ));
25452609
25462610#ifndef NDEBUG
25472611 for (int k = 0 ; k < nc; k++) {
0 commit comments