Skip to content

Commit 9021fb7

Browse files
committed
Remove non-critical bound check for 8/16-bits operations
1 parent e088861 commit 9021fb7

File tree

2 files changed

+3
-48
lines changed

2 files changed

+3
-48
lines changed

src/layer/arm/shufflechannel_arm.cpp

+1-16
Original file line numberDiff line numberDiff line change
@@ -374,7 +374,7 @@ int ShuffleChannel_arm::forward_bf16s_fp16s(const Mat& bottom_blob, Mat& top_blo
374374

375375
ptr1 += 4;
376376

377-
for (int i = 0; i < size - 1; i++)
377+
for (int i = 0; i < size; i++)
378378
{
379379
uint16x4_t _p0 = vld1_u16(ptr0);
380380
uint16x4_t _p1 = vld1_u16(ptr1);
@@ -388,21 +388,6 @@ int ShuffleChannel_arm::forward_bf16s_fp16s(const Mat& bottom_blob, Mat& top_blo
388388
ptr1 += 8;
389389
outptr0 += 8;
390390
}
391-
392-
for (int i = 0; i < 8; i++)
393-
{
394-
if (i % 2)
395-
{
396-
*outptr0 = *ptr1;
397-
ptr1 += 1;
398-
}
399-
else
400-
{
401-
*outptr0 = *ptr0;
402-
ptr0 += 1;
403-
}
404-
outptr0 += 1;
405-
}
406391
}
407392

408393
return 0;

src/layer/x86/shufflechannel_x86.cpp

+2-32
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ int ShuffleChannel_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Opt
116116

117117
ptr1 += 8;
118118

119-
for (int i = 0; i < size - 1; i++)
119+
for (int i = 0; i < size; i++)
120120
{
121121
__m256 _p0 = _mm256_loadu_ps(ptr0);
122122
__m256 _p1 = _mm256_loadu_ps(ptr1);
@@ -134,21 +134,6 @@ int ShuffleChannel_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Opt
134134
ptr1 += 16;
135135
outptr += 16;
136136
}
137-
138-
for (int i = 0; i < 16; i++)
139-
{
140-
if (i % 2)
141-
{
142-
*outptr = *ptr1;
143-
ptr1 += 1;
144-
}
145-
else
146-
{
147-
*outptr = *ptr0;
148-
ptr0 += 1;
149-
}
150-
outptr += 1;
151-
}
152137
}
153138

154139
return 0;
@@ -387,7 +372,7 @@ int ShuffleChannel_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Opt
387372

388373
ptr1 += 4;
389374

390-
for (int i = 0; i < size - 1; i++)
375+
for (int i = 0; i < size; i++)
391376
{
392377
__m128 _p0 = _mm_loadu_ps(ptr0);
393378
__m128 _p1 = _mm_loadu_ps(ptr1);
@@ -402,21 +387,6 @@ int ShuffleChannel_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Opt
402387
ptr1 += 8;
403388
outptr += 8;
404389
}
405-
406-
for (int i = 0; i < 8; i++)
407-
{
408-
if (i % 2)
409-
{
410-
*outptr = *ptr1;
411-
ptr1 += 1;
412-
}
413-
else
414-
{
415-
*outptr = *ptr0;
416-
ptr0 += 1;
417-
}
418-
outptr += 1;
419-
}
420390
}
421391

422392
return 0;

0 commit comments

Comments
 (0)