Skip to content

Commit 3a35f46

Browse files
rjogradycopybara-github
authored andcommitted
Support loop unroll pragma in gcc.
gcc warns that it will ignore the loop unrolling pragma if the for loop isn't structured in a very specific way when inside a function template, so work around that. PiperOrigin-RevId: 790786630 Change-Id: I1308ee0bc11c8f4719e1db798357f0bf02c77f04
1 parent f4e26c8 commit 3a35f46

File tree

1 file changed

+18
-8
lines changed

1 file changed

+18
-8
lines changed

fleetbench/libc/mem_benchmark.cc

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,16 @@
4141
#include "fleetbench/dynamic_registrar.h"
4242
#include "fleetbench/libc/utils.h"
4343

44+
#define FLEETBENCH_PRAGMA(x) _Pragma(#x)
45+
46+
#if defined(__clang__)
47+
#define UNROLL_LOOP(x) FLEETBENCH_PRAGMA(unroll x)
48+
#elif defined(__GNUC__)
49+
#define UNROLL_LOOP(x) FLEETBENCH_PRAGMA(GCC unroll x)
50+
#else
51+
#define UNROLL_LOOP(x)
52+
#endif
53+
4454
namespace fleetbench {
4555
namespace libc {
4656
// Number of needed buffer of memory operators.
@@ -91,8 +101,8 @@ void MemcpyFunction(benchmark::State &state,
91101
// sensitivity by partially unrolling the loop, which reduces the number of
92102
// branch instructions and moves the remaining ones to different offsets in
93103
// different iterations.
94-
#pragma unroll 8
95-
for (int i = 0; i < parameters.size_bytes.size(); i++) {
104+
UNROLL_LOOP(8)
105+
for (int i = 0, size = parameters.size_bytes.size(); i < size; i++) {
96106
auto res =
97107
memcpy(dst + parameters.dst_offset[i], src + parameters.src_offset[i],
98108
parameters.size_bytes[i]);
@@ -110,8 +120,8 @@ void MemmoveFunction(benchmark::State &state,
110120
int64_t warmup = 10;
111121
// Run benchmark and call memmove function
112122
while ((warmup-- > 0) || state.KeepRunningBatch(batch_size)) {
113-
#pragma unroll 8
114-
for (int i = 0; i < parameters.size_bytes.size(); i++) {
123+
UNROLL_LOOP(8)
124+
for (int i = 0, size = parameters.size_bytes.size(); i < size; i++) {
115125
auto res =
116126
memmove(buffer + parameters.dst_offset[i],
117127
buffer + parameters.src_offset[i], parameters.size_bytes[i]);
@@ -129,8 +139,8 @@ void CmpFunction(benchmark::State &state, const BM_Mem_Parameters &parameters,
129139
int64_t warmup = 10;
130140
// Run benchmark and call cmp function
131141
while ((warmup-- > 0) || state.KeepRunningBatch(batch_size)) {
132-
#pragma unroll 8
133-
for (int i = 0; i < parameters.size_bytes.size(); i++) {
142+
UNROLL_LOOP(8)
143+
for (int i = 0, size = parameters.size_bytes.size(); i < size; i++) {
134144
MemoryBuffers::mark(buffer, parameters.dst_offset[i],
135145
parameters.mismatch_pos[i]);
136146
auto res =
@@ -152,8 +162,8 @@ void MemsetFunction(benchmark::State &state,
152162
int64_t warmup = 10;
153163
// Run benchmark and call memset function
154164
while ((warmup-- > 0) || state.KeepRunningBatch(batch_size)) {
155-
#pragma unroll 8
156-
for (int i = 0; i < parameters.size_bytes.size(); i++) {
165+
UNROLL_LOOP(8)
166+
for (int i = 0, size = parameters.size_bytes.size(); i < size; i++) {
157167
auto res = memset(dst + parameters.dst_offset[i],
158168
parameters.memset_value[i], parameters.size_bytes[i]);
159169
benchmark::DoNotOptimize(res);

0 commit comments

Comments
 (0)