Skip to content

Commit 77e1a70

Browse files
abadamsalexreinking
authored andcommitted
Use optimization fences in the base class too
Before: Computing best tile sizes for each type ................................................. bytes, tile width, tile height, bandwidth (GB/s): 1 8 8 20.9997 1 16 8 20.8329 1 8 16 18.5702 1 8 32 17.2463 1 8 64 14.312 2 8 16 19.2047 2 8 8 18.8368 2 16 8 17.0593 2 8 32 17.0591 2 4 8 15.7681 4 8 8 24.9364 4 4 16 22.9699 4 8 16 22.5743 4 4 32 22.255 4 4 8 20.4468 8 8 8 38.4094 8 16 4 28.4167 8 16 8 27.6184 8 8 4 27.6062 8 8 16 26.8693 After: Computing best tile sizes for each type ................................................. bytes, tile width, tile height, bandwidth (GB/s): 1 16 32 34.1921 1 16 16 31.8399 1 8 16 25.575 1 16 64 25.1665 1 32 16 25.0061 2 8 32 28.2635 2 8 16 27.7648 2 16 16 27.2126 2 16 32 23.9034 2 8 8 23.6345 4 8 16 34.5303 4 8 8 28.3653 4 16 8 26.8521 4 8 32 26.084 4 16 16 24.4519 8 8 8 33.7163 8 8 4 29.1339 8 4 16 26.418 8 16 4 25.4663 8 2 8 24.3949
1 parent 3d6417e commit 77e1a70

1 file changed

Lines changed: 5 additions & 2 deletions

File tree

src/CodeGen_LLVM.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2195,7 +2195,10 @@ Value *CodeGen_LLVM::optimization_fence(Value *v) {
21952195
internal_assert(!t->isScalableTy())
21962196
<< "optimization_fence does not support scalable vectors yet";
21972197
const int bits = t->getPrimitiveSizeInBits();
2198-
llvm::Type *float_type = llvm_type_of(Float(64, bits / 64));
2198+
if (bits % 16) {
2199+
return v;
2200+
}
2201+
llvm::Type *float_type = llvm_type_of(Float(16, bits / 16));
21992202
v = builder->CreateBitCast(v, float_type);
22002203
v = builder->CreateArithmeticFence(v, float_type);
22012204
return builder->CreateBitCast(v, t);
@@ -2217,7 +2220,7 @@ Value *CodeGen_LLVM::interleave_vectors(const std::vector<Value *> &vecs) {
22172220
for (int i = 0; i < vec_elements * 2; i++) {
22182221
indices[i] = i % 2 == 0 ? i / 2 : i / 2 + vec_elements;
22192222
}
2220-
return shuffle_vectors(a, b, indices);
2223+
return optimization_fence(shuffle_vectors(a, b, indices));
22212224
} else {
22222225
// Grab the even and odd elements of vecs.
22232226
vector<Value *> even_vecs;

0 commit comments

Comments
 (0)