Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
d274861
Fix Vulkan interleave SPIRV codegen. Fix a bug in Simplify_Shuffle. F…
mcourteaux May 24, 2025
4fde938
Vector Legalization Pass. Useful for vectorizing to GPU backends with…
mcourteaux May 27, 2025
e88e66c
Fix Makefile.
mcourteaux May 27, 2025
2182bd1
Cleanup.
mcourteaux May 27, 2025
b345929
Cleanup vector legalization.
mcourteaux May 27, 2025
488426c
Try to fix the compiler complaint around visibility.
mcourteaux May 28, 2025
c44a130
GCC-9 does not understand a complete switch?
mcourteaux May 28, 2025
17a8c0a
Do not lift Let out to LetStmt if we are not in a loop with lane limi…
mcourteaux Jun 5, 2025
306b616
Improve error message for reinterpret.
mcourteaux Jun 5, 2025
2a50d11
Only run vector legalization mutators on device loops that require it.
mcourteaux Jun 5, 2025
963f510
Move required simplifier logic for the vector legalization to the act…
mcourteaux Jun 14, 2025
f381af0
Remove special handling of strict_float, as those got overhauled.
mcourteaux Jun 14, 2025
9e1329a
Hexagon codegen for vdelta fix regarding dont-care values in shuffle …
mcourteaux Aug 29, 2025
43ed906
Clang-format
mcourteaux Aug 29, 2025
4cb5c2c
Satisfy clang-tidy
mcourteaux Oct 12, 2025
3034e92
Revive.
mcourteaux Dec 13, 2025
70debb1
Restore case-insensitive sorting order.
mcourteaux Jan 28, 2026
f29344d
Feedback from Andrew.
mcourteaux Feb 21, 2026
ef2274a
Unify my own ExtractLanes and the existing Deinterleaver.
mcourteaux Mar 1, 2026
d9184c8
Don't use designated initializers. We're not on C++20 yet... :(
mcourteaux Mar 1, 2026
9601159
clang-format
mcourteaux Mar 1, 2026
cbc0031
unrelated clang-format???
mcourteaux Mar 1, 2026
d747743
Slightly better early-outing of the ExtractLanes mutator.
mcourteaux Mar 3, 2026
1937797
Forgot brackets.
mcourteaux Mar 3, 2026
4c5fc2f
Merge branch 'main' into fix-vulkan-interleave
mcourteaux Mar 3, 2026
a2f084b
Clang-tidy.
mcourteaux Mar 3, 2026
202d5c0
Two bugs identified by Gemini in CodeGen_Hexagon
mcourteaux Mar 3, 2026
0f22d87
Fix the shuffle bug that's causing everything to fail.
mcourteaux Mar 6, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,9 @@ xcuserdata
# NeoVim + clangd
.cache

# CCLS
.ccls-cache

# Emacs
tags
TAGS
Expand Down
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,7 @@ SOURCE_FILES = \
IRVisitor.cpp \
JITModule.cpp \
Lambda.cpp \
LegalizeVectors.cpp \
Lerp.cpp \
LICM.cpp \
LLVM_Output.cpp \
Expand Down Expand Up @@ -737,6 +738,7 @@ HEADER_FILES = \
WasmExecutor.h \
JITModule.h \
Lambda.h \
LegalizeVectors.h \
Lerp.h \
LICM.h \
LLVM_Output.h \
Expand Down
15 changes: 9 additions & 6 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ endif ()
set_target_properties(Halide PROPERTIES POSITION_INDEPENDENT_CODE ON)

##
# Lists of source files. Keep ALL lists sorted in alphabetical order.
# Lists of source files. Keep ALL lists sorted in case-insensitive alphabetical order.
# (neo)vim users can use ":sort i" in visual line mode.
Comment on lines +40 to +41
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should consider using https://github.com/google/keep-sorted for this.

##

# The externally-visible header files that go into making Halide.h.
Expand All @@ -63,9 +64,9 @@ target_sources(
AsyncProducers.h
AutoScheduleUtils.h
BoundaryConditions.h
BoundConstantExtentLoops.h
Bounds.h
BoundsInference.h
BoundConstantExtentLoops.h
BoundSmallAllocations.h
Buffer.h
Callable.h
Expand All @@ -87,9 +88,9 @@ target_sources(
CodeGen_WebGPU_Dev.h
CompilerLogger.h
ConciseCasts.h
CPlusPlusMangle.h
ConstantBounds.h
ConstantInterval.h
CPlusPlusMangle.h
CSE.h
Debug.h
DebugArguments.h
Expand Down Expand Up @@ -144,6 +145,7 @@ target_sources(
IRVisitor.h
JITModule.h
Lambda.h
LegalizeVectors.h
Lerp.h
LICM.h
LLVM_Output.h
Expand Down Expand Up @@ -241,9 +243,9 @@ target_sources(
AsyncProducers.cpp
AutoScheduleUtils.cpp
BoundaryConditions.cpp
BoundConstantExtentLoops.cpp
Bounds.cpp
BoundsInference.cpp
BoundConstantExtentLoops.cpp
BoundSmallAllocations.cpp
Buffer.cpp
Callable.cpp
Expand All @@ -269,9 +271,9 @@ target_sources(
CodeGen_WebGPU_Dev.cpp
CodeGen_X86.cpp
CompilerLogger.cpp
CPlusPlusMangle.cpp
ConstantBounds.cpp
ConstantInterval.cpp
CPlusPlusMangle.cpp
CSE.cpp
Debug.cpp
DebugArguments.cpp
Expand Down Expand Up @@ -320,6 +322,7 @@ target_sources(
IRVisitor.cpp
JITModule.cpp
Lambda.cpp
LegalizeVectors.cpp
Lerp.cpp
LICM.cpp
LLVM_Output.cpp
Expand Down Expand Up @@ -364,7 +367,6 @@ target_sources(
Simplify_Add.cpp
Simplify_And.cpp
Simplify_Call.cpp
Simplify_Reinterpret.cpp
Simplify_Cast.cpp
Simplify_Div.cpp
Simplify_EQ.cpp
Expand All @@ -377,6 +379,7 @@ target_sources(
Simplify_Mul.cpp
Simplify_Not.cpp
Simplify_Or.cpp
Simplify_Reinterpret.cpp
Simplify_Select.cpp
Simplify_Shuffle.cpp
Simplify_Stmts.cpp
Expand Down
5 changes: 5 additions & 0 deletions src/CSE.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@ bool should_extract(const Expr &e, bool lift_all) {
return false;
}

if (const Call *c = e.as<Call>()) {
// Calls with side effects should not be moved.
return c->is_pure() || c->call_type == Call::Halide;
}

if (lift_all) {
return true;
}
Expand Down
30 changes: 18 additions & 12 deletions src/CodeGen_Hexagon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1158,8 +1158,8 @@ Value *CodeGen_Hexagon::shuffle_vectors(Value *a, Value *b,
llvm::Type *result_ty = get_vector_type(element_ty, result_elements);

// Try to rewrite shuffles that only access the elements of b.
int min = indices[0];
for (size_t i = 1; i < indices.size(); i++) {
int min = INT_MAX;
for (size_t i = 0; i < indices.size(); i++) {
if (indices[i] != -1 && indices[i] < min) {
min = indices[i];
}
Expand All @@ -1171,7 +1171,7 @@ Value *CodeGen_Hexagon::shuffle_vectors(Value *a, Value *b,
i -= a_elements;
}
}
return shuffle_vectors(b, shifted_indices);
return shuffle_vectors(b, b, shifted_indices);
}

// Try to rewrite shuffles that only access the elements of a.
Expand All @@ -1186,15 +1186,16 @@ Value *CodeGen_Hexagon::shuffle_vectors(Value *a, Value *b,
create_bitcast(a_call->getArgOperand(1), native_ty),
create_bitcast(a_call->getArgOperand(0), native_ty), indices);
} else if (ShuffleVectorInst *a_shuffle = dyn_cast<ShuffleVectorInst>(a)) {
bool is_identity = true;
for (int i = 0; i < a_elements; i++) {
int mask_i = a_shuffle->getMaskValue(i);
is_identity = is_identity && (mask_i == i || mask_i == -1);
}
if (is_identity) {
return shuffle_vectors(a_shuffle->getOperand(0),
a_shuffle->getOperand(1), indices);
std::vector<int> new_indices(indices.size());
for (size_t i = 0; i < indices.size(); i++) {
if (indices[i] != -1) {
new_indices[i] = a_shuffle->getMaskValue(indices[i]);
} else {
new_indices[i] = -1;
}
}
return shuffle_vectors(a_shuffle->getOperand(0),
a_shuffle->getOperand(1), new_indices);
}
}

Expand Down Expand Up @@ -1516,7 +1517,11 @@ Value *CodeGen_Hexagon::vdelta(Value *lut, const vector<int> &indices) {
vector<int> i8_indices(indices.size() * replicate);
for (size_t i = 0; i < indices.size(); i++) {
for (int j = 0; j < replicate; j++) {
i8_indices[i * replicate + j] = indices[i] * replicate + j;
if (indices[i] == -1) {
i8_indices[i * replicate + j] = -1; // Replicate the don't-care.
} else {
i8_indices[i * replicate + j] = indices[i] * replicate + j;
}
}
}
Value *result = vdelta(i8_lut, i8_indices);
Expand Down Expand Up @@ -1556,6 +1561,7 @@ Value *CodeGen_Hexagon::vdelta(Value *lut, const vector<int> &indices) {
Value *ret = nullptr;
for (int i = 0; i < lut_elements; i += native_elements) {
Value *lut_i = slice_vector(lut, i, native_elements);
internal_assert(get_vector_num_elements(lut_i->getType()) == native_elements);
vector<int> indices_i(native_elements);
vector<Constant *> mask(native_elements);
bool all_used = true;
Expand Down
3 changes: 2 additions & 1 deletion src/CodeGen_LLVM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5030,10 +5030,11 @@ Value *CodeGen_LLVM::shuffle_vectors(Value *a, Value *b,
}
// Check for type identity *after* normalizing to fixed vectors
internal_assert(a->getType() == b->getType());
int elements_a = get_vector_num_elements(a->getType());
vector<Constant *> llvm_indices(indices.size());
for (size_t i = 0; i < llvm_indices.size(); i++) {
if (indices[i] >= 0) {
internal_assert(indices[i] < get_vector_num_elements(a->getType()) * 2);
internal_assert(indices[i] < elements_a * 2) << indices[i] << " " << elements_a * 2;
llvm_indices[i] = ConstantInt::get(i32_t, indices[i]);
} else {
// Only let -1 be undef.
Expand Down
24 changes: 7 additions & 17 deletions src/CodeGen_Vulkan_Dev.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2080,31 +2080,21 @@ void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Shuffle *op) {
debug(3) << "\n";

if (arg_ids.size() == 1) {

// 1 argument, just do a simple assignment via a cast
SpvId result_id = cast_type(op->type, op->vectors[0].type(), arg_ids[0]);
builder.update_id(result_id);

} else if (arg_ids.size() == 2) {

// 2 arguments, use a composite insert to update even and odd indices
uint32_t even_idx = 0;
uint32_t odd_idx = 1;
SpvFactory::Indices even_indices;
SpvFactory::Indices odd_indices;
for (int i = 0; i < op_lanes; ++i) {
even_indices.push_back(even_idx);
odd_indices.push_back(odd_idx);
even_idx += 2;
odd_idx += 2;
// 2 arguments, use vector-shuffle with logical indices indexing into (vec1[0], vec1[1], ..., vec2[0], vec2[1], ...)
SpvFactory::Indices logical_indices;
for (int i = 0; i < arg_lanes; ++i) {
logical_indices.push_back(uint32_t(i));
logical_indices.push_back(uint32_t(i + arg_lanes));
}

SpvId type_id = builder.declare_type(op->type);
SpvId value_id = builder.declare_null_constant(op->type);
SpvId partial_id = builder.reserve_id(SpvResultId);
SpvId result_id = builder.reserve_id(SpvResultId);
builder.append(SpvFactory::composite_insert(type_id, partial_id, arg_ids[0], value_id, even_indices));
builder.append(SpvFactory::composite_insert(type_id, result_id, arg_ids[1], partial_id, odd_indices));
builder.append(SpvFactory::vector_shuffle(type_id, result_id, arg_ids[0], arg_ids[1], logical_indices));
builder.update_id(result_id);

} else {
Expand Down Expand Up @@ -2134,7 +2124,7 @@ void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Shuffle *op) {
} else if (op->is_extract_element()) {
int idx = op->indices[0];
internal_assert(idx >= 0);
internal_assert(idx <= op->vectors[0].type().lanes());
internal_assert(idx < op->vectors[0].type().lanes());
if (op->vectors[0].type().is_vector()) {
SpvFactory::Indices indices = {(uint32_t)idx};
SpvId type_id = builder.declare_type(op->type);
Expand Down
Loading
Loading