halide
diff --git a/‎cmake/FindHexagonSDK.cmake
Lines changed: 92 additions & 0 deletions b/‎cmake/FindHexagonSDK.cmake
Lines changed: 92 additions & 0 deletions
diff --git a/‎src/FindIntrinsics.cpp
Lines changed: 27 additions & 24 deletions b/‎src/FindIntrinsics.cpp
Lines changed: 27 additions & 24 deletions
diff --git a/‎src/IR.h
Lines changed: 7 additions & 0 deletions b/‎src/IR.h
Lines changed: 7 additions & 0 deletions
diff --git a/‎src/IRMatch.h
Lines changed: 1 addition & 0 deletions b/‎src/IRMatch.h
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/LLVM_Output.cpp
Lines changed: 4 additions & 0 deletions b/‎src/LLVM_Output.cpp
Lines changed: 4 additions & 0 deletions
diff --git a/‎src/Simplify_Exprs.cpp
Lines changed: 18 additions & 15 deletions b/‎src/Simplify_Exprs.cpp
Lines changed: 18 additions & 15 deletions
diff --git a/‎src/runtime/CMakeLists.txt
Lines changed: 4 additions & 0 deletions b/‎src/runtime/CMakeLists.txt
Lines changed: 4 additions & 0 deletions
diff --git a/‎src/runtime/hexagon_host.cpp
Lines changed: 5 additions & 1 deletion b/‎src/runtime/hexagon_host.cpp
Lines changed: 5 additions & 1 deletion
@@ -0,0 +1,92 @@
+include(FindPackageHandleStandardArgs)
+
+##
+# Find the Hexagon SDK root
+
+# We use the presense of the hexagon toolchain file to determine the SDK
+# root. Other files have names that are too generic (like readme.txt) or
+# are platform-specific (like setup_sdk_env.source) to and so can't be
+# used to autodetect the path. Plus, we need to find this file anyway.
+
+find_path(
+    HEXAGON_SDK_ROOT build/cmake/hexagon_toolchain.cmake
+    HINTS ENV HEXAGON_SDK_ROOT
+)
+
+##
+# Detect the installed Hexagon tools version
+
+if (NOT DEFINED HEXAGON_TOOLS_VER AND DEFINED ENV{HEXAGON_TOOLS_VER})
+    set(HEXAGON_TOOLS_VER "$ENV{HEXAGON_TOOLS_VER}")
+endif ()
+
+if (NOT DEFINED HEXAGON_TOOLS_VER)
+    # No other way to list a directory; no need for CONFIGURE_DEPENDS here
+    # since this is just used to initialize a cache variable.
+    file(
+        GLOB tools_versions
+        RELATIVE "${HEXAGON_SDK_ROOT}/tools/HEXAGON_Tools"
+        "${HEXAGON_SDK_ROOT}/tools/HEXAGON_Tools/*"
+    )
+    if (NOT tools_versions STREQUAL "")
+        list(GET tools_versions 0 HEXAGON_TOOLS_VER)
+    endif ()
+endif ()
+
+set(HEXAGON_TOOLS_VER "${HEXAGON_TOOLS_VER}"
+    CACHE STRING "Version of the Hexagon tools to use")
+
+set(HEXAGON_TOOLS_ROOT "${HEXAGON_SDK_ROOT}/tools/HEXAGON_Tools/${HEXAGON_TOOLS_VER}")
+
+##
+# Set known paths
+
+set(HEXAGON_TOOLCHAIN ${HEXAGON_SDK_ROOT}/build/cmake/hexagon_toolchain.cmake)
+set(HEXAGON_QAIC ${HEXAGON_SDK_ROOT}/ipc/fastrpc/qaic/Ubuntu16/qaic)
+
+set(ANDROID_NDK_ROOT ${HEXAGON_SDK_ROOT}/tools/android-ndk-r19c)
+set(ANDROID_NDK_TOOLCHAIN ${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake)
+
+##
+# Find ISS wrapper library and headers
+
+find_library(
+  HEXAGON_ISS_WRAPPER_LIBRARY
+  NAMES wrapper
+  HINTS "${HEXAGON_TOOLS_ROOT}"
+  PATH_SUFFIXES Tools/lib/iss lib/iss iss
+)
+
+find_path(
+  HEXAGON_ISS_WRAPPER_INCLUDE_DIRECTORY
+  NAMES HexagonWrapper.h
+  HINTS "${HEXAGON_TOOLS_ROOT}"
+  PATH_SUFFIXES Tools/include/iss include/iss iss
+)
+
+##
+# Validate we found everything correctly
+
+find_package_handle_standard_args(
+    HexagonSDK
+    REQUIRED_VARS
+        HEXAGON_SDK_ROOT
+        HEXAGON_TOOLS_ROOT
+        HEXAGON_TOOLCHAIN
+        HEXAGON_ISS_WRAPPER_LIBRARY
+        HEXAGON_ISS_WRAPPER_INCLUDE_DIRECTORY
+    HANDLE_COMPONENTS
+)
+
+##
+# Create imported targets
+
+if (HexagonSDK_FOUND AND NOT TARGET HexagonSDK::wrapper)
+    add_library(HexagonSDK::wrapper UNKNOWN IMPORTED)
+    set_target_properties(
+        HexagonSDK::wrapper
+        PROPERTIES
+        IMPORTED_LOCATION "${HEXAGON_ISS_WRAPPER_LIBRARY}"
+        INTERFACE_INCLUDE_DIRECTORIES "${HEXAGON_ISS_WRAPPER_INCLUDE_DIRECTORY}"
+    )
+endif ()
@@ -157,16 +157,19 @@ Expr to_rounding_shift(const Call *c) {
                 return rounding_shift(cast(add->type, add->args[0]), b);
             }
         }
-        // Also need to handle the annoying case of a reinterpret wrapping a widen_right_add
+
+        // Also need to handle the annoying case of a reinterpret cast wrapping a widen_right_add
         // TODO: this pattern makes me want to change the semantics of this op.
-        if (const Reinterpret *reinterp = a.as<Reinterpret>()) {
-            if (reinterp->type.bits() == reinterp->value.type().bits()) {
-                if (const Call *add = Call::as_intrinsic(reinterp->value, {Call::widen_right_add})) {
+        if (const Cast *cast = a.as<Cast>()) {
+            if (cast->is_reinterpret()) {
+                if (const Call *add = Call::as_intrinsic(cast->value, {Call::widen_right_add})) {
                     if (can_prove(lower_intrinsics(add->args[1] == round))) {
-                        // We expect the first operand to be a reinterpet.
-                        const Reinterpret *reinterp_a = add->args[0].as<Reinterpret>();
-                        internal_assert(reinterp_a) << "Failed: " << add->args[0] << "\n";
-                        return rounding_shift(reinterp_a->value, b);
+                        // We expect the first operand to be a reinterpet cast.
+                        if (const Cast *cast_a = add->args[0].as<Cast>()) {
+                            if (cast_a->is_reinterpret()) {
+                                return rounding_shift(cast_a->value, b);
+                            }
+                        }
                     }
                 }
             }
@@ -245,9 +248,9 @@ class FindIntrinsics : public IRMutator {
                     if (b.type().code() != narrow_a.type().code()) {
                         // Need to do a safe reinterpret.
                         Type t = b.type().with_code(code);
-                        result = widen_right_add(reinterpret(t, b), narrow_a);
+                        result = widen_right_add(cast(t, b), narrow_a);
                         internal_assert(result.type() != op->type);
-                        result = reinterpret(op->type, result);
+                        result = cast(op->type, result);
                     } else {
                         result = widen_right_add(b, narrow_a);
                     }
@@ -258,9 +261,9 @@ class FindIntrinsics : public IRMutator {
                     if (a.type().code() != narrow_b.type().code()) {
                         // Need to do a safe reinterpret.
                         Type t = a.type().with_code(code);
-                        result = widen_right_add(reinterpret(t, a), narrow_b);
+                        result = widen_right_add(cast(t, a), narrow_b);
                         internal_assert(result.type() != op->type);
-                        result = reinterpret(op->type, result);
+                        result = cast(op->type, result);
                     } else {
                         result = widen_right_add(a, narrow_b);
                     }
@@ -328,9 +331,9 @@ class FindIntrinsics : public IRMutator {
                     if (a.type().code() != narrow_b.type().code()) {
                         // Need to do a safe reinterpret.
                         Type t = a.type().with_code(code);
-                        result = widen_right_sub(reinterpret(t, a), narrow_b);
+                        result = widen_right_sub(cast(t, a), narrow_b);
                         internal_assert(result.type() != op->type);
-                        result = reinterpret(op->type, result);
+                        result = cast(op->type, result);
                     } else {
                         result = widen_right_sub(a, narrow_b);
                     }
@@ -410,9 +413,9 @@ class FindIntrinsics : public IRMutator {
                     if (b.type().code() != narrow_a.type().code()) {
                         // Need to do a safe reinterpret.
                         Type t = b.type().with_code(code);
-                        result = widen_right_mul(reinterpret(t, b), narrow_a);
+                        result = widen_right_mul(cast(t, b), narrow_a);
                         internal_assert(result.type() != op->type);
-                        result = reinterpret(op->type, result);
+                        result = cast(op->type, result);
                     } else {
                         result = widen_right_mul(b, narrow_a);
                     }
@@ -423,9 +426,9 @@ class FindIntrinsics : public IRMutator {
                     if (a.type().code() != narrow_b.type().code()) {
                         // Need to do a safe reinterpret.
                         Type t = a.type().with_code(code);
-                        result = widen_right_mul(reinterpret(t, a), narrow_b);
+                        result = widen_right_mul(cast(t, a), narrow_b);
                         internal_assert(result.type() != op->type);
-                        result = reinterpret(op->type, result);
+                        result = cast(op->type, result);
                     } else {
                         result = widen_right_mul(a, narrow_b);
                     }
@@ -1261,8 +1264,8 @@ Expr lower_saturating_add(const Expr &a, const Expr &b) {
         return select(sum < a, a.type().max(), sum);
     } else if (a.type().is_int()) {
         Type u = a.type().with_code(halide_type_uint);
-        Expr ua = reinterpret(u, a);
-        Expr ub = reinterpret(u, b);
+        Expr ua = cast(u, a);
+        Expr ub = cast(u, b);
         Expr upper = make_const(u, (uint64_t(1) << (a.type().bits() - 1)) - 1);
         Expr lower = make_const(u, (uint64_t(1) << (a.type().bits() - 1)));
         Expr sum = ua + ub;
@@ -1272,7 +1275,7 @@ Expr lower_saturating_add(const Expr &a, const Expr &b) {
         // a + b >= 0 === a >= -b === a >= ~b + 1 === a > ~b
         Expr pos_result = min(sum, upper);
         Expr neg_result = max(sum, lower);
-        return simplify(reinterpret(a.type(), select(~b < a, pos_result, neg_result)));
+        return simplify(cast(a.type(), select(~b < a, pos_result, neg_result)));
     } else {
         internal_error << "Bad type for saturating_add: " << a.type() << "\n";
         return Expr();
@@ -1288,8 +1291,8 @@ Expr lower_saturating_sub(const Expr &a, const Expr &b) {
     } else if (a.type().is_int()) {
         // Do the math in unsigned, to avoid overflow in the simplifier.
         Type u = a.type().with_code(halide_type_uint);
-        Expr ua = reinterpret(u, a);
-        Expr ub = reinterpret(u, b);
+        Expr ua = cast(u, a);
+        Expr ub = cast(u, b);
         Expr upper = make_const(u, (uint64_t(1) << (a.type().bits() - 1)) - 1);
         Expr lower = make_const(u, (uint64_t(1) << (a.type().bits() - 1)));
         Expr diff = ua - ub;
@@ -1300,7 +1303,7 @@ Expr lower_saturating_sub(const Expr &a, const Expr &b) {
         // and saturate the negative difference to be at least -2^31 + 2^32 = 2^31
         Expr neg_diff = max(lower, diff);
         // Then select between them, and cast back to the signed type.
-        return simplify(reinterpret(a.type(), select(b <= a, pos_diff, neg_diff)));
+        return simplify(cast(a.type(), select(b <= a, pos_diff, neg_diff)));
     } else if (a.type().is_uint()) {
         return simplify(select(b < a, a - b, make_zero(a.type())));
     } else {
 
@@ -32,6 +32,13 @@ struct Cast : public ExprNode<Cast> {
     static Expr make(Type t, Expr v);
 
     static const IRNodeType _node_type = IRNodeType::Cast;
+
+    /** Check if the cast is equivalent to a reinterpret. */
+    bool is_reinterpret() const {
+        return (type.is_int_or_uint() &&
+                value.type().is_int_or_uint() &&
+                type.bits() == value.type().bits());
+    }
 };
 
 /** Reinterpret value as another type, without affecting any of the bits
 
@@ -2101,6 +2101,7 @@ struct SliceOp {
         }
         const Shuffle &v = (const Shuffle &)e;
         return v.vectors.size() == 1 &&
+               v.is_slice() &&
                vec.template match<bound>(*v.vectors[0].get(), state) &&
                base.template match<bound | bindings<Vec>::mask>(v.slice_begin(), state) &&
                stride.template match<bound | bindings<Vec>::mask | bindings<Base>::mask>(v.slice_stride(), state) &&
 
@@ -593,7 +593,11 @@ void create_static_library(const std::vector<std::string> &src_files_in, const T
         return;
     }
 
+#if LLVM_VERSION >= 180
+    const llvm::SymtabWritingMode write_symtab = llvm::SymtabWritingMode::NormalSymtab;
+#else
     const bool write_symtab = true;
+#endif
     const auto kind = Internal::get_triple_for_target(target).isOSDarwin() ? llvm::object::Archive::K_BSD : llvm::object::Archive::K_GNU;
     const bool thin = false;
     auto result = llvm::writeArchive(dst_file, new_members,
 
@@ -314,23 +314,26 @@ Expr Simplify::visit(const Load *op, ExprInfo *bounds) {
     ExprInfo index_info;
     Expr index = mutate(op->index, &index_info);
 
-    // If the load is fully out of bounds, replace it with undef.
-    // This should only occur inside branches that make the load unreachable,
-    // but perhaps the branch was hard to prove constant true or false. This
-    // provides an alternative mechanism to simplify these unreachable loads.
-    string alloc_extent_name = op->name + ".total_extent_bytes";
-    if (bounds_and_alignment_info.contains(alloc_extent_name)) {
-        if (index_info.max_defined && index_info.max < 0) {
-            in_unreachable = true;
-            return unreachable(op->type);
-        }
-        const ExprInfo &alloc_info = bounds_and_alignment_info.get(alloc_extent_name);
-        if (alloc_info.max_defined && index_info.min_defined) {
-            int index_min_bytes = index_info.min * op->type.bytes();
-            if (index_min_bytes > alloc_info.max) {
+    // If an unpredicated load is fully out of bounds, replace it with an
+    // unreachable intrinsic.  This should only occur inside branches that make
+    // the load unreachable, but perhaps the branch was hard to prove constant
+    // true or false. This provides an alternative mechanism to simplify these
+    // unreachable loads.
+    if (is_const_one(op->predicate)) {
+        string alloc_extent_name = op->name + ".total_extent_bytes";
+        if (bounds_and_alignment_info.contains(alloc_extent_name)) {
+            if (index_info.max_defined && index_info.max < 0) {
                 in_unreachable = true;
                 return unreachable(op->type);
             }
+            const ExprInfo &alloc_info = bounds_and_alignment_info.get(alloc_extent_name);
+            if (alloc_info.max_defined && index_info.min_defined) {
+                int index_min_bytes = index_info.min * op->type.bytes();
+                if (index_min_bytes > alloc_info.max) {
+                    in_unreachable = true;
+                    return unreachable(op->type);
+                }
+            }
         }
     }
 
@@ -347,7 +350,7 @@ Expr Simplify::visit(const Load *op, ExprInfo *bounds) {
     const Shuffle *s_index = index.as<Shuffle>();
     if (is_const_zero(predicate)) {
         // Predicate is always false
-        return undef(op->type);
+        return make_zero(op->type);
     } else if (b_index && is_const_one(predicate)) {
         // Load of a broadcast should be broadcast of the load
         Expr new_index = b_index->value;
 
@@ -338,4 +338,8 @@ add_library(Halide_Runtime INTERFACE)
 add_library(Halide::Runtime ALIAS Halide_Runtime)
 target_include_directories(Halide_Runtime INTERFACE $<BUILD_INTERFACE:${Halide_BINARY_DIR}/include>)
 set_target_properties(Halide_Runtime PROPERTIES EXPORT_NAME Runtime)
+option(Halide_BUILD_HEXAGON_REMOTE_RUNTIME "Build the hexagon remote runtime for offloading to Hexagon (HVX)" OFF)
 
+if (Halide_BUILD_HEXAGON_REMOTE_RUNTIME AND NOT Halide_CLANG_TIDY_BUILD)
+  add_subdirectory(hexagon_remote)
+endif ()
@@ -147,7 +147,11 @@ WEAK int init_hexagon_runtime(void *user_context) {
     if (!host_lib) {
         host_lib = halide_load_library("libhalide_hexagon_host.dll");
     }
-
+    if (!host_lib) {
+        // This will now cause a more specific error 'halide_error_code_symbol_not_found' down the line.
+        // So, just print this message and continue on instead of returning a generic error here.
+        error(user_context) << "Hexagon: unable to load libhalide_hexagon_host.so";
+    }
     debug(user_context) << "Hexagon: init_hexagon_runtime (user_context: " << user_context << ")\n";
 
     // Get the symbols we need from the library.
Original file line number	Diff line number	Diff line change
`@@ -2101,6 +2101,7 @@ struct SliceOp {`
`2101`	`2101`	`}`
`2102`	`2102`	`const Shuffle &v = (const Shuffle &)e;`
`2103`	`2103`	`return v.vectors.size() == 1 &&`
	`2104`	`+ v.is_slice() &&`
`2104`	`2105`	`vec.template match<bound>(*v.vectors[0].get(), state) &&`
`2105`	`2106`	`base.template match<bound \| bindings<Vec>::mask>(v.slice_begin(), state) &&`
`2106`	`2107`	`stride.template match<bound \| bindings<Vec>::mask \| bindings<Base>::mask>(v.slice_stride(), state) &&`