halide
diff --git a/‎.github/workflows/pip.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/pip.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎python_bindings/src/halide/halide_/PyHalide.cpp‎
Lines changed: 10 additions & 0 deletions b/‎python_bindings/src/halide/halide_/PyHalide.cpp‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎src/IROperator.cpp‎
Lines changed: 16 additions & 5 deletions b/‎src/IROperator.cpp‎
Lines changed: 16 additions & 5 deletions
diff --git a/‎src/SlidingWindow.cpp‎
Lines changed: 28 additions & 7 deletions b/‎src/SlidingWindow.cpp‎
Lines changed: 28 additions & 7 deletions
diff --git a/‎src/Solve.cpp‎
Lines changed: 64 additions & 2 deletions b/‎src/Solve.cpp‎
Lines changed: 64 additions & 2 deletions
diff --git a/‎test/correctness/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions b/‎test/correctness/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions
@@ -47,7 +47,7 @@ jobs:
           fetch-tags: true
 
       - uses: ilammy/msvc-dev-cmd@v1
-      - uses: lukka/get-cmake@v4.3.2
+      - uses: lukka/get-cmake@v4.3.3
         with:
           cmakeVersion: "~3.28.0"
 
 
@@ -38,6 +38,16 @@
 PYBIND11_MODULE(HALIDE_PYBIND_MODULE_NAME, m) {
     using namespace Halide::PythonBindings;
 
+#if PY_VERSION_HEX >= 0x030E0000
+    // CPython 3.14 caches C stack limits in the thread state, which does not
+    // interact well with Halide's user-space compiler stack when warnings call
+    // back into Python. Stay on Python's stack by default, but preserve the
+    // existing Halide behavior when the user explicitly opts in.
+    if (Halide::Internal::get_env_variable("HL_COMPILER_STACK_SIZE").empty()) {
+        Halide::set_compiler_stack_size(0);
+    }
+#endif
+
     // Order of definitions matters somewhat:
     // things used for default arguments must be registered
     // prior to that usage.
 
@@ -609,12 +609,23 @@ Expr lossless_negate(const Expr &x) {
     } else if (const FloatImm *f = x.as<FloatImm>()) {
         return FloatImm::make(f->type, -f->value);
     } else if (const Cast *c = x.as<Cast>()) {
-        Expr value = lossless_negate(c->value);
-        if (value.defined()) {
-            // This logic is only sound if we know the cast can't overflow.
-            value = lossless_cast(c->type, value);
+        // Unsigned inner types wrap modularly (-uint8(65) = 191), and signed
+        // integer inner types wrap at INT_TYPE_MIN (-int8(-128) = -128), so both
+        // make cast(outer, -inner) != -cast(outer, inner). Floats are exact.
+        // For signed integers, only proceed when bounds exclude INT_TYPE_MIN.
+        bool inner_negation_safe = c->value.type().is_float();
+        if (!inner_negation_safe && c->value.type().is_int()) {
+            ConstantInterval ci = constant_integer_bounds(c->value);
+            inner_negation_safe = ci.min_defined && !c->value.type().is_min(ci.min);
+        }
+        if (inner_negation_safe) {
+            Expr value = lossless_negate(c->value);
             if (value.defined()) {
-                return value;
+                // This logic is only sound if we know the cast can't overflow.
+                value = lossless_cast(c->type, value);
+                if (value.defined()) {
+                    return value;
+                }
             }
         }
     } else if (const Ramp *r = x.as<Ramp>()) {
 
@@ -1,6 +1,7 @@
 #include "SlidingWindow.h"
 
 #include "Bounds.h"
+#include "CSE.h"
 #include "CompilerLogger.h"
 #include "Debug.h"
 #include "ExprUsesVar.h"
@@ -86,7 +87,7 @@ class ExpandExpr : public IRMutator {
 // Perform all the substitutions in a scope
 Expr expand_expr(const Expr &e, const Scope<Expr> &scope) {
     ExpandExpr ee(scope);
-    Expr result = ee(e);
+    Expr result = common_subexpression_elimination(ee(e));
     debug(4) << "Expanded " << e << " into " << result << "\n";
     return result;
 }
@@ -223,6 +224,7 @@ class SlidingWindowOnFunctionAndLoop : public IRMutator {
     Expr loop_min;
     set<int> &slid_dimensions;
     Scope<Expr> scope;
+    Scope<Interval> &bounds_scope;
 
     // For loops strictly between the loop being slid over and the current
     // node (not including the loop being slid over itself).
@@ -282,8 +284,8 @@ class SlidingWindowOnFunctionAndLoop : public IRMutator {
                 internal_assert(min_val && max_val);
                 Expr min_req = *min_val;
                 Expr max_req = *max_val;
-                min_req = expand_expr(min_req, scope);
-                max_req = expand_expr(max_req, scope);
+                min_req = simplify(expand_expr(min_req, scope), bounds_scope);
+                max_req = simplify(expand_expr(max_req, scope), bounds_scope);
 
                 debug(3) << func_args[i] << ":" << min_req << ", " << max_req << "\n";
                 if (expr_depends_on_var(min_req, loop_var) ||
@@ -594,7 +596,10 @@ class SlidingWindowOnFunctionAndLoop : public IRMutator {
     }
 
     Stmt visit(const LetStmt *op) override {
-        ScopedBinding<Expr> bind(scope, op->name, simplify(expand_expr(op->value, scope)));
+        ScopedBinding<Interval> bind_bounds(bounds_scope, op->name,
+                                            bounds_of_expr_in_scope(op->value, bounds_scope));
+        ScopedBinding<Expr> bind(scope, op->name, simplify(expand_expr(op->value, scope), bounds_scope));
+
         Stmt new_body = mutate(op->body);
 
         Expr value = op->value;
@@ -613,8 +618,10 @@ class SlidingWindowOnFunctionAndLoop : public IRMutator {
     }
 
 public:
-    SlidingWindowOnFunctionAndLoop(Function f, string v, Expr v_min, set<int> &slid_dimensions)
-        : func(std::move(f)), loop_var(std::move(v)), loop_min(std::move(v_min)), slid_dimensions(slid_dimensions) {
+    SlidingWindowOnFunctionAndLoop(Function f, string v, Expr v_min, set<int> &slid_dimensions,
+                                   Scope<Interval> &bounds_scope)
+        : func(std::move(f)), loop_var(std::move(v)), loop_min(std::move(v_min)),
+          slid_dimensions(slid_dimensions), bounds_scope(bounds_scope) {
     }
 
     Expr new_loop_min;
@@ -755,9 +762,16 @@ class SlidingWindow : public IRMutator {
     // Keep track of realizations we want to slide, from innermost to
     // outermost.
     list<Function> sliding;
+    Scope<Interval> bounds_scope;
 
     using IRMutator::visit;
 
+    Stmt visit(const LetStmt *op) override {
+        ScopedBinding<Interval> bind(bounds_scope, op->name,
+                                     bounds_of_expr_in_scope(op->value, bounds_scope));
+        return IRMutator::visit(op);
+    }
+
     Stmt visit(const Realize *op) override {
         // Find the args for this function
         map<string, Function>::const_iterator iter = env.find(op->name);
@@ -827,7 +841,14 @@ class SlidingWindow : public IRMutator {
 
             set<int> &slid_dims = slid_dimensions[func.name()];
             size_t old_slid_dims_size = slid_dims.size();
-            SlidingWindowOnFunctionAndLoop slider(func, name, prev_loop_min, slid_dims);
+
+            Interval min_bounds = bounds_of_expr_in_scope(loop_min, bounds_scope);
+            Interval max_bounds = bounds_of_expr_in_scope(loop_max, bounds_scope);
+            ScopedBinding<Interval> bind_bounds(bounds_scope, op->name,
+                                                Interval(min_bounds.min, max_bounds.max));
+
+            SlidingWindowOnFunctionAndLoop slider(func, name, prev_loop_min, slid_dims, bounds_scope);
+
             body = slider(body);
 
             if (func.schedule().memory_type() == MemoryType::Register &&
 
@@ -312,6 +312,12 @@ class SolveExpression : public IRMutator {
             } else if (mul_a && mul_b && equal(mul_a->b, mul_b->b)) {
                 // f(x)*a - g(x)*a -> (f(x) - g(x))*a;
                 expr = mutate((mul_a->a - mul_b->a) * mul_a->b);
+            } else if (mul_a && equal(mul_a->a, b)) {
+                // f(x)*a - f(x) -> f(x) * (a - 1)
+                expr = mutate(b * (mul_a->b - 1));
+            } else if (mul_b && equal(mul_b->a, a)) {
+                // f(x) - f(x)*a -> f(x) * (1 - a)
+                expr = mutate(a * (make_one(a.type()) - mul_b->b));
             } else if (div_a && !a_failed && no_overflow_int(op->type) && can_prove(div_a->b != 0)) {
                 // f(x)/a - g(x) -> (f(x) - g(x) * a) / a
                 // Same overflow and div-by-zero concerns as the Add case above.
@@ -1053,7 +1059,35 @@ class SolveForInterval : public IRVisitor {
         if (!already_solved) {
             SolverResult solved = solve_expression(le, var, scope);
             if (!solved.fully_solved) {
-                fail();
+                // solve_expression failed; try direct max/min decomposition on the LHS.
+                if (const Max *max_fallback = le->a.as<Max>()) {
+                    // max(a, b) <= c <==> a <= c && b <= c
+                    (max_fallback->a <= le->b && max_fallback->b <= le->b).accept(this);
+                } else if (const Min *min_fallback = le->a.as<Min>()) {
+                    // min(a, b) <= c <==> a <= c || b <= c
+                    (min_fallback->a <= le->b || min_fallback->b <= le->b).accept(this);
+                } else if (const Mul *mul_fallback = le->a.as<Mul>()) {
+                    // max/min(a, b) * pos_c <= rhs <==> a*pos_c <= rhs [&&/||] b*pos_c <= rhs
+                    const Max *mxf = mul_fallback->a.as<Max>();
+                    const Min *mnf = mul_fallback->a.as<Min>();
+                    Expr factor = mul_fallback->b;
+                    if (!mxf && !mnf) {
+                        mxf = mul_fallback->b.as<Max>();
+                        mnf = mul_fallback->b.as<Min>();
+                        factor = mul_fallback->a;
+                    }
+                    if (mxf && is_positive_const(factor)) {
+                        // max(a, b) * pos_c <= rhs <==> a*pos_c <= rhs && b*pos_c <= rhs
+                        (mxf->a * factor <= le->b && mxf->b * factor <= le->b).accept(this);
+                    } else if (mnf && is_positive_const(factor)) {
+                        // min(a, b) * pos_c <= rhs <==> a*pos_c <= rhs || b*pos_c <= rhs
+                        (mnf->a * factor <= le->b || mnf->b * factor <= le->b).accept(this);
+                    } else {
+                        fail();
+                    }
+                } else {
+                    fail();
+                }
             } else {
                 already_solved = true;
                 solved.result.accept(this);
@@ -1110,7 +1144,35 @@ class SolveForInterval : public IRVisitor {
         if (!already_solved) {
             SolverResult solved = solve_expression(ge, var, scope);
             if (!solved.fully_solved) {
-                fail();
+                // solve_expression failed; try direct max/min decomposition on the LHS.
+                if (const Max *max_fallback = ge->a.as<Max>()) {
+                    // max(a, b) >= c <==> a >= c || b >= c
+                    (max_fallback->a >= ge->b || max_fallback->b >= ge->b).accept(this);
+                } else if (const Min *min_fallback = ge->a.as<Min>()) {
+                    // min(a, b) >= c <==> a >= c && b >= c
+                    (min_fallback->a >= ge->b && min_fallback->b >= ge->b).accept(this);
+                } else if (const Mul *mul_fallback = ge->a.as<Mul>()) {
+                    // max/min(a, b) * pos_c >= rhs <==> a*pos_c >= rhs [||/&&] b*pos_c >= rhs
+                    const Max *mxf = mul_fallback->a.as<Max>();
+                    const Min *mnf = mul_fallback->a.as<Min>();
+                    Expr factor = mul_fallback->b;
+                    if (!mxf && !mnf) {
+                        mxf = mul_fallback->b.as<Max>();
+                        mnf = mul_fallback->b.as<Min>();
+                        factor = mul_fallback->a;
+                    }
+                    if (mxf && is_positive_const(factor)) {
+                        // max(a, b) * pos_c >= rhs <==> a*pos_c >= rhs || b*pos_c >= rhs
+                        (mxf->a * factor >= ge->b || mxf->b * factor >= ge->b).accept(this);
+                    } else if (mnf && is_positive_const(factor)) {
+                        // min(a, b) * pos_c >= rhs <==> a*pos_c >= rhs && b*pos_c >= rhs
+                        (mnf->a * factor >= ge->b && mnf->b * factor >= ge->b).accept(this);
+                    } else {
+                        fail();
+                    }
+                } else {
+                    fail();
+                }
             } else {
                 already_solved = true;
                 solved.result.accept(this);
 
@@ -300,6 +300,7 @@ tests(
     sliding_over_guard_with_if.cpp
     sliding_reduction.cpp
     sliding_window.cpp
+    sliding_window_cascade.cpp
     solve.cpp
     sort_exprs.cpp
     specialize.cpp