stumpy-dev · NimaSarajpoor · Jan 8, 2026 · Jan 8, 2026 · Jan 8, 2026 · Jan 9, 2026
diff --git a/sdp/challenger_sdp.py b/sdp/challenger_sdp.py
@@ -1,14 +1,92 @@
+import math
 import numpy as np
+from scipy.special import lambertw
+from scipy.fft import next_fast_len
+from scipy.fft._pocketfft.basic import r2c, c2r
+
+
+def _compute_block_size(m, n, block_size=None):
+    """
+    Return a block size for the overlap-add method.
+    """
+    if block_size is None:
+        # Find optimal block_size based on m and n
+        if m >= n / 2:
+            block_size = n  # i.e. no blocking
+        else:
+            overlap = m - 1
+            opt_size = -overlap * lambertw(-1 / (2 * math.e * overlap), k=-1).real
+            block_size = next_fast_len(math.ceil(opt_size), real=True)
+
+    block_size = max(block_size, m)
+
+    return min(block_size, n)
+
+
+def _rfft_irfft_r2c2r_block(Q, T, block_size):
+    m = Q.shape[0]
+    n = T.shape[0]
+    T_step = block_size - (m - 1)
+    n_step = math.ceil(n / T_step)
+    last_chunk_start = (n_step - 1) * T_step
+
+    tmp = np.empty((n_step + 1, block_size), dtype=np.float64)
+
+    # fill with T, block-wise
+    tmp[: n_step - 1, :T_step] = T[:last_chunk_start].reshape(n_step - 1, T_step)
+    tmp[: n_step - 1, T_step:] = 0.0
+    tmp[n_step - 1, : n - last_chunk_start] = T[last_chunk_start:]
+    tmp[n_step - 1, n - last_chunk_start :] = 0.0
+
+    # fill with Q[::-1]
+    tmp[n_step, :m] = Q[::-1]
+    tmp[n_step, m:] = 0.0
+
+    fft_2d = r2c(True, tmp, axis=-1)
+
+    return c2r(False, np.multiply(fft_2d[:-1], fft_2d[[-1]]), n=block_size)
+
+
+def _sliding_dot_product_r2c2r(Q, T):
+    n = len(T)
+    m = len(Q)
+    next_fast_n = next_fast_len(n, real=True)
+
+    tmp = np.empty((2, next_fast_n))
+    tmp[0, :m] = Q[::-1]
+    tmp[0, m:] = 0.0
+    tmp[1, :n] = T
+    tmp[1, n:] = 0.0
+    fft_2d = r2c(True, tmp, axis=-1)
+
+    return c2r(False, np.multiply(fft_2d[0], fft_2d[1]), n=next_fast_n)[m - 1 : n]
+
+
+def _sliding_dot_product(Q, T, block_size):
+    m = Q.shape[0]
+    n = T.shape[0]
+
+    overlap = m - 1
+    ret = _rfft_irfft_r2c2r_block(Q, T, block_size)
+    out = ret[:, :-overlap]
+    out[1:, :overlap] += ret[:-1, -overlap:]
+    out = np.reshape(out, (-1,))
+
+    return out[m - 1 : n]
 
 
 def setup(Q, T):
     return
 
 
-def sliding_dot_product(Q, T):
-    m = len(Q)
-    l = T.shape[0] - m + 1
-    out = np.empty(l)
-    for i in range(l):
-        out[i] = np.dot(Q, T[i : i + m])
-    return out
+def sliding_dot_product(Q, T, block_size=None):
+    m = Q.shape[0]
+    n = T.shape[0]
+    if m == n:
+        return np.dot(Q, T)
+
+    block_size = _compute_block_size(m, n, block_size=block_size)
+    if block_size >= n:
+        return _sliding_dot_product_r2c2r(Q, T)
+    else:
+        return _sliding_dot_product(Q, T, block_size)
diff --git a/test.py b/test.py
@@ -210,3 +210,18 @@ def test_pyfftw_sdp_max_n():
     np.testing.assert_allclose(comp, ref)
 
     return
+
+
+def test_oaconvolve_sdp_blocksize():
+    from sdp.challenger_sdp import sliding_dot_product
+
+    T = np.random.rand(2**10)
+    Q = np.random.rand(2**8)
+    block_size = 2**9
+
+    comp = sliding_dot_product(Q, T, block_size=block_size)
+    ref = naive_sliding_dot_product(Q, T)
+
+    np.testing.assert_allclose(comp, ref)
+
+    return