Revert "use tiled pippenger if pool is available"

spiral-ladder · spiral-ladder · commit c32826e04dcb · 2026-04-10T11:05:10.000+01:00
This reverts commit f744e0c.
diff --git a/bindings/napi/blst.zig b/bindings/napi/blst.zig
@@ -879,56 +879,50 @@ fn asyncAggregateExecute(_: napi.Env, data: *AsyncAggregateData) void {
         }
     }
 
-    // Use thread pool for parallel tiled Pippenger when available.
-    if (thread_pool) |pool| {
-        const p1_ret = pool.multP1(@ptrCast(data.pks.ptr), n, &scalars, nbits);
-        bls.c.blst_p1_to_affine(&data.result_pk.point, &p1_ret);
-
-        const p2_ret = pool.multP2(@ptrCast(data.sigs.ptr), n, &scalars, nbits);
-        bls.c.blst_p2_to_affine(&data.result_sig.point, &p2_ret);
-    } else {
-        // Fallback: single-threaded Pippenger (no thread pool initialized)
-        var pk_ptrs: [MAX_AGGREGATE_PER_JOB]*const bls.c.blst_p1_affine = undefined;
-        var sig_ptrs: [MAX_AGGREGATE_PER_JOB]*const bls.c.blst_p2_affine = undefined;
-        var sca_ptrs: [MAX_AGGREGATE_PER_JOB]*const u8 = undefined;
-        for (0..n) |i| {
-            pk_ptrs[i] = &data.pks[i].point;
-            sig_ptrs[i] = &data.sigs[i].point;
-            sca_ptrs[i] = &scalars[i * nbytes];
-        }
-
-        const scratch_size = @max(
-            bls.c.blst_p1s_mult_pippenger_scratch_sizeof(n),
-            bls.c.blst_p2s_mult_pippenger_scratch_sizeof(n),
-        );
-        const scratch = allocator.alloc(u64, scratch_size) catch {
-            data.err = true;
-            return;
-        };
-        defer allocator.free(scratch);
-
-        var p1_ret: bls.c.blst_p1 = std.mem.zeroes(bls.c.blst_p1);
-        bls.c.blst_p1s_mult_pippenger(
-            &p1_ret,
-            @ptrCast(&pk_ptrs),
-            n,
-            @ptrCast(&sca_ptrs),
-            nbits,
-            scratch.ptr,
-        );
-        bls.c.blst_p1_to_affine(&data.result_pk.point, &p1_ret);
-
-        var p2_ret: bls.c.blst_p2 = std.mem.zeroes(bls.c.blst_p2);
-        bls.c.blst_p2s_mult_pippenger(
-            &p2_ret,
-            @ptrCast(&sig_ptrs),
-            n,
-            @ptrCast(&sca_ptrs),
-            nbits,
-            scratch.ptr,
-        );
-        bls.c.blst_p2_to_affine(&data.result_sig.point, &p2_ret);
+    // Build pointer arrays for Pippenger API
+    var pk_ptrs: [MAX_AGGREGATE_PER_JOB]*const bls.c.blst_p1_affine = undefined;
+    var sig_ptrs: [MAX_AGGREGATE_PER_JOB]*const bls.c.blst_p2_affine = undefined;
+    var sca_ptrs: [MAX_AGGREGATE_PER_JOB]*const u8 = undefined;
+    for (0..n) |i| {
+        pk_ptrs[i] = &data.pks[i].point;
+        sig_ptrs[i] = &data.sigs[i].point;
+        sca_ptrs[i] = &scalars[i * nbytes];
     }
+
+    // Per-call scratch allocation
+    const scratch_size = @max(
+        bls.c.blst_p1s_mult_pippenger_scratch_sizeof(n),
+        bls.c.blst_p2s_mult_pippenger_scratch_sizeof(n),
+    );
+    const scratch = allocator.alloc(u64, scratch_size) catch {
+        data.err = true;
+        return;
+    };
+    defer allocator.free(scratch);
+
+    // Pippenger multi-scalar multiplication on G1 (pubkeys)
+    var p1_ret: bls.c.blst_p1 = std.mem.zeroes(bls.c.blst_p1);
+    bls.c.blst_p1s_mult_pippenger(
+        &p1_ret,
+        @ptrCast(&pk_ptrs),
+        n,
+        @ptrCast(&sca_ptrs),
+        nbits,
+        scratch.ptr,
+    );
+    bls.c.blst_p1_to_affine(&data.result_pk.point, &p1_ret);
+
+    // Pippenger multi-scalar multiplication on G2 (signatures)
+    var p2_ret: bls.c.blst_p2 = std.mem.zeroes(bls.c.blst_p2);
+    bls.c.blst_p2s_mult_pippenger(
+        &p2_ret,
+        @ptrCast(&sig_ptrs),
+        n,
+        @ptrCast(&sca_ptrs),
+        nbits,
+        scratch.ptr,
+    );
+    bls.c.blst_p2_to_affine(&data.result_sig.point, &p2_ret);
 }
 
 fn asyncAggregateComplete(env: napi.Env, _: napi.status.Status, data: *AsyncAggregateData) void {
diff --git a/src/bls/ThreadPool.zig b/src/bls/ThreadPool.zig
@@ -593,113 +593,6 @@ fn buildTileGrid(npoints: usize, nbits: usize, ncpus: usize, tiles: []Tile) usiz
     return total;
 }
 
-/// Multi-scalar multiplication on G1 (pubkeys) using tiled Pippenger.
-/// Falls back to single-threaded Pippenger for small inputs or when no pool is available.
-pub fn multP1(pool: *ThreadPool, points: [*]const c.blst_p1_affine, npoints: usize, scalars: [*]const u8, nbits: usize) c.blst_p1 {
-    const nbytes = (nbits + 7) / 8;
-    const ncpus = pool.n_workers;
-
-    // Single-threaded fallback for small inputs or single worker
-    if (ncpus < 2 or npoints < 32) {
-        const scratch_size = c.blst_p1s_mult_pippenger_scratch_sizeof(npoints);
-        const scratch = pool.allocator.alloc(u64, scratch_size) catch {
-            // If allocation fails, try stack-based single-threaded
-            var ret: c.blst_p1 = std.mem.zeroes(c.blst_p1);
-            const pts: [2]?*const c.blst_p1_affine = .{ &points[0], null };
-            const sca: [2]?*const u8 = .{ &scalars[0], null };
-            c.blst_p1s_mult_pippenger(&ret, @ptrCast(&pts), npoints, @ptrCast(&sca), nbits, null);
-            return ret;
-        };
-        defer pool.allocator.free(scratch);
-
-        var ret: c.blst_p1 = std.mem.zeroes(c.blst_p1);
-        const pts: [2]?*const c.blst_p1_affine = .{ &points[0], null };
-        const sca: [2]?*const u8 = .{ &scalars[0], null };
-        c.blst_p1s_mult_pippenger(&ret, @ptrCast(&pts), npoints, @ptrCast(&sca), nbits, scratch.ptr);
-        return ret;
-    }
-
-    // Tiled parallel Pippenger
-    var tiles: [MAX_TILES]Tile = undefined;
-    const total = buildTileGrid(npoints, nbits, ncpus, &tiles);
-    const bd = breakdown(nbits, pippenger_window_size(npoints), ncpus);
-
-    var results: [MAX_TILES]c.blst_p1 = undefined;
-    var work_items: [MAX_WORKERS]TileP1WorkItem = undefined;
-    var work_ptrs: [MAX_WORKERS]*WorkItem = undefined;
-
-    var job = TileP1Job{
-        .points = points,
-        .scalars = scalars,
-        .nbytes = nbytes,
-        .nbits = nbits,
-        .tiles = tiles[0..total],
-        .results = results[0..total],
-        .counter = std.atomic.Value(usize).init(0),
-    };
-
-    const n_work = @min(ncpus, total);
-    for (0..n_work) |i| {
-        work_items[i] = .{ .base = .{ .exec_fn = TileP1WorkItem.exec }, .job = &job };
-        work_ptrs[i] = &work_items[i].base;
-    }
-    pool.submitAndWait(work_ptrs[0..n_work]);
-
-    return reduceTilesP1(tiles[0..total], results[0..total], bd.nx, bd.ny, bd.wnd);
-}
-
-/// Multi-scalar multiplication on G2 (signatures) using tiled Pippenger.
-/// Falls back to single-threaded Pippenger for small inputs or when no pool is available.
-pub fn multP2(pool: *ThreadPool, points: [*]const c.blst_p2_affine, npoints: usize, scalars: [*]const u8, nbits: usize) c.blst_p2 {
-    const nbytes = (nbits + 7) / 8;
-    const ncpus = pool.n_workers;
-
-    if (ncpus < 2 or npoints < 32) {
-        const scratch_size = c.blst_p2s_mult_pippenger_scratch_sizeof(npoints);
-        const scratch = pool.allocator.alloc(u64, scratch_size) catch {
-            var ret: c.blst_p2 = std.mem.zeroes(c.blst_p2);
-            const pts: [2]?*const c.blst_p2_affine = .{ &points[0], null };
-            const sca: [2]?*const u8 = .{ &scalars[0], null };
-            c.blst_p2s_mult_pippenger(&ret, @ptrCast(&pts), npoints, @ptrCast(&sca), nbits, null);
-            return ret;
-        };
-        defer pool.allocator.free(scratch);
-
-        var ret: c.blst_p2 = std.mem.zeroes(c.blst_p2);
-        const pts: [2]?*const c.blst_p2_affine = .{ &points[0], null };
-        const sca: [2]?*const u8 = .{ &scalars[0], null };
-        c.blst_p2s_mult_pippenger(&ret, @ptrCast(&pts), npoints, @ptrCast(&sca), nbits, scratch.ptr);
-        return ret;
-    }
-
-    var tiles: [MAX_TILES]Tile = undefined;
-    const total = buildTileGrid(npoints, nbits, ncpus, &tiles);
-    const bd = breakdown(nbits, pippenger_window_size(npoints), ncpus);
-
-    var results: [MAX_TILES]c.blst_p2 = undefined;
-    var work_items: [MAX_WORKERS]TileP2WorkItem = undefined;
-    var work_ptrs: [MAX_WORKERS]*WorkItem = undefined;
-
-    var job = TileP2Job{
-        .points = points,
-        .scalars = scalars,
-        .nbytes = nbytes,
-        .nbits = nbits,
-        .tiles = tiles[0..total],
-        .results = results[0..total],
-        .counter = std.atomic.Value(usize).init(0),
-    };
-
-    const n_work = @min(ncpus, total);
-    for (0..n_work) |i| {
-        work_items[i] = .{ .base = .{ .exec_fn = TileP2WorkItem.exec }, .job = &job };
-        work_ptrs[i] = &work_items[i].base;
-    }
-    pool.submitAndWait(work_ptrs[0..n_work]);
-
-    return reduceTilesP2(tiles[0..total], results[0..total], bd.nx, bd.ny, bd.wnd);
-}
-
 /// Reduce tile results: for each row (same y), add across x; then double-and-add across rows.
 fn reduceTilesP1(tiles: []const Tile, results: []c.blst_p1, nx: usize, ny: usize, window: usize) c.blst_p1 {
     var ret: c.blst_p1 = std.mem.zeroes(c.blst_p1);