Skip to content

Commit c32826e

Browse files
committed
Revert "use tiled pippenger if pool is available"
This reverts commit f744e0c.
1 parent f744e0c commit c32826e

File tree

2 files changed

+43
-156
lines changed

2 files changed

+43
-156
lines changed

bindings/napi/blst.zig

Lines changed: 43 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -879,56 +879,50 @@ fn asyncAggregateExecute(_: napi.Env, data: *AsyncAggregateData) void {
879879
}
880880
}
881881

882-
// Use thread pool for parallel tiled Pippenger when available.
883-
if (thread_pool) |pool| {
884-
const p1_ret = pool.multP1(@ptrCast(data.pks.ptr), n, &scalars, nbits);
885-
bls.c.blst_p1_to_affine(&data.result_pk.point, &p1_ret);
886-
887-
const p2_ret = pool.multP2(@ptrCast(data.sigs.ptr), n, &scalars, nbits);
888-
bls.c.blst_p2_to_affine(&data.result_sig.point, &p2_ret);
889-
} else {
890-
// Fallback: single-threaded Pippenger (no thread pool initialized)
891-
var pk_ptrs: [MAX_AGGREGATE_PER_JOB]*const bls.c.blst_p1_affine = undefined;
892-
var sig_ptrs: [MAX_AGGREGATE_PER_JOB]*const bls.c.blst_p2_affine = undefined;
893-
var sca_ptrs: [MAX_AGGREGATE_PER_JOB]*const u8 = undefined;
894-
for (0..n) |i| {
895-
pk_ptrs[i] = &data.pks[i].point;
896-
sig_ptrs[i] = &data.sigs[i].point;
897-
sca_ptrs[i] = &scalars[i * nbytes];
898-
}
899-
900-
const scratch_size = @max(
901-
bls.c.blst_p1s_mult_pippenger_scratch_sizeof(n),
902-
bls.c.blst_p2s_mult_pippenger_scratch_sizeof(n),
903-
);
904-
const scratch = allocator.alloc(u64, scratch_size) catch {
905-
data.err = true;
906-
return;
907-
};
908-
defer allocator.free(scratch);
909-
910-
var p1_ret: bls.c.blst_p1 = std.mem.zeroes(bls.c.blst_p1);
911-
bls.c.blst_p1s_mult_pippenger(
912-
&p1_ret,
913-
@ptrCast(&pk_ptrs),
914-
n,
915-
@ptrCast(&sca_ptrs),
916-
nbits,
917-
scratch.ptr,
918-
);
919-
bls.c.blst_p1_to_affine(&data.result_pk.point, &p1_ret);
920-
921-
var p2_ret: bls.c.blst_p2 = std.mem.zeroes(bls.c.blst_p2);
922-
bls.c.blst_p2s_mult_pippenger(
923-
&p2_ret,
924-
@ptrCast(&sig_ptrs),
925-
n,
926-
@ptrCast(&sca_ptrs),
927-
nbits,
928-
scratch.ptr,
929-
);
930-
bls.c.blst_p2_to_affine(&data.result_sig.point, &p2_ret);
882+
// Build pointer arrays for Pippenger API
883+
var pk_ptrs: [MAX_AGGREGATE_PER_JOB]*const bls.c.blst_p1_affine = undefined;
884+
var sig_ptrs: [MAX_AGGREGATE_PER_JOB]*const bls.c.blst_p2_affine = undefined;
885+
var sca_ptrs: [MAX_AGGREGATE_PER_JOB]*const u8 = undefined;
886+
for (0..n) |i| {
887+
pk_ptrs[i] = &data.pks[i].point;
888+
sig_ptrs[i] = &data.sigs[i].point;
889+
sca_ptrs[i] = &scalars[i * nbytes];
931890
}
891+
892+
// Per-call scratch allocation
893+
const scratch_size = @max(
894+
bls.c.blst_p1s_mult_pippenger_scratch_sizeof(n),
895+
bls.c.blst_p2s_mult_pippenger_scratch_sizeof(n),
896+
);
897+
const scratch = allocator.alloc(u64, scratch_size) catch {
898+
data.err = true;
899+
return;
900+
};
901+
defer allocator.free(scratch);
902+
903+
// Pippenger multi-scalar multiplication on G1 (pubkeys)
904+
var p1_ret: bls.c.blst_p1 = std.mem.zeroes(bls.c.blst_p1);
905+
bls.c.blst_p1s_mult_pippenger(
906+
&p1_ret,
907+
@ptrCast(&pk_ptrs),
908+
n,
909+
@ptrCast(&sca_ptrs),
910+
nbits,
911+
scratch.ptr,
912+
);
913+
bls.c.blst_p1_to_affine(&data.result_pk.point, &p1_ret);
914+
915+
// Pippenger multi-scalar multiplication on G2 (signatures)
916+
var p2_ret: bls.c.blst_p2 = std.mem.zeroes(bls.c.blst_p2);
917+
bls.c.blst_p2s_mult_pippenger(
918+
&p2_ret,
919+
@ptrCast(&sig_ptrs),
920+
n,
921+
@ptrCast(&sca_ptrs),
922+
nbits,
923+
scratch.ptr,
924+
);
925+
bls.c.blst_p2_to_affine(&data.result_sig.point, &p2_ret);
932926
}
933927

934928
fn asyncAggregateComplete(env: napi.Env, _: napi.status.Status, data: *AsyncAggregateData) void {

src/bls/ThreadPool.zig

Lines changed: 0 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -593,113 +593,6 @@ fn buildTileGrid(npoints: usize, nbits: usize, ncpus: usize, tiles: []Tile) usiz
593593
return total;
594594
}
595595

596-
/// Multi-scalar multiplication on G1 (pubkeys) using tiled Pippenger.
597-
/// Falls back to single-threaded Pippenger for small inputs or when no pool is available.
598-
pub fn multP1(pool: *ThreadPool, points: [*]const c.blst_p1_affine, npoints: usize, scalars: [*]const u8, nbits: usize) c.blst_p1 {
599-
const nbytes = (nbits + 7) / 8;
600-
const ncpus = pool.n_workers;
601-
602-
// Single-threaded fallback for small inputs or single worker
603-
if (ncpus < 2 or npoints < 32) {
604-
const scratch_size = c.blst_p1s_mult_pippenger_scratch_sizeof(npoints);
605-
const scratch = pool.allocator.alloc(u64, scratch_size) catch {
606-
// If allocation fails, try stack-based single-threaded
607-
var ret: c.blst_p1 = std.mem.zeroes(c.blst_p1);
608-
const pts: [2]?*const c.blst_p1_affine = .{ &points[0], null };
609-
const sca: [2]?*const u8 = .{ &scalars[0], null };
610-
c.blst_p1s_mult_pippenger(&ret, @ptrCast(&pts), npoints, @ptrCast(&sca), nbits, null);
611-
return ret;
612-
};
613-
defer pool.allocator.free(scratch);
614-
615-
var ret: c.blst_p1 = std.mem.zeroes(c.blst_p1);
616-
const pts: [2]?*const c.blst_p1_affine = .{ &points[0], null };
617-
const sca: [2]?*const u8 = .{ &scalars[0], null };
618-
c.blst_p1s_mult_pippenger(&ret, @ptrCast(&pts), npoints, @ptrCast(&sca), nbits, scratch.ptr);
619-
return ret;
620-
}
621-
622-
// Tiled parallel Pippenger
623-
var tiles: [MAX_TILES]Tile = undefined;
624-
const total = buildTileGrid(npoints, nbits, ncpus, &tiles);
625-
const bd = breakdown(nbits, pippenger_window_size(npoints), ncpus);
626-
627-
var results: [MAX_TILES]c.blst_p1 = undefined;
628-
var work_items: [MAX_WORKERS]TileP1WorkItem = undefined;
629-
var work_ptrs: [MAX_WORKERS]*WorkItem = undefined;
630-
631-
var job = TileP1Job{
632-
.points = points,
633-
.scalars = scalars,
634-
.nbytes = nbytes,
635-
.nbits = nbits,
636-
.tiles = tiles[0..total],
637-
.results = results[0..total],
638-
.counter = std.atomic.Value(usize).init(0),
639-
};
640-
641-
const n_work = @min(ncpus, total);
642-
for (0..n_work) |i| {
643-
work_items[i] = .{ .base = .{ .exec_fn = TileP1WorkItem.exec }, .job = &job };
644-
work_ptrs[i] = &work_items[i].base;
645-
}
646-
pool.submitAndWait(work_ptrs[0..n_work]);
647-
648-
return reduceTilesP1(tiles[0..total], results[0..total], bd.nx, bd.ny, bd.wnd);
649-
}
650-
651-
/// Multi-scalar multiplication on G2 (signatures) using tiled Pippenger.
652-
/// Falls back to single-threaded Pippenger for small inputs or when no pool is available.
653-
pub fn multP2(pool: *ThreadPool, points: [*]const c.blst_p2_affine, npoints: usize, scalars: [*]const u8, nbits: usize) c.blst_p2 {
654-
const nbytes = (nbits + 7) / 8;
655-
const ncpus = pool.n_workers;
656-
657-
if (ncpus < 2 or npoints < 32) {
658-
const scratch_size = c.blst_p2s_mult_pippenger_scratch_sizeof(npoints);
659-
const scratch = pool.allocator.alloc(u64, scratch_size) catch {
660-
var ret: c.blst_p2 = std.mem.zeroes(c.blst_p2);
661-
const pts: [2]?*const c.blst_p2_affine = .{ &points[0], null };
662-
const sca: [2]?*const u8 = .{ &scalars[0], null };
663-
c.blst_p2s_mult_pippenger(&ret, @ptrCast(&pts), npoints, @ptrCast(&sca), nbits, null);
664-
return ret;
665-
};
666-
defer pool.allocator.free(scratch);
667-
668-
var ret: c.blst_p2 = std.mem.zeroes(c.blst_p2);
669-
const pts: [2]?*const c.blst_p2_affine = .{ &points[0], null };
670-
const sca: [2]?*const u8 = .{ &scalars[0], null };
671-
c.blst_p2s_mult_pippenger(&ret, @ptrCast(&pts), npoints, @ptrCast(&sca), nbits, scratch.ptr);
672-
return ret;
673-
}
674-
675-
var tiles: [MAX_TILES]Tile = undefined;
676-
const total = buildTileGrid(npoints, nbits, ncpus, &tiles);
677-
const bd = breakdown(nbits, pippenger_window_size(npoints), ncpus);
678-
679-
var results: [MAX_TILES]c.blst_p2 = undefined;
680-
var work_items: [MAX_WORKERS]TileP2WorkItem = undefined;
681-
var work_ptrs: [MAX_WORKERS]*WorkItem = undefined;
682-
683-
var job = TileP2Job{
684-
.points = points,
685-
.scalars = scalars,
686-
.nbytes = nbytes,
687-
.nbits = nbits,
688-
.tiles = tiles[0..total],
689-
.results = results[0..total],
690-
.counter = std.atomic.Value(usize).init(0),
691-
};
692-
693-
const n_work = @min(ncpus, total);
694-
for (0..n_work) |i| {
695-
work_items[i] = .{ .base = .{ .exec_fn = TileP2WorkItem.exec }, .job = &job };
696-
work_ptrs[i] = &work_items[i].base;
697-
}
698-
pool.submitAndWait(work_ptrs[0..n_work]);
699-
700-
return reduceTilesP2(tiles[0..total], results[0..total], bd.nx, bd.ny, bd.wnd);
701-
}
702-
703596
/// Reduce tile results: for each row (same y), add across x; then double-and-add across rows.
704597
fn reduceTilesP1(tiles: []const Tile, results: []c.blst_p1, nx: usize, ny: usize, window: usize) c.blst_p1 {
705598
var ret: c.blst_p1 = std.mem.zeroes(c.blst_p1);

0 commit comments

Comments
 (0)