@@ -593,113 +593,6 @@ fn buildTileGrid(npoints: usize, nbits: usize, ncpus: usize, tiles: []Tile) usiz
593593 return total ;
594594}
595595
596- /// Multi-scalar multiplication on G1 (pubkeys) using tiled Pippenger.
597- /// Falls back to single-threaded Pippenger for small inputs or when no pool is available.
598- pub fn multP1 (pool : * ThreadPool , points : [* ]const c.blst_p1_affine , npoints : usize , scalars : [* ]const u8 , nbits : usize ) c.blst_p1 {
599- const nbytes = (nbits + 7 ) / 8 ;
600- const ncpus = pool .n_workers ;
601-
602- // Single-threaded fallback for small inputs or single worker
603- if (ncpus < 2 or npoints < 32 ) {
604- const scratch_size = c .blst_p1s_mult_pippenger_scratch_sizeof (npoints );
605- const scratch = pool .allocator .alloc (u64 , scratch_size ) catch {
606- // If allocation fails, try stack-based single-threaded
607- var ret : c.blst_p1 = std .mem .zeroes (c .blst_p1 );
608- const pts : [2 ]? * const c.blst_p1_affine = .{ & points [0 ], null };
609- const sca : [2 ]? * const u8 = .{ & scalars [0 ], null };
610- c .blst_p1s_mult_pippenger (& ret , @ptrCast (& pts ), npoints , @ptrCast (& sca ), nbits , null );
611- return ret ;
612- };
613- defer pool .allocator .free (scratch );
614-
615- var ret : c.blst_p1 = std .mem .zeroes (c .blst_p1 );
616- const pts : [2 ]? * const c.blst_p1_affine = .{ & points [0 ], null };
617- const sca : [2 ]? * const u8 = .{ & scalars [0 ], null };
618- c .blst_p1s_mult_pippenger (& ret , @ptrCast (& pts ), npoints , @ptrCast (& sca ), nbits , scratch .ptr );
619- return ret ;
620- }
621-
622- // Tiled parallel Pippenger
623- var tiles : [MAX_TILES ]Tile = undefined ;
624- const total = buildTileGrid (npoints , nbits , ncpus , & tiles );
625- const bd = breakdown (nbits , pippenger_window_size (npoints ), ncpus );
626-
627- var results : [MAX_TILES ]c.blst_p1 = undefined ;
628- var work_items : [MAX_WORKERS ]TileP1WorkItem = undefined ;
629- var work_ptrs : [MAX_WORKERS ]* WorkItem = undefined ;
630-
631- var job = TileP1Job {
632- .points = points ,
633- .scalars = scalars ,
634- .nbytes = nbytes ,
635- .nbits = nbits ,
636- .tiles = tiles [0.. total ],
637- .results = results [0.. total ],
638- .counter = std .atomic .Value (usize ).init (0 ),
639- };
640-
641- const n_work = @min (ncpus , total );
642- for (0.. n_work ) | i | {
643- work_items [i ] = .{ .base = .{ .exec_fn = TileP1WorkItem .exec }, .job = & job };
644- work_ptrs [i ] = & work_items [i ].base ;
645- }
646- pool .submitAndWait (work_ptrs [0.. n_work ]);
647-
648- return reduceTilesP1 (tiles [0.. total ], results [0.. total ], bd .nx , bd .ny , bd .wnd );
649- }
650-
651- /// Multi-scalar multiplication on G2 (signatures) using tiled Pippenger.
652- /// Falls back to single-threaded Pippenger for small inputs or when no pool is available.
653- pub fn multP2 (pool : * ThreadPool , points : [* ]const c.blst_p2_affine , npoints : usize , scalars : [* ]const u8 , nbits : usize ) c.blst_p2 {
654- const nbytes = (nbits + 7 ) / 8 ;
655- const ncpus = pool .n_workers ;
656-
657- if (ncpus < 2 or npoints < 32 ) {
658- const scratch_size = c .blst_p2s_mult_pippenger_scratch_sizeof (npoints );
659- const scratch = pool .allocator .alloc (u64 , scratch_size ) catch {
660- var ret : c.blst_p2 = std .mem .zeroes (c .blst_p2 );
661- const pts : [2 ]? * const c.blst_p2_affine = .{ & points [0 ], null };
662- const sca : [2 ]? * const u8 = .{ & scalars [0 ], null };
663- c .blst_p2s_mult_pippenger (& ret , @ptrCast (& pts ), npoints , @ptrCast (& sca ), nbits , null );
664- return ret ;
665- };
666- defer pool .allocator .free (scratch );
667-
668- var ret : c.blst_p2 = std .mem .zeroes (c .blst_p2 );
669- const pts : [2 ]? * const c.blst_p2_affine = .{ & points [0 ], null };
670- const sca : [2 ]? * const u8 = .{ & scalars [0 ], null };
671- c .blst_p2s_mult_pippenger (& ret , @ptrCast (& pts ), npoints , @ptrCast (& sca ), nbits , scratch .ptr );
672- return ret ;
673- }
674-
675- var tiles : [MAX_TILES ]Tile = undefined ;
676- const total = buildTileGrid (npoints , nbits , ncpus , & tiles );
677- const bd = breakdown (nbits , pippenger_window_size (npoints ), ncpus );
678-
679- var results : [MAX_TILES ]c.blst_p2 = undefined ;
680- var work_items : [MAX_WORKERS ]TileP2WorkItem = undefined ;
681- var work_ptrs : [MAX_WORKERS ]* WorkItem = undefined ;
682-
683- var job = TileP2Job {
684- .points = points ,
685- .scalars = scalars ,
686- .nbytes = nbytes ,
687- .nbits = nbits ,
688- .tiles = tiles [0.. total ],
689- .results = results [0.. total ],
690- .counter = std .atomic .Value (usize ).init (0 ),
691- };
692-
693- const n_work = @min (ncpus , total );
694- for (0.. n_work ) | i | {
695- work_items [i ] = .{ .base = .{ .exec_fn = TileP2WorkItem .exec }, .job = & job };
696- work_ptrs [i ] = & work_items [i ].base ;
697- }
698- pool .submitAndWait (work_ptrs [0.. n_work ]);
699-
700- return reduceTilesP2 (tiles [0.. total ], results [0.. total ], bd .nx , bd .ny , bd .wnd );
701- }
702-
703596/// Reduce tile results: for each row (same y), add across x; then double-and-add across rows.
704597fn reduceTilesP1 (tiles : []const Tile , results : []c.blst_p1 , nx : usize , ny : usize , window : usize ) c.blst_p1 {
705598 var ret : c.blst_p1 = std .mem .zeroes (c .blst_p1 );
0 commit comments