@@ -122,13 +122,13 @@ fn maskne64(x: u64, y: u64) -> u64 {
122122}
123123
124124/// Return a `u32::MAX` mask if `condition` is non-zero, otherwise return zero for a zero input.
125- #[ cfg( not( any( target_arch = "arm" , target_arch = "riscv32" ) ) ) ]
125+ #[ cfg( not( any( target_arch = "arm" , target_arch = "riscv32" , target_arch = "riscv64" ) ) ) ]
126126fn masknz32 ( condition : u32 ) -> u32 {
127127 masknz ! ( condition: u32 )
128128}
129129
130130/// Return a `u64::MAX` mask if `condition` is non-zero, otherwise return zero for a zero input.
131- #[ cfg( not( any( target_arch = "arm" , target_arch = "riscv32" ) ) ) ]
131+ #[ cfg( not( any( target_arch = "arm" , target_arch = "riscv32" , target_arch = "riscv64" ) ) ) ]
132132fn masknz64 ( condition : u64 ) -> u64 {
133133 masknz ! ( condition: u64 )
134134}
@@ -169,6 +169,28 @@ fn masknz32(condition: u32) -> u32 {
169169 mask
170170}
171171
172+ /// Optimized mask generation for riscv32 targets.
173+ #[ cfg( target_arch = "riscv64" ) ]
174+ fn masknz32 ( condition : u32 ) -> u32 {
175+ ( masknz64 ( condition. into ( ) ) & 0xFFFF_FFFF ) as u32
176+ }
177+
178+ /// Optimized mask generation for riscv32 targets.
179+ #[ cfg( target_arch = "riscv64" ) ]
180+ fn masknz64 ( condition : u64 ) -> u64 {
181+ let mut mask: u64 ;
182+ unsafe {
183+ core:: arch:: asm!(
184+ "seqz {0}, {1}" , // Set-if-not-zero pseudo-instruction
185+ "addi {0}, {0}, -1" , // Subtract 1, to have either full ones or full zeroes mask
186+ lateout( reg) mask,
187+ in( reg) condition,
188+ options( nostack, nomem) ,
189+ ) ;
190+ }
191+ mask
192+ }
193+
172194/// 64-bit wrapper for targets that implement 32-bit mask generation in assembly.
173195#[ cfg( any( target_arch = "arm" , target_arch = "riscv32" ) ) ]
174196fn masknz64 ( condition : u64 ) -> u64 {
0 commit comments