|
23 | 23 | core::{ |
24 | 24 | array, |
25 | 25 | convert::{Infallible, TryFrom}, |
26 | | - fmt, mem, |
| 26 | + fmt, |
| 27 | + hash::{Hash, Hasher}, |
| 28 | + mem, |
27 | 29 | str::{from_utf8, FromStr}, |
28 | 30 | }, |
29 | 31 | num_traits::{FromPrimitive, ToPrimitive}, |
@@ -158,10 +160,163 @@ impl From<u64> for PubkeyError { |
158 | 160 | #[cfg_attr(all(feature = "borsh", feature = "std"), derive(BorshSchema))] |
159 | 161 | #[cfg_attr(feature = "serde", derive(Deserialize, Serialize))] |
160 | 162 | #[cfg_attr(feature = "bytemuck", derive(Pod, Zeroable))] |
161 | | -#[derive(Clone, Copy, Default, Eq, Hash, Ord, PartialEq, PartialOrd)] |
| 163 | +#[derive(Clone, Copy, Default, Eq, Ord, PartialEq, PartialOrd)] |
162 | 164 | #[cfg_attr(feature = "dev-context-only-utils", derive(Arbitrary))] |
163 | 165 | pub struct Pubkey(pub(crate) [u8; 32]); |
164 | 166 |
|
| 167 | +/// Custom impl of Hash for Pubkey |
| 168 | +/// allows us to skip hashing the length of the pubkey |
| 169 | +/// which is always the same anyway |
| 170 | +impl Hash for Pubkey { |
| 171 | + fn hash<H: Hasher>(&self, state: &mut H) { |
| 172 | + state.write(self.as_array()); |
| 173 | + } |
| 174 | +} |
| 175 | + |
| 176 | +#[cfg(all(feature = "rand", not(target_os = "solana")))] |
| 177 | +mod hasher { |
| 178 | + use { |
| 179 | + crate::PUBKEY_BYTES, |
| 180 | + core::{ |
| 181 | + cell::Cell, |
| 182 | + hash::{BuildHasher, Hasher}, |
| 183 | + }, |
| 184 | + rand::{thread_rng, Rng}, |
| 185 | + }; |
| 186 | + |
| 187 | + /// A faster, but less collision resistant hasher for pubkeys. |
| 188 | + /// |
| 189 | + /// Specialized hasher that uses a random 8 bytes subslice of the |
| 190 | + /// pubkey as the hash value. Should not be used when collisions |
| 191 | + /// might be used to mount DOS attacks. |
| 192 | + /// |
| 193 | + /// Using this results in about 4x faster lookups in a typical hashmap. |
| 194 | + #[derive(Default)] |
| 195 | + pub struct PubkeyHasher { |
| 196 | + offset: usize, |
| 197 | + state: u64, |
| 198 | + } |
| 199 | + |
| 200 | + impl Hasher for PubkeyHasher { |
| 201 | + #[inline] |
| 202 | + fn finish(&self) -> u64 { |
| 203 | + self.state |
| 204 | + } |
| 205 | + #[inline] |
| 206 | + fn write(&mut self, bytes: &[u8]) { |
| 207 | + debug_assert_eq!( |
| 208 | + bytes.len(), |
| 209 | + PUBKEY_BYTES, |
| 210 | + "This hasher is intended to be used with pubkeys and nothing else" |
| 211 | + ); |
| 212 | + // This slice/unwrap can never panic since offset is < PUBKEY_BYTES - size_of::<u64>() |
| 213 | + let chunk: &[u8; size_of::<u64>()] = bytes[self.offset..self.offset + size_of::<u64>()] |
| 214 | + .try_into() |
| 215 | + .unwrap(); |
| 216 | + self.state = u64::from_ne_bytes(*chunk); |
| 217 | + } |
| 218 | + } |
| 219 | + |
| 220 | + /// A builder for faster, but less collision resistant hasher for pubkeys. |
| 221 | + /// |
| 222 | + /// Initializes `PubkeyHasher` instances that use an 8-byte |
| 223 | + /// slice of the pubkey as the hash value. Should not be used when |
| 224 | + /// collisions might be used to mount DOS attacks. |
| 225 | + /// |
| 226 | + /// Using this results in about 4x faster lookups in a typical hashmap. |
| 227 | + #[derive(Clone)] |
| 228 | + pub struct PubkeyHasherBuilder { |
| 229 | + offset: usize, |
| 230 | + } |
| 231 | + |
| 232 | + impl Default for PubkeyHasherBuilder { |
| 233 | + /// Default construct the PubkeyHasherBuilder. |
| 234 | + /// |
| 235 | + /// The position of the slice is determined initially |
| 236 | + /// through random draw and then by incrementing a thread-local |
| 237 | + /// This way each hashmap can be expected to use a slightly different |
| 238 | + /// slice. This is essentially the same mechanism as what is used by |
| 239 | + /// `RandomState` |
| 240 | + fn default() -> Self { |
| 241 | + std::thread_local!(static OFFSET: Cell<usize> = { |
| 242 | + let mut rng = thread_rng(); |
| 243 | + Cell::new(rng.gen_range(0..PUBKEY_BYTES - size_of::<u64>())) |
| 244 | + }); |
| 245 | + |
| 246 | + let offset = OFFSET.with(|offset| { |
| 247 | + let mut next_offset = offset.get() + 1; |
| 248 | + if next_offset > PUBKEY_BYTES - size_of::<u64>() { |
| 249 | + next_offset = 0; |
| 250 | + } |
| 251 | + offset.set(next_offset); |
| 252 | + next_offset |
| 253 | + }); |
| 254 | + PubkeyHasherBuilder { offset } |
| 255 | + } |
| 256 | + } |
| 257 | + |
| 258 | + impl BuildHasher for PubkeyHasherBuilder { |
| 259 | + type Hasher = PubkeyHasher; |
| 260 | + #[inline] |
| 261 | + fn build_hasher(&self) -> Self::Hasher { |
| 262 | + PubkeyHasher { |
| 263 | + offset: self.offset, |
| 264 | + state: 0, |
| 265 | + } |
| 266 | + } |
| 267 | + } |
| 268 | + |
| 269 | + #[cfg(test)] |
| 270 | + mod tests { |
| 271 | + use { |
| 272 | + super::PubkeyHasherBuilder, |
| 273 | + crate::Pubkey, |
| 274 | + core::hash::{BuildHasher, Hasher}, |
| 275 | + }; |
| 276 | + #[test] |
| 277 | + fn test_pubkey_hasher_builder() { |
| 278 | + let key = Pubkey::new_unique(); |
| 279 | + let builder = PubkeyHasherBuilder::default(); |
| 280 | + let mut hasher1 = builder.build_hasher(); |
| 281 | + let mut hasher2 = builder.build_hasher(); |
| 282 | + hasher1.write(key.as_array()); |
| 283 | + hasher2.write(key.as_array()); |
| 284 | + assert_eq!( |
| 285 | + hasher1.finish(), |
| 286 | + hasher2.finish(), |
| 287 | + "Hashers made with same builder should be identical" |
| 288 | + ); |
| 289 | + // Make sure that when we make new builders we get different slices |
| 290 | + // chosen for hashing |
| 291 | + let builder2 = PubkeyHasherBuilder::default(); |
| 292 | + for _ in 0..64 { |
| 293 | + let mut hasher3 = builder2.build_hasher(); |
| 294 | + hasher3.write(key.as_array()); |
| 295 | + std::dbg!(hasher1.finish()); |
| 296 | + std::dbg!(hasher3.finish()); |
| 297 | + if hasher1.finish() != hasher3.finish() { |
| 298 | + return; |
| 299 | + } |
| 300 | + } |
| 301 | + panic!("Hashers built with different builder should be different due to random offset"); |
| 302 | + } |
| 303 | + |
| 304 | + #[test] |
| 305 | + fn test_pubkey_hasher() { |
| 306 | + let key1 = Pubkey::new_unique(); |
| 307 | + let key2 = Pubkey::new_unique(); |
| 308 | + let builder = PubkeyHasherBuilder::default(); |
| 309 | + let mut hasher1 = builder.build_hasher(); |
| 310 | + let mut hasher2 = builder.build_hasher(); |
| 311 | + hasher1.write(key1.as_array()); |
| 312 | + hasher2.write(key2.as_array()); |
| 313 | + assert_ne!(hasher1.finish(), hasher2.finish()); |
| 314 | + } |
| 315 | + } |
| 316 | +} |
| 317 | +#[cfg(all(feature = "rand", not(target_os = "solana")))] |
| 318 | +pub use hasher::{PubkeyHasher, PubkeyHasherBuilder}; |
| 319 | + |
165 | 320 | impl solana_sanitize::Sanitize for Pubkey {} |
166 | 321 |
|
167 | 322 | // Use strum when testing to ensure our FromPrimitive |
@@ -322,12 +477,33 @@ impl Pubkey { |
322 | 477 | pub fn new_unique() -> Self { |
323 | 478 | use solana_atomic_u64::AtomicU64; |
324 | 479 | static I: AtomicU64 = AtomicU64::new(1); |
325 | | - |
326 | | - let mut b = [0u8; 32]; |
327 | | - let i = I.fetch_add(1); |
| 480 | + type T = u32; |
| 481 | + const COUNTER_BYTES: usize = size_of::<T>(); |
| 482 | + let mut b = [0u8; PUBKEY_BYTES]; |
| 483 | + let mut i = I.fetch_add(1) as T; |
328 | 484 | // use big endian representation to ensure that recent unique pubkeys |
329 | | - // are always greater than less recent unique pubkeys |
330 | | - b[0..8].copy_from_slice(&i.to_be_bytes()); |
| 485 | + // are always greater than less recent unique pubkeys. |
| 486 | + b[0..COUNTER_BYTES].copy_from_slice(&i.to_be_bytes()); |
| 487 | + // fill the rest of the pubkey with pseudorandom numbers to make |
| 488 | + // data statistically similar to real pubkeys. |
| 489 | + #[cfg(any(feature = "std", target_arch = "wasm32"))] |
| 490 | + { |
| 491 | + extern crate std; |
| 492 | + let mut hash = std::hash::DefaultHasher::new(); |
| 493 | + for slice in b[COUNTER_BYTES..].chunks_mut(COUNTER_BYTES) { |
| 494 | + hash.write_u32(i); |
| 495 | + i += 1; |
| 496 | + slice.copy_from_slice(&hash.finish().to_ne_bytes()[0..COUNTER_BYTES]); |
| 497 | + } |
| 498 | + } |
| 499 | + // if std is not available, just replicate last byte of the counter. |
| 500 | + // this is not as good as a proper hash, but at least it is uniform |
| 501 | + #[cfg(not(any(feature = "std", target_arch = "wasm32")))] |
| 502 | + { |
| 503 | + for b in b[COUNTER_BYTES..].iter_mut() { |
| 504 | + *b = (i & 0xFF) as u8; |
| 505 | + } |
| 506 | + } |
331 | 507 | Self::from(b) |
332 | 508 | } |
333 | 509 |
|
|
0 commit comments