3535#define SQRT_2 0x5a827999
3636#define SQRT_3 0x6ed9eba1
3737
38+ /*
39+ * If enabled, will check bitmap after calculating just the
40+ * first 32 bits of 'b' (does not apply to nt-long-opencl).
41+ */
42+ #define EARLY_REJECT 1
43+
3844#if USE_LOCAL_BITMAPS
3945#define BITMAPS_TYPE __local
4046#else
5056#define CACHE_TYPE __global
5157#endif
5258
53- #if BITMAP_SIZE_BITS_LESS_ONE < 0xffffffff
54- #define BITMAP_SIZE_BITS (BITMAP_SIZE_BITS_LESS_ONE + 1)
59+ #if BITMAP_MASK < 0xffffffff
60+ #define BITMAP_SIZE_BITS (BITMAP_MASK + 1)
5561#else
56- #error BITMAP_SIZE_BITS_LESS_ONE too large
62+ #error BITMAP_MASK too large
5763#endif
5864
59- inline void nt_crypt (uint * hash , uint * nt_buffer , uint md4_size )
65+ inline int nt_crypt (uint * hash , uint * nt_buffer , uint md4_size , BITMAPS_TYPE uint * bitmaps )
6066{
6167 /* Round 1 */
6268 hash [0 ] = 0xFFFFFFFF + nt_buffer [0 ] ; hash [0 ] = rotate (hash [0 ], 3u );
@@ -130,19 +136,38 @@ inline void nt_crypt(uint *hash, uint *nt_buffer, uint md4_size)
130136 hash [3 ] += MD4_H2 (hash [0 ], hash [1 ], hash [2 ]) + nt_buffer [9 ] + SQRT_3 ; hash [3 ] = rotate (hash [3 ], 9u );
131137 hash [2 ] += MD4_H (hash [3 ], hash [0 ], hash [1 ]) + nt_buffer [5 ] + SQRT_3 ; hash [2 ] = rotate (hash [2 ], 11u );
132138 hash [1 ] += MD4_H2 (hash [2 ], hash [3 ], hash [0 ]) + nt_buffer [13 ];
139+
140+ #if EARLY_REJECT && PLAINTEXT_LENGTH <= 27
141+ uint bitmap_index = hash [1 ] & BITMAP_MASK ;
142+ uint tmp = (bitmaps [(BITMAP_SIZE_BITS >> 5 ) * 0 + (bitmap_index >> 5 )] >> (bitmap_index & 31 )) & 1U ;
143+ #if SELECT_CMP_STEPS == 8
144+ bitmap_index = (hash [1 ] >> 8 ) & BITMAP_MASK ;
145+ tmp &= (bitmaps [(BITMAP_SIZE_BITS >> 5 ) * 1 + (bitmap_index >> 5 )] >> (bitmap_index & 31 )) & 1U ;
146+ bitmap_index = (hash [1 ] >> 16 ) & BITMAP_MASK ;
147+ tmp &= (bitmaps [(BITMAP_SIZE_BITS >> 5 ) * 2 + (bitmap_index >> 5 )] >> (bitmap_index & 31 )) & 1U ;
148+ bitmap_index = (hash [1 ] >> 24 ) & BITMAP_MASK ;
149+ tmp &= (bitmaps [(BITMAP_SIZE_BITS >> 5 ) * 3 + (bitmap_index >> 5 )] >> (bitmap_index & 31 )) & 1U ;
150+ #elif SELECT_CMP_STEPS == 4
151+ bitmap_index = (hash [1 ] >> 16 ) & BITMAP_MASK ;
152+ tmp &= (bitmaps [(BITMAP_SIZE_BITS >> 5 ) * 1 + (bitmap_index >> 5 )] >> (bitmap_index & 31 )) & 1U ;
153+ #endif /* SELECT_CMP_STEPS == 8 */
154+ if (likely (!tmp ))
155+ return 0 ;
156+ #endif /* EARLY_REJECT && PLAINTEXT_LENGTH <= 27 */
157+
133158 uint hash1 = hash [1 ] + SQRT_3 ; hash1 = rotate (hash1 , 15u );
134159
135160 hash [0 ] += MD4_H (hash [3 ], hash [2 ], hash1 ) + nt_buffer [3 ] + SQRT_3 ; hash [0 ] = rotate (hash [0 ], 3u );
136- hash [3 ] += MD4_H2 (hash [2 ], hash1 , hash [0 ]) + nt_buffer [11 ] + SQRT_3 ; hash [3 ] = rotate (hash [3 ], 9u );
137- hash [2 ] += MD4_H (hash1 , hash [0 ], hash [3 ]) + nt_buffer [7 ] + SQRT_3 ; hash [2 ] = rotate (hash [2 ], 11u );
138161
139162#if PLAINTEXT_LENGTH > 27
140163 if (likely (md4_size <= (27 << 4 )))
141- return ;
164+ return 1 ;
142165
143166 /*
144167 * Complete the first of a multi-block MD4 (reversing steps not possible).
145168 */
169+ hash [3 ] += MD4_H2 (hash [2 ], hash1 , hash [0 ]) + nt_buffer [11 ] + SQRT_3 ; hash [3 ] = rotate (hash [3 ], 9u );
170+ hash [2 ] += MD4_H (hash1 , hash [0 ], hash [3 ]) + nt_buffer [7 ] + SQRT_3 ; hash [2 ] = rotate (hash [2 ], 11u );
146171 hash [1 ] = hash1 + MD4_H2 (hash [2 ], hash [3 ], hash [0 ]) + nt_buffer [15 ] + SQRT_3 ; hash [1 ] = rotate (hash [1 ], 15u );
147172 hash [0 ] += INIT_A ;
148173 hash [1 ] += INIT_B ;
@@ -233,15 +258,16 @@ inline void nt_crypt(uint *hash, uint *nt_buffer, uint md4_size)
233258 * This bogus reverse adds a little work to long crypts instead
234259 * of losing the real reverse for single block crypts.
235260 */
236- hash [0 ] -= INIT_A ;
237- hash [1 ] -= INIT_B ;
238- hash [2 ] -= INIT_C ;
239261 hash [3 ] -= INIT_D ;
262+ hash [2 ] -= INIT_C ;
263+ hash [1 ] -= INIT_B ;
264+ hash [0 ] -= INIT_A ;
240265 hash [1 ] = (hash [1 ] >> 15 ) | (hash [1 ] << 17 );
241- hash [1 ] -= SQRT_3 + (hash [2 ] ^ hash [3 ] ^ hash [0 ]);
242- hash [1 ] = (hash [1 ] >> 15 ) | ( hash [ 1 ] << 17 );
266+ hash [1 ] -= SQRT_3 + MD4_H2 (hash [2 ], hash [3 ], hash [0 ]);
267+ hash [1 ] = rotate (hash [1 ], -15u );
243268 hash [1 ] -= SQRT_3 ;
244269#endif
270+ return 1 ;
245271}
246272
247273#if __OS_X__ && (cpu (DEVICE_INFO ) || gpu_nvidia (DEVICE_INFO ))
@@ -341,47 +367,32 @@ inline uint prepare_key(__global uint *key, uint length, uint *nt_buffer)
341367#endif /* UTF_8 */
342368
343369inline void cmp_final (uint gid ,
344- uint iter ,
345- uint * hash ,
346- __global uint * offset_table ,
347- __global uint * hash_table ,
348- __global uint * return_hashes ,
349- volatile __global uint * output ,
350- volatile __global uint * bitmap_dupe ) {
351-
352- uint t , offset_table_index , hash_table_index ;
353- unsigned long LO , HI ;
354- unsigned long p ;
355-
356- HI = ((unsigned long )hash [3 ] << 32 ) | (unsigned long )hash [2 ];
357- LO = ((unsigned long )hash [1 ] << 32 ) | (unsigned long )hash [0 ];
358-
359- p = (HI % OFFSET_TABLE_SIZE ) * SHIFT64_OT_SZ ;
360- p += LO % OFFSET_TABLE_SIZE ;
361- p %= OFFSET_TABLE_SIZE ;
362- offset_table_index = (unsigned int )p ;
363-
364- //error: chances of overflow is extremely low.
365- LO += (unsigned long )offset_table [offset_table_index ];
366-
367- p = (HI % HASH_TABLE_SIZE ) * SHIFT64_HT_SZ ;
368- p += LO % HASH_TABLE_SIZE ;
369- p %= HASH_TABLE_SIZE ;
370- hash_table_index = (unsigned int )p ;
371-
372- if (hash_table [hash_table_index ] == hash [0 ])
373- if (hash_table [HASH_TABLE_SIZE + hash_table_index ] == hash [1 ])
374- {
375- /*
376- * Prevent duplicate keys from cracking same hash
377- */
378- if (!(atomic_or (& bitmap_dupe [hash_table_index /32 ], (1U << (hash_table_index % 32 ))) & (1U << (hash_table_index % 32 )))) {
370+ uint iter ,
371+ uint * hash ,
372+ __global uint * offset_table ,
373+ __global uint * hash_table ,
374+ volatile __global uint * output ,
375+ volatile __global uint * bitmap_dupe )
376+ {
377+
378+ uint t , hash_table_index ;
379+ ulong hash64 ;
380+
381+ hash64 = ((ulong )hash [1 ] << 32 ) | (ulong )hash [0 ];
382+ hash64 += (ulong )offset_table [hash64 % OFFSET_TABLE_SIZE ];
383+ hash_table_index = hash64 % HASH_TABLE_SIZE ;
384+
385+ if (hash_table [hash_table_index ] == hash [0 ] &&
386+ hash_table [hash_table_index + HASH_TABLE_SIZE ] == hash [1 ]) {
387+ /*
388+ * Prevent duplicate keys from cracking same hash
389+ */
390+ if (!(atomic_or (& bitmap_dupe [hash_table_index / 32 ],
391+ (1U << (hash_table_index % 32 ))) & (1U << (hash_table_index % 32 )))) {
379392 t = atomic_inc (& output [0 ]);
380- output [1 + 3 * t ] = gid ;
381- output [2 + 3 * t ] = iter ;
382- output [3 + 3 * t ] = hash_table_index ;
383- return_hashes [2 * t ] = hash [2 ];
384- return_hashes [2 * t + 1 ] = hash [3 ];
393+ output [3 * t + 1 ] = gid ;
394+ output [3 * t + 2 ] = iter ;
395+ output [3 * t + 3 ] = hash_table_index ;
385396 }
386397 }
387398}
@@ -392,55 +403,59 @@ inline void cmp(uint gid,
392403 BITMAPS_TYPE uint * bitmaps ,
393404 __global uint * offset_table ,
394405 __global uint * hash_table ,
395- __global uint * return_hashes ,
396406 volatile __global uint * output ,
397407 volatile __global uint * bitmap_dupe )
398408{
399409 uint bitmap_index , tmp = 1 ;
400410
401- /* hash[0] += 0x67452301;
402- hash[1] += 0xefcdab89;
403- hash[2] += 0x98badcfe;
404- hash[3] += 0x10325476;*/
405-
406- #if SELECT_CMP_STEPS > 4
407- bitmap_index = hash [0 ] & (BITMAP_SIZE_BITS - 1 );
408- tmp &= (bitmaps [bitmap_index >> 5 ] >> (bitmap_index & 31 )) & 1U ;
409- bitmap_index = (hash [0 ] >> 16 ) & (BITMAP_SIZE_BITS - 1 );
410- tmp &= (bitmaps [(BITMAP_SIZE_BITS >> 5 ) + (bitmap_index >> 5 )] >> (bitmap_index & 31 )) & 1U ;
411- bitmap_index = hash [1 ] & (BITMAP_SIZE_BITS - 1 );
412- tmp &= (bitmaps [(BITMAP_SIZE_BITS >> 4 ) + (bitmap_index >> 5 )] >> (bitmap_index & 31 )) & 1U ;
413- bitmap_index = (hash [1 ] >> 16 ) & (BITMAP_SIZE_BITS - 1 );
411+ #if SELECT_CMP_STEPS == 8
412+ #if !EARLY_REJECT || PLAINTEXT_LENGTH > 27
413+ bitmap_index = hash [1 ] & BITMAP_MASK ;
414+ tmp &= (bitmaps [(BITMAP_SIZE_BITS >> 5 ) * 0 + (bitmap_index >> 5 )] >> (bitmap_index & 31 )) & 1U ;
415+ bitmap_index = (hash [1 ] >> 8 ) & BITMAP_MASK ;
416+ tmp &= (bitmaps [(BITMAP_SIZE_BITS >> 5 ) * 1 + (bitmap_index >> 5 )] >> (bitmap_index & 31 )) & 1U ;
417+ bitmap_index = (hash [1 ] >> 16 ) & BITMAP_MASK ;
418+ tmp &= (bitmaps [(BITMAP_SIZE_BITS >> 5 ) * 2 + (bitmap_index >> 5 )] >> (bitmap_index & 31 )) & 1U ;
419+ bitmap_index = (hash [1 ] >> 24 ) & BITMAP_MASK ;
414420 tmp &= (bitmaps [(BITMAP_SIZE_BITS >> 5 ) * 3 + (bitmap_index >> 5 )] >> (bitmap_index & 31 )) & 1U ;
415- bitmap_index = hash [2 ] & (BITMAP_SIZE_BITS - 1 );
416- tmp &= (bitmaps [(BITMAP_SIZE_BITS >> 3 ) + (bitmap_index >> 5 )] >> (bitmap_index & 31 )) & 1U ;
417- bitmap_index = (hash [2 ] >> 16 ) & (BITMAP_SIZE_BITS - 1 );
421+ #endif
422+ bitmap_index = hash [0 ] & BITMAP_MASK ;
423+ tmp &= (bitmaps [(BITMAP_SIZE_BITS >> 5 ) * 4 + (bitmap_index >> 5 )] >> (bitmap_index & 31 )) & 1U ;
424+ bitmap_index = (hash [0 ] >> 8 ) & BITMAP_MASK ;
418425 tmp &= (bitmaps [(BITMAP_SIZE_BITS >> 5 ) * 5 + (bitmap_index >> 5 )] >> (bitmap_index & 31 )) & 1U ;
419- bitmap_index = hash [3 ] & ( BITMAP_SIZE_BITS - 1 ) ;
426+ bitmap_index = ( hash [0 ] >> 16 ) & BITMAP_MASK ;
420427 tmp &= (bitmaps [(BITMAP_SIZE_BITS >> 5 ) * 6 + (bitmap_index >> 5 )] >> (bitmap_index & 31 )) & 1U ;
421- bitmap_index = (hash [3 ] >> 16 ) & ( BITMAP_SIZE_BITS - 1 ) ;
428+ bitmap_index = (hash [0 ] >> 24 ) & BITMAP_MASK ;
422429 tmp &= (bitmaps [(BITMAP_SIZE_BITS >> 5 ) * 7 + (bitmap_index >> 5 )] >> (bitmap_index & 31 )) & 1U ;
423- #elif SELECT_CMP_STEPS > 2
424- bitmap_index = hash [3 ] & (BITMAP_SIZE_BITS - 1 );
425- tmp &= (bitmaps [bitmap_index >> 5 ] >> (bitmap_index & 31 )) & 1U ;
426- bitmap_index = hash [2 ] & (BITMAP_SIZE_BITS - 1 );
427- tmp &= (bitmaps [(BITMAP_SIZE_BITS >> 5 ) + (bitmap_index >> 5 )] >> (bitmap_index & 31 )) & 1U ;
428- bitmap_index = hash [1 ] & (BITMAP_SIZE_BITS - 1 );
429- tmp &= (bitmaps [(BITMAP_SIZE_BITS >> 4 ) + (bitmap_index >> 5 )] >> (bitmap_index & 31 )) & 1U ;
430- bitmap_index = hash [0 ] & (BITMAP_SIZE_BITS - 1 );
430+
431+ #elif SELECT_CMP_STEPS == 4
432+ #if !EARLY_REJECT || PLAINTEXT_LENGTH > 27
433+ bitmap_index = hash [1 ] & BITMAP_MASK ;
434+ tmp &= (bitmaps [(BITMAP_SIZE_BITS >> 5 ) * 0 + (bitmap_index >> 5 )] >> (bitmap_index & 31 )) & 1U ;
435+ bitmap_index = (hash [1 ] >> 16 ) & BITMAP_MASK ;
436+ tmp &= (bitmaps [(BITMAP_SIZE_BITS >> 5 ) * 1 + (bitmap_index >> 5 )] >> (bitmap_index & 31 )) & 1U ;
437+ #endif
438+ bitmap_index = hash [0 ] & BITMAP_MASK ;
439+ tmp &= (bitmaps [(BITMAP_SIZE_BITS >> 5 ) * 2 + (bitmap_index >> 5 )] >> (bitmap_index & 31 )) & 1U ;
440+ bitmap_index = (hash [0 ] >> 16 ) & BITMAP_MASK ;
431441 tmp &= (bitmaps [(BITMAP_SIZE_BITS >> 5 ) * 3 + (bitmap_index >> 5 )] >> (bitmap_index & 31 )) & 1U ;
432- #elif SELECT_CMP_STEPS > 1
433- bitmap_index = hash [3 ] & (BITMAP_SIZE_BITS - 1 );
434- tmp &= (bitmaps [bitmap_index >> 5 ] >> (bitmap_index & 31 )) & 1U ;
435- bitmap_index = hash [2 ] & (BITMAP_SIZE_BITS - 1 );
436- tmp &= (bitmaps [(BITMAP_SIZE_BITS >> 5 ) + (bitmap_index >> 5 )] >> (bitmap_index & 31 )) & 1U ;
437- #else
438- bitmap_index = hash [3 ] & BITMAP_SIZE_BITS_LESS_ONE ;
439- tmp &= (bitmaps [bitmap_index >> 5 ] >> (bitmap_index & 31 )) & 1U ;
442+
443+ #elif SELECT_CMP_STEPS == 2
444+ #if !EARLY_REJECT || PLAINTEXT_LENGTH > 27
445+ bitmap_index = hash [1 ] & BITMAP_MASK ;
446+ tmp &= (bitmaps [(BITMAP_SIZE_BITS >> 5 ) * 0 + (bitmap_index >> 5 )] >> (bitmap_index & 31 )) & 1U ;
440447#endif
448+ bitmap_index = hash [0 ] & BITMAP_MASK ;
449+ tmp &= (bitmaps [(BITMAP_SIZE_BITS >> 5 ) * 1 + (bitmap_index >> 5 )] >> (bitmap_index & 31 )) & 1U ;
450+
451+ #elif !EARLY_REJECT || PLAINTEXT_LENGTH > 27 /* SELECT_CMP_STEPS == 1 */
452+ bitmap_index = hash [1 ] & BITMAP_MASK ;
453+ tmp &= (bitmaps [(BITMAP_SIZE_BITS >> 5 ) * 0 + (bitmap_index >> 5 )] >> (bitmap_index & 31 )) & 1U ;
454+
455+ #endif /* SELECT_CMP_STEPS == 8 */
441456
442457 if (tmp )
443- cmp_final (gid , iter , hash , offset_table , hash_table , return_hashes , output , bitmap_dupe );
458+ cmp_final (gid , iter , hash , offset_table , hash_table , output , bitmap_dupe );
444459}
445460
446461/*
@@ -455,7 +470,6 @@ __kernel void nt(__global uint *keys,
455470 __global uint * bitmaps ,
456471 __global uint * offset_table ,
457472 __global uint * hash_table ,
458- __global uint * return_hashes ,
459473 volatile __global uint * out_hash_ids ,
460474 volatile __global uint * bitmap_dupe )
461475{
@@ -503,8 +517,8 @@ __kernel void nt(__global uint *keys,
503517 uint lws = get_local_size (0 );
504518 __local uint s_bitmaps [(BITMAP_SIZE_BITS >> 5 ) * SELECT_CMP_STEPS ];
505519
506- for (i = 0 ; i < ((( BITMAP_SIZE_BITS >> 5 ) * SELECT_CMP_STEPS ) / lws ) ; i ++ )
507- s_bitmaps [i * lws + lid ] = bitmaps [i * lws + lid ];
520+ for (i = lid ; i < (BITMAP_SIZE_BITS >> 5 ) * SELECT_CMP_STEPS ; i += lws )
521+ s_bitmaps [i ] = bitmaps [i ];
508522
509523 barrier (CLK_LOCAL_MEM_FENCE );
510524
@@ -540,7 +554,7 @@ __kernel void nt(__global uint *keys,
540554#endif
541555#endif
542556#endif
543- nt_crypt (hash , nt_buffer , md4_size );
544- cmp (gid , i , hash , BITMAPS , offset_table , hash_table , return_hashes , out_hash_ids , bitmap_dupe );
557+ if ( nt_crypt (hash , nt_buffer , md4_size , BITMAPS ))
558+ cmp (gid , i , hash , BITMAPS , offset_table , hash_table , out_hash_ids , bitmap_dupe );
545559 }
546560}
0 commit comments