Skip to content

Commit 26f01de

Browse files
committed
NT-OpenCL: Early partial transfer of keybuffer
This is well tested code in other formats. About 10% boost on 2080ti, against 5300 hashes and pure wordlist, no mask.
1 parent 0d2ca1b commit 26f01de

File tree

1 file changed

+22
-4
lines changed

1 file changed

+22
-4
lines changed

src/opencl_nt_fmt_plug.c

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,9 @@ static int static_gpu_locations[MASK_FMT_INT_PLHDR];
196196

197197
static unsigned int shift64_ht_sz, shift64_ot_sz;
198198

199-
static unsigned int key_idx = 0;
199+
static size_t key_idx;
200+
static size_t key_offset, idx_offset;
201+
200202
static struct fmt_main *self;
201203

202204
#define STEP 0
@@ -530,6 +532,8 @@ static int get_hash_6(int index) { return hash_table_128[hash_ids[3 + 3 * index]
530532
static void clear_keys(void)
531533
{
532534
key_idx = 0;
535+
key_offset = 0;
536+
idx_offset = 0;
533537
}
534538

535539
static void set_key(char *_key, int index)
@@ -560,6 +564,20 @@ static void set_key(char *_key, int index)
560564
}
561565
if (len)
562566
saved_plain[key_idx++] = *key & (0xffffffffU >> (32 - (len << 3)));
567+
568+
/* Early partial transfer to GPU every 2 MB */
569+
if (4 * key_idx - key_offset > (2 << 20)) {
570+
HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_keys, CL_FALSE, key_offset, 4 * key_idx - key_offset, saved_plain + key_offset / 4, 0, NULL, NULL), "failed in clEnqueueWriteBuffer buffer_keys.");
571+
HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_idx, CL_FALSE, idx_offset, 4 * (index + 1) - idx_offset, saved_idx + idx_offset / 4, 0, NULL, NULL), "failed in clEnqueueWriteBuffer buffer_idx.");
572+
573+
if (!mask_gpu_is_static)
574+
HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_int_key_loc, CL_FALSE, idx_offset, 4 * (index + 1) - idx_offset, saved_int_key_loc + idx_offset / 4, 0, NULL, NULL), "failed in clEnqueueWriteBuffer buffer_int_key_loc.");
575+
576+
HANDLE_CLERROR(clFlush(queue[gpu_id]), "failed in clFlush");
577+
578+
key_offset = 4 * key_idx;
579+
idx_offset = 4 * (index + 1);
580+
}
563581
}
564582

565583
static char *get_key(int index)
@@ -619,12 +637,12 @@ static int crypt_all(int *pcount, struct db_salt *salt)
619637

620638
// copy keys to the device
621639
if (key_idx)
622-
BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_keys, CL_FALSE, 0, 4 * key_idx, saved_plain, 0, NULL, multi_profilingEvent[0]), "failed in clEnqueueWriteBuffer buffer_keys.");
640+
BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_keys, CL_FALSE, key_offset, 4 * key_idx - key_offset, saved_plain + key_offset / 4, 0, NULL, multi_profilingEvent[0]), "failed in clEnqueueWriteBuffer buffer_keys.");
623641

624-
BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_idx, CL_FALSE, 0, 4 * gws, saved_idx, 0, NULL, multi_profilingEvent[1]), "failed in clEnqueueWriteBuffer buffer_idx.");
642+
BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_idx, CL_FALSE, idx_offset, 4 * gws - idx_offset, saved_idx + idx_offset / 4, 0, NULL, multi_profilingEvent[1]), "failed in clEnqueueWriteBuffer buffer_idx.");
625643

626644
if (!mask_gpu_is_static)
627-
BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_int_key_loc, CL_FALSE, 0, 4 * gws, saved_int_key_loc, 0, NULL, NULL), "failed in clEnqueueWriteBuffer buffer_int_key_loc.");
645+
BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_int_key_loc, CL_FALSE, idx_offset, 4 * gws - idx_offset, saved_int_key_loc + idx_offset / 4, 0, NULL, NULL), "failed in clEnqueueWriteBuffer buffer_int_key_loc.");
628646

629647
return ocl_hc_128_extract_info(salt, set_kernel_args, set_kernel_args_kpc, init_kernel, gws, lws, pcount);
630648
}

0 commit comments

Comments
 (0)