Skip to content
This repository was archived by the owner on Jan 13, 2025. It is now read-only.

Commit 539a67a

Browse files
glupescusakridge
authored andcommitted
Add support for OpenCL framework
1 parent 9148749 commit 539a67a

File tree

12 files changed

+8211
-0
lines changed

12 files changed

+8211
-0
lines changed

src/Makefile

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,20 @@ CHACHA_TEST_BIN=cuda_chacha_test
1616
ECC_TEST_BIN=cuda_ed25519_verify
1717
LIB=cuda-crypt
1818

19+
CL_ECC_TEST_BIN=cl_ed25519_verify
20+
CL_LIB=cl-crypt
21+
22+
CL_HEADER_DIR:=opencl-platform
23+
1924
CUDA_HEADER_DIR:=cuda-headers
2025
CUDA_SHA256_DIR:=cuda-sha256
2126

27+
CXX ?= g++
2228
CFLAGS+=-DENDIAN_NEUTRAL -DLTC_NO_ASM -I$(CUDA_HEADER_DIR) -I$(CUDA_SHA256_DIR)
29+
#use -DUSE_RDTSC for Windows compilation
30+
CL_CFLAGS=-fPIC -std=c++11 -DENDIAN_NEUTRAL -DOPENCL_VARIANT -DLTC_NO_ASM -I$(CUDA_HEADER_DIR) -I$(CUDA_SHA256_DIR) -I$(CL_HEADER_DIR) -Icommon/
31+
32+
CUDA_PATH ?= /usr/local/cuda-9.1
2333

2434
all: $V/$(CHACHA_TEST_BIN) $V/$(ECC_TEST_BIN) $(V)/lib$(LIB).so
2535

@@ -49,13 +59,33 @@ $V/gpu_ctx.o: $(addprefix $(ECC_DIR)/,gpu_ctx.cu gpu_ctx.h)
4959
@mkdir -p $(@D)
5060
$(NVCC) -rdc=true $(CFLAGS) -c $< -o $@
5161

62+
CL_ECC_DIR:=opencl-ecc-ed25519
63+
64+
CL_SIGN_SRCS:=$(CL_ECC_DIR)/sign.cpp $(ECC_DIR)/fe.cu $(ECC_DIR)/ed25519.h
65+
$V/cl_sign.o: $(CL_SIGN_SRCS)
66+
@mkdir -p $(@D)
67+
$(CXX) $(CL_CFLAGS) -I$(ECC_DIR) -c $< -o $@
68+
69+
CL_VERIFY_SRCS:=$(CL_ECC_DIR)/verify.cpp $(ECC_DIR)/seed.cu $(ECC_DIR)/ed25519.h
70+
$V/cl_verify.o: $(CL_VERIFY_SRCS)
71+
@mkdir -p $(@D)
72+
$(CXX) $(CL_CFLAGS) -I$(ECC_DIR) -c $< -o $@
73+
74+
$V/cl_gpu_ctx.o: $(addprefix $(CL_ECC_DIR)/,gpu_ctx.cpp gpu_ctx.h)
75+
@mkdir -p $(@D)
76+
$(CXX) $(CL_CFLAGS) -I$(ECC_DIR) -c $< -o $@
77+
5278
CHACHA_DIR:=cuda-crypt
5379
CHACHA_SRCS:=$(addprefix $(CHACHA_DIR)/,chacha_cbc.cu chacha.h common.cu)
5480

5581
$V/chacha_cbc.o: $(CHACHA_SRCS)
5682
@mkdir -p $(@D)
5783
$(NVCC) -rdc=true $(CFLAGS) -c $< -o $@
5884

85+
$V/cl_init_platform.o: opencl-platform/cl_init_platform.cpp
86+
@mkdir -p $(@D)
87+
$(CXX) $(CL_CFLAGS) -c $< -o $@
88+
5989
AES_SRCS:=$(addprefix $(CHACHA_DIR)/,aes_cbc.cu aes_core.cu modes_lcl.h common.cu)
6090

6191
$V/aes_cbc.o: $(AES_SRCS)
@@ -69,6 +99,24 @@ $V/poh_verify.o: $(POH_SRCS)
6999
@mkdir -p $(@D)
70100
$(NVCC) -rdc=true $(CFLAGS) -c $< -o $@
71101

102+
CL_CPU_GPU_OBJS=$(addprefix $V/,cl_init_platform.o cl_verify.o cl_gpu_ctx.o cl_sign.o)
103+
104+
$V/cl_crypt-dlink.o: $(CL_CPU_GPU_OBJS)
105+
ar rvs $@ $^
106+
107+
$V/lib$(CL_LIB).a: $V/cl_crypt-dlink.o $(CL_CPU_GPU_OBJS)
108+
ar rcs $@ $^
109+
110+
$V/lib$(CL_LIB).so: $V/cl_crypt-dlink.o $(CL_CPU_GPU_OBJS)
111+
$(CXX) -shared --shared $^ -o $@
112+
113+
$V/cl_ecc_main.o: $(CL_ECC_DIR)/main.cpp $(ECC_DIR)/ed25519.h
114+
@mkdir -p $(@D)
115+
$(CXX) $(CL_CFLAGS) -I$(ECC_DIR) -c $< -o $@
116+
117+
$V/$(CL_ECC_TEST_BIN): $V/cl_ecc_main.o $V/lib$(CL_LIB).so
118+
$(CXX) $(CL_CFLAGS) -Wl,-v -L$(CUDA_PATH)/lib64 -L$V -lpthread $^ -lOpenCL -o $@
119+
72120
CPU_GPU_OBJS=$(addprefix $V/,chacha_cbc.o aes_cbc.o verify.o poh_verify.o gpu_ctx.o sign.o seed.o keypair.o)
73121

74122
$V/crypt-dlink.o: $(CPU_GPU_OBJS)

src/cuda-headers/gpu_common.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ extern bool g_verbose;
1010

1111
#define ROUND_UP_DIV(x, y) (((x) + (y) - 1) / (y))
1212

13+
#ifndef OPENCL_VARIANT
14+
1315
#define CUDA_CHK(ans) { cuda_assert((ans), __FILE__, __LINE__); }
1416

1517
inline void cuda_assert(cudaError_t err, const char *file, int line)
@@ -22,3 +24,5 @@ inline void cuda_assert(cudaError_t err, const char *file, int line)
2224
}
2325

2426
#endif
27+
28+
#endif

src/opencl-ecc-ed25519/gpu_ctx.cpp

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
#include "ed25519.h"
2+
#include "gpu_ctx.h"
3+
#include <pthread.h>
4+
#include "gpu_common.h"
5+
6+
static pthread_mutex_t g_ctx_mutex = PTHREAD_MUTEX_INITIALIZER;
7+
8+
#define MAX_NUM_GPUS 1
9+
#define MAX_QUEUE_SIZE 1
10+
11+
static gpu_ctx_t g_gpu_ctx[MAX_NUM_GPUS][MAX_QUEUE_SIZE] = {0};
12+
static uint32_t g_cur_gpu = 0;
13+
static uint32_t g_cur_queue[MAX_NUM_GPUS] = {0};
14+
static int32_t g_total_gpus = -1;
15+
16+
static bool cl_crypt_init_locked() {
17+
if (g_total_gpus == -1) {
18+
g_total_gpus = MAX_NUM_GPUS;
19+
LOG("total_gpus: %d\n", g_total_gpus);
20+
for (int gpu = 0; gpu < g_total_gpus; gpu++) {
21+
for (int queue = 0; queue < MAX_QUEUE_SIZE; queue++) {
22+
int err = pthread_mutex_init(&g_gpu_ctx[gpu][queue].mutex, NULL);
23+
if (err != 0) {
24+
fprintf(stderr, "pthread_mutex_init error %d gpu: %d queue: %d\n",
25+
err, gpu, queue);
26+
g_total_gpus = 0;
27+
return false;
28+
}
29+
}
30+
}
31+
}
32+
return g_total_gpus > 0;
33+
}
34+
35+
bool ed25519_init() {
36+
pthread_mutex_lock(&g_ctx_mutex);
37+
bool success = cl_crypt_init_locked();
38+
pthread_mutex_unlock(&g_ctx_mutex);
39+
return success;
40+
}
41+
42+
gpu_ctx_t* get_gpu_ctx() {
43+
int32_t cur_gpu, cur_queue;
44+
45+
LOG("locking global mutex\n");
46+
pthread_mutex_lock(&g_ctx_mutex);
47+
if (!cl_crypt_init_locked()) {
48+
pthread_mutex_unlock(&g_ctx_mutex);
49+
LOG("No GPUs, exiting...\n");
50+
return NULL;
51+
}
52+
cur_gpu = g_cur_gpu;
53+
g_cur_gpu++;
54+
g_cur_gpu %= g_total_gpus;
55+
cur_queue = g_cur_queue[cur_gpu];
56+
g_cur_queue[cur_gpu]++;
57+
g_cur_queue[cur_gpu] %= MAX_QUEUE_SIZE;
58+
pthread_mutex_unlock(&g_ctx_mutex);
59+
60+
gpu_ctx_t* cur_ctx = &g_gpu_ctx[cur_gpu][cur_queue];
61+
LOG("locking contex mutex queue: %d gpu: %d\n", cur_queue, cur_gpu);
62+
pthread_mutex_lock(&cur_ctx->mutex);
63+
64+
LOG("selecting gpu: %d queue: %d\n", cur_gpu, cur_queue);
65+
66+
return cur_ctx;
67+
}
68+
69+
void setup_gpu_ctx(verify_ctx_t* cur_ctx,
70+
const gpu_Elems* elems,
71+
uint32_t num_elems,
72+
uint32_t message_size,
73+
uint32_t total_packets,
74+
uint32_t total_packets_size,
75+
uint32_t total_signatures,
76+
const uint32_t* message_lens,
77+
const uint32_t* public_key_offsets,
78+
const uint32_t* signature_offsets,
79+
const uint32_t* message_start_offsets,
80+
size_t out_size
81+
) {
82+
int ret;
83+
size_t offsets_size = total_signatures * sizeof(uint32_t);
84+
85+
LOG("device allocate. packets: %d out: %d offsets_size: %zu\n",
86+
total_packets_size, (int)out_size, offsets_size);
87+
88+
if (cur_ctx->packets == NULL ||
89+
total_packets_size > cur_ctx->packets_size_bytes) {
90+
clReleaseMemObject(cur_ctx->packets);
91+
cur_ctx->packets = clCreateBuffer(context, CL_MEM_READ_WRITE, total_packets_size, NULL, &ret);
92+
CL_ERR( ret );
93+
94+
cur_ctx->packets_size_bytes = total_packets_size;
95+
}
96+
97+
if (cur_ctx->out == NULL || cur_ctx->out_size_bytes < out_size) {
98+
clReleaseMemObject(cur_ctx->out);
99+
cur_ctx->out = clCreateBuffer(context, CL_MEM_READ_WRITE, out_size, NULL, &ret);
100+
CL_ERR( ret );
101+
102+
cur_ctx->out_size_bytes = total_signatures;
103+
}
104+
105+
if (cur_ctx->public_key_offsets == NULL || cur_ctx->offsets_len < total_signatures) {
106+
107+
clReleaseMemObject(cur_ctx->public_key_offsets);
108+
cur_ctx->public_key_offsets = clCreateBuffer(context, CL_MEM_READ_WRITE, offsets_size, NULL, &ret);
109+
CL_ERR( ret );
110+
111+
clReleaseMemObject(cur_ctx->signature_offsets);
112+
cur_ctx->signature_offsets = clCreateBuffer(context, CL_MEM_READ_WRITE, offsets_size, NULL, &ret);
113+
CL_ERR( ret );
114+
115+
clReleaseMemObject(cur_ctx->message_start_offsets);
116+
cur_ctx->message_start_offsets = clCreateBuffer(context, CL_MEM_READ_WRITE, offsets_size, NULL, &ret);
117+
CL_ERR( ret );
118+
119+
clReleaseMemObject(cur_ctx->message_lens);
120+
cur_ctx->message_lens = clCreateBuffer(context, CL_MEM_READ_WRITE, offsets_size, NULL, &ret);
121+
CL_ERR( ret );
122+
123+
cur_ctx->offsets_len = total_signatures;
124+
}
125+
126+
CL_ERR( clEnqueueWriteBuffer(cmd_queue, cur_ctx->public_key_offsets, CL_TRUE, 0, offsets_size, public_key_offsets, 0, NULL, NULL));
127+
CL_ERR( clEnqueueWriteBuffer(cmd_queue, cur_ctx->signature_offsets, CL_TRUE, 0, offsets_size, signature_offsets, 0, NULL, NULL));
128+
CL_ERR( clEnqueueWriteBuffer(cmd_queue, cur_ctx->message_start_offsets, CL_TRUE, 0, offsets_size, message_start_offsets, 0, NULL, NULL));
129+
CL_ERR( clEnqueueWriteBuffer(cmd_queue, cur_ctx->message_lens, CL_TRUE, 0, offsets_size, message_lens, 0, NULL, NULL));
130+
131+
size_t cur = 0;
132+
for (size_t i = 0; i < num_elems; i++) {
133+
LOG("i: %zu size: %d\n", i, elems[i].num * message_size);
134+
CL_ERR( clEnqueueWriteBuffer(cmd_queue, cur_ctx->packets, CL_TRUE, cur * message_size, elems[i].num * message_size, elems[i].elems, 0, NULL, NULL));
135+
cur += elems[i].num;
136+
}
137+
}
138+
139+
140+
void release_gpu_ctx(gpu_ctx_t* cur_ctx) {
141+
pthread_mutex_unlock(&cur_ctx->mutex);
142+
}
143+
144+
void ed25519_free_gpu_mem() {
145+
for (size_t gpu = 0; gpu < MAX_NUM_GPUS; gpu++) {
146+
for (size_t queue = 0; queue < MAX_QUEUE_SIZE; queue++) {
147+
verify_ctx_t* verify_ctx = &g_gpu_ctx[gpu][queue].verify_ctx;
148+
149+
CL_ERR(clReleaseMemObject(verify_ctx->packets));
150+
CL_ERR(clReleaseMemObject(verify_ctx->out));
151+
CL_ERR(clReleaseMemObject(verify_ctx->message_lens));
152+
CL_ERR(clReleaseMemObject(verify_ctx->public_key_offsets));
153+
CL_ERR(clReleaseMemObject(verify_ctx->signature_offsets));
154+
CL_ERR(clReleaseMemObject(verify_ctx->message_start_offsets));
155+
}
156+
}
157+
}

src/opencl-ecc-ed25519/gpu_ctx.h

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
#ifndef GPU_CTX_H
2+
#define GPU_CTX_H
3+
4+
#include "cl_common.h"
5+
6+
#include <inttypes.h>
7+
#include "ed25519.h"
8+
9+
#ifdef __cplusplus
10+
extern "C" {
11+
#endif
12+
13+
typedef struct {
14+
cl_mem packets;
15+
uint32_t packets_size_bytes;
16+
17+
cl_mem out;
18+
size_t out_size_bytes;
19+
20+
cl_mem public_key_offsets;
21+
cl_mem message_start_offsets;
22+
cl_mem signature_offsets;
23+
cl_mem message_lens;
24+
size_t offsets_len;
25+
26+
size_t num;
27+
size_t num_signatures;
28+
uint32_t total_packets_len;
29+
} verify_ctx_t;
30+
31+
typedef struct {
32+
verify_ctx_t verify_ctx;
33+
34+
pthread_mutex_t mutex;
35+
} gpu_ctx_t;
36+
37+
extern gpu_ctx_t* get_gpu_ctx();
38+
extern void release_gpu_ctx(gpu_ctx_t*);
39+
40+
extern void ed25519_free_gpu_mem();
41+
42+
extern void setup_gpu_ctx(verify_ctx_t* cur_ctx,
43+
const gpu_Elems* elems,
44+
uint32_t num_elems,
45+
uint32_t message_size,
46+
uint32_t total_packets,
47+
uint32_t total_packets_size,
48+
uint32_t total_signatures,
49+
const uint32_t* message_lens,
50+
const uint32_t* public_key_offsets,
51+
const uint32_t* signature_offsets,
52+
const uint32_t* message_start_offsets,
53+
size_t out_size
54+
);
55+
56+
#ifdef __cplusplus
57+
}
58+
#endif
59+
60+
#endif

0 commit comments

Comments
 (0)