Skip to content
This repository was archived by the owner on Jan 13, 2025. It is now read-only.

Commit 943f51a

Browse files
committed
Build CL by default and include it in the release package.
Stub out unimplemented interfaces.
1 parent 539a67a commit 943f51a

File tree

13 files changed

+139
-46
lines changed

13 files changed

+139
-46
lines changed

Makefile

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ install:
1919
mkdir -p $(DESTDIR)
2020
ifneq ($(OS),Darwin)
2121
cp -f src/$(V)/libcuda-crypt.so $(DESTDIR)
22+
cp -f src/$(V)/libcl-crypt.so $(DESTDIR)
2223
endif
2324
ls -lh $(DESTDIR)
2425

src/Makefile

+38-18
Original file line numberDiff line numberDiff line change
@@ -19,19 +19,33 @@ LIB=cuda-crypt
1919
CL_ECC_TEST_BIN=cl_ed25519_verify
2020
CL_LIB=cl-crypt
2121

22+
CHACHA_IF_DIR:=crypt-if
23+
2224
CL_HEADER_DIR:=opencl-platform
2325

2426
CUDA_HEADER_DIR:=cuda-headers
2527
CUDA_SHA256_DIR:=cuda-sha256
2628

29+
CUDA_DIR ?= /usr/local/cuda
30+
2731
CXX ?= g++
28-
CFLAGS+=-DENDIAN_NEUTRAL -DLTC_NO_ASM -I$(CUDA_HEADER_DIR) -I$(CUDA_SHA256_DIR)
29-
#use -DUSE_RDTSC for Windows compilation
30-
CL_CFLAGS=-fPIC -std=c++11 -DENDIAN_NEUTRAL -DOPENCL_VARIANT -DLTC_NO_ASM -I$(CUDA_HEADER_DIR) -I$(CUDA_SHA256_DIR) -I$(CL_HEADER_DIR) -Icommon/
3132

32-
CUDA_PATH ?= /usr/local/cuda-9.1
33+
CFLAGS_COMMON:=-DENDIAN_NEUTRAL -DLTC_NO_ASM -I$(CHACHA_IF_DIR)
34+
CFLAGS+=$(CFLAGS_COMMON) -I$(CUDA_HEADER_DIR) -I$(CUDA_SHA256_DIR)
3335

34-
all: $V/$(CHACHA_TEST_BIN) $V/$(ECC_TEST_BIN) $(V)/lib$(LIB).so
36+
#use -DUSE_RDTSC for Windows compilation
37+
CL_CFLAGS_common:=-fPIC -std=c++11 $(CFLAGS_COMMON) -DOPENCL_VARIANT \
38+
-I$(CL_HEADER_DIR) -Icommon/ \
39+
-I$(CUDA_DIR)/targets/x86_64-linux/include $(HOST_CFLAGS)
40+
CL_CFLAGS_release:=$(CL_CFLAGS_common) -O3
41+
CL_CFLAGS_debug:=$(CL_CFLAGS_common) -O0 -g
42+
CL_CFLAGS:=$(CL_CFLAGS_$V)
43+
44+
all: $(V)/$(CHACHA_TEST_BIN) \
45+
$(V)/$(ECC_TEST_BIN) \
46+
$(V)/$(CL_ECC_TEST_BIN) \
47+
$(V)/lib$(LIB).so \
48+
$(V)/lib$(CL_LIB).so
3549

3650
ECC_DIR:=cuda-ecc-ed25519
3751

@@ -45,7 +59,7 @@ $V/seed.o: $(SEED_SRCS)
4559
@mkdir -p $(@D)
4660
$(NVCC) -rdc=true $(CFLAGS) -c $< -o $@
4761

48-
SIGN_SRCS:=$(addprefix $(ECC_DIR)/,sign.cu sha512.h ge.h sc.h fe.cu ../$(CUDA_HEADER_DIR)/gpu_common.h ed25519.h)
62+
SIGN_SRCS:=$(addprefix $(ECC_DIR)/,sign.cu sha512.h ge.h sc.h fe.cu ../common/gpu_common.h ed25519.h)
4963
$V/sign.o: $(SIGN_SRCS)
5064
@mkdir -p $(@D)
5165
$(NVCC) -rdc=true $(CFLAGS) -c $< -o $@
@@ -60,6 +74,18 @@ $V/gpu_ctx.o: $(addprefix $(ECC_DIR)/,gpu_ctx.cu gpu_ctx.h)
6074
$(NVCC) -rdc=true $(CFLAGS) -c $< -o $@
6175

6276
CL_ECC_DIR:=opencl-ecc-ed25519
77+
CL_CRYPT_DIR:=opencl-crypt
78+
CL_POH_VERIFY_DIR:=opencl-poh-verify
79+
80+
CL_POH_VERIFY_SRCS:=$(CL_POH_VERIFY_DIR)/cl_poh_verify.cpp
81+
$V/cl_poh_verify.o: $(CL_POH_VERIFY_SRCS)
82+
@mkdir -p $(@D)
83+
$(CXX) $(CL_CFLAGS) -I$(ECC_DIR) -c $< -o $@
84+
85+
CL_CHACHA_SRCS:=$(CL_CRYPT_DIR)/cl_chacha.cpp
86+
$V/cl_chacha.o: $(CL_CHACHA_SRCS)
87+
@mkdir -p $(@D)
88+
$(CXX) $(CL_CFLAGS) -I$(ECC_DIR) -c $< -o $@
6389

6490
CL_SIGN_SRCS:=$(CL_ECC_DIR)/sign.cpp $(ECC_DIR)/fe.cu $(ECC_DIR)/ed25519.h
6591
$V/cl_sign.o: $(CL_SIGN_SRCS)
@@ -76,7 +102,7 @@ $V/cl_gpu_ctx.o: $(addprefix $(CL_ECC_DIR)/,gpu_ctx.cpp gpu_ctx.h)
76102
$(CXX) $(CL_CFLAGS) -I$(ECC_DIR) -c $< -o $@
77103

78104
CHACHA_DIR:=cuda-crypt
79-
CHACHA_SRCS:=$(addprefix $(CHACHA_DIR)/,chacha_cbc.cu chacha.h common.cu)
105+
CHACHA_SRCS:=$(addprefix $(CHACHA_DIR)/,chacha_cbc.cu common.cu) $(CHACHA_IF_DIR)/chacha.h
80106

81107
$V/chacha_cbc.o: $(CHACHA_SRCS)
82108
@mkdir -p $(@D)
@@ -99,23 +125,17 @@ $V/poh_verify.o: $(POH_SRCS)
99125
@mkdir -p $(@D)
100126
$(NVCC) -rdc=true $(CFLAGS) -c $< -o $@
101127

102-
CL_CPU_GPU_OBJS=$(addprefix $V/,cl_init_platform.o cl_verify.o cl_gpu_ctx.o cl_sign.o)
128+
CL_CPU_GPU_OBJS=$(addprefix $V/,cl_init_platform.o cl_verify.o cl_gpu_ctx.o cl_sign.o cl_chacha.o cl_poh_verify.o)
103129

104-
$V/cl_crypt-dlink.o: $(CL_CPU_GPU_OBJS)
105-
ar rvs $@ $^
106-
107-
$V/lib$(CL_LIB).a: $V/cl_crypt-dlink.o $(CL_CPU_GPU_OBJS)
108-
ar rcs $@ $^
109-
110-
$V/lib$(CL_LIB).so: $V/cl_crypt-dlink.o $(CL_CPU_GPU_OBJS)
111-
$(CXX) -shared --shared $^ -o $@
130+
$V/lib$(CL_LIB).so: $(CL_CPU_GPU_OBJS)
131+
$(CXX) -shared $^ -lOpenCL -o $@
112132

113133
$V/cl_ecc_main.o: $(CL_ECC_DIR)/main.cpp $(ECC_DIR)/ed25519.h
114134
@mkdir -p $(@D)
115-
$(CXX) $(CL_CFLAGS) -I$(ECC_DIR) -c $< -o $@
135+
$(CXX) $(CL_CFLAGS) -pthread -I$(ECC_DIR) -c $< -o $@
116136

117137
$V/$(CL_ECC_TEST_BIN): $V/cl_ecc_main.o $V/lib$(CL_LIB).so
118-
$(CXX) $(CL_CFLAGS) -Wl,-v -L$(CUDA_PATH)/lib64 -L$V -lpthread $^ -lOpenCL -o $@
138+
$(CXX) $(CL_CFLAGS) -L$(CUDA_DIR)/lib64 -L$V -pthread $< -l$(CL_LIB) -lOpenCL -o $@
119139

120140
CPU_GPU_OBJS=$(addprefix $V/,chacha_cbc.o aes_cbc.o verify.o poh_verify.o gpu_ctx.o sign.o seed.o keypair.o)
121141

File renamed without changes.

src/cuda-crypt/chacha.h renamed to src/crypt-if/chacha.h

+4-3
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,10 @@ extern "C" {
1313
#define CHACHA_ROUNDS 500
1414
#define SAMPLE_SIZE 32
1515

16-
void __host__ __device__ chacha20_ctr_encrypt(const uint8_t *in, uint8_t *out, size_t in_len,
17-
const uint8_t key[CHACHA_KEY_SIZE], const uint8_t nonce[CHACHA_NONCE_SIZE],
18-
uint32_t counter);
16+
void chacha20_ctr_encrypt(const uint8_t *in, uint8_t *out, size_t in_len,
17+
const uint8_t key[CHACHA_KEY_SIZE],
18+
const uint8_t nonce[CHACHA_NONCE_SIZE],
19+
uint32_t counter);
1920

2021
void cuda_chacha20_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t in_len,
2122
const uint8_t key[CHACHA_KEY_SIZE], uint8_t* ivec);

src/cuda-crypt/chacha20_core.cu

+12-5
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,11 @@ static void __host__ __device__ chacha20_encrypt(const u32 input[16],
5757
}
5858
}
5959

60-
void __host__ __device__ chacha20_ctr_encrypt(const uint8_t *in, uint8_t *out, size_t in_len,
61-
const uint8_t key[CHACHA_KEY_SIZE],
62-
const uint8_t nonce[CHACHA_NONCE_SIZE],
63-
uint32_t counter)
60+
void __host__ __device__
61+
chacha20_ctr_encrypt_device(const uint8_t *in, uint8_t *out, size_t in_len,
62+
const uint8_t key[CHACHA_KEY_SIZE],
63+
const uint8_t nonce[CHACHA_NONCE_SIZE],
64+
uint32_t counter)
6465
{
6566
uint32_t input[16];
6667
uint8_t buf[64];
@@ -105,4 +106,10 @@ void __host__ __device__ chacha20_ctr_encrypt(const uint8_t *in, uint8_t *out, s
105106
}
106107
}
107108

108-
109+
void chacha20_ctr_encrypt(const uint8_t *in, uint8_t *out, size_t in_len,
110+
const uint8_t key[CHACHA_KEY_SIZE],
111+
const uint8_t nonce[CHACHA_NONCE_SIZE],
112+
uint32_t counter)
113+
{
114+
chacha20_ctr_encrypt_device(in, out, in_len, key, nonce, counter);
115+
}

src/cuda-crypt/chacha_cbc.cu

+1-1
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ __global__ void chacha_ctr_encrypt_kernel(const unsigned char* input, unsigned c
142142
size_t i = (size_t)(blockIdx.x * blockDim.x + threadIdx.x);
143143

144144
if (i < num_keys) {
145-
chacha20_ctr_encrypt(input, &output[i * length], length, &keys[i * CHACHA_KEY_SIZE], &nonces[i * CHACHA_NONCE_SIZE], 0);
145+
chacha20_ctr_encrypt_device(input, &output[i * length], length, &keys[i * CHACHA_KEY_SIZE], &nonces[i * CHACHA_NONCE_SIZE], 0);
146146
}
147147
}
148148

src/gpu-common.mk

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
NVCC:=nvcc
22
GPU_PTX_ARCH:=compute_35
33
GPU_ARCHS?=sm_37,sm_50,sm_61,sm_70
4+
HOST_CFLAGS:=-Wall -Werror -fPIC -Wno-strict-aliasing
45
GPU_CFLAGS:=--gpu-code=$(GPU_ARCHS),$(GPU_PTX_ARCH) --gpu-architecture=$(GPU_PTX_ARCH)
5-
CFLAGS_release:=-Icommon --ptxas-options=-v $(GPU_CFLAGS) -O3 -Xcompiler "-Wall -Werror -fPIC -Wno-strict-aliasing"
6+
CFLAGS_release:=-Icommon --ptxas-options=-v $(GPU_CFLAGS) -O3 -Xcompiler "$(HOST_CFLAGS)"
67
CFLAGS_debug:=$(CFLAGS_release) -g
78
CFLAGS:=$(CFLAGS_$V)

src/opencl-crypt/cl_chacha.cpp

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#include <stdio.h>
2+
#include <stdint.h>
3+
#include <stdlib.h>
4+
5+
#include "chacha.h"
6+
7+
void chacha_cbc_encrypt_many_sample(const uint8_t* in,
8+
void* sha_state_arg,
9+
size_t length,
10+
const uint8_t* keys,
11+
uint8_t* ivecs,
12+
uint32_t num_keys,
13+
const uint64_t* samples,
14+
uint32_t num_samples,
15+
uint64_t starting_block_offset,
16+
float* time_us)
17+
{
18+
fprintf(stderr, "chacha_cbc_encrypt_many_sample not implemented\n");
19+
exit(1);
20+
}
21+
22+
void chacha_end_sha_state(const void* sha_state, uint8_t* out, uint32_t num_keys)
23+
{
24+
fprintf(stderr, "chacha_end_sha_state not implemented\n");
25+
exit(1);
26+
}
27+
28+
void chacha_init_sha_state(void* sha_state, uint32_t num_keys)
29+
{
30+
fprintf(stderr, "chacha_init_sha_state not implemented\n");
31+
exit(1);
32+
}
33+
34+

src/opencl-ecc-ed25519/main.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717

1818
extern void ed25519_free_gpu_mem();
1919

20+
bool g_verbose = false;
21+
2022
typedef struct {
2123
size_t size;
2224
uint64_t num_retransmits;

src/opencl-ecc-ed25519/sign.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ void ed25519_sign_many(const gpu_Elems* elems,
9292

9393
size_t num_threads_per_block = 64;
9494
size_t num_blocks = ROUND_UP_DIV(total_signatures, num_threads_per_block) * num_threads_per_block;
95-
LOG("signing blocks: %d threads_per_block: %d\n", num_blocks, num_threads_per_block);
95+
LOG("signing blocks: %zu threads_per_block: %zu\n", num_blocks, num_threads_per_block);
9696

9797
/*
9898
__kernel void ed25519_sign_kernel(__global unsigned char* packets,

src/opencl-ecc-ed25519/verify.cpp

+12-2
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,6 @@ void ed25519_verify_many(const gpu_Elems* elems,
127127
num_elems, total_signatures, total_packets, message_size);
128128

129129
size_t out_size = total_signatures * sizeof(uint8_t);
130-
size_t offsets_size = total_signatures * sizeof(uint32_t);
131130

132131
uint32_t total_packets_size = total_packets * message_size;
133132

@@ -157,7 +156,7 @@ void ed25519_verify_many(const gpu_Elems* elems,
157156

158157
size_t num_threads_per_block = 64;
159158
size_t num_blocks = ROUND_UP_DIV(total_signatures, num_threads_per_block) * num_threads_per_block;
160-
LOG("num_blocks: %d threads_per_block: %d keys: %d out: %p\n",
159+
LOG("num_blocks: %zu threads_per_block: %zu keys: %d out: %p\n",
161160
num_blocks, num_threads_per_block, (int)total_packets, out);
162161

163162
CL_ERR( clSetKernelArg(ed25519_verify_kernel, 0, sizeof(cl_mem), (void *)&cur_ctx->packets) );
@@ -192,3 +191,14 @@ const char* ed25519_license() {
192191
"Licensed under the Apache License, Version 2.0 "
193192
"<http://www.apache.org/licenses/LICENSE-2.0>";
194193
}
194+
195+
// Supported by the cuda lib, so stub them here.
196+
int cuda_host_register(void* ptr, size_t size, unsigned int flags)
197+
{
198+
return 0;
199+
}
200+
201+
int cuda_host_unregister(void* ptr)
202+
{
203+
return 0;
204+
}

src/opencl-platform/cl_init_platform.cpp

+15-15
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ bool cl_check_init(cl_uint sel_device_type) {
200200
*/
201201
bool cl_check_init(void) {
202202

203-
if(cl_is_init == true) {
203+
if (cl_is_init == true) {
204204
return true;
205205
} else {
206206
cout << "OpenCL platform query & init..." << endl;
@@ -212,11 +212,10 @@ bool cl_check_init(void) {
212212
string kernel_src;
213213

214214
cl_device_id device;
215-
cl_platform_id platform;
216215
cl_uint platform_num = 0;
217216
cl_platform_id* platform_list = NULL;
218217

219-
cl_uint device_num = 0;
218+
cl_uint num_devices = 0;
220219
cl_device_id* device_list = NULL;
221220

222221
size_t attr_size = 0;
@@ -232,8 +231,7 @@ bool cl_check_init(void) {
232231
cout << "Platforms found: " << platform_num << endl;
233232

234233
/* list all platforms and VENDOR/VERSION properties */
235-
for(int platf=0; platf<platform_num; platf++)
236-
{
234+
for (cl_uint platf = 0; platf < platform_num; platf++) {
237235
/* get attribute CL_PLATFORM_VENDOR */
238236
CL_ERR( clGetPlatformInfo(platform_list[platf],
239237
CL_PLATFORM_VENDOR, 0, NULL, &attr_size));
@@ -248,33 +246,36 @@ bool cl_check_init(void) {
248246

249247
/* get attribute size CL_PLATFORM_VERSION */
250248
CL_ERR( clGetPlatformInfo(platform_list[platf],
251-
CL_PLATFORM_VERSION, 0, NULL, &attr_size));
249+
CL_PLATFORM_VERSION,
250+
0, NULL, &attr_size));
252251
attr_data = new char[attr_size];
253252
DIE(attr_data == NULL, "alloc attr_data");
254253

255254
/* get data size CL_PLATFORM_VERSION */
256255
CL_ERR( clGetPlatformInfo(platform_list[platf],
257-
CL_PLATFORM_VERSION, attr_size, attr_data, NULL));
256+
CL_PLATFORM_VERSION,
257+
attr_size, attr_data, NULL));
258258
cout << attr_data << endl;
259259
delete[] attr_data;
260260

261261
/* get num of available OpenCL devices type ALL on the selected platform */
262-
if(clGetDeviceIDs(platform_list[platf],
263-
query_device_type, 0, NULL, &device_num) != CL_SUCCESS) {
264-
device_num = 0;
262+
if (clGetDeviceIDs(platform_list[platf],
263+
query_device_type, 0,
264+
NULL, &num_devices) != CL_SUCCESS) {
265+
num_devices = 0;
265266
continue;
266267
}
267268

268-
device_list = new cl_device_id[device_num];
269+
device_list = new cl_device_id[num_devices];
269270
DIE(device_list == NULL, "alloc devices");
270271

271272
/* get all available OpenCL devices type ALL on the selected platform */
272273
CL_ERR( clGetDeviceIDs(platform_list[platf], query_device_type,
273-
device_num, device_list, NULL));
274-
cout << "\tDevices found " << device_num << endl;
274+
num_devices, device_list, NULL));
275+
cout << "\tDevices found " << num_devices << endl;
275276

276277
/* list all devices and TYPE/VERSION properties */
277-
for(int dev=0; dev<device_num; dev++)
278+
for(cl_uint dev=0; dev < num_devices; dev++)
278279
{
279280
/* get attribute size */
280281
CL_ERR( clGetDeviceInfo(device_list[dev], CL_DEVICE_NAME,
@@ -291,7 +292,6 @@ bool cl_check_init(void) {
291292
string tmpAttrData = attr_data;
292293

293294
// always select last device of type GPU
294-
platform = platform_list[platf];
295295
device = device_list[dev];
296296

297297
delete[] attr_data;
+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#include <stdint.h>
2+
#include <stddef.h>
3+
#include <stdlib.h>
4+
#include <stdio.h>
5+
6+
extern "C" {
7+
int poh_verify_many(uint8_t* hashes,
8+
const uint64_t* num_hashes_arr,
9+
size_t num_elems,
10+
uint8_t use_non_default_stream)
11+
{
12+
fprintf(stderr, "poh_verify_many not implemented.");
13+
exit(1);
14+
return 0;
15+
}
16+
}
17+

0 commit comments

Comments
 (0)