diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index 18f9e8b7f10..e479caf52a4 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -711,7 +711,7 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, if (train) l.bias_updates_gpu = cuda_make_array(l.bias_updates, n); } - l.output_gpu = cuda_make_array(l.output, total_batch*out_h*out_w*n); + l.output_gpu = cuda_make_array_init2zero(total_batch*out_h*out_w*n); if (train) l.delta_gpu = cuda_make_array(l.delta, total_batch*out_h*out_w*n); if(binary){ @@ -755,9 +755,9 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, } if (train) { - l.x_gpu = cuda_make_array(l.output, total_batch*out_h*out_w*n); + l.x_gpu = cuda_make_array_init2zero(total_batch*out_h*out_w*n); #ifndef CUDNN - l.x_norm_gpu = cuda_make_array(l.output, total_batch*out_h*out_w*n); + l.x_norm_gpu = cuda_make_array_init2zero( total_batch*out_h*out_w*n); #endif // CUDNN } } diff --git a/src/dark_cuda.c b/src/dark_cuda.c index 74f067724db..84a4116095f 100644 --- a/src/dark_cuda.c +++ b/src/dark_cuda.c @@ -487,6 +487,23 @@ float *cuda_make_array(float *x, size_t n) return x_gpu; } +float *cuda_make_array_init2zero(size_t n) { + float *x_gpu; + size_t size = sizeof(float) * n; + cudaError_t status = cudaMalloc((void **)&x_gpu, size); + // cudaError_t status = cudaMallocManaged((void **)&x_gpu, size, + // cudaMemAttachGlobal); status = cudaMemAdvise(x_gpu, size, + // cudaMemAdviseSetPreferredLocation, cudaCpuDeviceId); + if (status != cudaSuccess) + fprintf(stderr, " Try to set subdivisions=64 in your cfg-file. \n"); + CHECK_CUDA(status); + // status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice); + status =cudaMemsetAsync(x_gpu, 0, size, 0); + CHECK_CUDA(status); + if (!x_gpu) error("Cuda malloc failed", DARKNET_LOC); + return x_gpu; +} + void **cuda_make_array_pointers(void **x, size_t n) { void **x_gpu; diff --git a/src/dark_cuda.h b/src/dark_cuda.h index ffe3836b6eb..99457e1921a 100644 --- a/src/dark_cuda.h +++ b/src/dark_cuda.h @@ -78,6 +78,7 @@ extern "C" { float *cuda_make_array_pinned_preallocated(float *x, size_t n); float *cuda_make_array_pinned(float *x, size_t n); float *cuda_make_array(float *x, size_t n); + float *cuda_make_array_init2zero(size_t n); void **cuda_make_array_pointers(void **x, size_t n); int *cuda_make_int_array(size_t n); int *cuda_make_int_array_new_api(int *x, size_t n); diff --git a/src/route_layer.c b/src/route_layer.c index 23dfa0473b8..22f829180f5 100644 --- a/src/route_layer.c +++ b/src/route_layer.c @@ -35,8 +35,8 @@ route_layer make_route_layer(int batch, int n, int *input_layers, int *input_siz l.forward_gpu = forward_route_layer_gpu; l.backward_gpu = backward_route_layer_gpu; - l.delta_gpu = cuda_make_array(l.delta, outputs*batch); - l.output_gpu = cuda_make_array(l.output, outputs*batch); + l.delta_gpu = cuda_make_array_init2zero(outputs*batch); + l.output_gpu = cuda_make_array_init2zero(outputs*batch); #endif return l; }