Skip to content

Commit f3f5daf

Browse files
committed
Merge branch 'master' of https://github.com/ceccocats/tkDNN
2 parents c2d7362 + c32a0be commit f3f5daf

File tree

6 files changed

+17
-12
lines changed

6 files changed

+17
-12
lines changed

README.md

+4-3
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,10 @@ tkDNN is a Deep Neural Network library built with cuDNN primitives specifically
33
The main scope is to do high performance inference on already trained models.
44

55
this branch actually work on every NVIDIA GPU that support the dependencies:
6-
* CUDA 9
7-
* CUDNN 7.105
8-
* TENSORRT 4.02
6+
* CUDA 10.0
7+
* CUDNN 7.603
8+
* TENSORRT 6.01
9+
* OPENCV 4.1
910

1011
## Workflow
1112
The recommended workflow follow these step:

demo/demo/demo.cpp

+4-3
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
#include <opencv2/core/core.hpp>
99
#include <opencv2/highgui/highgui.hpp>
10+
#include <opencv2/videoio.hpp>
1011
#include <opencv2/imgproc/imgproc.hpp>
1112

1213
#include "Yolo3Detection.h"
@@ -46,9 +47,9 @@ int main(int argc, char *argv[]) {
4647

4748
cv::VideoWriter resultVideo;
4849
if(SAVE_RESULT) {
49-
int w = cap.get(CV_CAP_PROP_FRAME_WIDTH);
50-
int h = cap.get(CV_CAP_PROP_FRAME_HEIGHT);
51-
resultVideo.open("result.mp4", CV_FOURCC('M','P','4','V'), 30, cv::Size(w, h));
50+
int w = cap.get(cv::CAP_PROP_FRAME_WIDTH);
51+
int h = cap.get(cv::CAP_PROP_FRAME_HEIGHT);
52+
resultVideo.open("result.mp4", cv::VideoWriter::fourcc('M','P','4','V'), 30, cv::Size(w, h));
5253
}
5354

5455
cv::Mat frame;

include/tkDNN/Layer.h

+2
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ enum layerType_t {
2424
LAYER_YOLO
2525
};
2626

27+
#define TKDNN_BN_MIN_EPSILON 1e-5
28+
2729
/**
2830
Simple layer Father class
2931
*/

src/Conv2d.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ dnnType* Conv2d::infer(dataDim_t &dim, dnnType* srcData) {
117117
dstTensorDesc, dstData, dstTensorDesc,
118118
dstData, biasTensorDesc, //same tensor descriptor as bias
119119
scales_d, bias_d, mean_d, variance_d,
120-
CUDNN_BN_MIN_EPSILON);
120+
TKDNN_BN_MIN_EPSILON);
121121
}
122122
//update data dimensions
123123
dim = output_dim;

src/LayerWgs.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ LayerWgs::LayerWgs(Network *net, int inputs, int outputs,
2929
seek += outputs;
3030
readBinaryFile(weights_path.c_str(), outputs, &variance_h, &variance_d, seek);
3131

32-
float eps = CUDNN_BN_MIN_EPSILON;
32+
float eps = TKDNN_BN_MIN_EPSILON;
3333

3434
power_h = new dnnType[outputs];
3535
for(int i=0; i<outputs; i++) power_h[i] = 1.0f;

src/Network.cpp

+5-4
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,6 @@ Network::Network(dataDim_t input_dim) {
1717
<<", CUDNN v"<<cu_ver<<")\n";
1818
dataType = CUDNN_DATA_FLOAT;
1919
tensorFormat = CUDNN_TENSOR_NCHW;
20-
21-
checkCUDNN( cudnnCreate(&cudnnHandle) );
22-
checkERROR( cublasCreate(&cublasHandle) );
23-
2420
num_layers = 0;
2521

2622
fp16 = false;
@@ -38,6 +34,11 @@ Network::Network(dataDim_t input_dim) {
3834
std::cout<<COL_REDB<<"!! FP16 INERENCE ENABLED !!"<<COL_END<<"\n";
3935
if(dla)
4036
std::cout<<COL_GREENB<<"!! DLA INERENCE ENABLED !!"<<COL_END<<"\n";
37+
38+
39+
checkCUDNN( cudnnCreate(&cudnnHandle) );
40+
checkERROR( cublasCreate(&cublasHandle) );
41+
4142
}
4243

4344
Network::~Network() {

0 commit comments

Comments
 (0)