Skip to content

Commit 62e4a3f

Browse files
committed
memory release
1 parent 0458f36 commit 62e4a3f

File tree

7 files changed

+20
-5
lines changed

7 files changed

+20
-5
lines changed

Diff for: include/tkDNN/Layer.h

+3-2
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ class Layer {
5050
}
5151
void setFinal() { this->final = true; }
5252
dataDim_t input_dim, output_dim;
53-
dnnType *dstData; //where results will be putted
53+
dnnType *dstData = nullptr; //where results will be putted
5454

5555
int id = 0;
5656
bool final; //if the layer is the final one
@@ -122,7 +122,7 @@ class LayerWgs : public Layer {
122122
__half *data16_d = nullptr, *bias16_d = nullptr;
123123
__half *bias216_h = nullptr, *bias216_d = nullptr;
124124

125-
__half *power16_h = nullptr;
125+
__half *power16_h = nullptr, *power16_d = nullptr;
126126
__half *scales16_h = nullptr, *scales16_d = nullptr;
127127
__half *mean16_h = nullptr, *mean16_d = nullptr;
128128
__half *variance16_h = nullptr, *variance16_d = nullptr;
@@ -164,6 +164,7 @@ class LayerWgs : public Layer {
164164
if( scales16_d != nullptr) { cudaFree( scales16_d); scales16_d = nullptr; }
165165
if( mean16_d != nullptr) { cudaFree( mean16_d); mean16_d = nullptr; }
166166
if(variance16_d != nullptr) { cudaFree(variance16_d); variance16_d = nullptr; }
167+
if( power16_d != nullptr) { cudaFree( power16_d); power16_d = nullptr; }
167168
}
168169
}
169170
};

Diff for: include/tkDNN/utils.h

+1
Original file line numberDiff line numberDiff line change
@@ -116,5 +116,6 @@ void matrixMulAdd( cublasHandle_t handle, dnnType* srcData, dnnType* dstData,
116116
dnnType* add_vector, int dim, dnnType mul);
117117

118118
void getMemUsage(double& vm_usage_kb, double& resident_set_kb);
119+
void printCudaMemUsage();
119120
void removePathAndExtension(const std::string &full_string, std::string &name);
120121
#endif //UTILS_H

Diff for: src/Layer.cpp

+5
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,11 @@ Layer::~Layer() {
2424

2525
checkCUDNN( cudnnDestroyTensorDescriptor(srcTensorDesc) );
2626
checkCUDNN( cudnnDestroyTensorDescriptor(dstTensorDesc) );
27+
28+
if(dstData != nullptr) {
29+
cudaFree(dstData);
30+
dstData = nullptr;
31+
}
2732
}
2833

2934
}}

Diff for: src/LayerWgs.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ LayerWgs::LayerWgs(Network *net, int inputs, int outputs,
8080
variance16_h = new __half[b_size];
8181
scales16_h = new __half[b_size];
8282

83-
//cudaMalloc(&power16_d, b_size*sizeof(__half));
83+
cudaMalloc(&power16_d, b_size*sizeof(__half));
8484
cudaMalloc(&mean16_d, b_size*sizeof(__half));
8585
cudaMalloc(&variance16_d, b_size*sizeof(__half));
8686
cudaMalloc(&scales16_d, b_size*sizeof(__half));
@@ -91,8 +91,8 @@ LayerWgs::LayerWgs(Network *net, int inputs, int outputs,
9191

9292
//init power array of ones
9393
cudaMemcpy(tmp_d, power_h, b_size*sizeof(float), cudaMemcpyHostToDevice);
94-
//float2half(tmp_d, power16_d, b_size);
95-
//cudaMemcpy(power16_h, power16_d, b_size*sizeof(__half), cudaMemcpyDeviceToHost);
94+
float2half(tmp_d, power16_d, b_size);
95+
cudaMemcpy(power16_h, power16_d, b_size*sizeof(__half), cudaMemcpyDeviceToHost);
9696

9797
//mean array
9898
cudaMemcpy(tmp_d, mean_h, b_size*sizeof(float), cudaMemcpyHostToDevice);

Diff for: src/Network.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ void Network::print() {
128128
}
129129
printCenteredTitle("", '=', 60);
130130
std::cout<<"\n";
131+
printCudaMemUsage();
131132
}
132133
const char *Network::getNetworkRTName(const char *network_name){
133134
networkName = network_name;

Diff for: src/NetworkRT.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ NetworkRT::NetworkRT(Network *net, const char *name) {
134134
networkRT->markOutput(*input);
135135

136136
std::cout<<"Selected maxBatchSize: "<<builderRT->getMaxBatchSize()<<"\n";
137+
printCudaMemUsage();
137138
std::cout<<"Building tensorRT cuda engine...\n";
138139
#if NV_TENSORRT_MAJOR >= 6
139140
engineRT = builderRT->buildEngineWithConfig(*networkRT, *configRT);

Diff for: src/utils.cpp

+6
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,12 @@ void getMemUsage(double& vm_usage_kb, double& resident_set_kb){
197197
resident_set_kb = rss * page_size_kb;
198198
}
199199

200+
void printCudaMemUsage() {
201+
size_t free, total;
202+
checkCuda( cudaMemGetInfo(&free, &total) );
203+
std::cout<<"GPU free memory: "<<double(free)/1e6<<" mb.\n";
204+
}
205+
200206
void removePathAndExtension(const std::string &full_string, std::string &name){
201207
name = full_string;
202208
std::string tmp_str = full_string;

0 commit comments

Comments
 (0)