1414#include < sstream>
1515#include < cstdlib>
1616#include < vector>
17- #include < cstdint>
1817#include < chrono>
1918#include < thread>
19+ #include < cstdio>
20+ #include < cstdint>
21+ #include < cstdlib>
2022
2123
2224
@@ -66,6 +68,8 @@ extern "C" void debug_cpu_only_mode(bool_t mode) {
6668 } else {
6769 best_device = get_best_device ();
6870 }
71+ std::cout << " Debug CPU only mode: " << (debug_cpu_only ? " ON" : " OFF" ) << std::endl;
72+ std::cout.flush ();
6973}
7074
7175extern " C" bool_t accelerator_available () {
@@ -82,38 +86,51 @@ torch::ScalarType get_best_dtype() {
8286 }
8387}
8488
85- int bridge_tensor_elements (bridge_tensor_t &bt) {
86- int size = 1 ;
87- for (int i = 0 ; i < bt.dim ; ++i) {
88- size *= bt.sizes [i];
89- }
90- return size;
91- }
92-
93- size_t bridge_tensor_size (bridge_tensor_t &bt) {
94- return sizeof (float32_t ) * bridge_tensor_elements (bt);
95- }
9689
9790void store_tensor (at::Tensor &input, float32_t * dest) {
98- float32_t * data = input.data_ptr <float32_t >();
99- size_t bytes_size = sizeof (float32_t ) * input.numel ();
91+ const float32_t * data = input.const_data_ptr <float32_t >();
92+ std:: size_t bytes_size = sizeof (float32_t ) * input.numel ();
10093 // std::memmove(dest,data,bytes_size);
10194 std::memcpy (dest,data,bytes_size);
10295}
10396
10497bridge_tensor_t torch_to_bridge (at::Tensor &tensor) {
10598 bridge_tensor_t result;
10699 result.created_by_c = true ;
100+ result.was_freed = false ;
101+
107102 result.dim = tensor.dim ();
108- result.sizes = new int32_t [result.dim ];
109- for (int i = 0 ; i < result.dim ; ++i) {
110- result.sizes [i] = tensor.size (i);
103+
104+ std::size_t sizes_bytes = sizeof (uint32_t ) * result.dim ;
105+ result.sizes = static_cast <uint32_t *>(malloc (sizes_bytes));
106+ for (uint32_t i = 0 ; i < result.dim ; ++i) {
107+ result.sizes [i] = static_cast <uint32_t >(tensor.size (i));
111108 }
112- result.data = new float32_t [bridge_tensor_elements (result)];
109+
110+ std::size_t data_bytes = sizeof (float32_t ) * tensor.numel ();
111+ result.data = static_cast <float32_t *>(malloc (data_bytes));
113112 store_tensor (tensor, result.data );
114113 return result;
115114}
116115
116+ extern " C" void free_bridge_tensor (bridge_tensor_t bt) {
117+ if (bt.created_by_c && !bt.was_freed ) {
118+ free (bt.sizes );
119+ free (bt.data );
120+ return ;
121+ } else if (!bt.created_by_c ) {
122+ std::cerr << " Warning: Attempting to free a tensor not created by C code." << std::endl;
123+ std::cerr.flush ();
124+ } else if (bt.was_freed ) {
125+ std::cerr << " Warning: Attempting to free a tensor that has already been freed." << std::endl;
126+ std::cerr.flush ();
127+ } else {
128+ std::cerr << " Warning: Attempting to free a tensor with an unknown state." << std::endl;
129+ std::cerr.flush ();
130+ }
131+ }
132+
133+
117134at::Tensor bridge_to_torch (bridge_tensor_t &bt) {
118135 std::vector<int64_t > sizes_vec (bt.sizes , bt.sizes + bt.dim );
119136 auto shape = torch::IntArrayRef (sizes_vec);
@@ -130,7 +147,6 @@ at::Tensor bridge_to_torch(bridge_tensor_t &bt,torch::Device device, bool copy,t
130147 return t.to (device, dtype, /* non_blocking=*/ false , /* copy=*/ true );
131148 else
132149 return t.to (device, dtype, /* non_blocking=*/ false , /* copy=*/ false );
133-
134150}
135151
136152extern " C" float32_t * unsafe (const float32_t * arr) {
@@ -230,8 +246,8 @@ extern "C" bridge_pt_model_t load_model(const uint8_t* model_path) {
230246
231247
232248
233- bridge_tensor_t model_forward (bridge_pt_model_t model, bridge_tensor_t input, bool is_vgg_based_model ) {
234- auto tn_mps = bridge_to_torch (input,best_device,true ,best_dtype);
249+ extern " C " bridge_tensor_t model_forward (bridge_pt_model_t model, bridge_tensor_t input) {
250+ auto tn_mps = bridge_to_torch (input,best_device,false ,best_dtype);
235251 // tn_mps = tn_mps.permute({2, 0, 1}).contiguous();
236252 // tn_mps.unsqueeze_(0);//.contiguous();
237253 auto tn = tn_mps.permute ({2 , 0 , 1 }).unsqueeze (0 ).contiguous ();
@@ -244,24 +260,12 @@ bridge_tensor_t model_forward(bridge_pt_model_t model, bridge_tensor_t input, bo
244260 // auto tn_out = o.squeeze(0).permute({1, 2, 0}).contiguous();
245261 auto tn_out = o.squeeze (0 ).contiguous ().permute ({1 , 2 , 0 }).contiguous ();
246262
247- if (is_vgg_based_model) {
248- tn_out.div_ (255.0 );
249- }
250-
251- auto tn_out_cpu = tn_out.to (torch::kCPU ,torch::kFloat32 ,false ,true );
263+ auto tn_out_cpu = tn_out.to (torch::kCPU ,torch::kFloat32 ,false ,false );
252264
253265 return torch_to_bridge (tn_out_cpu);
254266
255267}
256268
257- extern " C" bridge_tensor_t model_forward (bridge_pt_model_t model, bridge_tensor_t input) {
258- return model_forward (model, input, false );
259- }
260-
261- extern " C" bridge_tensor_t model_forward_style_transfer (bridge_pt_model_t model, bridge_tensor_t input) {
262- return model_forward (model, input, true );
263- }
264-
265269// std::tuple<uint64_t, uint64_t> get_cpu_frame_size(uint64_t width, uint64_t height, float32_t scale_factor) {
266270// // if (best_device == torch::kMPS || best_device == torch::kCUDA)
267271// if (accelerator_available())
0 commit comments