1414#include < sstream>
1515#include < cstdlib>
1616#include < vector>
17- #include < cstdint>
1817#include < chrono>
1918#include < thread>
19+ #include < cstdio>
20+ #include < cstdint>
21+ #include < cstdlib>
2022
2123
2224
@@ -65,6 +67,8 @@ extern "C" void debug_cpu_only_mode(bool_t mode) {
6567 } else {
6668 best_device = get_best_device ();
6769 }
70+ std::cout << " Debug CPU only mode: " << (debug_cpu_only ? " ON" : " OFF" ) << std::endl;
71+ std::cout.flush ();
6872}
6973
7074extern " C" bool_t accelerator_available () {
@@ -81,38 +85,51 @@ torch::ScalarType get_best_dtype() {
8185 }
8286}
8387
84- int bridge_tensor_elements (bridge_tensor_t &bt) {
85- int size = 1 ;
86- for (int i = 0 ; i < bt.dim ; ++i) {
87- size *= bt.sizes [i];
88- }
89- return size;
90- }
91-
92- size_t bridge_tensor_size (bridge_tensor_t &bt) {
93- return sizeof (float32_t ) * bridge_tensor_elements (bt);
94- }
9588
9689void store_tensor (at::Tensor &input, float32_t * dest) {
97- float32_t * data = input.data_ptr <float32_t >();
98- size_t bytes_size = sizeof (float32_t ) * input.numel ();
90+ const float32_t * data = input.const_data_ptr <float32_t >();
91+ std:: size_t bytes_size = sizeof (float32_t ) * input.numel ();
9992 // std::memmove(dest,data,bytes_size);
10093 std::memcpy (dest,data,bytes_size);
10194}
10295
10396bridge_tensor_t torch_to_bridge (at::Tensor &tensor) {
10497 bridge_tensor_t result;
10598 result.created_by_c = true ;
99+ result.was_freed = false ;
100+
106101 result.dim = tensor.dim ();
107- result.sizes = new int32_t [result.dim ];
108- for (int i = 0 ; i < result.dim ; ++i) {
109- result.sizes [i] = tensor.size (i);
102+
103+ std::size_t sizes_bytes = sizeof (uint32_t ) * result.dim ;
104+ result.sizes = static_cast <uint32_t *>(malloc (sizes_bytes));
105+ for (uint32_t i = 0 ; i < result.dim ; ++i) {
106+ result.sizes [i] = static_cast <uint32_t >(tensor.size (i));
110107 }
111- result.data = new float32_t [bridge_tensor_elements (result)];
108+
109+ std::size_t data_bytes = sizeof (float32_t ) * tensor.numel ();
110+ result.data = static_cast <float32_t *>(malloc (data_bytes));
112111 store_tensor (tensor, result.data );
113112 return result;
114113}
115114
115+ extern " C" void free_bridge_tensor (bridge_tensor_t bt) {
116+ if (bt.created_by_c && !bt.was_freed ) {
117+ free (bt.sizes );
118+ free (bt.data );
119+ return ;
120+ } else if (!bt.created_by_c ) {
121+ std::cerr << " Warning: Attempting to free a tensor not created by C code." << std::endl;
122+ std::cerr.flush ();
123+ } else if (bt.was_freed ) {
124+ std::cerr << " Warning: Attempting to free a tensor that has already been freed." << std::endl;
125+ std::cerr.flush ();
126+ } else {
127+ std::cerr << " Warning: Attempting to free a tensor with an unknown state." << std::endl;
128+ std::cerr.flush ();
129+ }
130+ }
131+
132+
116133at::Tensor bridge_to_torch (bridge_tensor_t &bt) {
117134 std::vector<int64_t > sizes_vec (bt.sizes , bt.sizes + bt.dim );
118135 auto shape = torch::IntArrayRef (sizes_vec);
@@ -129,7 +146,6 @@ at::Tensor bridge_to_torch(bridge_tensor_t &bt,torch::Device device, bool copy,t
129146 return t.to (device, dtype, /* non_blocking=*/ false , /* copy=*/ true );
130147 else
131148 return t.to (device, dtype, /* non_blocking=*/ false , /* copy=*/ false );
132-
133149}
134150
135151extern " C" float32_t * unsafe (const float32_t * arr) {
@@ -229,8 +245,8 @@ extern "C" bridge_pt_model_t load_model(const uint8_t* model_path) {
229245
230246
231247
232- bridge_tensor_t model_forward (bridge_pt_model_t model, bridge_tensor_t input, bool is_vgg_based_model ) {
233- auto tn_mps = bridge_to_torch (input,best_device,true ,best_dtype);
248+ extern " C " bridge_tensor_t model_forward (bridge_pt_model_t model, bridge_tensor_t input) {
249+ auto tn_mps = bridge_to_torch (input,best_device,false ,best_dtype);
234250 // tn_mps = tn_mps.permute({2, 0, 1}).contiguous();
235251 // tn_mps.unsqueeze_(0);//.contiguous();
236252 auto tn = tn_mps.permute ({2 , 0 , 1 }).unsqueeze (0 ).contiguous ();
@@ -243,24 +259,12 @@ bridge_tensor_t model_forward(bridge_pt_model_t model, bridge_tensor_t input, bo
243259 // auto tn_out = o.squeeze(0).permute({1, 2, 0}).contiguous();
244260 auto tn_out = o.squeeze (0 ).contiguous ().permute ({1 , 2 , 0 }).contiguous ();
245261
246- if (is_vgg_based_model) {
247- tn_out.div_ (255.0 );
248- }
249-
250- auto tn_out_cpu = tn_out.to (torch::kCPU ,torch::kFloat32 ,false ,true );
262+ auto tn_out_cpu = tn_out.to (torch::kCPU ,torch::kFloat32 ,false ,false );
251263
252264 return torch_to_bridge (tn_out_cpu);
253265
254266}
255267
256- extern " C" bridge_tensor_t model_forward (bridge_pt_model_t model, bridge_tensor_t input) {
257- return model_forward (model, input, false );
258- }
259-
260- extern " C" bridge_tensor_t model_forward_style_transfer (bridge_pt_model_t model, bridge_tensor_t input) {
261- return model_forward (model, input, true );
262- }
263-
264268// std::tuple<uint64_t, uint64_t> get_cpu_frame_size(uint64_t width, uint64_t height, float32_t scale_factor) {
265269// // if (best_device == torch::kMPS || best_device == torch::kCUDA)
266270// if (accelerator_available())
0 commit comments