77#include < ze_api.h>
88#include < ze_graph_ext.h>
99
10+ #include < chrono>
11+ #include < cstdio>
12+
1013#include " intel_npu/common/itt.hpp"
1114#include " intel_npu/config/options.hpp"
1215#include " intel_npu/utils/logger/logger.hpp"
@@ -110,6 +113,8 @@ Pipeline::Pipeline(const Config& config,
110113 }
111114 }
112115
116+ const auto t_cmdlist_start = std::chrono::steady_clock::now ();
117+
113118 _command_lists.reserve (_number_of_command_lists);
114119 for (size_t i = 0 ; i < _number_of_command_lists; i++) {
115120 _command_lists.emplace_back (
@@ -124,6 +129,10 @@ Pipeline::Pipeline(const Config& config,
124129 }
125130 }
126131
132+ const auto t_cmdlist_end = std::chrono::steady_clock::now ();
133+ const auto t_setargs_start = std::chrono::steady_clock::now ();
134+ size_t total_set_argument_calls = 0 ;
135+
127136 for (size_t i = 0 ; i < _number_of_command_lists; i++) {
128137 _logger.debug (" Pipeline - set args for command list number: %zu" , i);
129138 size_t io_index = 0 ;
@@ -146,6 +155,7 @@ Pipeline::Pipeline(const Config& config,
146155 tensor->data (),
147156 get_strides (tensor->get_strides (), tensor->get_element_type ().size ()));
148157 }
158+ ++total_set_argument_calls;
149159 ++io_index;
150160 continue ;
151161 }
@@ -162,6 +172,7 @@ Pipeline::Pipeline(const Config& config,
162172 get_strides (tensor->get_strides (), tensor->get_element_type ().size ()));
163173 }
164174
175+ ++total_set_argument_calls;
165176 ++io_index;
166177 }
167178
@@ -178,6 +189,7 @@ Pipeline::Pipeline(const Config& config,
178189 static_cast <unsigned char *>(tensor->data ()) + (i * tensor->get_strides ()[0 ]),
179190 get_strides (tensor->get_strides (), tensor->get_element_type ().size ()));
180191 }
192+ ++total_set_argument_calls;
181193 ++io_index;
182194 }
183195
@@ -217,6 +229,18 @@ Pipeline::Pipeline(const Config& config,
217229 _events.at (i)->AppendSignalEvent (*_command_lists.at (i));
218230 }
219231 }
232+
233+ const auto t_setargs_end = std::chrono::steady_clock::now ();
234+ const auto cmdlist_us =
235+ std::chrono::duration_cast<std::chrono::microseconds>(t_cmdlist_end - t_cmdlist_start).count ();
236+ const auto setargs_us =
237+ std::chrono::duration_cast<std::chrono::microseconds>(t_setargs_end - t_setargs_start).count ();
238+ fprintf (stderr, " [Pipeline] Pipeline() breakdown: command_list_create=%.3f ms, "
239+ " set_argument_values=%.3f ms (%zu calls)\n " ,
240+ cmdlist_us / 1000.0 ,
241+ setargs_us / 1000.0 ,
242+ total_set_argument_calls);
243+
220244 _logger.debug (" Pipeline - initialize completed" );
221245}
222246
@@ -249,6 +273,8 @@ void Pipeline::push() {
249273 _graph->set_last_submitted_id (_id);
250274 }
251275
276+ const auto t_push_start = std::chrono::steady_clock::now ();
277+
252278 for (size_t i = 0 ; i < _command_lists.size (); ++i) {
253279 _command_lists.at (i)->close ();
254280
@@ -260,13 +286,23 @@ void Pipeline::push() {
260286 }
261287 }
262288
289+ if (_infer_count < 2 ) {
290+ const auto t_push_end = std::chrono::steady_clock::now ();
291+ const auto push_us =
292+ std::chrono::duration_cast<std::chrono::microseconds>(t_push_end - t_push_start).count ();
293+ fprintf (stderr, " [Pipeline] push() #%zu: %.3f ms (close+execute)\n " ,
294+ _infer_count, push_us / 1000.0 );
295+ }
296+
263297 _logger.debug (" Pipeline - push() completed" );
264298};
265299
266300void Pipeline::pull () {
267301 _logger.debug (" Pipeline - pull() started" );
268302 OV_ITT_TASK_CHAIN (ZERO_PIPELINE_IP_PULL, itt::domains::LevelZeroBackend, " Pipeline" , " pull" );
269303
304+ const auto t_pull_start = std::chrono::steady_clock::now ();
305+
270306 for (size_t i = 0 ; i < _command_lists.size (); ++i) {
271307 if (_sync_output_with_fences) {
272308 _fences.at (i)->hostSynchronize ();
@@ -279,6 +315,16 @@ void Pipeline::pull() {
279315 }
280316 }
281317
318+ if (_infer_count < 2 ) {
319+ const auto t_pull_end = std::chrono::steady_clock::now ();
320+ const auto pull_us =
321+ std::chrono::duration_cast<std::chrono::microseconds>(t_pull_end - t_pull_start).count ();
322+ fprintf (stderr, " [Pipeline] pull() #%zu: %.3f ms (hostSynchronize)\n " ,
323+ _infer_count, pull_us / 1000.0 );
324+ }
325+
326+ ++_infer_count;
327+
282328 _logger.debug (" Pipeline - pull() completed" );
283329};
284330
0 commit comments