Skip to content

Commit 1159e35

Browse files
committed
keep same data
1 parent 1b79294 commit 1159e35

File tree

3 files changed

+63
-25
lines changed

3 files changed

+63
-25
lines changed

onnxruntime/core/common/profiler.cc

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,11 @@ profiling::Profiler::~Profiler() {}
2020
#endif
2121

2222
::onnxruntime::TimePoint profiling::Profiler::Start() {
23+
return Start(std::chrono::high_resolution_clock::now());
24+
}
25+
26+
::onnxruntime::TimePoint profiling::Profiler::Start(const TimePoint& start_time) {
2327
ORT_ENFORCE(enabled_);
24-
auto start_time = std::chrono::high_resolution_clock::now();
2528
auto ts = TimeDiffMicroSeconds(profiling_start_time_, start_time);
2629
for (const auto& ep_profiler : ep_profilers_) {
2730
ep_profiler->Start(ts);
@@ -75,8 +78,17 @@ void Profiler::EndTimeAndRecordEvent(EventCategory category,
7578
const std::string& event_name,
7679
const TimePoint& start_time,
7780
const std::initializer_list<std::pair<std::string, std::string>>& event_args,
81+
bool sync_gpu) {
82+
EndTimeAndRecordEvent(category, event_name, start_time, std::chrono::high_resolution_clock::now(), event_args, sync_gpu);
83+
}
84+
85+
void Profiler::EndTimeAndRecordEvent(EventCategory category,
86+
const std::string& event_name,
87+
const TimePoint& start_time,
88+
const TimePoint& end_time,
89+
const std::initializer_list<std::pair<std::string, std::string>>& event_args,
7890
bool /*sync_gpu*/) {
79-
long long dur = TimeDiffMicroSeconds(start_time);
91+
long long dur = TimeDiffMicroSeconds(start_time, end_time);
8092
long long ts = TimeDiffMicroSeconds(profiling_start_time_, start_time);
8193

8294
EventRecord event(category, logging::GetProcessId(),

onnxruntime/core/common/profiler.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,11 @@ class Profiler {
5454
*/
5555
TimePoint Start();
5656

57+
/*
58+
Start profiling with a specific start time.
59+
*/
60+
TimePoint Start(const TimePoint& start_time);
61+
5762
/*
5863
Whether data collection and output from this profiler is enabled.
5964
*/
@@ -80,6 +85,13 @@ class Profiler {
8085
const std::initializer_list<std::pair<std::string, std::string>>& event_args = {},
8186
bool sync_gpu = false);
8287

88+
void EndTimeAndRecordEvent(EventCategory category,
89+
const std::string& event_name,
90+
const TimePoint& start_time,
91+
const TimePoint& end_time,
92+
const std::initializer_list<std::pair<std::string, std::string>>& event_args = {},
93+
bool sync_gpu = false);
94+
8395
/*
8496
Write profile data to the given stream in chrome format defined below.
8597
https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview#

onnxruntime/core/framework/sequential_executor.cc

Lines changed: 37 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -173,16 +173,21 @@ class SessionScope {
173173
#endif
174174
#ifdef DEBUG_NODE_INPUTS_OUTPUTS
175175
,
176-
dump_context_{
177-
session_state_.GetGraphExecutionCounter(), 0}
176+
dump_context_{session_state_.GetGraphExecutionCounter(), 0}
178177
#endif
179178
{
180179
// 分别启动两个 Profiler 的计时
181-
if (session_state_.Profiler().IsEnabled()) {
182-
session_start_ = session_state_.Profiler().Start();
183-
}
184-
if (run_profiler_ && run_profiler_->IsEnabled()) {
185-
run_profiler_start_ = run_profiler_->Start();
180+
bool session_profiling_enabled = session_state_.Profiler().IsEnabled();
181+
bool run_profiling_enabled = run_profiler_ && run_profiler_->IsEnabled();
182+
183+
if (session_profiling_enabled || run_profiling_enabled) {
184+
auto now = std::chrono::high_resolution_clock::now();
185+
if (session_profiling_enabled) {
186+
session_start_ = session_state_.Profiler().Start(now);
187+
}
188+
if (run_profiling_enabled) {
189+
run_profiler_start_ = run_profiler_->Start(now);
190+
}
186191
}
187192

188193
auto& logger = session_state_.Logger();
@@ -230,11 +235,17 @@ class SessionScope {
230235
#endif
231236

232237
// 分别记录 Session 级别的结束事件
233-
if (session_state_.Profiler().IsEnabled()) {
234-
session_state_.Profiler().EndTimeAndRecordEvent(profiling::SESSION_EVENT, "SequentialExecutor::Execute", session_start_);
235-
}
236-
if (run_profiler_ && run_profiler_->IsEnabled()) {
237-
run_profiler_->EndTimeAndRecordEvent(profiling::SESSION_EVENT, "SequentialExecutor::Execute", run_profiler_start_);
238+
bool session_profiling_enabled = session_state_.Profiler().IsEnabled();
239+
bool run_profiling_enabled = run_profiler_ && run_profiler_->IsEnabled();
240+
241+
if (session_profiling_enabled || run_profiling_enabled) {
242+
auto now = std::chrono::high_resolution_clock::now();
243+
if (session_profiling_enabled) {
244+
session_state_.Profiler().EndTimeAndRecordEvent(profiling::SESSION_EVENT, "SequentialExecutor::Execute", session_start_, now);
245+
}
246+
if (run_profiling_enabled) {
247+
run_profiler_->EndTimeAndRecordEvent(profiling::SESSION_EVENT, "SequentialExecutor::Execute", run_profiler_start_, now);
248+
}
238249
}
239250

240251
#if !defined(ORT_MINIMAL_BUILD) && defined(ORT_MEMORY_PROFILE)
@@ -265,7 +276,7 @@ class SessionScope {
265276
const SessionState& session_state_;
266277
profiling::Profiler* run_profiler_;
267278
TimePoint session_start_;
268-
TimePoint run_profiler_start_; // 新增:记录 Run Profiler 的开始时间
279+
TimePoint run_profiler_start_; // 新增:记录 Run Profiler 的开始时间
269280
#if !defined(ORT_MINIMAL_BUILD) && defined(ORT_MEMORY_PROFILE)
270281
const ExecutionFrame& frame_;
271282
#endif
@@ -360,12 +371,12 @@ class KernelScope {
360371
concurrency::ThreadPool::StartProfiling(session_state_.GetThreadPool());
361372
VLOGS(session_state_.Logger(), 1) << "Computing kernel: " << node_name_;
362373

363-
// 分别记录开始时间
374+
auto now = std::chrono::high_resolution_clock::now();
364375
if (session_profiling_enabled) {
365-
kernel_begin_time_ = session_state_.Profiler().Start();
376+
kernel_begin_time_ = session_state_.Profiler().Start(now);
366377
}
367378
if (run_profiling_enabled) {
368-
run_kernel_begin_time_ = session_scope_.run_profiler_->Start();
379+
run_kernel_begin_time_ = session_scope_.run_profiler_->Start(now);
369380
}
370381

371382
CalculateTotalInputSizes(&kernel_context, &kernel_,
@@ -403,18 +414,21 @@ class KernelScope {
403414
};
404415

405416
// 分别记录结束事件
417+
auto now = std::chrono::high_resolution_clock::now();
406418
if (session_profiling_enabled) {
407419
session_state_.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT,
408-
node_name_ + "_kernel_time",
409-
kernel_begin_time_,
410-
event_args);
420+
node_name_ + "_kernel_time",
421+
kernel_begin_time_,
422+
now,
423+
event_args);
411424
}
412425

413426
if (run_profiling_enabled) {
414427
session_scope_.run_profiler_->EndTimeAndRecordEvent(profiling::NODE_EVENT,
415-
node_name_ + "_kernel_time",
416-
run_kernel_begin_time_,
417-
event_args);
428+
node_name_ + "_kernel_time",
429+
run_kernel_begin_time_,
430+
now,
431+
event_args);
418432
}
419433
}
420434

@@ -450,7 +464,7 @@ class KernelScope {
450464
std::string input_type_shape_;
451465

452466
TimePoint kernel_begin_time_;
453-
TimePoint run_kernel_begin_time_; // 新增:记录 Run Profiler 的 Kernel 开始时间
467+
TimePoint run_kernel_begin_time_; // 新增:记录 Run Profiler 的 Kernel 开始时间
454468

455469
#ifdef CONCURRENCY_VISUALIZER
456470
diagnostic::span span_;

0 commit comments

Comments
 (0)