Skip to content

Commit 60ed832

Browse files
author
Darshak Bhatti
committed
cp
1 parent 735e69a commit 60ed832

File tree

7 files changed

+228
-7
lines changed

7 files changed

+228
-7
lines changed

onnxruntime/core/platform/telemetry.cc

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,4 +157,34 @@ void Telemetry::LogProviderOptions(const std::string& provider_id,
157157
ORT_UNUSED_PARAMETER(captureState);
158158
}
159159

160+
void Telemetry::LogModelLoadStart(uint32_t session_id) const {
161+
ORT_UNUSED_PARAMETER(session_id);
162+
}
163+
164+
void Telemetry::LogModelLoadEnd(uint32_t session_id, const common::Status& status) const {
165+
ORT_UNUSED_PARAMETER(session_id);
166+
ORT_UNUSED_PARAMETER(status);
167+
}
168+
169+
void Telemetry::LogSessionCreationEnd(uint32_t session_id,
170+
const common::Status& status) const {
171+
ORT_UNUSED_PARAMETER(session_id);
172+
ORT_UNUSED_PARAMETER(status);
173+
}
174+
175+
void Telemetry::LogRunStart(uint32_t session_id, uint64_t run_id) const {
176+
ORT_UNUSED_PARAMETER(session_id);
177+
ORT_UNUSED_PARAMETER(run_id);
178+
}
179+
180+
void Telemetry::LogRegisterEpLibraryStart(const std::string& registration_name) const {
181+
ORT_UNUSED_PARAMETER(registration_name);
182+
}
183+
184+
void Telemetry::LogRegisterEpLibraryEnd(const std::string& registration_name,
185+
const common::Status& status) const {
186+
ORT_UNUSED_PARAMETER(registration_name);
187+
ORT_UNUSED_PARAMETER(status);
188+
}
189+
160190
} // namespace onnxruntime

onnxruntime/core/platform/telemetry.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,20 @@ class Telemetry {
101101
const std::string& provider_options_string,
102102
bool captureState) const;
103103

104+
virtual void LogModelLoadStart(uint32_t session_id) const;
105+
106+
virtual void LogModelLoadEnd(uint32_t session_id, const common::Status& status) const;
107+
108+
virtual void LogSessionCreationEnd(uint32_t session_id,
109+
const common::Status& status) const;
110+
111+
virtual void LogRunStart(uint32_t session_id, uint64_t run_id) const;
112+
113+
virtual void LogRegisterEpLibraryStart(const std::string& registration_name) const;
114+
115+
virtual void LogRegisterEpLibraryEnd(const std::string& registration_name,
116+
const common::Status& status) const;
117+
104118
private:
105119
ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(Telemetry);
106120
};

onnxruntime/core/platform/windows/telemetry.cc

Lines changed: 116 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -457,7 +457,8 @@ void WindowsTelemetry::LogCompileModelStart(uint32_t session_id,
457457
TraceLoggingInt32(graph_optimization_level, "graphOptimizationLevel"),
458458
TraceLoggingBool(embed_ep_context, "embedEpContext"),
459459
TraceLoggingBool(has_external_initializers_file, "hasExternalInitializersFile"),
460-
TraceLoggingString(execution_provider_string.c_str(), "executionProviderIds"));
460+
TraceLoggingString(execution_provider_string.c_str(), "executionProviderIds"),
461+
TraceLoggingString(ORT_CALLER_FRAMEWORK, "frameworkName"));
461462
}
462463

463464
void WindowsTelemetry::LogCompileModelComplete(uint32_t session_id,
@@ -480,7 +481,8 @@ void WindowsTelemetry::LogCompileModelComplete(uint32_t session_id,
480481
TraceLoggingBool(success, "success"),
481482
TraceLoggingUInt32(error_code, "errorCode"),
482483
TraceLoggingUInt32(error_category, "errorCategory"),
483-
TraceLoggingString(error_message.c_str(), "errorMessage"));
484+
TraceLoggingString(error_message.c_str(), "errorMessage"),
485+
TraceLoggingString(ORT_CALLER_FRAMEWORK, "frameworkName"));
484486
}
485487

486488
void WindowsTelemetry::LogRuntimeError(uint32_t session_id, const common::Status& status, const char* file,
@@ -548,6 +550,7 @@ void WindowsTelemetry::LogRuntimePerf(uint32_t session_id, uint32_t total_runs_s
548550
TraceLoggingBool(true, "UTCReplace_AppSessionGuid"),
549551
TelemetryPrivacyDataTag(PDT_ProductAndServicePerformance),
550552
TraceLoggingKeyword(MICROSOFT_KEYWORD_MEASURES),
553+
TraceLoggingLevel(WINEVENT_LEVEL_INFO),
551554
// Telemetry info
552555
TraceLoggingUInt8(0, "schemaVersion"),
553556
TraceLoggingUInt32(session_id, "sessionId"),
@@ -668,4 +671,115 @@ void WindowsTelemetry::LogProviderOptions(const std::string& provider_id, const
668671
}
669672
}
670673

674+
void WindowsTelemetry::LogModelLoadStart(uint32_t session_id) const {
675+
if (global_register_count_ == 0 || enabled_ == false)
676+
return;
677+
678+
TraceLoggingWrite(telemetry_provider_handle,
679+
"ModelLoadStart",
680+
TraceLoggingBool(true, "UTCReplace_AppSessionGuid"),
681+
TelemetryPrivacyDataTag(PDT_ProductAndServiceUsage),
682+
TraceLoggingKeyword(MICROSOFT_KEYWORD_MEASURES),
683+
TraceLoggingLevel(WINEVENT_LEVEL_INFO),
684+
// Telemetry info
685+
TraceLoggingUInt8(0, "schemaVersion"),
686+
TraceLoggingUInt32(session_id, "sessionId"),
687+
TraceLoggingString(ORT_CALLER_FRAMEWORK, "frameworkName"));
688+
}
689+
690+
void WindowsTelemetry::LogModelLoadEnd(uint32_t session_id, const common::Status& status) const {
691+
if (global_register_count_ == 0 || enabled_ == false)
692+
return;
693+
694+
TraceLoggingWrite(telemetry_provider_handle,
695+
"ModelLoadEnd",
696+
TraceLoggingBool(true, "UTCReplace_AppSessionGuid"),
697+
TelemetryPrivacyDataTag(PDT_ProductAndServicePerformance),
698+
TraceLoggingKeyword(MICROSOFT_KEYWORD_MEASURES),
699+
TraceLoggingLevel(WINEVENT_LEVEL_INFO),
700+
// Telemetry info
701+
TraceLoggingUInt8(0, "schemaVersion"),
702+
TraceLoggingUInt32(session_id, "sessionId"),
703+
TraceLoggingBool(status.IsOK(), "isSuccess"),
704+
TraceLoggingUInt32(status.Code(), "errorCode"),
705+
TraceLoggingUInt32(status.Category(), "errorCategory"),
706+
TraceLoggingString(status.IsOK() ? "" : status.ErrorMessage().c_str(), "errorMessage"),
707+
TraceLoggingString(ORT_CALLER_FRAMEWORK, "frameworkName"));
708+
}
709+
710+
void WindowsTelemetry::LogSessionCreationEnd(uint32_t session_id,
711+
const common::Status& status) const {
712+
if (global_register_count_ == 0 || enabled_ == false)
713+
return;
714+
715+
TraceLoggingWrite(telemetry_provider_handle,
716+
"SessionCreationEnd",
717+
TraceLoggingBool(true, "UTCReplace_AppSessionGuid"),
718+
TelemetryPrivacyDataTag(PDT_ProductAndServicePerformance),
719+
TraceLoggingKeyword(MICROSOFT_KEYWORD_MEASURES),
720+
TraceLoggingLevel(WINEVENT_LEVEL_INFO),
721+
// Telemetry info
722+
TraceLoggingUInt8(0, "schemaVersion"),
723+
TraceLoggingUInt32(session_id, "sessionId"),
724+
TraceLoggingBool(status.IsOK(), "isSuccess"),
725+
TraceLoggingUInt32(status.Code(), "errorCode"),
726+
TraceLoggingUInt32(status.Category(), "errorCategory"),
727+
TraceLoggingString(status.IsOK() ? "" : status.ErrorMessage().c_str(), "errorMessage"),
728+
TraceLoggingString(ORT_CALLER_FRAMEWORK, "frameworkName"));
729+
}
730+
731+
void WindowsTelemetry::LogRunStart(uint32_t session_id, uint64_t run_id) const {
732+
if (global_register_count_ == 0 || enabled_ == false)
733+
return;
734+
735+
TraceLoggingWrite(telemetry_provider_handle,
736+
"RunStart",
737+
TraceLoggingBool(true, "UTCReplace_AppSessionGuid"),
738+
TelemetryPrivacyDataTag(PDT_ProductAndServiceUsage),
739+
TraceLoggingKeyword(MICROSOFT_KEYWORD_MEASURES),
740+
TraceLoggingLevel(WINEVENT_LEVEL_INFO),
741+
// Telemetry info
742+
TraceLoggingUInt8(0, "schemaVersion"),
743+
TraceLoggingUInt32(session_id, "sessionId"),
744+
TraceLoggingUInt64(run_id, "runId"),
745+
TraceLoggingString(ORT_CALLER_FRAMEWORK, "frameworkName"));
746+
}
747+
748+
void WindowsTelemetry::LogRegisterEpLibraryStart(const std::string& registration_name) const {
749+
if (global_register_count_ == 0 || enabled_ == false)
750+
return;
751+
752+
TraceLoggingWrite(telemetry_provider_handle,
753+
"RegisterEpLibraryStart",
754+
TraceLoggingBool(true, "UTCReplace_AppSessionGuid"),
755+
TelemetryPrivacyDataTag(PDT_ProductAndServiceUsage),
756+
TraceLoggingKeyword(MICROSOFT_KEYWORD_MEASURES),
757+
TraceLoggingLevel(WINEVENT_LEVEL_INFO),
758+
// Telemetry info
759+
TraceLoggingUInt8(0, "schemaVersion"),
760+
TraceLoggingString(registration_name.c_str(), "registrationName"),
761+
TraceLoggingString(ORT_CALLER_FRAMEWORK, "frameworkName"));
762+
}
763+
764+
void WindowsTelemetry::LogRegisterEpLibraryEnd(const std::string& registration_name,
765+
const common::Status& status) const {
766+
if (global_register_count_ == 0 || enabled_ == false)
767+
return;
768+
769+
TraceLoggingWrite(telemetry_provider_handle,
770+
"RegisterEpLibraryEnd",
771+
TraceLoggingBool(true, "UTCReplace_AppSessionGuid"),
772+
TelemetryPrivacyDataTag(PDT_ProductAndServicePerformance),
773+
TraceLoggingKeyword(MICROSOFT_KEYWORD_MEASURES),
774+
TraceLoggingLevel(WINEVENT_LEVEL_INFO),
775+
// Telemetry info
776+
TraceLoggingUInt8(0, "schemaVersion"),
777+
TraceLoggingString(registration_name.c_str(), "registrationName"),
778+
TraceLoggingBool(status.IsOK(), "isSuccess"),
779+
TraceLoggingUInt32(status.Code(), "errorCode"),
780+
TraceLoggingUInt32(status.Category(), "errorCategory"),
781+
TraceLoggingString(status.IsOK() ? "" : status.ErrorMessage().c_str(), "errorMessage"),
782+
TraceLoggingString(ORT_CALLER_FRAMEWORK, "frameworkName"));
783+
}
784+
671785
} // namespace onnxruntime

onnxruntime/core/platform/windows/telemetry.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,20 @@ class WindowsTelemetry : public Telemetry {
9494
const std::string& provider_options_string,
9595
bool captureState) const override;
9696

97+
void LogModelLoadStart(uint32_t session_id) const override;
98+
99+
void LogModelLoadEnd(uint32_t session_id, const common::Status& status) const override;
100+
101+
void LogSessionCreationEnd(uint32_t session_id,
102+
const common::Status& status) const override;
103+
104+
void LogRunStart(uint32_t session_id, uint64_t run_id) const override;
105+
106+
void LogRegisterEpLibraryStart(const std::string& registration_name) const override;
107+
108+
void LogRegisterEpLibraryEnd(const std::string& registration_name,
109+
const common::Status& status) const override;
110+
97111
using EtwInternalCallback = std::function<void(LPCGUID SourceId, ULONG IsEnabled, UCHAR Level,
98112
ULONGLONG MatchAnyKeyword, ULONGLONG MatchAllKeyword,
99113
PEVENT_FILTER_DESCRIPTOR FilterData, PVOID CallbackContext)>;

onnxruntime/core/session/environment.cc

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -539,8 +539,13 @@ bool AreVirtualDevicesAllowed(std::string_view lib_registration_name) {
539539
Status Environment::RegisterExecutionProviderLibrary(const std::string& registration_name,
540540
std::unique_ptr<EpLibrary> ep_library,
541541
const std::vector<EpFactoryInternal*>& internal_factories) {
542+
const Env& env = Env::Default();
543+
env.GetTelemetryProvider().LogRegisterEpLibraryStart(registration_name);
544+
542545
if (ep_libraries_.count(registration_name) > 0) {
543-
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "library is already registered under ", registration_name);
546+
auto status = ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "library is already registered under ", registration_name);
547+
env.GetTelemetryProvider().LogRegisterEpLibraryEnd(registration_name, status);
548+
return status;
544549
}
545550

546551
auto status = Status::OK();
@@ -592,6 +597,7 @@ Status Environment::RegisterExecutionProviderLibrary(const std::string& registra
592597
});
593598
}
594599

600+
env.GetTelemetryProvider().LogRegisterEpLibraryEnd(registration_name, status);
595601
return status;
596602
}
597603

onnxruntime/core/session/inference_session.cc

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -976,6 +976,9 @@ common::Status InferenceSession::LoadWithLoader(std::function<common::Status(std
976976
tp = session_profiler_.Start();
977977
}
978978
ORT_TRY {
979+
const Env& env = Env::Default();
980+
env.GetTelemetryProvider().LogModelLoadStart(session_id_);
981+
979982
std::lock_guard<std::mutex> l(session_mutex_);
980983
if (is_model_loaded_) { // already loaded
981984
LOGS(*session_logger_, ERROR) << "This session already contains a loaded model.";
@@ -1011,6 +1014,11 @@ common::Status InferenceSession::LoadWithLoader(std::function<common::Status(std
10111014
session_profiler_.EndTimeAndRecordEvent(profiling::SESSION_EVENT, event_name, tp);
10121015
}
10131016

1017+
{
1018+
const Env& env = Env::Default();
1019+
env.GetTelemetryProvider().LogModelLoadEnd(session_id_, status);
1020+
}
1021+
10141022
return status;
10151023
}
10161024

@@ -1647,6 +1655,9 @@ Status InferenceSession::LoadOrtModel(const void* model_data, int model_data_len
16471655
}
16481656

16491657
Status InferenceSession::LoadOrtModelWithLoader(std::function<Status()> load_ort_format_model_bytes) {
1658+
const Env& env = Env::Default();
1659+
env.GetTelemetryProvider().LogModelLoadStart(session_id_);
1660+
16501661
std::lock_guard<std::mutex> l(session_mutex_);
16511662

16521663
if (is_model_loaded_) { // already loaded
@@ -1767,6 +1778,8 @@ Status InferenceSession::LoadOrtModelWithLoader(std::function<Status()> load_ort
17671778

17681779
is_model_loaded_ = true;
17691780

1781+
env.GetTelemetryProvider().LogModelLoadEnd(session_id_, Status::OK());
1782+
17701783
return Status::OK();
17711784
}
17721785

@@ -2618,6 +2631,12 @@ common::Status InferenceSession::Initialize() {
26182631
}
26192632
}
26202633

2634+
// Log session creation end telemetry
2635+
{
2636+
const Env& init_env = Env::Default();
2637+
init_env.GetTelemetryProvider().LogSessionCreationEnd(session_id_, status);
2638+
}
2639+
26212640
return status;
26222641
}
26232642
#if defined(_MSC_VER) && !defined(__clang__)
@@ -3015,6 +3034,22 @@ Status InferenceSession::Run(const RunOptions& run_options,
30153034
Status retval = Status::OK();
30163035
const Env& env = Env::Default();
30173036

3037+
// Assign a unique run_id for telemetry correlation
3038+
const uint64_t run_id = run_id_counter_.fetch_add(1);
3039+
3040+
// Determine whether to emit Run telemetry (LogRunStart + LogRuntimePerf pair)
3041+
bool emit_run_telemetry = false;
3042+
{
3043+
std::lock_guard<std::mutex> telemetry_lock(telemetry_mutex_);
3044+
if (TimeDiffMicroSeconds(telemetry_.time_sent_last_) > telemetry_.runtime_perf_interval_) {
3045+
emit_run_telemetry = true;
3046+
}
3047+
}
3048+
3049+
if (emit_run_telemetry) {
3050+
env.GetTelemetryProvider().LogRunStart(session_id_, run_id);
3051+
}
3052+
30183053
int graph_annotation_id = 0;
30193054
const std::string& graph_annotation_str =
30203055
run_options.config_options.GetConfigOrDefault(kOrtRunOptionsConfigCudaGraphAnnotation, "");
@@ -3210,8 +3245,8 @@ Status InferenceSession::Run(const RunOptions& run_options,
32103245
telemetry_.total_run_duration_since_last_ += TimeDiffMicroSeconds(tp);
32113246
telemetry_.duration_per_batch_size_[batch_size] += TimeDiffMicroSeconds(tp);
32123247

3213-
if (TimeDiffMicroSeconds(telemetry_.time_sent_last_) > Telemetry::kDurationBetweenSending) {
3214-
// send the telemetry
3248+
// Emit RuntimePerf paired with the LogRunStart that fired at entry
3249+
if (emit_run_telemetry) {
32153250
env.GetTelemetryProvider().LogRuntimePerf(session_id_, telemetry_.total_runs_since_last_,
32163251
telemetry_.total_run_duration_since_last_,
32173252
telemetry_.duration_per_batch_size_);
@@ -3220,6 +3255,10 @@ Status InferenceSession::Run(const RunOptions& run_options,
32203255
telemetry_.total_runs_since_last_ = 0;
32213256
telemetry_.total_run_duration_since_last_ = 0;
32223257
telemetry_.duration_per_batch_size_.clear();
3258+
3259+
// Double the interval, capping at kRuntimePerfMaxInterval
3260+
telemetry_.runtime_perf_interval_ = std::min(telemetry_.runtime_perf_interval_ * 2,
3261+
Telemetry::kRuntimePerfMaxInterval);
32233262
}
32243263
}
32253264

onnxruntime/core/session/inference_session.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -976,10 +976,14 @@ class InferenceSession {
976976
std::unordered_map<int64_t, long long> duration_per_batch_size_; // the duration (us) of Run() calls per batch size since the last report
977977

978978
TimePoint time_sent_last_; // the TimePoint of the last report
979-
// Event Rate per provider < 20 peak events per second
980-
constexpr static long long kDurationBetweenSending = 1000 * 1000 * 60 * 10; // duration in (us). send a report every 10 mins
979+
// RuntimePerf backoff: starts at 2s, doubles each emission, caps at 10 min
980+
constexpr static long long kRuntimePerfInitialInterval = 2 * 1000 * 1000; // 2 seconds in (us)
981+
constexpr static long long kRuntimePerfMaxInterval = 1000 * 1000 * 60 * 10; // 10 minutes in (us)
982+
long long runtime_perf_interval_ = kRuntimePerfInitialInterval;
981983
} telemetry_;
982984

985+
std::atomic<uint64_t> run_id_counter_{0}; // monotonically increasing run id for correlation
986+
983987
mutable std::mutex telemetry_mutex_; // to ensure thread-safe access to telemetry data
984988

985989
#ifdef ONNXRUNTIME_ENABLE_INSTRUMENT

0 commit comments

Comments
 (0)