diff --git a/onnxruntime/core/platform/telemetry.cc b/onnxruntime/core/platform/telemetry.cc index bb9130add215a..3ee57e62e2672 100644 --- a/onnxruntime/core/platform/telemetry.cc +++ b/onnxruntime/core/platform/telemetry.cc @@ -157,4 +157,39 @@ void Telemetry::LogProviderOptions(const std::string& provider_id, ORT_UNUSED_PARAMETER(captureState); } +void Telemetry::LogModelLoadStart(uint32_t session_id) const { + ORT_UNUSED_PARAMETER(session_id); +} + +void Telemetry::LogModelLoadEnd(uint32_t session_id, const common::Status& status) const { + ORT_UNUSED_PARAMETER(session_id); + ORT_UNUSED_PARAMETER(status); +} + +void Telemetry::LogSessionCreationEnd(uint32_t session_id, + const common::Status& status) const { + ORT_UNUSED_PARAMETER(session_id); + ORT_UNUSED_PARAMETER(status); +} + +void Telemetry::LogRunStart(uint32_t session_id) const { + ORT_UNUSED_PARAMETER(session_id); +} + +void Telemetry::LogRegisterEpLibraryWithLibPath(const std::string& registration_name, + const std::string& lib_path) const { + ORT_UNUSED_PARAMETER(registration_name); + ORT_UNUSED_PARAMETER(lib_path); +} + +void Telemetry::LogRegisterEpLibraryStart(const std::string& registration_name) const { + ORT_UNUSED_PARAMETER(registration_name); +} + +void Telemetry::LogRegisterEpLibraryEnd(const std::string& registration_name, + const common::Status& status) const { + ORT_UNUSED_PARAMETER(registration_name); + ORT_UNUSED_PARAMETER(status); +} + } // namespace onnxruntime diff --git a/onnxruntime/core/platform/telemetry.h b/onnxruntime/core/platform/telemetry.h index e74d7ed0180fd..47fd2ca1a9e12 100644 --- a/onnxruntime/core/platform/telemetry.h +++ b/onnxruntime/core/platform/telemetry.h @@ -101,6 +101,23 @@ class Telemetry { const std::string& provider_options_string, bool captureState) const; + virtual void LogModelLoadStart(uint32_t session_id) const; + + virtual void LogModelLoadEnd(uint32_t session_id, const common::Status& status) const; + + virtual void LogSessionCreationEnd(uint32_t session_id, + const common::Status& status) const; + + virtual void LogRunStart(uint32_t session_id) const; + + virtual void LogRegisterEpLibraryWithLibPath(const std::string& registration_name, + const std::string& lib_path) const; + + virtual void LogRegisterEpLibraryStart(const std::string& registration_name) const; + + virtual void LogRegisterEpLibraryEnd(const std::string& registration_name, + const common::Status& status) const; + private: ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(Telemetry); }; diff --git a/onnxruntime/core/platform/windows/telemetry.cc b/onnxruntime/core/platform/windows/telemetry.cc index 6d5a400be703b..bbae6daa1e9d7 100644 --- a/onnxruntime/core/platform/windows/telemetry.cc +++ b/onnxruntime/core/platform/windows/telemetry.cc @@ -457,7 +457,8 @@ void WindowsTelemetry::LogCompileModelStart(uint32_t session_id, TraceLoggingInt32(graph_optimization_level, "graphOptimizationLevel"), TraceLoggingBool(embed_ep_context, "embedEpContext"), TraceLoggingBool(has_external_initializers_file, "hasExternalInitializersFile"), - TraceLoggingString(execution_provider_string.c_str(), "executionProviderIds")); + TraceLoggingString(execution_provider_string.c_str(), "executionProviderIds"), + TraceLoggingString(ORT_CALLER_FRAMEWORK, "frameworkName")); } void WindowsTelemetry::LogCompileModelComplete(uint32_t session_id, @@ -480,7 +481,8 @@ void WindowsTelemetry::LogCompileModelComplete(uint32_t session_id, TraceLoggingBool(success, "success"), TraceLoggingUInt32(error_code, "errorCode"), TraceLoggingUInt32(error_category, "errorCategory"), - TraceLoggingString(error_message.c_str(), "errorMessage")); + TraceLoggingString(error_message.c_str(), "errorMessage"), + TraceLoggingString(ORT_CALLER_FRAMEWORK, "frameworkName")); } void WindowsTelemetry::LogRuntimeError(uint32_t session_id, const common::Status& status, const char* file, @@ -668,4 +670,132 @@ void WindowsTelemetry::LogProviderOptions(const std::string& provider_id, const } } +void WindowsTelemetry::LogModelLoadStart(uint32_t session_id) const { + if (global_register_count_ == 0 || enabled_ == false) + return; + + TraceLoggingWrite(telemetry_provider_handle, + "ModelLoadStart", + TraceLoggingBool(true, "UTCReplace_AppSessionGuid"), + TelemetryPrivacyDataTag(PDT_ProductAndServiceUsage), + TraceLoggingKeyword(MICROSOFT_KEYWORD_MEASURES), + TraceLoggingLevel(WINEVENT_LEVEL_INFO), + // Telemetry info + TraceLoggingUInt8(0, "schemaVersion"), + TraceLoggingUInt32(session_id, "sessionId"), + TraceLoggingString(ORT_CALLER_FRAMEWORK, "frameworkName")); +} + +void WindowsTelemetry::LogModelLoadEnd(uint32_t session_id, const common::Status& status) const { + if (global_register_count_ == 0 || enabled_ == false) + return; + + TraceLoggingWrite(telemetry_provider_handle, + "ModelLoadEnd", + TraceLoggingBool(true, "UTCReplace_AppSessionGuid"), + TelemetryPrivacyDataTag(PDT_ProductAndServicePerformance), + TraceLoggingKeyword(MICROSOFT_KEYWORD_MEASURES), + TraceLoggingLevel(WINEVENT_LEVEL_INFO), + // Telemetry info + TraceLoggingUInt8(0, "schemaVersion"), + TraceLoggingUInt32(session_id, "sessionId"), + TraceLoggingBool(status.IsOK(), "isSuccess"), + TraceLoggingUInt32(status.Code(), "errorCode"), + TraceLoggingUInt32(status.Category(), "errorCategory"), + TraceLoggingString(status.IsOK() ? "" : status.ErrorMessage().c_str(), "errorMessage"), + TraceLoggingString(ORT_CALLER_FRAMEWORK, "frameworkName")); +} + +void WindowsTelemetry::LogSessionCreationEnd(uint32_t session_id, + const common::Status& status) const { + if (global_register_count_ == 0 || enabled_ == false) + return; + + TraceLoggingWrite(telemetry_provider_handle, + "SessionCreationEnd", + TraceLoggingBool(true, "UTCReplace_AppSessionGuid"), + TelemetryPrivacyDataTag(PDT_ProductAndServicePerformance), + TraceLoggingKeyword(MICROSOFT_KEYWORD_MEASURES), + TraceLoggingLevel(WINEVENT_LEVEL_INFO), + // Telemetry info + TraceLoggingUInt8(0, "schemaVersion"), + TraceLoggingUInt32(session_id, "sessionId"), + TraceLoggingBool(status.IsOK(), "isSuccess"), + TraceLoggingUInt32(status.Code(), "errorCode"), + TraceLoggingUInt32(status.Category(), "errorCategory"), + TraceLoggingString(status.IsOK() ? "" : status.ErrorMessage().c_str(), "errorMessage"), + TraceLoggingString(ORT_CALLER_FRAMEWORK, "frameworkName")); +} + +void WindowsTelemetry::LogRunStart(uint32_t session_id) const { + if (global_register_count_ == 0 || enabled_ == false) + return; + + TraceLoggingWrite(telemetry_provider_handle, + "RunStart", + TraceLoggingBool(true, "UTCReplace_AppSessionGuid"), + TelemetryPrivacyDataTag(PDT_ProductAndServiceUsage), + TraceLoggingKeyword(MICROSOFT_KEYWORD_MEASURES), + TraceLoggingLevel(WINEVENT_LEVEL_INFO), + // Telemetry info + TraceLoggingUInt8(0, "schemaVersion"), + TraceLoggingUInt32(session_id, "sessionId"), + TraceLoggingString(ORT_CALLER_FRAMEWORK, "frameworkName")); +} + +void WindowsTelemetry::LogRegisterEpLibraryWithLibPath(const std::string& registration_name, + const std::string& lib_path) const { + if (global_register_count_ == 0 || enabled_ == false) + return; + + TraceLoggingWrite(telemetry_provider_handle, + "RegisterEpLibraryWithLibPath", + TraceLoggingBool(true, "UTCReplace_AppSessionGuid"), + TelemetryPrivacyDataTag(PDT_ProductAndServiceUsage), + TraceLoggingKeyword(MICROSOFT_KEYWORD_MEASURES), + TraceLoggingLevel(WINEVENT_LEVEL_INFO), + // Telemetry info + TraceLoggingUInt8(0, "schemaVersion"), + TraceLoggingString(registration_name.c_str(), "registrationName"), + TraceLoggingString(lib_path.c_str(), "libPath"), + TraceLoggingString(ORT_CALLER_FRAMEWORK, "frameworkName")); +} + +void WindowsTelemetry::LogRegisterEpLibraryStart(const std::string& registration_name) const { + if (global_register_count_ == 0 || enabled_ == false) + return; + + TraceLoggingWrite(telemetry_provider_handle, + "RegisterEpLibraryStart", + TraceLoggingBool(true, "UTCReplace_AppSessionGuid"), + TelemetryPrivacyDataTag(PDT_ProductAndServiceUsage), + TraceLoggingKeyword(MICROSOFT_KEYWORD_MEASURES), + TraceLoggingLevel(WINEVENT_LEVEL_INFO), + // Telemetry info + TraceLoggingUInt8(0, "schemaVersion"), + TraceLoggingString(registration_name.c_str(), "registrationName"), + TraceLoggingString(ORT_CALLER_FRAMEWORK, "frameworkName")); +} + +void WindowsTelemetry::LogRegisterEpLibraryEnd(const std::string& registration_name, + const common::Status& status) const { + if (global_register_count_ == 0 || enabled_ == false) + return; + + TraceLoggingWrite(telemetry_provider_handle, + "RegisterEpLibraryEnd", + TraceLoggingBool(true, "UTCReplace_AppSessionGuid"), + TelemetryPrivacyDataTag(PDT_ProductAndServicePerformance), + TraceLoggingKeyword(MICROSOFT_KEYWORD_MEASURES), + TraceLoggingLevel(WINEVENT_LEVEL_INFO), + // Telemetry info + TraceLoggingUInt8(0, "schemaVersion"), + TraceLoggingString(registration_name.c_str(), "registrationName"), + TraceLoggingBool(status.IsOK(), "isSuccess"), + TraceLoggingUInt32(status.Code(), "errorCode"), + TraceLoggingUInt32(status.Category(), "errorCategory"), + TraceLoggingString(status.IsOK() ? "" : status.ErrorMessage().c_str(), "errorMessage"), + TraceLoggingString(ORT_CALLER_FRAMEWORK, "frameworkName")); +} + } // namespace onnxruntime diff --git a/onnxruntime/core/platform/windows/telemetry.h b/onnxruntime/core/platform/windows/telemetry.h index 591a248d70ab8..caedbfbd42ece 100644 --- a/onnxruntime/core/platform/windows/telemetry.h +++ b/onnxruntime/core/platform/windows/telemetry.h @@ -94,6 +94,23 @@ class WindowsTelemetry : public Telemetry { const std::string& provider_options_string, bool captureState) const override; + void LogModelLoadStart(uint32_t session_id) const override; + + void LogModelLoadEnd(uint32_t session_id, const common::Status& status) const override; + + void LogSessionCreationEnd(uint32_t session_id, + const common::Status& status) const override; + + void LogRunStart(uint32_t session_id) const override; + + void LogRegisterEpLibraryWithLibPath(const std::string& registration_name, + const std::string& lib_path) const override; + + void LogRegisterEpLibraryStart(const std::string& registration_name) const override; + + void LogRegisterEpLibraryEnd(const std::string& registration_name, + const common::Status& status) const override; + using EtwInternalCallback = std::function; diff --git a/onnxruntime/core/session/environment.cc b/onnxruntime/core/session/environment.cc index 523ff8eaf13b8..049d70209d878 100644 --- a/onnxruntime/core/session/environment.cc +++ b/onnxruntime/core/session/environment.cc @@ -539,8 +539,13 @@ bool AreVirtualDevicesAllowed(std::string_view lib_registration_name) { Status Environment::RegisterExecutionProviderLibrary(const std::string& registration_name, std::unique_ptr ep_library, const std::vector& internal_factories) { + const Env& env = Env::Default(); + env.GetTelemetryProvider().LogRegisterEpLibraryStart(registration_name); + if (ep_libraries_.count(registration_name) > 0) { - return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "library is already registered under ", registration_name); + auto status = ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "library is already registered under ", registration_name); + env.GetTelemetryProvider().LogRegisterEpLibraryEnd(registration_name, status); + return status; } auto status = Status::OK(); @@ -592,6 +597,7 @@ Status Environment::RegisterExecutionProviderLibrary(const std::string& registra }); } + env.GetTelemetryProvider().LogRegisterEpLibraryEnd(registration_name, status); return status; } @@ -611,6 +617,8 @@ Status Environment::CreateAndRegisterInternalEps() { Status Environment::RegisterExecutionProviderLibrary(const std::string& registration_name, const ORTCHAR_T* lib_path) { std::lock_guard lock{mutex_}; + Env::Default().GetTelemetryProvider().LogRegisterEpLibraryWithLibPath(registration_name, ToUTF8String(lib_path)); + std::vector internal_factories = {}; std::unique_ptr ep_library; diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc index c00d63d0be8a2..ef2174179a186 100644 --- a/onnxruntime/core/session/inference_session.cc +++ b/onnxruntime/core/session/inference_session.cc @@ -976,6 +976,9 @@ common::Status InferenceSession::LoadWithLoader(std::function l(session_mutex_); if (is_model_loaded_) { // already loaded LOGS(*session_logger_, ERROR) << "This session already contains a loaded model."; @@ -1011,6 +1014,11 @@ common::Status InferenceSession::LoadWithLoader(std::function load_ort_format_model_bytes) { + const Env& env = Env::Default(); + env.GetTelemetryProvider().LogModelLoadStart(session_id_); + std::lock_guard l(session_mutex_); if (is_model_loaded_) { // already loaded @@ -1767,6 +1778,8 @@ Status InferenceSession::LoadOrtModelWithLoader(std::function load_ort is_model_loaded_ = true; + env.GetTelemetryProvider().LogModelLoadEnd(session_id_, Status::OK()); + return Status::OK(); } @@ -2618,6 +2631,12 @@ common::Status InferenceSession::Initialize() { } } + // Log session creation end telemetry + { + const Env& init_env = Env::Default(); + init_env.GetTelemetryProvider().LogSessionCreationEnd(session_id_, status); + } + return status; } #if defined(_MSC_VER) && !defined(__clang__) @@ -3015,6 +3034,19 @@ Status InferenceSession::Run(const RunOptions& run_options, Status retval = Status::OK(); const Env& env = Env::Default(); + // Determine whether to emit Run telemetry + bool emit_run_telemetry = false; + { + std::lock_guard telemetry_lock(telemetry_mutex_); + if (TimeDiffMicroSeconds(telemetry_.time_sent_last_) > telemetry_.runtime_perf_interval_) { + emit_run_telemetry = true; + } + } + + if (emit_run_telemetry) { + env.GetTelemetryProvider().LogRunStart(session_id_); + } + int graph_annotation_id = 0; const std::string& graph_annotation_str = run_options.config_options.GetConfigOrDefault(kOrtRunOptionsConfigCudaGraphAnnotation, ""); @@ -3210,8 +3242,8 @@ Status InferenceSession::Run(const RunOptions& run_options, telemetry_.total_run_duration_since_last_ += TimeDiffMicroSeconds(tp); telemetry_.duration_per_batch_size_[batch_size] += TimeDiffMicroSeconds(tp); - if (TimeDiffMicroSeconds(telemetry_.time_sent_last_) > Telemetry::kDurationBetweenSending) { - // send the telemetry + // Emit RuntimePerf + if (emit_run_telemetry) { env.GetTelemetryProvider().LogRuntimePerf(session_id_, telemetry_.total_runs_since_last_, telemetry_.total_run_duration_since_last_, telemetry_.duration_per_batch_size_); @@ -3220,6 +3252,10 @@ Status InferenceSession::Run(const RunOptions& run_options, telemetry_.total_runs_since_last_ = 0; telemetry_.total_run_duration_since_last_ = 0; telemetry_.duration_per_batch_size_.clear(); + + // Double the interval, capping at kRuntimePerfMaxInterval + telemetry_.runtime_perf_interval_ = std::min(telemetry_.runtime_perf_interval_ * 2, + Telemetry::kRuntimePerfMaxInterval); } } diff --git a/onnxruntime/core/session/inference_session.h b/onnxruntime/core/session/inference_session.h index 1dbf0318c988c..a29eba1cdacca 100644 --- a/onnxruntime/core/session/inference_session.h +++ b/onnxruntime/core/session/inference_session.h @@ -976,8 +976,10 @@ class InferenceSession { std::unordered_map duration_per_batch_size_; // the duration (us) of Run() calls per batch size since the last report TimePoint time_sent_last_; // the TimePoint of the last report - // Event Rate per provider < 20 peak events per second - constexpr static long long kDurationBetweenSending = 1000 * 1000 * 60 * 10; // duration in (us). send a report every 10 mins + // RuntimePerf backoff: starts at 2s, doubles each emission, caps at 10 min + constexpr static long long kRuntimePerfInitialInterval = 2 * 1000 * 1000; // 2 seconds in (us) + constexpr static long long kRuntimePerfMaxInterval = 1000 * 1000 * 60 * 10; // 10 minutes in (us) + long long runtime_perf_interval_ = kRuntimePerfInitialInterval; } telemetry_; mutable std::mutex telemetry_mutex_; // to ensure thread-safe access to telemetry data