diff --git a/onnxruntime/core/providers/webgpu/webgpu_context.cc b/onnxruntime/core/providers/webgpu/webgpu_context.cc index 11f67d342da0d..2f50fd8051b9c 100644 --- a/onnxruntime/core/providers/webgpu/webgpu_context.cc +++ b/onnxruntime/core/providers/webgpu/webgpu_context.cc @@ -161,15 +161,6 @@ void WebGpuContext::Initialize(const WebGpuContextConfig& config) { } else { query_type_ = TimestampQueryType::None; } - if (config.enable_pix_capture) { -#if defined(ENABLE_PIX_FOR_WEBGPU_EP) - // set pix frame generator - pix_frame_generator_ = std::make_unique(instance_, - Device()); -#else - ORT_THROW("Support PIX capture requires extra build flags (--enable_pix_capture)"); -#endif // ENABLE_PIX_FOR_WEBGPU_EP - } }); } @@ -757,14 +748,6 @@ void WebGpuContext::Flush(const webgpu::BufferManager& buffer_mgr) { num_pending_dispatches_ = 0; } -void WebGpuContext::OnRunEnd() { -#if defined(ENABLE_PIX_FOR_WEBGPU_EP) - if (pix_frame_generator_) { - pix_frame_generator_->GeneratePIXFrame(); - } -#endif // ENABLE_PIX_FOR_WEBGPU_EP -} - void WebGpuContext::LaunchComputePipeline(const wgpu::ComputePassEncoder& compute_pass_encoder, const std::vector& bind_buffers, const std::vector& bind_buffers_segments, diff --git a/onnxruntime/core/providers/webgpu/webgpu_context.h b/onnxruntime/core/providers/webgpu/webgpu_context.h index 5a97ef662855e..8cc513680142d 100644 --- a/onnxruntime/core/providers/webgpu/webgpu_context.h +++ b/onnxruntime/core/providers/webgpu/webgpu_context.h @@ -78,7 +78,6 @@ struct WebGpuContextConfig { 0 #endif }; - bool enable_pix_capture{false}; }; class WebGpuContextFactory { @@ -215,7 +214,13 @@ class WebGpuContext final { Status PopErrorScope(); Status Run(ComputeContextBase& context, const ProgramBase& program); - void OnRunEnd(); + +#if defined(ENABLE_PIX_FOR_WEBGPU_EP) + std::unique_ptr CreatePIXFrameGenerator() { + return std::make_unique(instance_, + Device()); + } +#endif // ENABLE_PIX_FOR_WEBGPU_EP private: enum class TimestampQueryType { @@ -334,10 +339,6 @@ class WebGpuContext final { // External vector to store captured commands, owned by EP std::vector* external_captured_commands_ = nullptr; - -#if defined(ENABLE_PIX_FOR_WEBGPU_EP) - std::unique_ptr pix_frame_generator_ = nullptr; -#endif // ENABLE_PIX_FOR_WEBGPU_EP }; } // namespace webgpu diff --git a/onnxruntime/core/providers/webgpu/webgpu_execution_provider.cc b/onnxruntime/core/providers/webgpu/webgpu_execution_provider.cc index 9c163c4b532df..15263a87a17b6 100644 --- a/onnxruntime/core/providers/webgpu/webgpu_execution_provider.cc +++ b/onnxruntime/core/providers/webgpu/webgpu_execution_provider.cc @@ -810,6 +810,15 @@ WebGpuExecutionProvider::WebGpuExecutionProvider(int context_id, webgpu::BufferCacheMode::GraphSimple, webgpu::BufferCacheMode::Disabled); } + + if (config.enable_pix_capture) { +#if defined(ENABLE_PIX_FOR_WEBGPU_EP) + // set pix frame generator + pix_frame_generator_ = context_.CreatePIXFrameGenerator(); +#else + ORT_THROW("Support PIX capture requires extra build flags (--enable_pix_capture)"); +#endif // ENABLE_PIX_FOR_WEBGPU_EP + } } std::vector WebGpuExecutionProvider::CreatePreferredAllocators() { @@ -1008,7 +1017,11 @@ Status WebGpuExecutionProvider::OnRunEnd(bool /* sync_stream */, const onnxrunti context_.CollectProfilingData(profiler_->Events()); } - context_.OnRunEnd(); +#if defined(ENABLE_PIX_FOR_WEBGPU_EP) + if (pix_frame_generator_) { + pix_frame_generator_->GeneratePIXFrame(); + } +#endif // ENABLE_PIX_FOR_WEBGPU_EP if (context_.ValidationMode() >= ValidationMode::Basic) { return context_.PopErrorScope(); diff --git a/onnxruntime/core/providers/webgpu/webgpu_execution_provider.h b/onnxruntime/core/providers/webgpu/webgpu_execution_provider.h index 95796e8b2c500..bf0963f67cf1e 100644 --- a/onnxruntime/core/providers/webgpu/webgpu_execution_provider.h +++ b/onnxruntime/core/providers/webgpu/webgpu_execution_provider.h @@ -10,6 +10,10 @@ #include "core/providers/providers.h" #include "core/providers/webgpu/buffer_manager.h" +#if defined(ENABLE_PIX_FOR_WEBGPU_EP) +#include "core/providers/webgpu/webgpu_pix_frame_generator.h" +#endif // ENABLE_PIX_FOR_WEBGPU_EP + struct pthreadpool; namespace onnxruntime { namespace webgpu { @@ -29,6 +33,7 @@ struct CapturedCommandInfo; struct WebGpuExecutionProviderConfig { DataLayout data_layout{DataLayout::NHWC}; // preferred layout is NHWC by default bool enable_graph_capture{false}; // graph capture feature is disabled by default + bool enable_pix_capture{false}; // PIX capture is disabled by default std::vector force_cpu_node_names{}; }; @@ -92,6 +97,10 @@ class WebGpuExecutionProvider : public IExecutionProvider { const int min_num_runs_before_cuda_graph_capture_ = 1; // required min regular runs before graph capture for the necessary memory allocations. int m_current_graph_annotation_id = 0; +#if defined(ENABLE_PIX_FOR_WEBGPU_EP) + std::unique_ptr pix_frame_generator_ = nullptr; +#endif // ENABLE_PIX_FOR_WEBGPU_EP + // Buffer manager specifically for graph capture mode std::unique_ptr graph_buffer_mgr_ = nullptr; diff --git a/onnxruntime/core/providers/webgpu/webgpu_provider_factory.cc b/onnxruntime/core/providers/webgpu/webgpu_provider_factory.cc index c92c3624678ea..cd791e31dcc2f 100644 --- a/onnxruntime/core/providers/webgpu/webgpu_provider_factory.cc +++ b/onnxruntime/core/providers/webgpu/webgpu_provider_factory.cc @@ -80,9 +80,22 @@ WebGpuExecutionProviderConfig ParseEpConfig(const ConfigOptions& config_options) } } + // enable pix capture + if (std::string enable_pix_capture_str; + config_options.TryGetConfigEntry(kEnablePIXCapture, enable_pix_capture_str)) { + if (enable_pix_capture_str == kEnablePIXCapture_ON) { + webgpu_ep_config.enable_pix_capture = true; + } else if (enable_pix_capture_str == kEnablePIXCapture_OFF) { + webgpu_ep_config.enable_pix_capture = false; + } else { + ORT_THROW("Invalid enable pix capture: ", enable_pix_capture_str); + } + } + LOGS_DEFAULT(VERBOSE) << "WebGPU EP preferred layout: " << int(webgpu_ep_config.data_layout); LOGS_DEFAULT(VERBOSE) << "WebGPU EP graph capture enable: " << webgpu_ep_config.enable_graph_capture; LOGS_DEFAULT(VERBOSE) << "WebGPU EP force CPU node count: " << webgpu_ep_config.force_cpu_node_names.size(); + LOGS_DEFAULT(VERBOSE) << "WebGPU EP pix capture enable: " << webgpu_ep_config.enable_pix_capture; return webgpu_ep_config; } @@ -216,19 +229,6 @@ WebGpuContextConfig ParseWebGpuContextConfig(const ConfigOptions& config_options } } - // enable pix capture - - if (std::string enable_pix_capture_str; - config_options.TryGetConfigEntry(kEnablePIXCapture, enable_pix_capture_str)) { - if (enable_pix_capture_str == kEnablePIXCapture_ON) { - config.enable_pix_capture = true; - } else if (enable_pix_capture_str == kEnablePIXCapture_OFF) { - config.enable_pix_capture = false; - } else { - ORT_THROW("Invalid enable pix capture: ", enable_pix_capture_str); - } - } - LOGS_DEFAULT(VERBOSE) << "WebGPU EP storage buffer cache mode: " << config.buffer_cache_config.storage.mode; LOGS_DEFAULT(VERBOSE) << "WebGPU EP uniform buffer cache mode: " << config.buffer_cache_config.uniform.mode; LOGS_DEFAULT(VERBOSE) << "WebGPU EP query resolve buffer cache mode: " << config.buffer_cache_config.query_resolve.mode; @@ -236,7 +236,6 @@ WebGpuContextConfig ParseWebGpuContextConfig(const ConfigOptions& config_options LOGS_DEFAULT(VERBOSE) << "WebGPU EP power preference: " << config.power_preference; LOGS_DEFAULT(VERBOSE) << "WebGPU EP Dawn backend type: " << config.backend_type; - LOGS_DEFAULT(VERBOSE) << "WebGPU EP pix capture enable: " << config.enable_pix_capture; return config; } diff --git a/setup.py b/setup.py index c095452fef768..df62fdb78622b 100644 --- a/setup.py +++ b/setup.py @@ -297,8 +297,12 @@ def run(self): else: pass + # qnn links libc++ rather than libstdc++ for its x86_64 dependencies which we currently do not + # support for many_linux. This is not the case for other platforms. + qnn_run_audit = environ.get("AUDITWHEEL_ARCH", "x86_64") != "x86_64" + _bdist_wheel.run(self) - if is_manylinux and not disable_auditwheel_repair and not is_openvino: + if is_manylinux and not disable_auditwheel_repair and not is_openvino and (not is_qnn or qnn_run_audit): assert self.dist_dir is not None file = glob(path.join(self.dist_dir, "*linux*.whl"))[0] logger.info("repairing %s for manylinux1", file)