55
66#include < memory>
77#include < mutex>
8+ #include < optional>
89
910#include " core/providers/webgpu/webgpu_external_header.h"
1011
@@ -25,13 +26,47 @@ class WebGpuContext;
2526class ComputeContextBase ;
2627class ProgramBase ;
2728
29+ // PendingKernelInfo stores profiling information for a kernel execution
30+ struct PendingKernelInfo {
31+ PendingKernelInfo (std::string_view kernel_name,
32+ std::string_view kernel_type,
33+ std::string_view program_name,
34+ std::string_view cache_key,
35+ const std::vector<ProgramInput>& inputs,
36+ const std::vector<ProgramOutput>& outputs)
37+ : name{absl::StrJoin ({kernel_name, kernel_type, program_name}, " &" )}, cache_key{cache_key} {
38+ // Store shape information instead of tensor pointers to avoid accessing released tensors
39+ input_shapes.reserve (inputs.size ());
40+ for (const auto & input : inputs) {
41+ input_shapes.emplace_back (input.use_override_shape ? input.override_shape : input.tensor ->Shape ());
42+ }
43+ output_shapes.reserve (outputs.size ());
44+ for (const auto & output : outputs) {
45+ output_shapes.emplace_back (output.use_override_shape ? output.override_shape : output.tensor ->Shape ());
46+ }
47+ }
48+
49+ PendingKernelInfo (const PendingKernelInfo&) = default ;
50+ PendingKernelInfo& operator =(const PendingKernelInfo&) = default ;
51+ PendingKernelInfo (PendingKernelInfo&&) = default ;
52+ PendingKernelInfo& operator =(PendingKernelInfo&&) = default ;
53+
54+ std::string name;
55+ std::string cache_key;
56+ std::vector<TensorShape> input_shapes;
57+ std::vector<TensorShape> output_shapes;
58+ };
59+
2860// Definition for CapturedCommandInfo in the webgpu namespace
2961struct CapturedCommandInfo {
3062 wgpu::ComputePipeline compute_pipeline;
3163 WGPUBindGroup bind_group;
3264 WGPUBindGroupLayout bind_group_layout;
3365 std::array<uint32_t , 3 > dispatch_group;
34- WGPUBuffer indirect_buffer; // WGPUBuffer for indirect dispatch, nullptr if not using indirect dispatch
66+ // WGPUBuffer for indirect dispatch, nullptr if not using indirect dispatch
67+ WGPUBuffer indirect_buffer;
68+ // Optional profiling data
69+ std::optional<PendingKernelInfo> pending_kernel_info;
3570};
3671
3772struct WebGpuBufferCacheConfig {
@@ -145,7 +180,7 @@ class WebGpuContext final {
145180
146181 wgpu::ComputePassDescriptor compute_pass_desc{};
147182
148- if (is_profiling_ && query_type_ == TimestampQueryType::AtPasses) {
183+ if (is_profiling_ && query_type_ == TimestampQueryType::AtPasses && graph_capture_state_ != GraphCaptureState::Capturing ) {
149184 wgpu::PassTimestampWrites timestampWrites = {
150185 nullptr ,
151186 query_set_,
@@ -261,35 +296,6 @@ class WebGpuContext final {
261296 wgpu::Limits GetRequiredLimits (const wgpu::Adapter& adapter) const ;
262297 void WriteTimestamp (uint32_t query_index);
263298
264- struct PendingKernelInfo {
265- PendingKernelInfo (std::string_view kernel_name,
266- std::string_view kernel_type,
267- std::string_view program_name,
268- std::string_view cache_key,
269- const std::vector<ProgramInput>& inputs,
270- const std::vector<ProgramOutput>& outputs)
271- : name{absl::StrJoin ({kernel_name, kernel_type, program_name}, " &" )}, cache_key{cache_key} {
272- // Store shape information instead of tensor pointers to avoid accessing released tensors
273- input_shapes.reserve (inputs.size ());
274- for (const auto & input : inputs) {
275- input_shapes.emplace_back (input.use_override_shape ? input.override_shape : input.tensor ->Shape ());
276- }
277- output_shapes.reserve (outputs.size ());
278- for (const auto & output : outputs) {
279- output_shapes.emplace_back (output.use_override_shape ? output.override_shape : output.tensor ->Shape ());
280- }
281- }
282-
283- PendingKernelInfo (PendingKernelInfo&&) = default ;
284- PendingKernelInfo& operator =(PendingKernelInfo&&) = default ;
285- ORT_DISALLOW_COPY_AND_ASSIGNMENT (PendingKernelInfo);
286-
287- std::string name;
288- std::string cache_key;
289- std::vector<TensorShape> input_shapes;
290- std::vector<TensorShape> output_shapes;
291- };
292-
293299 struct PendingQueryInfo {
294300 PendingQueryInfo (std::vector<PendingKernelInfo>&& kernels, wgpu::Buffer query_buffer)
295301 : kernels{std::move (kernels)}, query_buffer{query_buffer} {}
0 commit comments