microsoft
diff --git a/‎cmake/onnxruntime_providers_webgpu.cmake‎
Lines changed: 6 additions & 0 deletions b/‎cmake/onnxruntime_providers_webgpu.cmake‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎onnxruntime/core/providers/cpu/tensor/padbase.h‎
Lines changed: 12 additions & 6 deletions b/‎onnxruntime/core/providers/cpu/tensor/padbase.h‎
Lines changed: 12 additions & 6 deletions
diff --git a/‎onnxruntime/core/providers/cpu/tensor/upsamplebase.h‎
Lines changed: 16 additions & 2 deletions b/‎onnxruntime/core/providers/cpu/tensor/upsamplebase.h‎
Lines changed: 16 additions & 2 deletions
diff --git a/‎onnxruntime/core/providers/webgpu/compute_context.h‎
Lines changed: 4 additions & 0 deletions b/‎onnxruntime/core/providers/webgpu/compute_context.h‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎onnxruntime/core/providers/webgpu/controlflow/if.cc‎
Lines changed: 6 additions & 1 deletion b/‎onnxruntime/core/providers/webgpu/controlflow/if.cc‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎onnxruntime/core/providers/webgpu/controlflow/if.h‎
Lines changed: 13 additions & 1 deletion b/‎onnxruntime/core/providers/webgpu/controlflow/if.h‎
Lines changed: 13 additions & 1 deletion
diff --git a/‎onnxruntime/core/providers/webgpu/ep/api.cc‎
Lines changed: 78 additions & 0 deletions b/‎onnxruntime/core/providers/webgpu/ep/api.cc‎
Lines changed: 78 additions & 0 deletions
@@ -122,6 +122,12 @@
     if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
       message(FATAL_ERROR "WebGPU EP shared library build is not supported on Emscripten. Please use static library build.")
     endif()
+
+    # Configure precompiled headers for shared library build
+    # PCH ensures ep/_pch.h is included first and improves compilation speed
+    target_precompile_headers(onnxruntime_providers_webgpu PRIVATE
+      "${REPO_ROOT}/include/onnxruntime/ep/_pch.h"
+    )
   endif()
 
   set_target_properties(onnxruntime_providers_webgpu PROPERTIES CXX_STANDARD_REQUIRED ON)
 
@@ -158,14 +158,20 @@ class PadBase {
         ORT_THROW("Invalid 'mode' attribute value");
     }
 
-    const auto& kernel_def = info.GetKernelDef();
+    if constexpr (std::is_same_v<KernelInfoType, onnxruntime::OpKernelInfo>) {
+      const auto& kernel_def = info.GetKernelDef();
 
-    int start_ver, end_ver;
-    kernel_def.SinceVersion(&start_ver, &end_ver);
+      int start_ver, end_ver;
+      kernel_def.SinceVersion(&start_ver, &end_ver);
 
-    // kMSDomain contrib kernel AND OnnxDomain start version >= 11 => DynamicPad
-    if (start_ver >= 11 || kernel_def.Domain() == kMSDomain) {
-      is_dynamic_ = true;
+      // kMSDomain contrib kernel AND OnnxDomain start version >= 11 => DynamicPad
+      if (start_ver >= 11 || kernel_def.Domain() == kMSDomain) {
+        is_dynamic_ = true;
+      }
+    } else {
+      if (info.node().SinceVersion() >= 11) {  // TODO(fs-eire): support contrib domain check
+        is_dynamic_ = true;
+      }
     }
 
     if (!is_dynamic_) {
 
@@ -219,8 +219,22 @@ class UpsampleBase {
     if (scales_input_idx_ > 0) {
       const Tensor* scale;
       bool get_scale = info.TryGetConstantInput(scales_input_idx_, &scale);
-      auto x_shape = node.InputDefs()[0]->Shape();
-      int64_t rank = x_shape ? x_shape->dim_size() : -1;
+      int64_t rank = -1;
+      if constexpr (std::is_same_v<KernelInfoType, onnxruntime::OpKernelInfo>) {
+        auto x_shape = node.InputDefs()[0]->Shape();
+        if (x_shape != nullptr) {
+          rank = x_shape->dim_size();
+        }
+      } else {
+        int is_const;
+        auto tensor = info.GetKernelInfo().GetTensorConstantInput(0, &is_const);
+        if (is_const) {
+          auto type_and_shape_info = tensor.GetTensorTypeAndShapeInfo();
+          if (type_and_shape_info.HasShape()) {
+            rank = static_cast<int64_t>(type_and_shape_info.GetShape().size());
+          }
+        }
+      }
       if (get_scale && scale->Shape().Size() > 0 && ((opset < 18) || (rank > 0 && opset >= 18))) {
         ORT_THROW_IF_ERROR(ParseScalesData(scale, scales_, rank));
         scales_cached_ = true;
 
@@ -100,7 +100,11 @@ class ComputeContextBase {
   // Get the logger.
   //
   inline const logging::Logger& Logger() const {
+#if defined(BUILD_WEBGPU_EP_STATIC_LIB)
     return *ep_.GetLogger();
+#else
+    return ep_.GetEpLogger();
+#endif
   }
 
   //
 
@@ -69,9 +69,14 @@ ONNX_OPERATOR_KERNEL_EX(If,
                         If);
 
 Status If::Compute(OpKernelContext* ctx) const {
+#if defined(BUILD_WEBGPU_EP_STATIC_LIB)
   // call the base CPU version.
   return onnxruntime::If::Compute(ctx);
+#else
+  // TODO(fs-eire): implement WebGPU If kernel
+  return ORT_MAKE_STATUS(ONNXRUNTIME, NOT_IMPLEMENTED, "If operator is not implemented for WebGPU EP yet.");
+#endif
 }
 
 }  // namespace webgpu
-}  // namespace onnxruntime
+}  // namespace onnxruntime
@@ -10,6 +10,8 @@
 namespace onnxruntime {
 namespace webgpu {
 
+#if defined(BUILD_WEBGPU_EP_STATIC_LIB)
+
 // Use the CPU implementation for the logic
 class If final : public onnxruntime::If {
  public:
@@ -18,5 +20,15 @@ class If final : public onnxruntime::If {
   Status Compute(OpKernelContext* ctx) const override;
 };
 
+#else
+
+class If final : public OpKernel {
+ public:
+  If(const OpKernelInfo& info) : OpKernel(info) {}
+
+  Status Compute(OpKernelContext* ctx) const override;
+};
+#endif
+
 }  // namespace webgpu
-}  // namespace onnxruntime
+}  // namespace onnxruntime
@@ -0,0 +1,78 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#define ORT_API_MANUAL_INIT
+#include "onnxruntime_cxx_api.h"
+#undef ORT_API_MANUAL_INIT
+
+#include <memory>
+
+#include "core/providers/webgpu/ep/factory.h"
+
+// To make symbols visible on macOS/iOS
+#ifdef __APPLE__
+#define EXPORT_SYMBOL __attribute__((visibility("default")))
+#else
+#define EXPORT_SYMBOL
+#endif
+
+namespace onnxruntime {
+namespace webgpu {
+void CleanupWebGpuContexts();
+void CleanupKernelRegistries();
+}  // namespace webgpu
+}  // namespace onnxruntime
+
+namespace google {
+namespace protobuf {
+void ShutdownProtobufLibrary();
+}  // namespace protobuf
+}  // namespace google
+
+extern "C" {
+//
+// Public symbols
+//
+EXPORT_SYMBOL OrtStatus* CreateEpFactories(const char* registration_name, const OrtApiBase* ort_api_base,
+                                           const OrtLogger* default_logger,
+                                           OrtEpFactory** factories, size_t max_factories, size_t* num_factories) {
+  // Manual init for the C++ API
+  onnxruntime::ep::ApiInit(ort_api_base);
+
+  if (max_factories < 1) {
+    return onnxruntime::ep::Api().ort.CreateStatus(ORT_INVALID_ARGUMENT,
+                                                   "Not enough space to return EP factory. Need at least one.");
+  }
+
+  // Initialize the global default logger
+  ::onnxruntime::ep::adapter::Logger::CreateDefaultLogger(default_logger);
+
+  // Factory could use registration_name or define its own EP name.
+  std::unique_ptr<OrtEpFactory> factory = std::make_unique<onnxruntime::webgpu::ep::Factory>();
+
+  factories[0] = factory.release();
+  *num_factories = 1;
+
+  return nullptr;
+}
+
+EXPORT_SYMBOL OrtStatus* ReleaseEpFactory(OrtEpFactory* factory) {
+  // STEP.1 - Release the factory
+  delete static_cast<onnxruntime::webgpu::ep::Factory*>(factory);
+
+  // STEP.2 - Clean up cached kernel registries
+  onnxruntime::webgpu::CleanupKernelRegistries();
+
+  // STEP.3 - Clean up WebGPU contexts
+  onnxruntime::webgpu::CleanupWebGpuContexts();
+
+  // STEP.4 - Destroy the global default logger wrapper
+  ::onnxruntime::ep::adapter::Logger::DestroyDefaultLogger();
+
+  // STEP.5 - Shutdown protobuf library
+  google::protobuf::ShutdownProtobufLibrary();
+
+  return nullptr;
+}
+
+}  // extern "C"