diff --git a/cmake/external/onnx b/cmake/external/onnx
index b8baa84466864..595228d99e397 160000
--- a/cmake/external/onnx
+++ b/cmake/external/onnx
@@ -1 +1 @@
-Subproject commit b8baa8446686496da4cc8fda09f2b6fe65c2a02c
+Subproject commit 595228d99e3977ac27cb79d5963adda262af99ad
diff --git a/js/web/docs/webgl-operators.md b/js/web/docs/webgl-operators.md
index 3aec0aa3d7cf3..cd25819a2069e 100644
--- a/js/web/docs/webgl-operators.md
+++ b/js/web/docs/webgl-operators.md
@@ -9,18 +9,18 @@ See [Compatibility](../README.md#Compatibility) for a list of the supported plat
 | Operator | WebGl Backend |
 |:--------:|:-------------:|
 | [Abs](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Abs) | [6-12](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Abs-6), [13+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Abs-13) |
-| [Acos](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Acos) | [7-21](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Acos-7), [22+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Acos-22) |
+| [Acos](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Acos) | [7+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Acos-7) |
 | [Acosh](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Acosh) |  |
 | [Add](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Add) | [7-12](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Add-7), [13](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Add-13), [14+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Add-14) |
 | [AffineGrid](https://github.com/onnx/onnx/blob/main/docs/Operators.md#AffineGrid) |  |
 | [And](https://github.com/onnx/onnx/blob/main/docs/Operators.md#And) | [7+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#And-7) |
 | [ArgMax](https://github.com/onnx/onnx/blob/main/docs/Operators.md#ArgMax) |  |
 | [ArgMin](https://github.com/onnx/onnx/blob/main/docs/Operators.md#ArgMin) |  |
-| [Asin](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Asin) | [7-21](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Asin-7), [22+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Asin-22) |
+| [Asin](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Asin) | [7+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Asin-7) |
 | [Asinh](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Asinh) |  |
-| [Atan](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Atan) | [7-21](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Atan-7), [22+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Atan-22) |
+| [Atan](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Atan) | [7+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Atan-7) |
 | [Atanh](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Atanh) |  |
-| [AveragePool](https://github.com/onnx/onnx/blob/main/docs/Operators.md#AveragePool) | [7-9](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#AveragePool-7), [10](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#AveragePool-10), [11-18](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#AveragePool-11), [19-21](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#AveragePool-19), [22+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#AveragePool-22) |
+| [AveragePool](https://github.com/onnx/onnx/blob/main/docs/Operators.md#AveragePool) | [7-9](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#AveragePool-7), [10](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#AveragePool-10), [11-18](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#AveragePool-11), [19+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#AveragePool-19) |
 | [BatchNormalization](https://github.com/onnx/onnx/blob/main/docs/Operators.md#BatchNormalization) | [7-8](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#BatchNormalization-7), [9-13](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#BatchNormalization-9), [14](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#BatchNormalization-14), [15+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#BatchNormalization-15) |
 | [Bernoulli](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Bernoulli) |  |
 | [BitShift](https://github.com/onnx/onnx/blob/main/docs/Operators.md#BitShift) |  |
@@ -41,10 +41,10 @@ See [Compatibility](../README.md#Compatibility) for a list of the supported plat
 | [ConcatFromSequence](https://github.com/onnx/onnx/blob/main/docs/Operators.md#ConcatFromSequence) |  |
 | [Constant](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Constant) |  |
 | [ConstantOfShape](https://github.com/onnx/onnx/blob/main/docs/Operators.md#ConstantOfShape) |  |
-| [Conv](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Conv) | [1-10](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Conv-1), [11-21](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Conv-11), [22+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Conv-22) |
+| [Conv](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Conv) | [1-10](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Conv-1), [11+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Conv-11) |
 | [ConvInteger](https://github.com/onnx/onnx/blob/main/docs/Operators.md#ConvInteger) |  |
-| [ConvTranspose](https://github.com/onnx/onnx/blob/main/docs/Operators.md#ConvTranspose) | [1-10](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ConvTranspose-1), [11-21](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ConvTranspose-11), [22+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ConvTranspose-22) |
-| [Cos](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Cos) | [7-21](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Cos-7), [22+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Cos-22) |
+| [ConvTranspose](https://github.com/onnx/onnx/blob/main/docs/Operators.md#ConvTranspose) | [1-10](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ConvTranspose-1), [11+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ConvTranspose-11) |
+| [Cos](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Cos) | [7+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Cos-7) |
 | [Cosh](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Cosh) |  |
 | [CumSum](https://github.com/onnx/onnx/blob/main/docs/Operators.md#CumSum) |  |
 | [DFT](https://github.com/onnx/onnx/blob/main/docs/Operators.md#DFT) |  |
@@ -53,10 +53,10 @@ See [Compatibility](../README.md#Compatibility) for a list of the supported plat
 | [DequantizeLinear](https://github.com/onnx/onnx/blob/main/docs/Operators.md#DequantizeLinear) |  |
 | [Det](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Det) |  |
 | [Div](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Div) | [7-12](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Div-7), [13](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Div-13), [14+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Div-14) |
-| [Dropout](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Dropout) | [7-9](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Dropout-7), [10-11](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Dropout-10), [12](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Dropout-12), [13-21](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Dropout-13), [22+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Dropout-22) |
+| [Dropout](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Dropout) | [7-9](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Dropout-7), [10-11](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Dropout-10), [12](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Dropout-12), [13+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Dropout-13) |
 | [DynamicQuantizeLinear](https://github.com/onnx/onnx/blob/main/docs/Operators.md#DynamicQuantizeLinear) |  |
 | [Einsum](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Einsum) |  |
-| [Elu](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Elu) | [6-21](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Elu-6), [22+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Elu-22) |
+| [Elu](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Elu) | [6+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Elu-6) |
 | [Equal](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Equal) | [7-10](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Equal-7), [11-12](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Equal-11), [13-18](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Equal-13), [19+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Equal-19) |
 | [Erf](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Erf) |  |
 | [Exp](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Exp) | [6-12](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Exp-6), [13+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Exp-13) |
@@ -70,9 +70,9 @@ See [Compatibility](../README.md#Compatibility) for a list of the supported plat
 | [GatherND](https://github.com/onnx/onnx/blob/main/docs/Operators.md#GatherND) |  |
 | [Gelu](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Gelu) |  |
 | [Gemm](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Gemm) | [7-8](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Gemm-7), [9-10](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Gemm-9), [11-12](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Gemm-11), [13+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Gemm-13) |
-| [GlobalAveragePool](https://github.com/onnx/onnx/blob/main/docs/Operators.md#GlobalAveragePool) | [1-21](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#GlobalAveragePool-1), [22+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#GlobalAveragePool-22) |
+| [GlobalAveragePool](https://github.com/onnx/onnx/blob/main/docs/Operators.md#GlobalAveragePool) | [1+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#GlobalAveragePool-1) |
 | [GlobalLpPool](https://github.com/onnx/onnx/blob/main/docs/Operators.md#GlobalLpPool) |  |
-| [GlobalMaxPool](https://github.com/onnx/onnx/blob/main/docs/Operators.md#GlobalMaxPool) | [1-21](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#GlobalMaxPool-1), [22+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#GlobalMaxPool-22) |
+| [GlobalMaxPool](https://github.com/onnx/onnx/blob/main/docs/Operators.md#GlobalMaxPool) | [1+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#GlobalMaxPool-1) |
 | [Greater](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Greater) | [7-8](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Greater-7), [9-12](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Greater-9), [13+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Greater-13) |
 | [GreaterOrEqual](https://github.com/onnx/onnx/blob/main/docs/Operators.md#GreaterOrEqual) |  |
 | [GridSample](https://github.com/onnx/onnx/blob/main/docs/Operators.md#GridSample) |  |
@@ -85,7 +85,7 @@ See [Compatibility](../README.md#Compatibility) for a list of the supported plat
 | [Identity](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Identity) | [1-12](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Identity-1), [13](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Identity-13), [14-15](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Identity-14), [16-18](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Identity-16), [19-20](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Identity-19), [21+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Identity-21) |
 | [If](https://github.com/onnx/onnx/blob/main/docs/Operators.md#If) |  |
 | [ImageDecoder](https://github.com/onnx/onnx/blob/main/docs/Operators.md#ImageDecoder) |  |
-| [InstanceNormalization](https://github.com/onnx/onnx/blob/main/docs/Operators.md#InstanceNormalization) | [6-21](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#InstanceNormalization-6), [22+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#InstanceNormalization-22) |
+| [InstanceNormalization](https://github.com/onnx/onnx/blob/main/docs/Operators.md#InstanceNormalization) | [6+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#InstanceNormalization-6) |
 | [IsInf](https://github.com/onnx/onnx/blob/main/docs/Operators.md#IsInf) |  |
 | [IsNaN](https://github.com/onnx/onnx/blob/main/docs/Operators.md#IsNaN) |  |
 | [LRN](https://github.com/onnx/onnx/blob/main/docs/Operators.md#LRN) | [1-12](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#LRN-1), [13+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#LRN-13) |
@@ -102,7 +102,7 @@ See [Compatibility](../README.md#Compatibility) for a list of the supported plat
 | [MatMul](https://github.com/onnx/onnx/blob/main/docs/Operators.md#MatMul) | [1-8](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#MatMul-1), [9-12](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#MatMul-9), [13+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#MatMul-13) |
 | [MatMulInteger](https://github.com/onnx/onnx/blob/main/docs/Operators.md#MatMulInteger) |  |
 | [Max](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Max) |  |
-| [MaxPool](https://github.com/onnx/onnx/blob/main/docs/Operators.md#MaxPool) | [1-7](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#MaxPool-1), [8-9](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#MaxPool-8), [10](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#MaxPool-10), [11](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#MaxPool-11), [12-21](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#MaxPool-12), [22+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#MaxPool-22) |
+| [MaxPool](https://github.com/onnx/onnx/blob/main/docs/Operators.md#MaxPool) | [1-7](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#MaxPool-1), [8-9](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#MaxPool-8), [10](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#MaxPool-10), [11](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#MaxPool-11), [12+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#MaxPool-12) |
 | [MaxRoiPool](https://github.com/onnx/onnx/blob/main/docs/Operators.md#MaxRoiPool) |  |
 | [MaxUnpool](https://github.com/onnx/onnx/blob/main/docs/Operators.md#MaxUnpool) |  |
 | [Mean](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Mean) |  |
@@ -170,7 +170,7 @@ See [Compatibility](../README.md#Compatibility) for a list of the supported plat
 | [Shrink](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Shrink) |  |
 | [Sigmoid](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Sigmoid) | [6-12](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Sigmoid-6), [13+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Sigmoid-13) |
 | [Sign](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Sign) |  |
-| [Sin](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Sin) | [7-21](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Sin-7), [22+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Sin-22) |
+| [Sin](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Sin) | [7+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Sin-7) |
 | [Sinh](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Sinh) |  |
 | [Size](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Size) |  |
 | [Slice](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Slice) | [1-9](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Slice-1), [10](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Slice-10), [11-12](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Slice-11), [13+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Slice-13) |
@@ -188,7 +188,7 @@ See [Compatibility](../README.md#Compatibility) for a list of the supported plat
 | [StringSplit](https://github.com/onnx/onnx/blob/main/docs/Operators.md#StringSplit) |  |
 | [Sub](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Sub) | [7-12](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Sub-7), [13](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Sub-13), [14+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Sub-14) |
 | [Sum](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Sum) | [6-7](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Sum-6), [8-12](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Sum-8), [13+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Sum-13) |
-| [Tan](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Tan) | [7-21](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Tan-7), [22+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Tan-22) |
+| [Tan](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Tan) | [7+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Tan-7) |
 | [Tanh](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Tanh) | [6-12](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Tanh-6), [13+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Tanh-13) |
 | [TfIdfVectorizer](https://github.com/onnx/onnx/blob/main/docs/Operators.md#TfIdfVectorizer) |  |
 | [ThresholdedRelu](https://github.com/onnx/onnx/blob/main/docs/Operators.md#ThresholdedRelu) |  |
diff --git a/onnxruntime/core/providers/webgpu/tensor/slice.cc b/onnxruntime/core/providers/webgpu/tensor/slice.cc
new file mode 100644
index 0000000000000..752c028e4f5e5
--- /dev/null
+++ b/onnxruntime/core/providers/webgpu/tensor/slice.cc
@@ -0,0 +1,363 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "core/common/inlined_containers.h"
+#include "core/providers/webgpu/tensor/slice.h"
+#include "core/providers/cpu/tensor/utils.h"
+#include "core/providers/webgpu/shader_helper.h"
+#include "core/providers/webgpu/webgpu_supported_types.h"
+
+namespace onnxruntime {
+namespace webgpu {
+
+ONNX_OPERATOR_VERSIONED_KERNEL_EX(
+    Slice,
+    kOnnxDomain,
+    1, 9,
+    kWebGpuExecutionProvider,
+    (*KernelDefBuilder::Create()).TypeConstraint("T", WebGpuSupportedFloatTypes()),
+    Slice);
+
+ONNX_OPERATOR_VERSIONED_KERNEL_EX(
+    Slice,
+    kOnnxDomain,
+    10, 10,
+    kWebGpuExecutionProvider,
+    (*KernelDefBuilder::Create()).TypeConstraint("T", WebGpuSupportedFloatTypes()).InputMemoryType(OrtMemTypeCPU, 1).InputMemoryType(OrtMemTypeCPU, 2).InputMemoryType(OrtMemTypeCPU, 3).InputMemoryType(OrtMemTypeCPU, 4),
+    Slice);
+
+ONNX_OPERATOR_VERSIONED_KERNEL_EX(
+    Slice,
+    kOnnxDomain,
+    11, 12,
+    kWebGpuExecutionProvider,
+    (*KernelDefBuilder::Create()).TypeConstraint("T", WebGpuSupportedFloatTypes()).InputMemoryType(OrtMemTypeCPU, 1).InputMemoryType(OrtMemTypeCPU, 2).InputMemoryType(OrtMemTypeCPU, 3).InputMemoryType(OrtMemTypeCPU, 4),
+    Slice);
+
+ONNX_OPERATOR_KERNEL_EX(
+    Slice,
+    kOnnxDomain,
+    13,
+    kWebGpuExecutionProvider,
+    (*KernelDefBuilder::Create()).TypeConstraint("T", WebGpuSupportedFloatTypes()).InputMemoryType(OrtMemTypeCPU, 1).InputMemoryType(OrtMemTypeCPU, 2).InputMemoryType(OrtMemTypeCPU, 3).InputMemoryType(OrtMemTypeCPU, 4),
+    Slice);
+
+Status SliceProgram::GenerateShaderCode(ShaderHelper& shader) const {
+  std::cout << "generate shader code" << std::endl;
+  const ShaderVariableHelper& input = shader.AddInput("input", ShaderUsage::UseUniform | ShaderUsage::UseIndicesTypeAlias);
+  const ShaderVariableHelper& output = shader.AddOutput("output", ShaderUsage::UseUniform | ShaderUsage::UseIndicesTypeAlias);
+
+  shader.MainFunctionBody() << shader.GuardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")
+                            << "let output_indices = " << output.OffsetToIndices("global_idx") << ";\n"
+                            << "var input_indices: input_indices_t;\n"
+                            << "var carry = 0u;\n";
+
+  for (auto i = input.Rank() - 1; i >= 0; i--) {
+    std::string input_shape_i = absl::StrCat("input_shape_", i);
+    std::string steps_i = absl::StrCat("steps_", i);
+    std::string starts_i = absl::StrCat("starts_", i);
+    std::string output_index_i = absl::StrCat("output_index_", i);
+    std::string input_index_i = absl::StrCat("input_index_", i);
+
+    shader.MainFunctionBody() << "let " << input_shape_i << " = " << input.IndicesGet("uniforms.input_shape", i) << ";\n"
+                              << "let " << steps_i << " = " << input.IndicesGet("uniforms.steps", i) << ";\n"
+                              << "let " << starts_i << " = " << input.IndicesGet("uniforms.starts", i) << ";\n"
+                              << "var " << output_index_i << " = " << output.IndicesGet("output_indices", i) << ";\n"
+                              << "var " << input_index_i << " = " << output_index_i << " * " << steps_i << " + " << starts_i << " + carry;\n"
+                              << "carry = " << input_index_i << " / " << input_shape_i << ";\n"
+                              << input_index_i << " = " << input_index_i << " % " << input_shape_i << ";\n"
+                              << "if (" << input.IndicesGet("uniforms.signs", i) << " < 0) {\n"
+                              << "  " << input_index_i << " = " << input_shape_i << " - " << input_index_i << " - 1u + " << starts_i << ";\n"
+                              << "}\n"
+                              << input.IndicesSet("input_indices", i, input_index_i) << ";\n";
+  }
+
+  shader.MainFunctionBody() << output.SetByOffset("global_idx", input.GetByIndices("input_indices"));
+
+  std::cout << "shader code generated" << std::endl;
+  return Status::OK();
+}
+
+Status Slice::ComputeInternal(ComputeContext& context) const {
+  // READ INPUTS
+  std::cout << "read input" << std::endl;
+  const Tensor* input_tensor = context.Input(0);
+  const TensorShape& input_shape = input_tensor->Shape();
+  int64_t input_rank = static_cast<int64_t>(input_shape.NumDimensions());
+
+  std::cout << "read starts/ends from either attr or input" << std::endl;
+
+  auto starts_raw = hasStartsAttr ? gsl::make_span(attr_starts_) : context.Input(1)->DataAsSpan<int64_t>();
+  auto ends_raw = hasEndsAttr ? gsl::make_span(attr_ends_) : context.Input(2)->DataAsSpan<int64_t>();
+
+  ORT_ENFORCE(starts_raw.size() == ends_raw.size(), "starts and ends must have the same size");
+
+  int input_count = context.InputCount();
+
+  const Tensor* axes_tensor = nullptr;
+  const Tensor* steps_tensor = nullptr;
+
+  std::cout << "read axes and steps from input" << std::endl;
+
+  if (input_count >= 4) {
+    // axes provided as input
+    axes_tensor = context.Input(3);
+  }
+
+  if (input_count == 5) {
+    // steps provided as input
+    steps_tensor = context.Input(4);
+  }
+
+  std::cout << "inject defaults if axes or steps not provided" << std::endl;
+
+  std::vector<int64_t> axes_default;
+  if (axes_tensor == nullptr) {
+    // if axes not provided, set to [0, ..., len(starts)-1]
+    for (size_t i = 0; i < starts_raw.size(); i++) {
+      axes_default.push_back(i);
+    }
+  }
+  auto axes_raw = hasAxesAttr ? gsl::make_span(attr_axes_) : (axes_tensor == nullptr ? gsl::make_span(axes_default) : axes_tensor->DataAsSpan<int64_t>());
+
+  std::vector<int64_t> steps_default;
+  if (steps_tensor == nullptr) {
+    // if steps not provided, set to [1, ..., 1] of len(starts)
+    for (size_t i = 0; i < starts_raw.size(); i++) {
+      steps_default.push_back(1);
+    }
+  }
+  auto steps_raw = steps_tensor == nullptr ? gsl::make_span(steps_default) : steps_tensor->DataAsSpan<int64_t>();
+
+  std::cout << "ORIGINAL INPUTS" << std::endl;
+  std::cout << "input shape: " << input_shape << std::endl;
+  std::cout << "starts: ";
+  for (auto start : starts_raw) {
+    std::cout << start << " ";
+  }
+  std::cout << std::endl;
+  std::cout << "ends: ";
+  for (auto end : ends_raw) {
+    std::cout << end << " ";
+  }
+  std::cout << std::endl;
+  std::cout << "axes: ";
+  for (auto axis : axes_raw) {
+    std::cout << axis << " ";
+  }
+  std::cout << std::endl;
+  std::cout << "steps: ";
+  for (auto step : steps_raw) {
+    std::cout << step << " ";
+  }
+  std::cout << std::endl;
+
+  // PROCESS INPUTS
+  std::cout << "processing inputs" << std::endl;
+  std::cout << "process axes" << std::endl;
+
+  std::vector<uint32_t> axes;
+  for (unsigned int i = 0; i < axes_raw.size(); i++) {
+    int64_t val = axes_raw[i];
+    if (val < 0) {
+      val += input_rank;
+    }
+    axes.push_back(static_cast<int32_t>(val));
+  }
+
+  std::cout << "process starts" << std::endl;
+  std::vector<uint32_t> starts;
+  for (unsigned int i = 0; i < starts_raw.size(); i++) {
+    int64_t val = starts_raw[i];
+    std::cout << "val: " << val << std::endl;
+    if (val < 0) {
+      val += input_shape[axes[i]];
+    }
+    std::cout << "val after handling negative: " << val << std::endl;
+
+    std::cout << "steps raw i: " << steps_raw[i] << std::endl;
+    if (steps_raw[i] < 0) {
+      std::cout << "steps raw < 0" << std::endl;
+      std::cout << "axes raw i: " << axes[i] << std::endl;
+      std::cout << "input shape axes raw i: " << input_shape[axes[i]] << std::endl;
+      val = std::max(static_cast<int64_t>(0), std::min(val, static_cast<int64_t>(input_shape[axes[i]] - 1)));
+    } else {
+      std::cout << "steps raw >= 0" << std::endl;
+      std::cout << "axes raw i: " << axes[i] << std::endl;
+      std::cout << "input shape axes raw i: " << input_shape[axes[i]] << std::endl;
+      val = std::max(static_cast<int64_t>(0), std::min(val, static_cast<int64_t>(input_shape[axes[i]])));
+    }
+    std::cout << "val after clamping: " << val << std::endl;
+    starts.push_back(static_cast<uint32_t>(val));
+  }
+
+  std::cout << "process ends" << std::endl;
+
+  std::vector<uint32_t> ends;
+  for (unsigned int i = 0; i < ends_raw.size(); i++) {
+    int64_t val = ends_raw[i];
+    if (val < 0) {
+      val += input_shape[axes[i]];
+    }
+    if (steps_raw[i] < 0) {
+      val = std::max(static_cast<int64_t>(0), std::min(val, static_cast<int64_t>(input_shape[axes[i]] - 1)));
+    } else {
+      val = std::max(static_cast<int64_t>(0), std::min(val, static_cast<int64_t>(input_shape[axes[i]])));
+    }
+    ends.push_back(static_cast<uint32_t>(val));
+  }
+
+  std::cout << "process steps with INT_MAX" << std::endl;
+
+  // temporary steps vector to handle negative steps
+  std::vector<int32_t> steps_tmp;
+  for (unsigned int i = 0; i < steps_raw.size(); i++) {
+    if (steps_raw[i] >= std::numeric_limits<int32_t>::max()) {
+      steps_tmp.push_back(std::numeric_limits<int32_t>::max());
+    } else {
+      steps_tmp.push_back(static_cast<int32_t>(steps_raw[i]));
+    }
+  }
+
+  std::cout << "insert missing dimensions" << std::endl;
+
+  if (static_cast<int64_t>(axes.size()) != input_rank) {
+    for (uint32_t i = 0; i < input_rank; i++) {
+      int idx = -1;
+      for (unsigned int j = 0; j < axes_raw.size(); j++) {
+        if (axes_raw[j] == i) {
+          idx = j;
+          break;
+        }
+      }
+      if (idx == -1) {
+        axes.insert(axes.begin() + i, i);
+        starts.insert(starts.begin() + i, 0);
+        ends.insert(ends.begin() + i, static_cast<uint32_t>(input_shape[i]));
+        steps_tmp.insert(steps_tmp.begin() + i, 1);
+      }
+    }
+  }
+
+  std::cout << "retain the sign of the steps" << std::endl;
+
+  // retain the sign of the steps
+  std::vector<int32_t> signs;
+  for (unsigned int i = 0; i < steps_tmp.size(); i++) {
+    signs.push_back(steps_tmp[i] < 0 ? -1 : (steps_tmp[i] > 0 ? 1 : 0));
+  }
+
+  std::cout << "convert negative steps to positive steps and reverse starts and ends" << std::endl;
+
+  // Convert negative steps to positive steps and reverse starts and ends
+  for (unsigned int i = 0; i < steps_tmp.size(); i++) {
+    if (steps_tmp[i] < 0) {
+      float numSteps = static_cast<float>((static_cast<float>(ends[i]) - static_cast<float>(starts[i])) / static_cast<float>(steps_tmp[i]));
+      float newEnd = static_cast<float>(starts[i]);
+      float newStart = newEnd + numSteps * static_cast<float>(steps_tmp[i]);
+
+      starts[i] = static_cast<uint32_t>(newStart);
+      ends[i] = static_cast<uint32_t>(newEnd);
+      steps_tmp[i] = static_cast<int32_t>(-steps_tmp[i]);
+    }
+  }
+
+  std::cout << "final steps vector" << std::endl;
+
+  // final steps vector of type unsigned int
+  std::vector<uint32_t> steps;
+  for (unsigned int i = 0; i < steps_tmp.size(); i++) {
+    steps.push_back(static_cast<uint32_t>(steps_tmp[i]));
+  }
+
+  std::cout << "PROCESSED INPUTS" << std::endl;
+  std::cout << "starts: ";
+  for (auto start : starts) {
+    std::cout << start << " ";
+  }
+  std::cout << std::endl;
+  std::cout << "ends: ";
+  for (auto end : ends) {
+    std::cout << end << " ";
+  }
+  std::cout << std::endl;
+  std::cout << "axes: ";
+  for (auto axis : axes) {
+    std::cout << axis << " ";
+  }
+  std::cout << std::endl;
+  std::cout << "steps: ";
+  for (auto step : steps) {
+    std::cout << step << " ";
+  }
+  std::cout << std::endl;
+
+  std::cout << "reorder inputs in order of axis" << std::endl;
+
+  std::vector<int32_t> signs_reordered;
+  std::vector<uint32_t> steps_reordered, starts_reordered;
+  for (unsigned int i = 0; i < axes.size(); i++) {
+    signs_reordered.push_back(0);
+    steps_reordered.push_back(0);
+    starts_reordered.push_back(0);
+  }
+  for (unsigned int i = 0; i < axes.size(); i++) {
+    int32_t dim = axes[i];
+    signs_reordered[dim] = signs[i];
+    steps_reordered[dim] = steps[i];
+    starts_reordered[dim] = starts[i];
+  }
+
+  std::cout << "REORDERED INPUTS" << std::endl;
+  std::cout << "signs_reordered: ";
+  for (auto sign : signs_reordered) {
+    std::cout << sign << " ";
+  }
+  std::cout << std::endl;
+  std::cout << "steps_reordered: ";
+  for (auto step : steps_reordered) {
+    std::cout << step << " ";
+  }
+  std::cout << std::endl;
+  std::cout << "starts_reordered: ";
+  for (auto start : starts_reordered) {
+    std::cout << start << " ";
+  }
+  std::cout << std::endl;
+
+  std::cout << "calculate output dims" << std::endl;
+
+  // calculate output dims
+  std::vector<int64_t> output_dims;
+  for (unsigned int i = 0; i < axes.size(); i++) {
+    int32_t dim = axes[i];
+    float tmp = ceil((static_cast<float>(ends[dim]) - static_cast<float>(starts[dim])) / static_cast<float>(steps[dim]));
+    if (tmp < 0)
+      output_dims.push_back(0);
+    else
+      output_dims.push_back(static_cast<int64_t>(tmp));
+  }
+
+  TensorShape output_shape(output_dims);
+
+  auto* output_tensor = context.Output(0, output_shape);
+  uint32_t output_size = static_cast<uint32_t>(output_shape.Size());
+
+  if (output_size == 0) {
+    std::cout << "output size is 0" << std::endl;
+    return Status::OK();
+  }
+
+  std::cout << "run program" << std::endl;
+
+  SliceProgram program{};
+  program
+      .AddInputs({{input_tensor, ProgramTensorMetadataDependency::TypeAndRank}})
+      .AddOutputs({output_tensor})
+      .SetDispatchGroupSize((output_size + WORKGROUP_SIZE - 1) / WORKGROUP_SIZE)
+      .AddUniformVariables({{output_size}, {starts_reordered}, {steps_reordered}, {signs_reordered}});
+  return context.RunProgram(program);
+}
+
+}  // namespace webgpu
+}  // namespace onnxruntime
\ No newline at end of file
diff --git a/onnxruntime/core/providers/webgpu/tensor/slice.h b/onnxruntime/core/providers/webgpu/tensor/slice.h
new file mode 100644
index 0000000000000..e349218aac7be
--- /dev/null
+++ b/onnxruntime/core/providers/webgpu/tensor/slice.h
@@ -0,0 +1,41 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "core/providers/webgpu/webgpu_kernel.h"
+#include "core/providers/webgpu/program.h"
+#include <iostream>
+
+namespace onnxruntime {
+namespace webgpu {
+
+class SliceProgram final : public Program<SliceProgram> {
+ public:
+  SliceProgram() : Program{"Slice"} {}
+
+  Status GenerateShaderCode(ShaderHelper& sh) const override;
+
+  WEBGPU_PROGRAM_DEFINE_UNIFORM_VARIABLES({"output_size", ProgramUniformVariableDataType::Uint32},
+                                          {"starts", ProgramUniformVariableDataType::Uint32},
+                                          {"steps", ProgramUniformVariableDataType::Uint32},
+                                          {"signs", ProgramUniformVariableDataType::Int32});
+};
+
+class Slice final : public WebGpuKernel {
+ public:
+  Slice(const OpKernelInfo& info) : WebGpuKernel(info) {
+    hasStartsAttr = info.GetAttrs("starts", attr_starts_).IsOK();
+    hasEndsAttr = info.GetAttrs("ends", attr_ends_).IsOK();
+    hasAxesAttr = info.GetAttrs("axes", attr_axes_).IsOK();
+  }
+
+  Status ComputeInternal(ComputeContext& context) const override;
+
+ private:
+  std::vector<int64_t> attr_starts_, attr_ends_, attr_axes_;
+  bool hasStartsAttr, hasEndsAttr, hasAxesAttr;
+};
+
+}  // namespace webgpu
+}  // namespace onnxruntime
\ No newline at end of file
diff --git a/onnxruntime/core/providers/webgpu/webgpu_execution_provider.cc b/onnxruntime/core/providers/webgpu/webgpu_execution_provider.cc
index 76a55b7ce4f2e..9a6301e09f22c 100644
--- a/onnxruntime/core/providers/webgpu/webgpu_execution_provider.cc
+++ b/onnxruntime/core/providers/webgpu/webgpu_execution_provider.cc
@@ -663,10 +663,10 @@ std::unique_ptr<KernelRegistry> RegisterKernels() {
       // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kMSInternalNHWCDomain, 18, 18, Resize)>,
       // BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kMSInternalNHWCDomain, 19, Resize)>,
 
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 1, 9, Slice)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 10, 10, Slice)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 11, 12, Slice)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 13, Slice)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 1, 9, Slice)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 10, 10, Slice)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 11, 12, Slice)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 13, Slice)>,
 
       BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 1, 8, Flatten)>,
       BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 9, 10, Flatten)>,
diff --git a/onnxruntime/test/providers/cpu/tensor/slice_op.test.cc b/onnxruntime/test/providers/cpu/tensor/slice_op.test.cc
index 2169436255727..b173d959ba47b 100644
--- a/onnxruntime/test/providers/cpu/tensor/slice_op.test.cc
+++ b/onnxruntime/test/providers/cpu/tensor/slice_op.test.cc
@@ -352,6 +352,9 @@ TEST(SliceTest, Slice1D_WithNegativeSteps_EndOutOfBounds_1) {
 }
 
 TEST(SliceTest, Slice1D_WithNegativeSteps_EndOutOfBounds_2) {
+  if (DefaultWebGpuExecutionProvider().get() != nullptr) {
+    GTEST_SKIP() << "Not covered by WebGPU test suite";
+  }
   RunSliceTest<float>({6},
                       {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f},
                       {0},
@@ -536,6 +539,9 @@ TEST(SliceTest, Slice1D_ReverseAllAxes_1) {
   if (DefaultVSINPUExecutionProvider().get() != nullptr) {
     GTEST_SKIP() << "Skipping because of the following error: Expected output shape [{4}] did not match run output shape [{0}] for output";
   }
+  if (DefaultWebGpuExecutionProvider().get() != nullptr) {
+    GTEST_SKIP() << "Not covered by WebGPU test suite";
+  }
 
   RunSliceTest<float>({4},
                       {1.0f, 2.0f, 3.0f, 4.0f},
@@ -550,6 +556,9 @@ TEST(SliceTest, Slice1D_ReverseAllAxes_1) {
 
 // With numeric_limit_min, the end value should be clamped to -1
 TEST(SliceTest, Slice1D_ReverseAllAxes_2) {
+  if (DefaultWebGpuExecutionProvider().get() != nullptr) {
+    GTEST_SKIP() << "Not covered by WebGPU test suite";
+  }
   RunSliceTest<float>({4},
                       {1.0f, 2.0f, 3.0f, 4.0f},
                       {-1},
@@ -563,6 +572,9 @@ TEST(SliceTest, Slice1D_ReverseAllAxes_2) {
 
 // giving an end value < -{dim_value} should also clamp it to -1
 TEST(SliceTest, Slice1D_ReverseAllAxes_3) {
+  if (DefaultWebGpuExecutionProvider().get() != nullptr) {
+    GTEST_SKIP() << "Not covered by WebGPU test suite";
+  }
   RunSliceTest<float>({4},
                       {1.0f, 2.0f, 3.0f, 4.0f},
                       {-1},
@@ -579,6 +591,9 @@ TEST(SliceTest, Slice2D_ReverseAllAxes) {
   if (DefaultDmlExecutionProvider().get() != nullptr) {
     GTEST_SKIP() << "Skipping because of the following error: Expected output shape [{4}] did not match run output shape [{0}] for output";
   }
+  if (DefaultWebGpuExecutionProvider().get() != nullptr) {
+    GTEST_SKIP() << "Not covered by WebGPU test suite";
+  }
 
   RunSliceTest<float>({2, 2},
                       {1.0f, 2.0f, 3.0f, 4.0f},
@@ -596,6 +611,9 @@ TEST(SliceTest, Slice2D_ReverseSubsetOfAxes_1) {
   if (DefaultDmlExecutionProvider().get() != nullptr) {
     GTEST_SKIP() << "Skipping because of the following error: MLOperatorAuthorImpl.cpp(2100): The parameter is incorrect.";
   }
+  if (DefaultWebGpuExecutionProvider().get() != nullptr) {
+    GTEST_SKIP() << "Not covered by WebGPU test suite";
+  }
 
   RunSliceTest<float>({2, 2},
                       {1.0f, 2.0f, 3.0f, 4.0f},
@@ -613,6 +631,9 @@ TEST(SliceTest, Slice2D_ReverseSubsetOfAxes_2) {
   if (DefaultDmlExecutionProvider().get() != nullptr) {
     GTEST_SKIP() << "Skipping because of the following error: Expected output shape [{2,2}] did not match run output shape [{0,2}] for output";
   }
+  if (DefaultWebGpuExecutionProvider().get() != nullptr) {
+    GTEST_SKIP() << "Not covered by WebGPU test suite";
+  }
 
   RunSliceTest<float>({2, 2},
                       {1.0f, 2.0f, 3.0f, 4.0f},
@@ -667,6 +688,9 @@ TEST(SliceTest, Slice2D_ReverseSubsetOfNegAxes_1) {
   if (DefaultDmlExecutionProvider().get() != nullptr) {
     GTEST_SKIP() << "Skipping because of the following error: Expected output shape [{2,2}] did not match run output shape [{2,0}] for output";
   }
+  if (DefaultWebGpuExecutionProvider().get() != nullptr) {
+    GTEST_SKIP() << "Not covered by WebGPU test suite";
+  }
 
   RunSliceTest<float>({2, 2},
                       {1.0f, 2.0f, 3.0f, 4.0f},
@@ -782,5 +806,38 @@ TEST(SliceTest, CoalesceDims) {
   RunSliceTest<float>({1, 1, 1}, {1.f}, {0}, {std::numeric_limits<int64_t>::max()}, {1}, {}, {1, 1, 1}, {1.f}, true);
 }
 
+TEST(SliceTest, SliceWebGPU_float32) {
+  RunSliceTest<float>({5},
+                      {0.3964604139328003, -0.8916832804679871, -1.6578896045684814, 1.960708737373352, 1.181204915046692},
+                      {3},
+                      {4},
+                      {},
+                      {},
+                      {1},
+                      {1.960708737373352});
+}
+
+TEST(SliceTest, SliceWebGPU_float32_large_dims) {
+  RunSliceTest<float>({1, 1, 1, 1, 5},
+                      {0.3964604139328003, -0.8916832804679871, -1.6578896045684814, 1.960708737373352, 1.181204915046692},
+                      {3},
+                      {4},
+                      {4},
+                      {},
+                      {1, 1, 1, 1, 1},
+                      {1.960708737373352});
+}
+
+TEST(SliceTest, SliceWebGPU_int32) {
+  RunSliceTest<float>({5},
+                      {0, 0, -1, 1, 0},
+                      {3},
+                      {4},
+                      {},
+                      {},
+                      {1},
+                      {1});
+}
+
 }  // namespace test
 }  // namespace onnxruntime
diff --git a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc
index 0540fb3912e81..b74b822a197ea 100644
--- a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc
+++ b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc
@@ -739,7 +739,9 @@
         "^test_layer_normalization_default_axis_cpu",
         "^test_gelu_tanh_1_expanded_cpu",
         "^test_gelu_tanh_2_expanded_cpu",
-        "^test_dynamicquantizelinear_expanded_cpu"
+        "^test_dynamicquantizelinear_expanded_cpu",
+        "^test_center_crop_pad_crop_negative_axes_hwc*", // failed due to new types or shape infer with negative axis for CenterCropPad.
+        "^test_center_crop_pad_crop_negative_axes_hwc_expanded*" // failed due to new types or shape infer with negative axis for CenterCropPad.
     ],
     "current_failing_tests_pure_DML": [
         "^test_negative_log_likelihood_loss_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_cpu",
diff --git a/tools/ci_build/github/azure-pipelines/templates/mac-cpu-packaging-steps.yml b/tools/ci_build/github/azure-pipelines/templates/mac-cpu-packaging-steps.yml
index 84f517a81686d..35d2caef132b6 100644
--- a/tools/ci_build/github/azure-pipelines/templates/mac-cpu-packaging-steps.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/mac-cpu-packaging-steps.yml
@@ -57,7 +57,7 @@ steps:
 
 - ${{ if eq(parameters.MacosArch, 'x86_64') }}:
   - script: |
-      python3 $(Build.SourcesDirectory)/tools/ci_build/build.py --test  ${{ parameters.AdditionalBuildFlags }} --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --parallel --use_binskim_compliant_compile_flags --build_shared_lib --config Release
+      python3 $(Build.SourcesDirectory)/tools/ci_build/build.py --test  ${{ parameters.AdditionalBuildFlags }} --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --use_binskim_compliant_compile_flags --build_shared_lib --config Release
     displayName: 'Running Tests'
 
 - task: ShellScript@2