diff --git a/.github/actions/setup_base/action.yml b/.github/actions/setup_base/action.yml
index 72883e18..4176fbf7 100644
--- a/.github/actions/setup_base/action.yml
+++ b/.github/actions/setup_base/action.yml
@@ -31,7 +31,11 @@ inputs:
   python-version:
     required: false
     description: ''
-    default: '3.12'
+    default: '3.13'
+  emscripten-version:
+    required: false
+    description: ''
+    default: ''
 
 outputs:
   cache-dir:
@@ -244,8 +248,12 @@ runs:
       shell: bash
       run: |
         
-        pip install pyodide-build>=0.28.0
-        echo "EMSCRIPTEN_VERSION=$(pyodide config get emscripten_version)" >> $GITHUB_ENV
+        pip install pyodide-build==0.30.9
+        if [[ "${{ inputs.emscripten-version }}" == "" ]]; then
+          echo "EMSCRIPTEN_VERSION=$(pyodide config get emscripten_version)" >> $GITHUB_ENV
+        else
+          echo "EMSCRIPTEN_VERSION=${{ inputs.emscripten-version }}" >> $GITHUB_ENV
+        fi
 
     - name: Setup Emscripten
       if: inputs.target-arch == 'wasm32'
diff --git a/.github/workflows/build_llvm.yml b/.github/workflows/build_llvm.yml
index f864a5eb..4a9a3713 100644
--- a/.github/workflows/build_llvm.yml
+++ b/.github/workflows/build_llvm.yml
@@ -314,7 +314,7 @@ jobs:
           ###############################
           
           $CCACHE -z
-          pyodide build scripts/llvm_wasm -o wheelhouse --compression-level 10
+          PYODIDE_BUILD_EXPORTS=whole_archive pyodide build scripts/llvm_wasm -o wheelhouse --compression-level 10
           $CCACHE -s
 
       - name: Upload ccache log
diff --git a/.github/workflows/build_mlir_python_bindings_wheel.yml b/.github/workflows/build_mlir_python_bindings_wheel.yml
index 25bd84a8..1d6c444a 100644
--- a/.github/workflows/build_mlir_python_bindings_wheel.yml
+++ b/.github/workflows/build_mlir_python_bindings_wheel.yml
@@ -124,7 +124,7 @@ jobs:
         run: |
           
           if [[ "${{ matrix.target-arch }}" == "wasm32" ]]; then
-            pip download mlir-wheel --plat pyodide_2024_0_wasm32 --no-deps --python-version 3.12 -f https://llvm.github.io/eudsl
+            pip download mlir-wheel --plat pyodide_2025_0_wasm32 --no-deps --python-version 3.13 -f https://llvm.github.io/eudsl
           else
             pip download mlir-wheel -f https://llvm.github.io/eudsl
           fi
@@ -214,7 +214,7 @@ jobs:
         run: |
           
           if [[ "${{ matrix.target-arch }}" == "wasm32" ]]; then
-            pyodide build "$PWD/projects/mlir-python-bindings-wasm" -o wheelhouse --compression-level 10
+            PYODIDE_BUILD_EXPORTS=whole_archive pyodide build "$PWD/projects/mlir-python-bindings-wasm" -o wheelhouse --compression-level 10
           else
             $python3_command -m cibuildwheel "$PWD/projects/mlir-python-bindings" --output-dir wheelhouse
           fi
diff --git a/.github/workflows/deploy_pip_page.yml b/.github/workflows/deploy_pip_page.yml
index 70cf89a9..d74b4dd3 100644
--- a/.github/workflows/deploy_pip_page.yml
+++ b/.github/workflows/deploy_pip_page.yml
@@ -49,11 +49,11 @@ jobs:
       - name: Setup Python
         uses: actions/setup-python@v5
         with:
-          python-version: '3.12'
+          python-version: '3.13'
 
       - name: Fetch latest WASM wheel
         run: |
-          pip download mlir-python-bindings --plat pyodide_2024_0_wasm32 --no-deps --python-version 3.12 -f https://llvm.github.io/eudsl
+          pip download mlir-python-bindings --plat pyodide_2025_0_wasm32 --no-deps --python-version 3.13 -f https://llvm.github.io/eudsl
           echo "MLIR_PYTHON_WHEEL_NAME=$(ls mlir_python_bindings*)" >> $GITHUB_ENV
           pip wheel eudsl-python-extras -f https://llvm.github.io/eudsl --no-deps -w .
           echo "EUDSL_PYTHON_EXTRAS_WHEEL_NAME=$(ls eudsl_python_extras*)" >> $GITHUB_ENV
diff --git a/.gitmodules b/.gitmodules
index 1d492ef8..190dd517 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -4,3 +4,14 @@
 	ignore = dirty
 	shallow = true
 	branch = main
+[submodule "third_party/dawn"]
+	path = third_party/dawn
+	url = https://github.com/google/dawn.git
+	fetchRecurseSubmodules = false
+	shallow = true
+[submodule "third_party/SPIRV-Headers"]
+	path = third_party/SPIRV-Headers
+	url = https://github.com/KhronosGroup/SPIRV-Headers.git
+[submodule "third_party/SPIRV-Tools"]
+	path = third_party/SPIRV-Tools
+	url = https://github.com/KhronosGroup/SPIRV-Tools.git
diff --git a/projects/mlir-python-bindings-wasm/build.sh b/projects/mlir-python-bindings-wasm/build.sh
index 407c446a..56b7c233 100755
--- a/projects/mlir-python-bindings-wasm/build.sh
+++ b/projects/mlir-python-bindings-wasm/build.sh
@@ -1,5 +1,7 @@
 #!/bin/bash
 
+TD="$(cd $(dirname $0) && pwd)"
+REPO_ROOT="$(cd $TD/../.. && pwd)"
 
 if ! command -v pyodide >/dev/null 2>&1
 then
@@ -33,4 +35,4 @@ if [[ "$OSTYPE" == "darwin"* ]]; then
   # the above doesn't work so you need to run in docker
 fi
 
-pyodide build . -o wheelhouse --compression-level 10
+PYODIDE_SOURCEMAPS=1 PYODIDE_BUILD_EXPORTS=whole_archive pyodide build $TD -o $REPO_ROOT/wheelhouse --compression-level 10
diff --git a/projects/mlir-python-bindings-wasm/pyproject.toml b/projects/mlir-python-bindings-wasm/pyproject.toml
index 7bea72f2..f66e0883 100644
--- a/projects/mlir-python-bindings-wasm/pyproject.toml
+++ b/projects/mlir-python-bindings-wasm/pyproject.toml
@@ -23,7 +23,9 @@ Discussions = "https://discourse.llvm.org/"
 requires = [
     "scikit-build-core==0.10.7",
     "typing_extensions==4.12.2",
-    "nanobind>=2.4, <3.0",
+    # https://github.com/wjakob/nanobind/commit/dd350fe81931a1b362196cb415d188c36422766e#diff-8599263e788c107944d356ce118965942735cfbe16289ccf98ee5f8a33f0e808
+    # error: static assertion failed due to requirement 'pyobj_name::total_count * sizeof(_object *) == 96'
+    "nanobind>=2.4, <=2.9.2",
     "pybind11>=2.10.0, <=2.13.6",
 ]
 build-backend = "scikit_build_core.build"
@@ -52,14 +54,16 @@ CMAKE_CXX_COMPILER_LAUNCHER = { env = "CMAKE_CXX_COMPILER_LAUNCHER", default = "
 # (and setting CMAKE_BUILD_WITH_INSTALL_RPATH does nothing)
 # CMAKE_GENERATOR = { env = "CMAKE_GENERATOR", default = "Ninja" }
 CMAKE_PLATFORM_NO_VERSIONED_SONAME = "ON"
-CMAKE_CXX_FLAGS = "-sNO_DISABLE_EXCEPTION_CATCHING"
 CMAKE_VISIBILITY_INLINES_HIDDEN = "ON"
 CMAKE_C_VISIBILITY_PRESET = "hidden"
 CMAKE_CXX_VISIBILITY_PRESET = "hidden"
-CMAKE_EXE_LINKER_FLAGS = "-sALLOW_TABLE_GROWTH -sASSERTIONS -sNO_DISABLE_EXCEPTION_CATCHING -sWASM_BIGINT"
-CMAKE_SHARED_LINKER_FLAGS = "-sALLOW_TABLE_GROWTH -sASSERTIONS -sNO_DISABLE_EXCEPTION_CATCHING -sWASM_BIGINT"
-CMAKE_MODULE_LINKER_FLAGS = "-sALLOW_TABLE_GROWTH -sASSERTIONS -sNO_DISABLE_EXCEPTION_CATCHING -sWASM_BIGINT"
+CMAKE_EXE_LINKER_FLAGS = "-sALLOW_TABLE_GROWTH -sASSERTIONS -sWASM_BIGINT"
+CMAKE_SHARED_LINKER_FLAGS = "-sALLOW_TABLE_GROWTH -sASSERTIONS -sWASM_BIGINT"
+CMAKE_MODULE_LINKER_FLAGS = "-sALLOW_TABLE_GROWTH -sASSERTIONS -sWASM_BIGINT"
 CMAKE_VERBOSE_MAKEFILE = "ON"
+# De-duplicate libraries on link lines based on linker capabilities.
+# minimum cmake version is 3.29
+CMAKE_POLICY_DEFAULT_CMP0156 = "NEW"
 
 # so that NATIVE doesn't try to get built
 LLVM_NATIVE_TOOL_DIR = { env = "LLVM_NATIVE_TOOL_DIR", default = "" }
@@ -68,5 +72,3 @@ MLIR_LINALG_ODS_YAML_GEN = { env = "MLIR_LINALG_ODS_YAML_GEN", default = "" }
 MLIR_TABLEGEN = { env = "MLIR_TABLEGEN", default = "" }
 
 MLIR_ENABLE_BINDINGS_PYTHON = "ON"
-MLIR_ENABLE_EXECUTION_ENGINE = "ON"
-MLIR_ENABLE_SPIRV_CPU_RUNNER = "ON"
diff --git a/projects/mlir-wgpu/CMakeLists.txt b/projects/mlir-wgpu/CMakeLists.txt
new file mode 100644
index 00000000..fed73e61
--- /dev/null
+++ b/projects/mlir-wgpu/CMakeLists.txt
@@ -0,0 +1,89 @@
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+# Copyright (c) 2025.
+
+cmake_minimum_required(VERSION 3.29)
+project(mlir-webgpu LANGUAGES CXX C)
+
+set(CMAKE_CXX_STANDARD 20)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_CXX_EXTENSIONS OFF)
+
+if(POLICY CMP0068)
+  cmake_policy(SET CMP0068 NEW)
+  set(CMAKE_BUILD_WITH_INSTALL_NAME_DIR ON)
+endif()
+
+if(POLICY CMP0075)
+  cmake_policy(SET CMP0075 NEW)
+endif()
+
+if(POLICY CMP0077)
+  cmake_policy(SET CMP0077 NEW)
+endif()
+
+if(POLICY CMP0091)
+  cmake_policy(SET CMP0091 NEW)
+endif()
+
+if(POLICY CMP0116)
+  cmake_policy(SET CMP0116 NEW)
+endif()
+
+if(POLICY CMP0135)
+  cmake_policy(SET CMP0116 OLD)
+endif()
+
+set(DAWN_FETCH_DEPENDENCIES ON)
+set(TINT_BUILD_SAMPLES OFF CACHE BOOL "" FORCE)
+set(TINT_BUILD_DOCS  OFF CACHE BOOL "" FORCE)
+set(TINT_BUILD_TESTS OFF CACHE BOOL "" FORCE)
+set(TINT_BUILD_SPV_READER ON CACHE BOOL "" FORCE)
+set(TINT_BUILD_WGSL_READER OFF CACHE BOOL "" FORCE)
+set(TINT_BUILD_GLSL_WRITER OFF CACHE BOOL "" FORCE)
+set(TINT_BUILD_HLSL_WRITER OFF CACHE BOOL "" FORCE)
+set(TINT_BUILD_MSL_WRITER OFF CACHE BOOL "" FORCE)
+set(TINT_BUILD_SPV_WRITER OFF CACHE BOOL "" FORCE)
+set(TINT_BUILD_WGSL_WRITER ON CACHE BOOL "" FORCE)
+
+add_subdirectory(
+  "${CMAKE_CURRENT_SOURCE_DIR}/../../third_party/dawn"
+  EXCLUDE_FROM_ALL
+  ${CMAKE_CURRENT_BINARY_DIR}/dawn
+)
+if(NOT Dawn_SOURCE_DIR OR NOT EXISTS ${Dawn_SOURCE_DIR})
+  message(FATAL_ERROR "failed to configure Dawn dependency")
+endif()
+set(DAWN_SOURCE_DIR ${Dawn_SOURCE_DIR})
+
+set(SPIRV_HEADERS_SKIP_EXAMPLES ON CACHE BOOL "" FORCE)
+set(SPIRV_HEADERS_SKIP_INSTALL ON CACHE BOOL "" FORCE)
+add_subdirectory(
+  "${CMAKE_CURRENT_SOURCE_DIR}/../../third_party/SPIRV-Headers"
+  EXCLUDE_FROM_ALL
+  ${CMAKE_CURRENT_BINARY_DIR}/SPIRV-Headers
+)
+
+add_subdirectory(
+  "${CMAKE_CURRENT_SOURCE_DIR}/../../third_party/SPIRV-Tools"
+  EXCLUDE_FROM_ALL
+  ${CMAKE_CURRENT_BINARY_DIR}/SPIRV-Tools
+)
+
+# https://github.com/kainino0x/webgpu-cross-platform-demo
+add_executable(dawn_app shader_test.cpp)
+target_compile_definitions(dawn_app PRIVATE DEMO_USE_ASYNCIFY=1)
+
+if(EMSCRIPTEN)
+  set_target_properties(dawn_app PROPERTIES SUFFIX ".html")
+  target_link_libraries(dawn_app PRIVATE emdawnwebgpu_cpp webgpu_glfw)
+  target_link_options(dawn_app PRIVATE
+    "-sASYNCIFY=1"
+    "-sUSE_GLFW=3"
+    "-sASYNCIFY_STACK_SIZE=65536"
+    "-sEXPORTED_RUNTIME_METHODS=ccall"
+  )
+else()
+  target_link_libraries(dawn_app PRIVATE webgpu_dawn webgpu_glfw glfw)
+endif()
diff --git a/projects/mlir-wgpu/dawn_main.cpp b/projects/mlir-wgpu/dawn_main.cpp
new file mode 100644
index 00000000..bc801a7e
--- /dev/null
+++ b/projects/mlir-wgpu/dawn_main.cpp
@@ -0,0 +1,152 @@
+#include <iostream>
+
+#include <GLFW/glfw3.h>
+#if defined(__EMSCRIPTEN__)
+#include <emscripten/emscripten.h>
+#endif
+#include <dawn/webgpu_cpp_print.h>
+#include <webgpu/webgpu_cpp.h>
+#include <webgpu/webgpu_glfw.h>
+
+wgpu::Instance instance;
+wgpu::Adapter adapter;
+wgpu::Device device;
+wgpu::RenderPipeline pipeline;
+
+wgpu::Surface surface;
+wgpu::TextureFormat format;
+const uint32_t kWidth = 512;
+const uint32_t kHeight = 512;
+
+void ConfigureSurface() {
+  wgpu::SurfaceCapabilities capabilities;
+  surface.GetCapabilities(adapter, &capabilities);
+  format = capabilities.formats[0];
+
+  wgpu::SurfaceConfiguration config{.device = device,
+                                    .format = format,
+                                    .width = kWidth,
+                                    .height = kHeight};
+  surface.Configure(&config);
+}
+
+void Init() {
+  static const auto kTimedWaitAny = wgpu::InstanceFeatureName::TimedWaitAny;
+  wgpu::InstanceDescriptor instanceDesc{.requiredFeatureCount = 1,
+                                        .requiredFeatures = &kTimedWaitAny};
+  instance = wgpu::CreateInstance(&instanceDesc);
+
+  wgpu::Future f1 = instance.RequestAdapter(
+      nullptr, wgpu::CallbackMode::WaitAnyOnly,
+      [](wgpu::RequestAdapterStatus status, wgpu::Adapter a,
+         wgpu::StringView message) {
+        if (status != wgpu::RequestAdapterStatus::Success) {
+          std::cout << "RequestAdapter: " << message << "\n";
+          exit(0);
+        }
+        adapter = std::move(a);
+      });
+  instance.WaitAny(f1, UINT64_MAX);
+
+  wgpu::DeviceDescriptor desc{};
+  desc.SetUncapturedErrorCallback([](const wgpu::Device&,
+                                     wgpu::ErrorType errorType,
+                                     wgpu::StringView message) {
+    std::cout << "Error: " << errorType << " - message: " << message << "\n";
+  });
+
+  wgpu::Future f2 = adapter.RequestDevice(
+      &desc, wgpu::CallbackMode::WaitAnyOnly,
+      [](wgpu::RequestDeviceStatus status, wgpu::Device d,
+         wgpu::StringView message) {
+        if (status != wgpu::RequestDeviceStatus::Success) {
+          std::cout << "RequestDevice: " << message << "\n";
+          exit(0);
+        }
+        device = std::move(d);
+      });
+  instance.WaitAny(f2, UINT64_MAX);
+}
+
+const char shaderCode[] = R"(
+    @vertex fn vertexMain(@builtin(vertex_index) i : u32) ->
+      @builtin(position) vec4f {
+        const pos = array(vec2f(0, 1), vec2f(-1, -1), vec2f(1, -1));
+        return vec4f(pos[i], 0, 1);
+    }
+    @fragment fn fragmentMain() -> @location(0) vec4f {
+        return vec4f(1, 0, 0, 1);
+    }
+)";
+
+void CreateRenderPipeline() {
+  wgpu::ShaderSourceWGSL wgsl{{.code = shaderCode}};
+
+  wgpu::ShaderModuleDescriptor shaderModuleDescriptor{.nextInChain = &wgsl};
+  wgpu::ShaderModule shaderModule =
+      device.CreateShaderModule(&shaderModuleDescriptor);
+
+  wgpu::ColorTargetState colorTargetState{.format = format};
+
+  wgpu::FragmentState fragmentState{
+      .module = shaderModule, .targetCount = 1, .targets = &colorTargetState};
+
+  wgpu::RenderPipelineDescriptor descriptor{.vertex = {.module = shaderModule},
+                                            .fragment = &fragmentState};
+  pipeline = device.CreateRenderPipeline(&descriptor);
+}
+
+void Render() {
+  wgpu::SurfaceTexture surfaceTexture;
+  surface.GetCurrentTexture(&surfaceTexture);
+
+  wgpu::RenderPassColorAttachment attachment{
+      .view = surfaceTexture.texture.CreateView(),
+      .loadOp = wgpu::LoadOp::Clear,
+      .storeOp = wgpu::StoreOp::Store};
+
+  wgpu::RenderPassDescriptor renderpass{.colorAttachmentCount = 1,
+                                        .colorAttachments = &attachment};
+
+  wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+  wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderpass);
+  pass.SetPipeline(pipeline);
+  pass.Draw(3);
+  pass.End();
+  wgpu::CommandBuffer commands = encoder.Finish();
+  device.GetQueue().Submit(1, &commands);
+}
+
+void InitGraphics() {
+  ConfigureSurface();
+  CreateRenderPipeline();
+}
+
+void Start() {
+  if (!glfwInit()) {
+    return;
+  }
+
+  glfwWindowHint(GLFW_CLIENT_API, GLFW_NO_API);
+  GLFWwindow* window =
+      glfwCreateWindow(kWidth, kHeight, "WebGPU window", nullptr, nullptr);
+  surface = wgpu::glfw::CreateSurfaceForWindow(instance, window);
+
+  InitGraphics();
+
+#if defined(__EMSCRIPTEN__)
+  emscripten_set_main_loop(Render, 0, false);
+#else
+  while (!glfwWindowShouldClose(window)) {
+    glfwPollEvents();
+    Render();
+    surface.Present();
+    instance.ProcessEvents();
+  }
+#endif
+}
+
+int main() {
+  Init();
+  Start();
+}
diff --git a/projects/mlir-wgpu/shader_test.cpp b/projects/mlir-wgpu/shader_test.cpp
new file mode 100644
index 00000000..aa49c741
--- /dev/null
+++ b/projects/mlir-wgpu/shader_test.cpp
@@ -0,0 +1,614 @@
+// Copyright 2019 The Dawn Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <algorithm>
+#include <cassert>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <emscripten.h>
+#include <emscripten/html5.h>
+#include <iostream>
+#include <memory>
+#include <webgpu/webgpu_cpp.h>
+
+static wgpu::Instance instance;
+static wgpu::Device device;
+static wgpu::Queue queue;
+static wgpu::RenderPipeline pipeline;
+static constexpr uint32_t kBindGroupOffset = 256;
+static wgpu::BindGroup bindgroup;
+static int testsStarted = 0;
+static int testsCompleted = 0;
+
+wgpu::Surface surface;
+wgpu::TextureView canvasDepthStencilView;
+const uint32_t kWidth = 300;
+const uint32_t kHeight = 150;
+
+wgpu::Device GetDevice(wgpu::DeviceDescriptor *descriptor) {
+  wgpu::RequestAdapterWebXROptions xrOptions = {};
+  wgpu::RequestAdapterOptions options = {};
+  options.nextInChain = &xrOptions;
+
+  wgpu::Adapter adapter;
+  wgpu::Future f1 = instance.RequestAdapter(
+      &options, wgpu::CallbackMode::WaitAnyOnly,
+      [&](wgpu::RequestAdapterStatus status, wgpu::Adapter ad,
+          wgpu::StringView message) {
+        if (message.length) {
+          printf("RequestAdapter: %.*s\n", (int)message.length, message.data);
+        }
+        if (status == wgpu::RequestAdapterStatus::Unavailable) {
+          printf("WebGPU unavailable; exiting cleanly\n");
+          // exit(0) (rather than emscripten_force_exit(0)) ensures there is no
+          // dangling keepalive.
+          exit(0);
+        }
+        assert(status == wgpu::RequestAdapterStatus::Success);
+
+        adapter = std::move(ad);
+      });
+  instance.WaitAny(f1, UINT64_MAX);
+  assert(adapter);
+
+  wgpu::Device device;
+  wgpu::Future f2 = adapter.RequestDevice(
+      descriptor, wgpu::CallbackMode::WaitAnyOnly,
+      [&](wgpu::RequestDeviceStatus status, wgpu::Device dev,
+          wgpu::StringView message) {
+        if (message.length) {
+          printf("RequestDevice: %.*s\n", (int)message.length, message.data);
+        }
+        assert(status == wgpu::RequestDeviceStatus::Success);
+
+        device = std::move(dev);
+      });
+  instance.WaitAny(f2, UINT64_MAX);
+  assert(device);
+
+  return device;
+}
+
+static const char shaderCode[] = R"(
+    @binding(0) @group(0) var<uniform> uColor : vec4f;
+
+    @vertex
+    fn main_v(@builtin(vertex_index) idx: u32) -> @builtin(position) vec4<f32> {
+        var pos = array<vec2<f32>, 3>(
+            vec2<f32>(0.0, 0.5), vec2<f32>(-0.5, -0.5), vec2<f32>(0.5, -0.5));
+        return vec4<f32>(pos[idx], 0.0, 1.0);
+    }
+    @fragment
+    fn main_f() -> @location(0) vec4<f32> {
+        return uColor;
+    }
+)";
+
+void init() {
+  queue = device.GetQueue();
+
+  // Test of OOM with mappedAtCreation.
+  {
+    wgpu::BufferDescriptor descriptor{};
+    descriptor.usage = wgpu::BufferUsage::CopyDst;
+    descriptor.size = 0x10'0000'0000'0000ULL;
+    descriptor.mappedAtCreation = true;
+    wgpu::Buffer bufferTooLarge = device.CreateBuffer(&descriptor);
+    assert(bufferTooLarge == nullptr);
+  }
+
+  wgpu::ShaderModule shaderModule{};
+  {
+    wgpu::ShaderSourceWGSL wgslDesc{};
+    wgslDesc.code = shaderCode;
+
+    wgpu::ShaderModuleDescriptor descriptor{};
+    descriptor.nextInChain = &wgslDesc;
+    shaderModule = device.CreateShaderModule(&descriptor);
+  }
+
+  wgpu::BindGroupLayout bgl;
+  {
+    wgpu::BindGroupLayoutEntry bglEntry{
+        .binding = 0,
+        .visibility = wgpu::ShaderStage::Fragment,
+        //.bindingArraySize = 1,
+        .buffer =
+            {
+                .type = wgpu::BufferBindingType::Uniform,
+                .hasDynamicOffset = true,
+            },
+    };
+    wgpu::BindGroupLayoutDescriptor bglDesc{
+        .entryCount = 1,
+        .entries = &bglEntry,
+    };
+    bgl = device.CreateBindGroupLayout(&bglDesc);
+
+    static constexpr std::array<float, 4> kColor{0.0, 0.502, 1.0,
+                                                 1.0}; // 0x80/0xff ~= 0.502
+    wgpu::BufferDescriptor uniformBufferDesc{
+        .usage = wgpu::BufferUsage::Uniform,
+        .size = kBindGroupOffset + sizeof(kColor),
+        .mappedAtCreation = true,
+    };
+    wgpu::Buffer uniformBuffer = device.CreateBuffer(&uniformBufferDesc);
+    {
+      float *mapped = reinterpret_cast<float *>(
+          uniformBuffer.GetMappedRange(kBindGroupOffset));
+      memcpy(mapped, kColor.data(), sizeof(kColor));
+      uniformBuffer.Unmap();
+    }
+
+    wgpu::BindGroupEntry bgEntry{
+        .binding = 0,
+        .buffer = uniformBuffer,
+        .size = sizeof(kColor),
+    };
+    wgpu::BindGroupDescriptor bgDesc{
+        .layout = bgl,
+        .entryCount = 1,
+        .entries = &bgEntry,
+    };
+    bindgroup = device.CreateBindGroup(&bgDesc);
+  }
+
+  {
+    wgpu::PipelineLayoutDescriptor pl{};
+    pl.bindGroupLayoutCount = 1;
+    pl.bindGroupLayouts = &bgl;
+
+    wgpu::ColorTargetState colorTargetState{};
+    colorTargetState.format = wgpu::TextureFormat::BGRA8Unorm;
+
+    wgpu::FragmentState fragmentState{};
+    fragmentState.module = shaderModule;
+    fragmentState.entryPoint = "main_f";
+    fragmentState.targetCount = 1;
+    fragmentState.targets = &colorTargetState;
+
+    wgpu::DepthStencilState depthStencilState{};
+    depthStencilState.format = wgpu::TextureFormat::Depth32Float;
+    depthStencilState.depthWriteEnabled = true;
+    depthStencilState.depthCompare = wgpu::CompareFunction::Always;
+
+    wgpu::RenderPipelineDescriptor descriptor{};
+    descriptor.layout = device.CreatePipelineLayout(&pl);
+    descriptor.vertex.module = shaderModule;
+    descriptor.vertex.entryPoint = "main_v";
+    descriptor.fragment = &fragmentState;
+    descriptor.primitive.topology = wgpu::PrimitiveTopology::TriangleList;
+    descriptor.depthStencil = &depthStencilState;
+
+    // Just test the bindings; we are only going to actually use the async one
+    // below.
+    wgpu::RenderPipeline unused = device.CreateRenderPipeline(&descriptor);
+    assert(unused);
+
+    wgpu::Future f = device.CreateRenderPipelineAsync(
+        &descriptor, wgpu::CallbackMode::WaitAnyOnly,
+        [](wgpu::CreatePipelineAsyncStatus status, wgpu::RenderPipeline pl,
+           wgpu::StringView message) {
+          if (message.length) {
+            printf("CreateRenderPipelineAsync: %.*s\n", (int)message.length,
+                   message.data);
+          }
+          assert(status == wgpu::CreatePipelineAsyncStatus::Success);
+          pipeline = std::move(pl);
+        });
+    instance.WaitAny(f, UINT64_MAX);
+    assert(pipeline);
+  }
+}
+
+// The depth stencil attachment isn't really needed to draw the triangle
+// and doesn't really affect the render result.
+// But having one should give us a slightly better test coverage for the compile
+// of the depth stencil descriptor.
+void render(wgpu::TextureView view, wgpu::TextureView depthStencilView) {
+  wgpu::RenderPassColorAttachment attachment{};
+  attachment.view = view;
+  attachment.loadOp = wgpu::LoadOp::Clear;
+  attachment.storeOp = wgpu::StoreOp::Store;
+  attachment.clearValue = {0, 1, 0, 0.5};
+
+  wgpu::RenderPassDescriptor renderpass{};
+  renderpass.colorAttachmentCount = 1;
+  renderpass.colorAttachments = &attachment;
+
+  wgpu::RenderPassDepthStencilAttachment depthStencilAttachment = {};
+  depthStencilAttachment.view = depthStencilView;
+  depthStencilAttachment.depthClearValue = 0;
+  depthStencilAttachment.depthLoadOp = wgpu::LoadOp::Clear;
+  depthStencilAttachment.depthStoreOp = wgpu::StoreOp::Store;
+
+  renderpass.depthStencilAttachment = &depthStencilAttachment;
+
+  wgpu::CommandBuffer commands;
+  {
+    wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+    {
+      wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderpass);
+      pass.SetPipeline(pipeline);
+      pass.SetBindGroup(0, bindgroup, 1, &kBindGroupOffset);
+      pass.Draw(3);
+      pass.End();
+    }
+    commands = encoder.Finish();
+  }
+
+  queue.Submit(1, &commands);
+}
+
+void issueContentsCheck(const char *functionName, wgpu::Buffer readbackBuffer,
+                        uint32_t expectData) {
+  testsStarted++;
+  wgpu::Future f = readbackBuffer.MapAsync(
+      wgpu::MapMode::Read, 0, 4, wgpu::CallbackMode::WaitAnyOnly,
+      [=](wgpu::MapAsyncStatus status, wgpu::StringView message) {
+        if (message.length) {
+          printf("issueContentsCheck MapAsync: %.*s\n", (int)message.length,
+                 message.data);
+        }
+        assert(status == wgpu::MapAsyncStatus::Success);
+
+        static constexpr bool kUseReadMappedRange = true;
+
+        std::vector<char> ptrData;
+        const void *ptr;
+        if constexpr (kUseReadMappedRange) {
+          ptrData.resize(4);
+          ptr = ptrData.data();
+          wgpu::Status status =
+              readbackBuffer.ReadMappedRange(0, ptrData.data(), 4);
+          printf("%s: ReadMappedRange -> %u%s\n", functionName, status,
+                 status == wgpu::Status::Success ? "" : " <------- FAILED");
+          assert(status == wgpu::Status::Success);
+        } else {
+          ptr = readbackBuffer.GetConstMappedRange();
+          printf("%s: GetConstMappedRange -> %p%s\n", functionName, ptr,
+                 ptr ? "" : " <------- FAILED");
+          assert(ptr != nullptr);
+        }
+
+        uint32_t readback = static_cast<const uint32_t *>(ptr)[0];
+        printf("  got %08x, expected %08x%s\n", readback, expectData,
+               readback == expectData ? "" : " <------- FAILED");
+
+        readbackBuffer.Unmap();
+        testsCompleted++;
+      });
+  instance.WaitAny(f, UINT64_MAX);
+}
+
+void doCopyTestMappedAtCreation(bool useRange) {
+  static constexpr uint32_t kValue = 0x05060708;
+  size_t offset = useRange ? 8 : 0;
+  size_t size = useRange ? 12 : 4;
+  wgpu::Buffer src;
+  {
+    wgpu::BufferDescriptor descriptor{};
+    descriptor.size = size;
+    descriptor.usage = wgpu::BufferUsage::CopySrc;
+    // descriptor.usage = static_cast<wgpu::BufferUsage>(0xffff'ffff); //
+    // Uncomment to make createBuffer fail
+    descriptor.mappedAtCreation = true;
+    src = device.CreateBuffer(&descriptor);
+    // Calls just to check they work
+    src.GetSize();
+    src.GetUsage();
+  }
+
+  static constexpr bool kUseWriteMappedRange = true;
+  if constexpr (kUseWriteMappedRange) {
+    wgpu::Status status = src.WriteMappedRange(offset, &kValue, 4);
+    printf("%s: WriteMappedRange -> %u%s\n", __FUNCTION__, status,
+           status == wgpu::Status::Success ? "" : " <------- FAILED");
+    assert(status == wgpu::Status::Success);
+  } else {
+    uint32_t *ptr = static_cast<uint32_t *>(
+        useRange ? src.GetMappedRange(offset, 4) : src.GetMappedRange());
+    printf("%s: GetMappedRange -> %p%s\n", __FUNCTION__, ptr,
+           ptr ? "" : " <------- FAILED");
+    assert(ptr != nullptr);
+    *ptr = kValue;
+  }
+  src.Unmap();
+
+  wgpu::Buffer dst;
+  {
+    wgpu::BufferDescriptor descriptor{};
+    descriptor.size = 4;
+    descriptor.usage = wgpu::BufferUsage::CopyDst | wgpu::BufferUsage::MapRead;
+    dst = device.CreateBuffer(&descriptor);
+  }
+
+  wgpu::CommandBuffer commands;
+  {
+    wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+    encoder.CopyBufferToBuffer(src, offset, dst, 0, 4);
+    commands = encoder.Finish();
+  }
+  queue.Submit(1, &commands);
+
+  issueContentsCheck(__FUNCTION__, dst, kValue);
+}
+
+void doCopyTestMapAsync(bool useRange) {
+  static constexpr uint32_t kValue = 0x01020304;
+  size_t size = useRange ? 12 : 4;
+  wgpu::Buffer src;
+  {
+    wgpu::BufferDescriptor descriptor{};
+    descriptor.size = size;
+    descriptor.usage = wgpu::BufferUsage::MapWrite | wgpu::BufferUsage::CopySrc;
+    src = device.CreateBuffer(&descriptor);
+  }
+  size_t offset = useRange ? 8 : 0;
+
+  const char *functionName = __FUNCTION__;
+  wgpu::Future f = src.MapAsync(
+      wgpu::MapMode::Write, offset, 4, wgpu::CallbackMode::AllowSpontaneous,
+      [=](wgpu::MapAsyncStatus status, wgpu::StringView message) {
+        if (message.length) {
+          printf("doCopyTestMapAsync MapAsync: %.*s\n", (int)message.length,
+                 message.data);
+        }
+        assert(status == wgpu::MapAsyncStatus::Success);
+
+        uint32_t *ptr = static_cast<uint32_t *>(
+            useRange ? src.GetMappedRange(offset, 4) : src.GetMappedRange());
+        printf("%s: getMappedRange -> %p%s\n", functionName, ptr,
+               ptr ? "" : " <------- FAILED");
+        assert(ptr != nullptr);
+        *ptr = kValue;
+        src.Unmap();
+      });
+  instance.WaitAny(f, UINT64_MAX);
+
+  // TODO: Doesn't work if this is inside the MapAsync callback because it
+  // causes nested WaitAny to happen and crashes Emscripten.
+  wgpu::Buffer dst;
+  {
+    wgpu::BufferDescriptor descriptor{};
+    descriptor.size = 4;
+    descriptor.usage = wgpu::BufferUsage::CopyDst | wgpu::BufferUsage::MapRead;
+    dst = device.CreateBuffer(&descriptor);
+  }
+
+  wgpu::CommandBuffer commands;
+  {
+    wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+    encoder.CopyBufferToBuffer(src, offset, dst, 0, 4);
+    commands = encoder.Finish();
+  }
+  queue.Submit(1, &commands);
+
+  issueContentsCheck(functionName, dst, kValue);
+}
+
+void doRenderTest() {
+  wgpu::Texture readbackTexture;
+  {
+    wgpu::TextureDescriptor descriptor{};
+    descriptor.usage =
+        wgpu::TextureUsage::RenderAttachment | wgpu::TextureUsage::CopySrc;
+    descriptor.size = {1, 1, 1};
+    descriptor.format = wgpu::TextureFormat::BGRA8Unorm;
+    readbackTexture = device.CreateTexture(&descriptor);
+    // Calls just to check they work
+    readbackTexture.GetWidth();
+    readbackTexture.GetHeight();
+    readbackTexture.GetDepthOrArrayLayers();
+    readbackTexture.GetDimension();
+    readbackTexture.GetFormat();
+    readbackTexture.GetMipLevelCount();
+    readbackTexture.GetSampleCount();
+    readbackTexture.GetUsage();
+  }
+  wgpu::Texture depthTexture;
+  {
+    wgpu::TextureDescriptor descriptor{};
+    descriptor.usage = wgpu::TextureUsage::RenderAttachment;
+    descriptor.size = {1, 1, 1};
+    descriptor.format = wgpu::TextureFormat::Depth32Float;
+    depthTexture = device.CreateTexture(&descriptor);
+  }
+  render(readbackTexture.CreateView(), depthTexture.CreateView());
+
+  wgpu::Buffer readbackBuffer;
+  {
+    wgpu::BufferDescriptor descriptor{};
+    descriptor.size = 4;
+    descriptor.usage = wgpu::BufferUsage::CopyDst | wgpu::BufferUsage::MapRead;
+
+    readbackBuffer = device.CreateBuffer(&descriptor);
+  }
+
+  wgpu::CommandBuffer commands;
+  {
+    wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+    wgpu::TexelCopyTextureInfo src{};
+    src.texture = readbackTexture;
+    src.origin = {0, 0, 0};
+    wgpu::TexelCopyBufferInfo dst{};
+    dst.buffer = readbackBuffer;
+    dst.layout.bytesPerRow = 256;
+    wgpu::Extent3D extent = {1, 1, 1};
+    encoder.CopyTextureToBuffer(&src, &dst, &extent);
+    commands = encoder.Finish();
+  }
+  queue.Submit(1, &commands);
+
+  // Check the color value encoded in the shader makes it out correctly.
+  static const uint32_t expectData = 0xff0080ff;
+  issueContentsCheck(__FUNCTION__, readbackBuffer, expectData);
+}
+
+extern "C" {
+EMSCRIPTEN_KEEPALIVE bool frame();
+}
+
+static int frameNum = 0;
+bool frame() {
+  frameNum++;
+  if (frameNum == 1) {
+    printf("Running frame-1 tests...\n");
+    // Another copy of doRenderTest to make sure it works in the frame loop.
+    // Note this function is async (via Asyncify/JSPI) so the SurfaceTexture
+    // lifetime must not span it! (it may expire while waiting for mapAsync to
+    // complete)
+    doRenderTest();
+    return true;
+  }
+
+  wgpu::SurfaceTexture surfaceTexture;
+  surface.GetCurrentTexture(&surfaceTexture);
+  assert(surfaceTexture.status ==
+         wgpu::SurfaceGetCurrentTextureStatus::SuccessOptimal);
+  wgpu::TextureView backbuffer = surfaceTexture.texture.CreateView();
+
+  if (frameNum == 2) {
+    printf("Running frame 2 and continuing!\n");
+  }
+  render(backbuffer, canvasDepthStencilView);
+  // TODO: On frame 1, read back from the canvas with drawImage() (or something)
+  // and check the result.
+
+#if defined(__EMSCRIPTEN__)
+  // Stop running after a few frames in Emscripten.
+  if (frameNum >= 10 && testsCompleted == testsStarted) {
+    printf("Several frames rendered and no pending tests remaining!\n");
+    printf("Stopping main loop and destroying device to clean up.\n");
+    device.Destroy();
+    return false; // Stop the requestAnimationFrame loop
+  }
+#endif
+
+  return true; // Continue the requestAnimationFrame loop (Wasm) or main loop
+               // (native)
+}
+
+void run() {
+  init();
+
+  printf("Running startup tests...\n");
+
+  // Kick off all of the tests before setting up to render a frame.
+  // (Note we don't wait for the tests so they may complete before or after the
+  // frame.)
+  doCopyTestMappedAtCreation(false);
+  doCopyTestMappedAtCreation(true);
+  doCopyTestMapAsync(false);
+  doCopyTestMapAsync(true);
+  doRenderTest();
+
+  {
+    wgpu::TextureDescriptor descriptor{};
+    descriptor.usage = wgpu::TextureUsage::RenderAttachment;
+    descriptor.size = {kWidth, kHeight, 1};
+    descriptor.format = wgpu::TextureFormat::Depth32Float;
+    canvasDepthStencilView = device.CreateTexture(&descriptor).CreateView();
+  }
+
+  printf("Starting main loop...\n");
+#if defined(__EMSCRIPTEN__)
+  {
+    wgpu::EmscriptenSurfaceSourceCanvasHTMLSelector canvasDesc{};
+    canvasDesc.selector = "#canvas";
+
+    wgpu::SurfaceDescriptor surfDesc{};
+    surfDesc.nextInChain = &canvasDesc;
+    surface = instance.CreateSurface(&surfDesc);
+
+    wgpu::SurfaceColorManagement colorManagement{};
+    wgpu::SurfaceConfiguration configuration{};
+    configuration.nextInChain = &colorManagement;
+    configuration.device = device;
+    configuration.usage = wgpu::TextureUsage::RenderAttachment;
+    configuration.format = wgpu::TextureFormat::BGRA8Unorm;
+    configuration.width = kWidth;
+    configuration.height = kHeight;
+    configuration.alphaMode = wgpu::CompositeAlphaMode::Premultiplied;
+    configuration.presentMode = wgpu::PresentMode::Fifo;
+    surface.Configure(&configuration);
+  }
+
+  // Workaround for JSPI not working in emscripten_set_main_loop. Loosely based
+  // on this code:
+  // https://github.com/emscripten-core/emscripten/issues/22493#issuecomment-2330275282
+  // Note the following link args are required:
+  // - JSPI: -sDEFAULT_LIBRARY_FUNCS_TO_INCLUDE=$getWasmTableEntry
+  // - Asyncify: -sEXPORTED_RUNTIME_METHODS=ccall
+  // clang-format off
+    EM_ASM({
+#    if DEMO_USE_JSPI // -sJSPI=1 (aka -sASYNCIFY=2)
+        var callback = WebAssembly.promising(getWasmTableEntry($0));
+#    else // -sASYNCIFY=1
+        // ccall seems to be the only thing in Emscripten which lets us turn an
+        // Asyncified Wasm function into a JS function returning a Promise.
+        // It can only call exported functions.
+        var callback = () => globalThis['Module']['ccall']("frame", "boolean", [], [], {async: true});
+#    endif // DEMO_USE_JSPI
+        async function tick() {
+            // Start the frame callback. 'await' means we won't call
+            // requestAnimationFrame again until it completes.
+            var keepLooping = await callback();
+            if (keepLooping) requestAnimationFrame(tick);
+        }
+        requestAnimationFrame(tick);
+    }, frame);
+    // clang-format off
+#endif
+    printf("Stopping main loop and destroying device to clean up.\n");
+}
+
+int main() {
+    printf("Initializing...\n");
+    wgpu::InstanceDescriptor desc;
+    static constexpr auto kTimedWaitAny = wgpu::InstanceFeatureName::TimedWaitAny;
+    desc.requiredFeatureCount = 1;
+    desc.requiredFeatures = &kTimedWaitAny;
+    instance = wgpu::CreateInstance(&desc);
+
+    {
+        wgpu::Limits limits;
+        //limits.maxBufferSize = 0xffff'ffff'ffffLLU; // Uncomment to make requestDevice fail
+        //limits.maxImmediateSize = 1; // Uncomment to test maxImmediateSize passthrough
+        wgpu::DeviceDescriptor desc;
+        desc.requiredLimits = &limits;
+        desc.SetUncapturedErrorCallback(
+            [](const wgpu::Device&, wgpu::ErrorType errorType, wgpu::StringView message) {
+                printf("UncapturedError (errorType=%d): %.*s\n", errorType, (int)message.length, message.data);
+                assert(false);
+            });
+        desc.SetDeviceLostCallback(wgpu::CallbackMode::AllowSpontaneous,
+            [](const wgpu::Device&, wgpu::DeviceLostReason reason, wgpu::StringView message) {
+                printf("DeviceLost (reason=%d): %.*s\n", reason, (int)message.length, message.data);
+            });
+        device = GetDevice(&desc);
+    }
+
+    run();
+
+    // The test result will be reported when the main_loop completes.
+    // emscripten_exit_with_live_runtime() shouldn't be needed, because the async stuff we do keeps
+    // the runtime alive automatically. (Note the tests may complete before or after the frame.)
+    // - The WebGPU callbacks keep the runtime alive until they complete.
+    // - emscripten_set_main_loop keeps it alive until emscripten_cancel_main_loop.
+    //
+    // This code is returned when the runtime exits unless something else sets it, like exit(0).
+    return 99;
+}
diff --git a/scripts/llvm_wasm/Dockerfile b/scripts/llvm_wasm/Dockerfile
index 14468b8c..160209e2 100644
--- a/scripts/llvm_wasm/Dockerfile
+++ b/scripts/llvm_wasm/Dockerfile
@@ -2,16 +2,32 @@ FROM ubuntu
 
 ENV DEBIAN_FRONTEND=noninteractive
 
-RUN apt-get update && apt-get -y install python-is-python3 cmake ninja-build python3.12-venv python3-pip vim git unzip
+RUN apt-get update && apt-get -y install cmake python-is-python3 ninja-build python3-pip vim git unzip software-properties-common wget ripgrep pkg-config autoconf libtool curl
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
+RUN echo 'source "/root/.cargo/env"' >> $HOME/.bashrc
+
+RUN add-apt-repository -y ppa:deadsnakes/ppa && apt update && apt -y install python3.13 python3.13-venv
+RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.13 99
 
 RUN python -m venv venv
-RUN /venv/bin/python -m pip install pip-tools pyodide-build
 RUN echo 'source "/venv/bin/activate"' >> $HOME/.bashrc
 
-RUN git clone https://github.com/emscripten-core/emsdk.git
-# EMSCRIPTEN_VERSION=$(pyodide config get emscripten_version)
-RUN cd emsdk && \
-    ./emsdk install 3.1.58 ccache-git-emscripten-64bit && \
-    ./emsdk activate 3.1.58 ccache-git-emscripten-64bit && \
+RUN git clone --recursive https://github.com/pyodide/pyodide && \
+    cd pyodide && \
+    git checkout 0.29.0 && \
+    git submodule update --init && \
+    pip install -r requirements.txt && \
+    touch src/core/jsproxy.h && \
+    EXTRA_CFLAGS="-D DEBUG_F" EXTRA_LDFLAGS="-s ASSERTIONS=2" PYODIDE_SOURCEMAPS=1 make && \
+    EXTRA_CFLAGS="-D DEBUG_F" EXTRA_LDFLAGS="-s ASSERTIONS=2" PYODIDE_SOURCEMAPS=1 make -C cpython rebuild-all && \
+    EXTRA_CFLAGS="-D DEBUG_F" EXTRA_LDFLAGS="-s ASSERTIONS=2" PYODIDE_SOURCEMAPS=1 make all-but-packages && \
     cd ..
-RUN echo 'source "/emsdk/emsdk_env.sh"' >> $HOME/.bashrc
+
+RUN . "/venv/bin/activate" && pip install /pyodide/pyodide-build
+
+RUN /pyodide/emsdk/emsdk/emsdk install ccache-git-emscripten-64bit && \
+    /pyodide/emsdk/emsdk/emsdk activate ccache-git-emscripten-64bit
+RUN echo 'source "/pyodide/emsdk/emsdk/emsdk_env.sh"' >> $HOME/.bashrc
+
+# has to be after emscripten ccache because that build for some reason fails with 4.2.0
+RUN wget https://github.com/Kitware/CMake/releases/download/v4.2.0/cmake-4.2.0-linux-aarch64.sh && bash cmake-4.2.0-linux-aarch64.sh --skip-license --prefix=/usr
diff --git a/scripts/llvm_wasm/build_llvm_wasm.sh b/scripts/llvm_wasm/build_llvm_wasm.sh
index 3ec8a9fd..d85f1849 100755
--- a/scripts/llvm_wasm/build_llvm_wasm.sh
+++ b/scripts/llvm_wasm/build_llvm_wasm.sh
@@ -40,4 +40,4 @@ if [ -x "$(command -v $EMSDK/ccache/git-emscripten_64bit/bin/ccache)" ]; then
   export CMAKE_CXX_COMPILER_LAUNCHER="$CCACHE"
 fi
 
-pyodide build $TD -o wheelhouse --compression-level 10
+PYODIDE_SOURCEMAPS=1 PYODIDE_BUILD_EXPORTS=whole_archive pyodide build $TD -o wheelhouse --compression-level 10
diff --git a/scripts/llvm_wasm/llvm_wasm_cache.cmake b/scripts/llvm_wasm/llvm_wasm_cache.cmake
index c689989c..0a8639bb 100644
--- a/scripts/llvm_wasm/llvm_wasm_cache.cmake
+++ b/scripts/llvm_wasm/llvm_wasm_cache.cmake
@@ -7,17 +7,15 @@
 
 set(LLVM_ENABLE_PROJECTS "mlir;llvm;lld" CACHE STRING "")
 
-set(LLVM_TARGETS_TO_BUILD "WebAssembly" CACHE STRING "")
-set(LLVM_TARGET_ARCH "wasm32" CACHE STRING "")
 set(LLVM_DEFAULT_TARGET_TRIPLE "wasm32-unknown-emscripten" CACHE STRING "")
-set(LLVM_HOST_TRIPLE "wasm32-unknown-emscripten" CACHE STRING "")
-set(LLVM_BUILD_STATIC ON CACHE BOOL "")
 set(LLVM_ENABLE_RTTI ON CACHE BOOL "")
-set(LLVM_ENABLE_EH ON CACHE BOOL "")
+set(LLVM_HOST_TRIPLE "wasm32-unknown-emscripten" CACHE STRING "")
+set(LLVM_TARGETS_TO_BUILD "WebAssembly" CACHE STRING "")
+set(LLVM_TARGET_ARCH "wasm32" CACHE STRING "")
+# for ExecutionEngine
+set(LLVM_ENABLE_PIC ON CACHE BOOL "")
 
 set(MLIR_ENABLE_BINDINGS_PYTHON ON CACHE BOOL "")
-set(MLIR_ENABLE_EXECUTION_ENGINE ON CACHE BOOL "")
-set(MLIR_ENABLE_SPIRV_CPU_RUNNER ON CACHE BOOL "")
 
 set(LLVM_BUILD_DOCS OFF CACHE BOOL "")
 set(LLVM_ENABLE_BACKTRACES OFF CACHE BOOL "")
@@ -27,7 +25,6 @@ set(LLVM_ENABLE_LIBEDIT OFF CACHE BOOL "")
 set(LLVM_ENABLE_LIBPFM OFF CACHE BOOL "")
 set(LLVM_ENABLE_LIBXML2 OFF CACHE BOOL "")
 set(LLVM_ENABLE_OCAMLDOC OFF CACHE BOOL "")
-set(LLVM_ENABLE_PIC OFF CACHE BOOL "")
 set(LLVM_ENABLE_THREADS OFF CACHE BOOL "")
 set(LLVM_ENABLE_UNWIND_TABLES OFF CACHE BOOL "")
 set(LLVM_ENABLE_ZLIB OFF CACHE BOOL "")
@@ -59,7 +56,6 @@ set(LLVM_MlirDevelopment_DISTRIBUTION_COMPONENTS
     lld-cmake-exports
     lld-mlirdevelopment-cmake-exports
 
-    llvm-config
     llvm-headers
     llvm-libraries
 
diff --git a/scripts/llvm_wasm/pyproject.toml b/scripts/llvm_wasm/pyproject.toml
index 7cc4ae58..cc62ce8f 100644
--- a/scripts/llvm_wasm/pyproject.toml
+++ b/scripts/llvm_wasm/pyproject.toml
@@ -23,7 +23,9 @@ Discussions = "https://discourse.llvm.org/"
 requires = [
     "scikit-build-core==0.10.7",
     "typing_extensions==4.12.2",
-    "nanobind>=2.4, <3.0",
+    # https://github.com/wjakob/nanobind/commit/dd350fe81931a1b362196cb415d188c36422766e#diff-8599263e788c107944d356ce118965942735cfbe16289ccf98ee5f8a33f0e808
+    # error: static assertion failed due to requirement 'pyobj_name::total_count * sizeof(_object *) == 96'
+    "nanobind>=2.4, <=2.9.2",
     "pybind11>=2.10.0, <=2.13.6",
 ]
 build-backend = "scikit_build_core.build"
@@ -45,34 +47,18 @@ cmake.args = ["-C", "llvm_wasm_cache.cmake"]
 CMAKE_BUILD_TYPE = { env = "CMAKE_BUILD_TYPE", default = "Release" }
 CMAKE_C_COMPILER_LAUNCHER = { env = "CMAKE_C_COMPILER_LAUNCHER", default = "" }
 CMAKE_CXX_COMPILER_LAUNCHER = { env = "CMAKE_CXX_COMPILER_LAUNCHER", default = "" }
-CMAKE_CXX_FLAGS = "-sNO_DISABLE_EXCEPTION_CATCHING"
-CMAKE_EXE_LINKER_FLAGS = "-sALLOW_TABLE_GROWTH -sASSERTIONS -sNO_DISABLE_EXCEPTION_CATCHING -sWASM_BIGINT"
-CMAKE_SHARED_LINKER_FLAGS = "-sALLOW_TABLE_GROWTH -sASSERTIONS -sNO_DISABLE_EXCEPTION_CATCHING -sWASM_BIGINT"
-CMAKE_MODULE_LINKER_FLAGS = "-sALLOW_TABLE_GROWTH -sASSERTIONS -sNO_DISABLE_EXCEPTION_CATCHING -sWASM_BIGINT"
+#CMAKE_C_FLAGS = "-sLINKABLE"
+#CMAKE_CXX_FLAGS = "-sLINKABLE"
+CMAKE_EXE_LINKER_FLAGS = "-sALLOW_TABLE_GROWTH -sASSERTIONS -sWASM_BIGINT "
+CMAKE_SHARED_LINKER_FLAGS = "-sALLOW_TABLE_GROWTH -sASSERTIONS -sWASM_BIGINT"
+CMAKE_MODULE_LINKER_FLAGS = "-sALLOW_TABLE_GROWTH -sASSERTIONS -sWASM_BIGINT"
 CMAKE_VERBOSE_MAKEFILE = "ON"
-
-LLVM_ENABLE_EH = "ON"
-LLVM_ENABLE_PROJECTS = "mlir;lld"
-LLVM_ENABLE_RTTI = "ON"
-LLVM_ENABLE_THREADS = "OFF"
-# requires threads
-LLVM_INCLUDE_BENCHMARKS = "OFF"
-LLVM_INCLUDE_TESTS = "OFF"
-
-# for ExecutionEngine
-LLVM_ENABLE_PIC = "ON"
-LLVM_TARGET_ARCH = "wasm32"
-LLVM_HOST_TRIPLE = "wasm32-unknown-emscripten"
-LLVM_TARGETS_TO_BUILD = "WebAssembly"
-LLVM_DEFAULT_TARGET_TRIPLE = "wasm32-unknown-emscripten"
+# De-duplicate libraries on link lines based on linker capabilities.
+# minimum cmake version is 3.29
+CMAKE_POLICY_DEFAULT_CMP0156 = "NEW"
 
 # so that NATIVE doesn't try to get built
 LLVM_NATIVE_TOOL_DIR = { env = "LLVM_NATIVE_TOOL_DIR", default = "" }
 LLVM_TABLEGEN = { env = "LLVM_TABLEGEN", default = "" }
 MLIR_LINALG_ODS_YAML_GEN = { env = "MLIR_LINALG_ODS_YAML_GEN", default = "" }
 MLIR_TABLEGEN = { env = "MLIR_TABLEGEN", default = "" }
-
-MLIR_BINDINGS_PYTHON_INSTALL_PREFIX = "mlir"
-MLIR_ENABLE_BINDINGS_PYTHON = "ON"
-MLIR_ENABLE_EXECUTION_ENGINE = "ON"
-MLIR_ENABLE_SPIRV_CPU_RUNNER = "ON"
diff --git a/third_party/SPIRV-Headers b/third_party/SPIRV-Headers
new file mode 160000
index 00000000..6146b3d9
--- /dev/null
+++ b/third_party/SPIRV-Headers
@@ -0,0 +1 @@
+Subproject commit 6146b3d9ad4fcc5fb512209d348e97ce03749169
diff --git a/third_party/SPIRV-Tools b/third_party/SPIRV-Tools
new file mode 160000
index 00000000..8c1e6ca9
--- /dev/null
+++ b/third_party/SPIRV-Tools
@@ -0,0 +1 @@
+Subproject commit 8c1e6ca9b896a5a82ea39973a2f677f515f1f45d
diff --git a/third_party/dawn b/third_party/dawn
new file mode 160000
index 00000000..5ae3d897
--- /dev/null
+++ b/third_party/dawn
@@ -0,0 +1 @@
+Subproject commit 5ae3d897f6125617f752334aa7d9830774c8a1c0