diff --git a/MODULE.bazel b/MODULE.bazel
index 1144af2..ace6fd6 100644
--- a/MODULE.bazel
+++ b/MODULE.bazel
@@ -44,6 +44,7 @@ use_repo(
     "cuda_nvtx",
     "cuda_nvvm",
     "cuda_profiler_api",
+    "cuda_nvptxcompiler",
 )
 
 ##############################################################
diff --git a/common/BUILD b/common/BUILD
index 62393ea..217cb57 100644
--- a/common/BUILD
+++ b/common/BUILD
@@ -96,6 +96,46 @@ config_setting(
     flag_values = {":enable_cuda": "False"},
 )
 
+# Flag for linking static CUDA libs
+bool_flag(
+    name = "link_cuda_static_libs",
+    build_setting_default = False,
+)
+
+config_setting(
+    name = "is_cuda_static_linking_enabled",
+    flag_values = {
+        ":link_cuda_static_libs": "True",
+    },
+)
+
+
+# Flag for linking static CUDA NVRTC libs
+bool_flag(
+    name = "link_nvrtc_static_libs",
+    build_setting_default = False,
+)
+
+config_setting(
+    name = "is_nvrtc_static_linking_enabled",
+    flag_values = {
+        ":link_nvrtc_static_libs": "True",
+    },
+)
+
+# Flag for linking static CUDA CUDNN libs
+bool_flag(
+    name = "link_cudnn_static_libs",
+    build_setting_default = False,
+)
+
+config_setting(
+    name = "is_cudnn_static_linking_enabled",
+    flag_values = {
+        ":link_cudnn_static_libs": "True",
+    },
+)
+
 #######################################################
 # Enable SYCL support flags
 
diff --git a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl
index 2cfa7cd..0f225e1 100755
--- a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl
+++ b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl
@@ -224,17 +224,17 @@ def InvokeNvcc(argv, log=False):
   # Unfortunately, there are other options that have -c prefix too.
   # So allowing only those look like C/C++ files.
   src_files = [f for f in src_files if
-               re.search(r'\.cpp$|\.cc$|\.c$|\.cxx$|\.C$|\.cu$', f)]
+	       re.search(r'\.cpp$|\.cc$|\.c$|\.cxx$|\.C$|\.cu$', f)]
   srcs = ' '.join(src_files)
   out = ' -o ' + out_file[0]
 
   nvccopts = '-D_FORCE_INLINES '
-  capabilities_sm = set(get_option_value(argv, '--cuda-gpu-arch')) - set(
-      get_option_value(argv, '--no-cuda-gpu-arch')
+  capabilities_sm = set(GetOptionValue(argv, '--cuda-gpu-arch')) - set(
+      GetOptionValue(argv, '--no-cuda-gpu-arch')
   )
   capabilities_compute = set(
-      get_option_value(argv, '--cuda-include-ptx')
-  ) - set(get_option_value(argv, '--no-cuda-include-ptx'))
+      GetOptionValue(argv, '--cuda-include-ptx')
+  ) - set(GetOptionValue(argv, '--no-cuda-include-ptx'))
   # When both "code=sm_xy" and "code=compute_xy" are requested for a single
   # arch, they can be combined using "code=xy,compute_xy" which avoids a
   # redundant PTX generation during compilation.
@@ -257,6 +257,7 @@ def InvokeNvcc(argv, log=False):
   nvccopts += std_options
   nvccopts += m_options
   nvccopts += warning_options
+  # nvccopts += ' -rdc=true '
   # Force C++17 dialect (note, everything in just one string!)
   nvccopts += ' --std c++17 '
   nvccopts += fatbin_options
diff --git a/third_party/gpus/cuda/build_defs.bzl.tpl b/third_party/gpus/cuda/build_defs.bzl.tpl
index 09c3c59..c2dc2a9 100644
--- a/third_party/gpus/cuda/build_defs.bzl.tpl
+++ b/third_party/gpus/cuda/build_defs.bzl.tpl
@@ -20,10 +20,47 @@ def if_cuda(if_true, if_false = []):
     with CUDA enabled.  Otherwise, the select statement evaluates to if_false.
     """
     return select({
-        "@local_config_cuda//:is_cuda_enabled": if_true,
+        "@rules_ml_toolchain//common:is_cuda_enabled": if_true,
         "//conditions:default": if_false,
     })
 
+# Macros for building CUDA static code.
+def if_static_cuda(if_true, if_false = []):
+    """Shorthand for select()'ing on whether we're building with static CUDA libs.
+
+    Returns a select statement which evaluates to if_true if we're building
+    with static CUDA enabled.  Otherwise, the select statement evaluates to if_false.
+    """
+    return select({
+        "@rules_ml_toolchain//common:is_cuda_static_linking_enabled": if_true,
+        "//conditions:default": if_false,
+    })
+
+# Macros for building NVRTC static code.
+def if_static_nvrtc(if_true, if_false = []):
+    """Shorthand for select()'ing on whether we're building with static NVRTC libs.
+
+    Returns a select statement which evaluates to if_true if we're building
+    with static NVRTC enabled.  Otherwise, the select statement evaluates to if_false.
+    """
+    return select({
+        "@rules_ml_toolchain//common:is_nvrtc_static_linking_enabled": if_true,
+        "//conditions:default": if_false,
+    })
+
+# Macros for building CUDNN static code.
+def if_static_cudnn(if_true, if_false = []):
+    """Shorthand for select()'ing on whether we're building with static CUDNN libs.
+
+    Returns a select statement which evaluates to if_true if we're building
+    with static CUDNN enabled.  Otherwise, the select statement evaluates to if_false.
+    """
+    return select({
+        "@rules_ml_toolchain//common:is_cudnn_static_linking_enabled": if_true,
+        "//conditions:default": if_false,
+    })
+
+
 def if_cuda_clang(if_true, if_false = []):
    """Shorthand for select()'ing on wheteher we're building with cuda-clang.
 
diff --git a/third_party/gpus/cuda/hermetic/BUILD.tpl b/third_party/gpus/cuda/hermetic/BUILD.tpl
index 103465b..8ff4d66 100644
--- a/third_party/gpus/cuda/hermetic/BUILD.tpl
+++ b/third_party/gpus/cuda/hermetic/BUILD.tpl
@@ -326,10 +326,9 @@ selects.config_setting_group(
     ],
 )
 
-cc_library(
-    # This is not yet fully supported, but we need the rule
-    # to make bazel query happy.
+alias(
     name = "nvptxcompiler",
+    actual = "@cuda_nvcc//:nvptxcompiler",
 )
 
 alias(
diff --git a/third_party/gpus/cuda/hermetic/cuda_configure.bzl b/third_party/gpus/cuda/hermetic/cuda_configure.bzl
index 4bd735c..f91c263 100644
--- a/third_party/gpus/cuda/hermetic/cuda_configure.bzl
+++ b/third_party/gpus/cuda/hermetic/cuda_configure.bzl
@@ -54,6 +54,7 @@ load("@cuda_nvml//:version.bzl", _nvml_version = "VERSION")
 load("@cuda_nvtx//:version.bzl", _nvtx_version = "VERSION")
 load("@cuda_nvvm//:version.bzl", _nvvm_version = "VERSION")
 load("@cuda_profiler_api//:version.bzl", _cuda_profiler_api_version = "VERSION")
+load("@cuda_nvptxcompiler//:version.bzl", _cuda_nvptxcompiler_version = "VERSION")
 load("@llvm_linux_aarch64//:version.bzl", _llvm_aarch64_hermetic_version = "VERSION")
 load("@llvm_linux_x86_64//:version.bzl", _llvm_x86_64_hermetic_version = "VERSION")
 load(
@@ -366,6 +367,7 @@ def _get_cuda_config(repository_ctx):
         cupti_version = _cupti_version,
         cudart_version = _cudart_version,
         cuda_profiler_api_version = _cuda_profiler_api_version,
+        cuda_nvptxcompiler_version = _cuda_nvptxcompiler_version,
         cublas_version = _cublas_version,
         cusolver_version = _cusolver_version,
         curand_version = _curand_version,
diff --git a/third_party/gpus/cuda/hermetic/cuda_cublas.BUILD.tpl b/third_party/gpus/cuda/hermetic/cuda_cublas.BUILD.tpl
index 7f2c111..45fafff 100644
--- a/third_party/gpus/cuda/hermetic/cuda_cublas.BUILD.tpl
+++ b/third_party/gpus/cuda/hermetic/cuda_cublas.BUILD.tpl
@@ -2,6 +2,7 @@ licenses(["restricted"])  # NVIDIA proprietary license
 load(
      "@local_config_cuda//cuda:build_defs.bzl",
      "if_cuda_newer_than",
+     "if_static_cuda",
 )
 load(
     "@rules_ml_toolchain//third_party/gpus:nvidia_common_rules.bzl",
@@ -13,7 +14,6 @@ cc_import(
     name = "cublas_shared_library",
     hdrs = [":headers"],
     shared_library = "lib/libcublas.so.%{libcublas_version}",
-    deps = [":cublasLt"],
 )
 
 cc_import(
@@ -21,11 +21,26 @@ cc_import(
     hdrs = [":headers"],
     shared_library = "lib/libcublasLt.so.%{libcublaslt_version}",
 )
+
+cc_import(
+    name = "cublasLt_static_library",
+    hdrs = [":headers"],
+    static_library = "lib/libcublasLt_static.a",
+)
+
+cc_import(
+    name = "cublas_static_library",
+    hdrs = [":headers"],
+    static_library = "lib/libcublas_static.a",
+)
 %{multiline_comment}
 cc_library(
     name = "cublas",
     visibility = ["//visibility:public"],
-    %{comment}deps = [":cublas_shared_library"],
+    %{comment}deps = if_static_cuda(
+        %{comment}[":cublas_static_library"],
+        %{comment}[":cublas_shared_library"],
+    %{comment}) + [":cublasLt"],
     %{comment}linkopts = if_cuda_newer_than(
         %{comment}"13_0",
         %{comment}if_true = cuda_rpath_flags("nvidia/cu13/lib"),
@@ -36,7 +51,10 @@ cc_library(
 cc_library(
     name = "cublasLt",
     visibility = ["//visibility:public"],
-    %{comment}deps = [":cublasLt_shared_library"],
+    %{comment}deps = if_static_cuda(
+        %{comment}[":cublasLt_static_library"],
+        %{comment}[":cublasLt_shared_library"],
+    %{comment}),
     %{comment}linkopts = if_cuda_newer_than(
         %{comment}"13_0",
         %{comment}if_true = cuda_rpath_flags("nvidia/cu13/lib"),
diff --git a/third_party/gpus/cuda/hermetic/cuda_cudart.BUILD.tpl b/third_party/gpus/cuda/hermetic/cuda_cudart.BUILD.tpl
index 2668c3b..f09d1d3 100644
--- a/third_party/gpus/cuda/hermetic/cuda_cudart.BUILD.tpl
+++ b/third_party/gpus/cuda/hermetic/cuda_cudart.BUILD.tpl
@@ -2,6 +2,7 @@ licenses(["restricted"])  # NVIDIA proprietary license
 load(
      "@local_config_cuda//cuda:build_defs.bzl",
      "if_cuda_newer_than",
+     "if_static_cuda",
 )
 load(
     "@rules_ml_toolchain//third_party/gpus:nvidia_common_rules.bzl",
@@ -29,6 +30,24 @@ cc_import(
     hdrs = [":headers"],
     shared_library = "lib/libcudart.so.%{libcudart_version}",
 )
+
+cc_import(
+    name = "cudart_static_library",
+    hdrs = [":headers"],
+    static_library = "lib/libcudart_static.a",
+)
+
+cc_import(
+    name = "culibos_static_library",
+    hdrs = [":headers"],
+    static_library = if_cuda_newer_than("13_0", None, "lib/libculibos.a"),
+)
+
+cc_import(
+    name = "cudadevrt_static_library",
+    hdrs = [":headers"],
+    static_library = "lib/libcudadevrt.a",
+)
 %{multiline_comment}
 cc_library(
     name = "cuda_driver",
@@ -44,9 +63,10 @@ cc_library(
             %{comment}"@cuda_driver//:nvidia_ptxjitcompiler",
         %{comment}],
         %{comment}"//conditions:default": [":cuda_driver"],
-    %{comment}}) + [
-        %{comment}":cudart_shared_library",
-    %{comment}],
+    %{comment}}) + if_static_cuda(
+        %{comment}[":cudart_static_library", ":cudadevrt_static_library"] + if_cuda_newer_than("13_0", ["@cuda_culibos//:culibos_static_library"], [":culibos_static_library"]),
+        %{comment}[":cudart_shared_library"],
+    %{comment}),
     %{comment}linkopts = if_cuda_newer_than(
         %{comment}"13_0",
         %{comment}if_true = cuda_rpath_flags("nvidia/cu13/lib"),
diff --git a/third_party/gpus/cuda/hermetic/cuda_cudnn9.BUILD.tpl b/third_party/gpus/cuda/hermetic/cuda_cudnn9.BUILD.tpl
index e8f571f..ebcaf3b 100644
--- a/third_party/gpus/cuda/hermetic/cuda_cudnn9.BUILD.tpl
+++ b/third_party/gpus/cuda/hermetic/cuda_cudnn9.BUILD.tpl
@@ -1,4 +1,8 @@
 licenses(["restricted"])  # NVIDIA proprietary license
+load(
+     "@local_config_cuda//cuda:build_defs.bzl",
+     "if_static_cudnn",
+)
 load(
     "@rules_ml_toolchain//third_party/gpus:nvidia_common_rules.bzl",
     "cuda_rpath_flags",
@@ -52,20 +56,72 @@ cc_import(
     hdrs = [":headers"],
     shared_library = "lib/libcudnn.so.%{libcudnn_version}",
 )
+
+cc_import(
+    name = "cudnn_graph_static",
+    hdrs = [":headers"],
+    static_library = "lib/libcudnn_graph_static_v9.a",
+)
+
+cc_import(
+    name = "cudnn_adv_static",
+    hdrs = [":headers"],
+    static_library = "lib/libcudnn_adv_static_v9.a",
+)
+
+cc_import(
+    name = "cudnn_engines_runtime_compiled_static",
+    hdrs = [":headers"],
+    static_library = "lib/libcudnn_engines_runtime_compiled_static_v9.a",
+)
+
+cc_import(
+    name = "cudnn_engines_precompiled_static",
+    hdrs = [":headers"],
+    static_library = "lib/libcudnn_engines_precompiled_static_v9.a",
+)
+
+cc_import(
+    name = "cudnn_ops_static",
+    hdrs = [":headers"],
+    static_library = "lib/libcudnn_ops_static_v9.a",
+)
+
+cc_import(
+    name = "cudnn_heuristic_static",
+    hdrs = [":headers"],
+    static_library = "lib/libcudnn_heuristic_static_v9.a",
+)
+
+cc_import(
+    name = "cudnn_cnn_static",
+    hdrs = [":headers"],
+    static_library = "lib/libcudnn_cnn_static_v9.a",
+)
 %{multiline_comment}
 cc_library(
     name = "cudnn",
-    %{comment}deps = [
-      %{comment}":cudnn_engines_precompiled",
+    %{comment}alwayslink = if_static_cudnn(True, False),
+    %{comment}srcs = if_static_cudnn(
+      %{comment}[":lib/libcudnn_engines_precompiled_static_v9.a",
+      %{comment} ":lib/libcudnn_ops_static_v9.a",
+      %{comment} ":lib/libcudnn_cnn_static_v9.a",
+      %{comment} ":lib/libcudnn_adv_static_v9.a",
+      %{comment} ":lib/libcudnn_heuristic_static_v9.a",
+      %{comment} ":lib/libcudnn_graph_static_v9.a",
+      %{comment} ":lib/libcudnn_engines_runtime_compiled_static_v9.a",
+      %{comment}], []),
+    %{comment}deps = if_static_cudnn(
+      %{comment}[],
+      %{comment}[":cudnn_engines_precompiled",
       %{comment}":cudnn_ops",
       %{comment}":cudnn_graph",
       %{comment}":cudnn_cnn",
       %{comment}":cudnn_adv",
       %{comment}":cudnn_engines_runtime_compiled",
       %{comment}":cudnn_heuristic",
-      %{comment}"@cuda_nvrtc//:nvrtc",
       %{comment}":cudnn_main",
-    %{comment}],
+    %{comment}]) + ["@cuda_nvrtc//:nvrtc"],
     %{comment}linkopts = cuda_rpath_flags("nvidia/cudnn/lib"),
     visibility = ["//visibility:public"],
 )
diff --git a/third_party/gpus/cuda/hermetic/cuda_cufft.BUILD.tpl b/third_party/gpus/cuda/hermetic/cuda_cufft.BUILD.tpl
index 3fdd402..305451b 100644
--- a/third_party/gpus/cuda/hermetic/cuda_cufft.BUILD.tpl
+++ b/third_party/gpus/cuda/hermetic/cuda_cufft.BUILD.tpl
@@ -2,6 +2,7 @@ licenses(["restricted"])  # NVIDIA proprietary license
 load(
      "@local_config_cuda//cuda:build_defs.bzl",
      "if_cuda_newer_than",
+     "if_static_cuda",
 )
 load(
     "@rules_ml_toolchain//third_party/gpus:nvidia_common_rules.bzl",
@@ -14,15 +15,33 @@ cc_import(
     hdrs = [":headers"],
     shared_library = "lib/libcufft.so.%{libcufft_version}",
 )
+
+cc_import(
+    name = "cufft_static_library",
+    hdrs = [":headers"],
+    static_library = "lib/libcufft_static.a",
+)
+
+cc_import(
+    name = "cufftw_static_library",
+    hdrs = [":headers"],
+    static_library = "lib/libcufftw_static.a",
+)
+
+cc_import(
+    name = "cufft_static_nocallback_library",
+    hdrs = [":headers"],
+    static_library = if_cuda_newer_than("13_0", None, "lib/libcufft_static_nocallback.a"),
+)
 %{multiline_comment}
 cc_library(
     name = "cufft",
-    %{comment}deps = [":cufft_shared_library"],
+    %{comment}deps = if_static_cuda(if_cuda_newer_than("13_0", [":cufft_static_library"], [":cufft_static_nocallback_library"]) + [":cufftw_static_library"], [":cufft_shared_library"]),
     %{comment}linkopts = if_cuda_newer_than(
         %{comment}"13_0",
         %{comment}if_true = cuda_rpath_flags("nvidia/cu13/lib"),
         %{comment}if_false = cuda_rpath_flags("nvidia/cufft/lib"),
-    %{comment}),
+    %{comment}) + if_static_cuda(["-Wl,--no-relax"]),
     visibility = ["//visibility:public"],
 )
 
diff --git a/third_party/gpus/cuda/hermetic/cuda_culibos.BUILD.tpl b/third_party/gpus/cuda/hermetic/cuda_culibos.BUILD.tpl
new file mode 100644
index 0000000..4957dee
--- /dev/null
+++ b/third_party/gpus/cuda/hermetic/cuda_culibos.BUILD.tpl
@@ -0,0 +1,11 @@
+licenses(["restricted"])  # NVIDIA proprietary license
+
+load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda_newer_than")
+
+%{multiline_comment}
+cc_import(
+    name = "culibos_static_library",
+    static_library = if_cuda_newer_than("13_0", "lib/libculibos.a", None),
+    visibility = ["//visibility:public"],
+)
+%{multiline_comment}
diff --git a/third_party/gpus/cuda/hermetic/cuda_cupti.BUILD.tpl b/third_party/gpus/cuda/hermetic/cuda_cupti.BUILD.tpl
index 6780ca3..c7bde53 100644
--- a/third_party/gpus/cuda/hermetic/cuda_cupti.BUILD.tpl
+++ b/third_party/gpus/cuda/hermetic/cuda_cupti.BUILD.tpl
@@ -2,6 +2,7 @@ licenses(["restricted"])  # NVIDIA proprietary license
 load(
     "@local_config_cuda//cuda:build_defs.bzl",
     "if_cuda_newer_than",
+    "if_static_cuda",
     "if_version_equal_or_greater_than",
 )
 load(
@@ -15,10 +16,22 @@ cc_import(
     hdrs = [":headers"],
     shared_library = "lib/libcupti.so.%{libcupti_version}",
 )
+
+cc_import(
+    name = "cupti_static_library",
+    hdrs = [":headers"],
+    static_library = "lib/libcupti_static.a",
+)
+
+cc_import(
+    name = "nvperf_host_static_library",
+    hdrs = [":headers"],
+    static_library = "lib/libnvperf_host_static.a",
+)
 %{multiline_comment}
 cc_library(
     name = "cupti",
-    %{comment}deps = [":cupti_shared_library"],
+    %{comment}deps = if_static_cuda([":cupti_static_library"], [":cupti_shared_library"]),
     %{comment}linkopts = if_cuda_newer_than(
         %{comment}"13_0",
         %{comment}if_true = cuda_rpath_flags("nvidia/cu13/lib"),
diff --git a/third_party/gpus/cuda/hermetic/cuda_curand.BUILD.tpl b/third_party/gpus/cuda/hermetic/cuda_curand.BUILD.tpl
index 093c7c0..686bafb 100644
--- a/third_party/gpus/cuda/hermetic/cuda_curand.BUILD.tpl
+++ b/third_party/gpus/cuda/hermetic/cuda_curand.BUILD.tpl
@@ -2,6 +2,7 @@ licenses(["restricted"])  # NVIDIA proprietary license
 load(
     "@local_config_cuda//cuda:build_defs.bzl",
     "if_cuda_newer_than",
+    "if_static_cuda",
 )
 load(
     "@rules_ml_toolchain//third_party/gpus:nvidia_common_rules.bzl",
@@ -14,10 +15,16 @@ cc_import(
     hdrs = [":headers"],
     shared_library = "lib/libcurand.so.%{libcurand_version}",
 )
+
+cc_import(
+    name = "curand_static_library",
+    hdrs = [":headers"],
+    static_library = "lib/libcurand_static.a",
+)
 %{multiline_comment}
 cc_library(
     name = "curand",
-    %{comment}deps = [":curand_shared_library"],
+    %{comment}deps = if_static_cuda([":curand_static_library"], [":curand_shared_library"]),
     %{comment}linkopts = if_cuda_newer_than(
         %{comment}"13_0",
         %{comment}if_true = cuda_rpath_flags("nvidia/cu13/lib"),
diff --git a/third_party/gpus/cuda/hermetic/cuda_cusolver.BUILD.tpl b/third_party/gpus/cuda/hermetic/cuda_cusolver.BUILD.tpl
index 23be787..69552a7 100644
--- a/third_party/gpus/cuda/hermetic/cuda_cusolver.BUILD.tpl
+++ b/third_party/gpus/cuda/hermetic/cuda_cusolver.BUILD.tpl
@@ -2,6 +2,7 @@ licenses(["restricted"])  # NVIDIA proprietary license
 load(
     "@local_config_cuda//cuda:build_defs.bzl",
     "if_cuda_newer_than",
+    "if_static_cuda",
 )
 load(
     "@rules_ml_toolchain//third_party/gpus:nvidia_common_rules.bzl",
@@ -13,17 +14,41 @@ cc_import(
     name = "cusolver_shared_library",
     hdrs = [":headers"],
     shared_library = "lib/libcusolver.so.%{libcusolver_version}",
-    deps = [
-        "@cuda_nvjitlink//:nvjitlink",
-        "@cuda_cusparse//:cusparse",
-        "@cuda_cublas//:cublas",
-        "@cuda_cublas//:cublasLt",
-    ],
+)
+
+cc_import(
+    name = "cusolver_lapack_static_library",
+    hdrs = [":headers"],
+    static_library = "lib/libcusolver_lapack_static.a",
+)
+
+cc_import(
+    name = "cusolver_metis_static_library",
+    hdrs = [":headers"],
+    static_library = "lib/libcusolver_metis_static.a",
+)
+
+cc_import(
+    name = "cusolver_static_library",
+    hdrs = [":headers"],
+    static_library = "lib/libcusolver_static.a",
+)
+
+cc_import(
+    name = "metis_static_library",
+    hdrs = [":headers"],
+    static_library = "lib/libmetis_static.a",
 )
 %{multiline_comment}
 cc_library(
     name = "cusolver",
-    %{comment}deps = [":cusolver_shared_library"],
+    %{comment}deps = if_static_cuda([":cusolver_static_library", ":cusolver_lapack_static_library", ":cusolver_metis_static_library", ":metis_static_library"], [":cusolver_shared_library"])
+    %{comment}+ [
+        %{comment}"@cuda_nvjitlink//:nvjitlink",
+        %{comment}"@cuda_cusparse//:cusparse",
+        %{comment}"@cuda_cublas//:cublas",
+        %{comment}"@cuda_cublas//:cublasLt",
+    %{comment}],
     %{comment}linkopts = if_cuda_newer_than(
         %{comment}"13_0",
         %{comment}if_true = cuda_rpath_flags("nvidia/cu13/lib"),
diff --git a/third_party/gpus/cuda/hermetic/cuda_cusparse.BUILD.tpl b/third_party/gpus/cuda/hermetic/cuda_cusparse.BUILD.tpl
index 5c67c11..65f37f3 100644
--- a/third_party/gpus/cuda/hermetic/cuda_cusparse.BUILD.tpl
+++ b/third_party/gpus/cuda/hermetic/cuda_cusparse.BUILD.tpl
@@ -2,6 +2,7 @@ licenses(["restricted"])  # NVIDIA proprietary license
 load(
     "@local_config_cuda//cuda:build_defs.bzl",
     "if_cuda_newer_than",
+    "if_static_cuda",
 )
 load(
     "@rules_ml_toolchain//third_party/gpus:nvidia_common_rules.bzl",
@@ -13,12 +14,17 @@ cc_import(
     name = "cusparse_shared_library",
     hdrs = [":headers"],
     shared_library = "lib/libcusparse.so.%{libcusparse_version}",
-    deps = ["@cuda_nvjitlink//:nvjitlink"],
+)
+
+cc_import(
+    name = "cusparse_static_library",
+    hdrs = [":headers"],
+    static_library = "lib/libcusparse_static.a",
 )
 %{multiline_comment}
 cc_library(
     name = "cusparse",
-    %{comment}deps = [":cusparse_shared_library"],
+    %{comment}deps = if_static_cuda([":cusparse_static_library"], [":cusparse_shared_library"]) + ["@cuda_nvjitlink//:nvjitlink"],
     %{comment}linkopts = if_cuda_newer_than(
         %{comment}"13_0",
         %{comment}if_true = cuda_rpath_flags("nvidia/cu13/lib"),
diff --git a/third_party/gpus/cuda/hermetic/cuda_nvcc.BUILD.tpl b/third_party/gpus/cuda/hermetic/cuda_nvcc.BUILD.tpl
index e311c43..f0f42bf 100644
--- a/third_party/gpus/cuda/hermetic/cuda_nvcc.BUILD.tpl
+++ b/third_party/gpus/cuda/hermetic/cuda_nvcc.BUILD.tpl
@@ -1,5 +1,9 @@
 licenses(["restricted"])  # NVIDIA proprietary license
 
+load(
+    "@local_config_cuda//cuda:build_defs.bzl",
+    "if_static_cuda",
+)
 load(
     "@rules_ml_toolchain//cc/cuda/features:cuda_nvcc_feature.bzl",
     "cuda_nvcc_feature",
@@ -84,6 +88,24 @@ cuda_nvcc_feature(
     ],
 )
 
+%{multiline_comment}
+cc_import(
+    name = "nvptxcompiler_static_library",
+    hdrs = [":headers"],
+    static_library = if_cuda_newer_than("13_0", None, "lib/libnvptxcompiler_static.a"),
+)
+%{multiline_comment}
+
+cc_library(
+    name = "nvptxcompiler",
+    %{comment}deps = if_static_cuda(if_cuda_newer_than(
+        %{comment}"13_0",
+        %{comment}["@cuda_nvptxcompiler//:nvptxcompiler"],
+        %{comment}[":nvptxcompiler_static_library"],
+    %{comment})),
+    visibility = ["//visibility:public"],
+)
+
 cc_library(
     name = "headers",
     %{comment}hdrs = glob([
diff --git a/third_party/gpus/cuda/hermetic/cuda_nvjitlink.BUILD.tpl b/third_party/gpus/cuda/hermetic/cuda_nvjitlink.BUILD.tpl
index 5ca20a2..c4b3ab0 100644
--- a/third_party/gpus/cuda/hermetic/cuda_nvjitlink.BUILD.tpl
+++ b/third_party/gpus/cuda/hermetic/cuda_nvjitlink.BUILD.tpl
@@ -2,6 +2,7 @@ licenses(["restricted"])  # NVIDIA proprietary license
 load(
     "@local_config_cuda//cuda:build_defs.bzl",
     "if_cuda_newer_than",
+    "if_static_cuda",
 )
 load(
     "@rules_ml_toolchain//third_party/gpus:nvidia_common_rules.bzl",
@@ -14,10 +15,16 @@ cc_import(
     hdrs = [":headers"],
     shared_library = "lib/libnvJitLink.so.%{libnvjitlink_version}",
 )
+
+cc_import(
+    name = "nvjitlink_static_library",
+    hdrs = [":headers"],
+    static_library = "lib/libnvJitLink_static.a",
+)
 %{multiline_comment}
 cc_library(
     name = "nvjitlink",
-    %{comment}deps = [":nvjitlink_shared_library"],
+    %{comment}deps = if_static_cuda([":nvjitlink_static_library"], [":nvjitlink_shared_library"]),
     %{comment}linkopts = if_cuda_newer_than(
         %{comment}"13_0",
         %{comment}if_true = cuda_rpath_flags("nvidia/cu13/lib"),
diff --git a/third_party/gpus/cuda/hermetic/cuda_nvptxcompiler.BUILD.tpl b/third_party/gpus/cuda/hermetic/cuda_nvptxcompiler.BUILD.tpl
new file mode 100644
index 0000000..7a0fd3e
--- /dev/null
+++ b/third_party/gpus/cuda/hermetic/cuda_nvptxcompiler.BUILD.tpl
@@ -0,0 +1,18 @@
+licenses(["restricted"])  # NVIDIA proprietary license
+
+load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda_newer_than")
+
+%{multiline_comment}
+cc_import(
+    name = "nvptxcompiler_static_library",
+    hdrs = ["include/nvPTXCompiler.h"],
+    static_library = if_cuda_newer_than("13_0", "lib/libnvptxcompiler_static.a", None),
+)
+%{multiline_comment}
+
+cc_library(
+    name = "nvptxcompiler",
+    %{comment}deps = [":nvptxcompiler_static_library"],
+    visibility = ["//visibility:public"],
+)
+
diff --git a/third_party/gpus/cuda/hermetic/cuda_nvrtc.BUILD.tpl b/third_party/gpus/cuda/hermetic/cuda_nvrtc.BUILD.tpl
index 883966b..449f7e8 100644
--- a/third_party/gpus/cuda/hermetic/cuda_nvrtc.BUILD.tpl
+++ b/third_party/gpus/cuda/hermetic/cuda_nvrtc.BUILD.tpl
@@ -2,6 +2,7 @@ licenses(["restricted"])  # NVIDIA proprietary license
 load(
     "@local_config_cuda//cuda:build_defs.bzl",
     "if_cuda_newer_than",
+    "if_static_nvrtc",
 )
 load(
     "@rules_ml_toolchain//third_party/gpus:nvidia_common_rules.bzl",
@@ -20,13 +21,38 @@ cc_import(
     hdrs = [":headers"],
     shared_library = "lib/libnvrtc-builtins.so.%{libnvrtc-builtins_version}",
 )
+
+cc_import(
+    name = "nvrtc_builtins_static_alt",
+    static_library = "lib/libnvrtc-builtins_static.alt.a",
+)
+
+cc_import(
+    name = "nvrtc_static",
+    static_library = "lib/libnvrtc_static.a",
+)
+
+cc_import(
+    name = "nvrtc_builtins_static",
+    static_library = "lib/libnvrtc-builtins_static.a",
+)
+
+cc_import(
+    name = "nvrtc_static_alt",
+    static_library = "lib/libnvrtc_static.alt.a",
+)
 %{multiline_comment}
 cc_library(
     name = "nvrtc",
-    %{comment}deps = [
+    %{comment}deps = if_static_nvrtc([
+        %{comment}":nvrtc_static",
+        %{comment}":nvrtc_builtins_static",
+        %{comment}"@cuda_nvcc//:nvptxcompiler",
+    %{comment}],
+    %{comment}[
         %{comment}":nvrtc_main",
         %{comment}":nvrtc_builtins",
-    %{comment}],
+    %{comment}]),
     %{comment}linkopts = if_cuda_newer_than(
         %{comment}"13_0",
         %{comment}if_true = cuda_rpath_flags("nvidia/cu13/lib"),
diff --git a/third_party/gpus/cuda/hermetic/cuda_redist_versions.bzl b/third_party/gpus/cuda/hermetic/cuda_redist_versions.bzl
index 5461d8e..b802370 100644
--- a/third_party/gpus/cuda/hermetic/cuda_redist_versions.bzl
+++ b/third_party/gpus/cuda/hermetic/cuda_redist_versions.bzl
@@ -517,6 +517,12 @@ REDIST_VERSIONS_TO_BUILD_TEMPLATES = {
             "11": "//third_party/gpus/cuda/hermetic:cuda_cudart.BUILD.tpl",
         },
     },
+    "cuda_culibos": {
+            "repo_name": "cuda_culibos",
+            "version_to_template": {
+                "13": "//third_party/gpus/cuda/hermetic:cuda_culibos.BUILD.tpl",
+            },
+        },
     "libcufft": {
         "repo_name": "cuda_cufft",
         "version_to_template": {
@@ -637,6 +643,12 @@ REDIST_VERSIONS_TO_BUILD_TEMPLATES = {
             "11": "//third_party/gpus/cuda/hermetic:cuda_nvtx.BUILD.tpl",
         },
     },
+    "libnvptxcompiler": {
+            "repo_name": "cuda_nvptxcompiler",
+            "version_to_template": {
+                "13": "//third_party/gpus/cuda/hermetic:cuda_nvptxcompiler.BUILD.tpl",
+            },
+        },
 }
 
 NVSHMEM_REDIST_VERSIONS_TO_BUILD_TEMPLATES = {