diff --git a/MODULE.bazel b/MODULE.bazel index 1144af2..ace6fd6 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -44,6 +44,7 @@ use_repo( "cuda_nvtx", "cuda_nvvm", "cuda_profiler_api", + "cuda_nvptxcompiler", ) ############################################################## diff --git a/common/BUILD b/common/BUILD index 62393ea..217cb57 100644 --- a/common/BUILD +++ b/common/BUILD @@ -96,6 +96,46 @@ config_setting( flag_values = {":enable_cuda": "False"}, ) +# Flag for linking static CUDA libs +bool_flag( + name = "link_cuda_static_libs", + build_setting_default = False, +) + +config_setting( + name = "is_cuda_static_linking_enabled", + flag_values = { + ":link_cuda_static_libs": "True", + }, +) + + +# Flag for linking static CUDA NVRTC libs +bool_flag( + name = "link_nvrtc_static_libs", + build_setting_default = False, +) + +config_setting( + name = "is_nvrtc_static_linking_enabled", + flag_values = { + ":link_nvrtc_static_libs": "True", + }, +) + +# Flag for linking static CUDA CUDNN libs +bool_flag( + name = "link_cudnn_static_libs", + build_setting_default = False, +) + +config_setting( + name = "is_cudnn_static_linking_enabled", + flag_values = { + ":link_cudnn_static_libs": "True", + }, +) + ####################################################### # Enable SYCL support flags diff --git a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl index 2cfa7cd..0f225e1 100755 --- a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl +++ b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl @@ -224,17 +224,17 @@ def InvokeNvcc(argv, log=False): # Unfortunately, there are other options that have -c prefix too. # So allowing only those look like C/C++ files. src_files = [f for f in src_files if - re.search(r'\.cpp$|\.cc$|\.c$|\.cxx$|\.C$|\.cu$', f)] + re.search(r'\.cpp$|\.cc$|\.c$|\.cxx$|\.C$|\.cu$', f)] srcs = ' '.join(src_files) out = ' -o ' + out_file[0] nvccopts = '-D_FORCE_INLINES ' - capabilities_sm = set(get_option_value(argv, '--cuda-gpu-arch')) - set( - get_option_value(argv, '--no-cuda-gpu-arch') + capabilities_sm = set(GetOptionValue(argv, '--cuda-gpu-arch')) - set( + GetOptionValue(argv, '--no-cuda-gpu-arch') ) capabilities_compute = set( - get_option_value(argv, '--cuda-include-ptx') - ) - set(get_option_value(argv, '--no-cuda-include-ptx')) + GetOptionValue(argv, '--cuda-include-ptx') + ) - set(GetOptionValue(argv, '--no-cuda-include-ptx')) # When both "code=sm_xy" and "code=compute_xy" are requested for a single # arch, they can be combined using "code=xy,compute_xy" which avoids a # redundant PTX generation during compilation. @@ -257,6 +257,7 @@ def InvokeNvcc(argv, log=False): nvccopts += std_options nvccopts += m_options nvccopts += warning_options + # nvccopts += ' -rdc=true ' # Force C++17 dialect (note, everything in just one string!) nvccopts += ' --std c++17 ' nvccopts += fatbin_options diff --git a/third_party/gpus/cuda/build_defs.bzl.tpl b/third_party/gpus/cuda/build_defs.bzl.tpl index 09c3c59..c2dc2a9 100644 --- a/third_party/gpus/cuda/build_defs.bzl.tpl +++ b/third_party/gpus/cuda/build_defs.bzl.tpl @@ -20,10 +20,47 @@ def if_cuda(if_true, if_false = []): with CUDA enabled. Otherwise, the select statement evaluates to if_false. """ return select({ - "@local_config_cuda//:is_cuda_enabled": if_true, + "@rules_ml_toolchain//common:is_cuda_enabled": if_true, "//conditions:default": if_false, }) +# Macros for building CUDA static code. +def if_static_cuda(if_true, if_false = []): + """Shorthand for select()'ing on whether we're building with static CUDA libs. + + Returns a select statement which evaluates to if_true if we're building + with static CUDA enabled. Otherwise, the select statement evaluates to if_false. + """ + return select({ + "@rules_ml_toolchain//common:is_cuda_static_linking_enabled": if_true, + "//conditions:default": if_false, + }) + +# Macros for building NVRTC static code. +def if_static_nvrtc(if_true, if_false = []): + """Shorthand for select()'ing on whether we're building with static NVRTC libs. + + Returns a select statement which evaluates to if_true if we're building + with static NVRTC enabled. Otherwise, the select statement evaluates to if_false. + """ + return select({ + "@rules_ml_toolchain//common:is_nvrtc_static_linking_enabled": if_true, + "//conditions:default": if_false, + }) + +# Macros for building CUDNN static code. +def if_static_cudnn(if_true, if_false = []): + """Shorthand for select()'ing on whether we're building with static CUDNN libs. + + Returns a select statement which evaluates to if_true if we're building + with static CUDNN enabled. Otherwise, the select statement evaluates to if_false. + """ + return select({ + "@rules_ml_toolchain//common:is_cudnn_static_linking_enabled": if_true, + "//conditions:default": if_false, + }) + + def if_cuda_clang(if_true, if_false = []): """Shorthand for select()'ing on wheteher we're building with cuda-clang. diff --git a/third_party/gpus/cuda/hermetic/BUILD.tpl b/third_party/gpus/cuda/hermetic/BUILD.tpl index 103465b..8ff4d66 100644 --- a/third_party/gpus/cuda/hermetic/BUILD.tpl +++ b/third_party/gpus/cuda/hermetic/BUILD.tpl @@ -326,10 +326,9 @@ selects.config_setting_group( ], ) -cc_library( - # This is not yet fully supported, but we need the rule - # to make bazel query happy. +alias( name = "nvptxcompiler", + actual = "@cuda_nvcc//:nvptxcompiler", ) alias( diff --git a/third_party/gpus/cuda/hermetic/cuda_configure.bzl b/third_party/gpus/cuda/hermetic/cuda_configure.bzl index 4bd735c..f91c263 100644 --- a/third_party/gpus/cuda/hermetic/cuda_configure.bzl +++ b/third_party/gpus/cuda/hermetic/cuda_configure.bzl @@ -54,6 +54,7 @@ load("@cuda_nvml//:version.bzl", _nvml_version = "VERSION") load("@cuda_nvtx//:version.bzl", _nvtx_version = "VERSION") load("@cuda_nvvm//:version.bzl", _nvvm_version = "VERSION") load("@cuda_profiler_api//:version.bzl", _cuda_profiler_api_version = "VERSION") +load("@cuda_nvptxcompiler//:version.bzl", _cuda_nvptxcompiler_version = "VERSION") load("@llvm_linux_aarch64//:version.bzl", _llvm_aarch64_hermetic_version = "VERSION") load("@llvm_linux_x86_64//:version.bzl", _llvm_x86_64_hermetic_version = "VERSION") load( @@ -366,6 +367,7 @@ def _get_cuda_config(repository_ctx): cupti_version = _cupti_version, cudart_version = _cudart_version, cuda_profiler_api_version = _cuda_profiler_api_version, + cuda_nvptxcompiler_version = _cuda_nvptxcompiler_version, cublas_version = _cublas_version, cusolver_version = _cusolver_version, curand_version = _curand_version, diff --git a/third_party/gpus/cuda/hermetic/cuda_cublas.BUILD.tpl b/third_party/gpus/cuda/hermetic/cuda_cublas.BUILD.tpl index 7f2c111..45fafff 100644 --- a/third_party/gpus/cuda/hermetic/cuda_cublas.BUILD.tpl +++ b/third_party/gpus/cuda/hermetic/cuda_cublas.BUILD.tpl @@ -2,6 +2,7 @@ licenses(["restricted"]) # NVIDIA proprietary license load( "@local_config_cuda//cuda:build_defs.bzl", "if_cuda_newer_than", + "if_static_cuda", ) load( "@rules_ml_toolchain//third_party/gpus:nvidia_common_rules.bzl", @@ -13,7 +14,6 @@ cc_import( name = "cublas_shared_library", hdrs = [":headers"], shared_library = "lib/libcublas.so.%{libcublas_version}", - deps = [":cublasLt"], ) cc_import( @@ -21,11 +21,26 @@ cc_import( hdrs = [":headers"], shared_library = "lib/libcublasLt.so.%{libcublaslt_version}", ) + +cc_import( + name = "cublasLt_static_library", + hdrs = [":headers"], + static_library = "lib/libcublasLt_static.a", +) + +cc_import( + name = "cublas_static_library", + hdrs = [":headers"], + static_library = "lib/libcublas_static.a", +) %{multiline_comment} cc_library( name = "cublas", visibility = ["//visibility:public"], - %{comment}deps = [":cublas_shared_library"], + %{comment}deps = if_static_cuda( + %{comment}[":cublas_static_library"], + %{comment}[":cublas_shared_library"], + %{comment}) + [":cublasLt"], %{comment}linkopts = if_cuda_newer_than( %{comment}"13_0", %{comment}if_true = cuda_rpath_flags("nvidia/cu13/lib"), @@ -36,7 +51,10 @@ cc_library( cc_library( name = "cublasLt", visibility = ["//visibility:public"], - %{comment}deps = [":cublasLt_shared_library"], + %{comment}deps = if_static_cuda( + %{comment}[":cublasLt_static_library"], + %{comment}[":cublasLt_shared_library"], + %{comment}), %{comment}linkopts = if_cuda_newer_than( %{comment}"13_0", %{comment}if_true = cuda_rpath_flags("nvidia/cu13/lib"), diff --git a/third_party/gpus/cuda/hermetic/cuda_cudart.BUILD.tpl b/third_party/gpus/cuda/hermetic/cuda_cudart.BUILD.tpl index 2668c3b..f09d1d3 100644 --- a/third_party/gpus/cuda/hermetic/cuda_cudart.BUILD.tpl +++ b/third_party/gpus/cuda/hermetic/cuda_cudart.BUILD.tpl @@ -2,6 +2,7 @@ licenses(["restricted"]) # NVIDIA proprietary license load( "@local_config_cuda//cuda:build_defs.bzl", "if_cuda_newer_than", + "if_static_cuda", ) load( "@rules_ml_toolchain//third_party/gpus:nvidia_common_rules.bzl", @@ -29,6 +30,24 @@ cc_import( hdrs = [":headers"], shared_library = "lib/libcudart.so.%{libcudart_version}", ) + +cc_import( + name = "cudart_static_library", + hdrs = [":headers"], + static_library = "lib/libcudart_static.a", +) + +cc_import( + name = "culibos_static_library", + hdrs = [":headers"], + static_library = if_cuda_newer_than("13_0", None, "lib/libculibos.a"), +) + +cc_import( + name = "cudadevrt_static_library", + hdrs = [":headers"], + static_library = "lib/libcudadevrt.a", +) %{multiline_comment} cc_library( name = "cuda_driver", @@ -44,9 +63,10 @@ cc_library( %{comment}"@cuda_driver//:nvidia_ptxjitcompiler", %{comment}], %{comment}"//conditions:default": [":cuda_driver"], - %{comment}}) + [ - %{comment}":cudart_shared_library", - %{comment}], + %{comment}}) + if_static_cuda( + %{comment}[":cudart_static_library", ":cudadevrt_static_library"] + if_cuda_newer_than("13_0", ["@cuda_culibos//:culibos_static_library"], [":culibos_static_library"]), + %{comment}[":cudart_shared_library"], + %{comment}), %{comment}linkopts = if_cuda_newer_than( %{comment}"13_0", %{comment}if_true = cuda_rpath_flags("nvidia/cu13/lib"), diff --git a/third_party/gpus/cuda/hermetic/cuda_cudnn9.BUILD.tpl b/third_party/gpus/cuda/hermetic/cuda_cudnn9.BUILD.tpl index e8f571f..ebcaf3b 100644 --- a/third_party/gpus/cuda/hermetic/cuda_cudnn9.BUILD.tpl +++ b/third_party/gpus/cuda/hermetic/cuda_cudnn9.BUILD.tpl @@ -1,4 +1,8 @@ licenses(["restricted"]) # NVIDIA proprietary license +load( + "@local_config_cuda//cuda:build_defs.bzl", + "if_static_cudnn", +) load( "@rules_ml_toolchain//third_party/gpus:nvidia_common_rules.bzl", "cuda_rpath_flags", @@ -52,20 +56,72 @@ cc_import( hdrs = [":headers"], shared_library = "lib/libcudnn.so.%{libcudnn_version}", ) + +cc_import( + name = "cudnn_graph_static", + hdrs = [":headers"], + static_library = "lib/libcudnn_graph_static_v9.a", +) + +cc_import( + name = "cudnn_adv_static", + hdrs = [":headers"], + static_library = "lib/libcudnn_adv_static_v9.a", +) + +cc_import( + name = "cudnn_engines_runtime_compiled_static", + hdrs = [":headers"], + static_library = "lib/libcudnn_engines_runtime_compiled_static_v9.a", +) + +cc_import( + name = "cudnn_engines_precompiled_static", + hdrs = [":headers"], + static_library = "lib/libcudnn_engines_precompiled_static_v9.a", +) + +cc_import( + name = "cudnn_ops_static", + hdrs = [":headers"], + static_library = "lib/libcudnn_ops_static_v9.a", +) + +cc_import( + name = "cudnn_heuristic_static", + hdrs = [":headers"], + static_library = "lib/libcudnn_heuristic_static_v9.a", +) + +cc_import( + name = "cudnn_cnn_static", + hdrs = [":headers"], + static_library = "lib/libcudnn_cnn_static_v9.a", +) %{multiline_comment} cc_library( name = "cudnn", - %{comment}deps = [ - %{comment}":cudnn_engines_precompiled", + %{comment}alwayslink = if_static_cudnn(True, False), + %{comment}srcs = if_static_cudnn( + %{comment}[":lib/libcudnn_engines_precompiled_static_v9.a", + %{comment} ":lib/libcudnn_ops_static_v9.a", + %{comment} ":lib/libcudnn_cnn_static_v9.a", + %{comment} ":lib/libcudnn_adv_static_v9.a", + %{comment} ":lib/libcudnn_heuristic_static_v9.a", + %{comment} ":lib/libcudnn_graph_static_v9.a", + %{comment} ":lib/libcudnn_engines_runtime_compiled_static_v9.a", + %{comment}], []), + %{comment}deps = if_static_cudnn( + %{comment}[], + %{comment}[":cudnn_engines_precompiled", %{comment}":cudnn_ops", %{comment}":cudnn_graph", %{comment}":cudnn_cnn", %{comment}":cudnn_adv", %{comment}":cudnn_engines_runtime_compiled", %{comment}":cudnn_heuristic", - %{comment}"@cuda_nvrtc//:nvrtc", %{comment}":cudnn_main", - %{comment}], + %{comment}]) + ["@cuda_nvrtc//:nvrtc"], %{comment}linkopts = cuda_rpath_flags("nvidia/cudnn/lib"), visibility = ["//visibility:public"], ) diff --git a/third_party/gpus/cuda/hermetic/cuda_cufft.BUILD.tpl b/third_party/gpus/cuda/hermetic/cuda_cufft.BUILD.tpl index 3fdd402..305451b 100644 --- a/third_party/gpus/cuda/hermetic/cuda_cufft.BUILD.tpl +++ b/third_party/gpus/cuda/hermetic/cuda_cufft.BUILD.tpl @@ -2,6 +2,7 @@ licenses(["restricted"]) # NVIDIA proprietary license load( "@local_config_cuda//cuda:build_defs.bzl", "if_cuda_newer_than", + "if_static_cuda", ) load( "@rules_ml_toolchain//third_party/gpus:nvidia_common_rules.bzl", @@ -14,15 +15,33 @@ cc_import( hdrs = [":headers"], shared_library = "lib/libcufft.so.%{libcufft_version}", ) + +cc_import( + name = "cufft_static_library", + hdrs = [":headers"], + static_library = "lib/libcufft_static.a", +) + +cc_import( + name = "cufftw_static_library", + hdrs = [":headers"], + static_library = "lib/libcufftw_static.a", +) + +cc_import( + name = "cufft_static_nocallback_library", + hdrs = [":headers"], + static_library = if_cuda_newer_than("13_0", None, "lib/libcufft_static_nocallback.a"), +) %{multiline_comment} cc_library( name = "cufft", - %{comment}deps = [":cufft_shared_library"], + %{comment}deps = if_static_cuda(if_cuda_newer_than("13_0", [":cufft_static_library"], [":cufft_static_nocallback_library"]) + [":cufftw_static_library"], [":cufft_shared_library"]), %{comment}linkopts = if_cuda_newer_than( %{comment}"13_0", %{comment}if_true = cuda_rpath_flags("nvidia/cu13/lib"), %{comment}if_false = cuda_rpath_flags("nvidia/cufft/lib"), - %{comment}), + %{comment}) + if_static_cuda(["-Wl,--no-relax"]), visibility = ["//visibility:public"], ) diff --git a/third_party/gpus/cuda/hermetic/cuda_culibos.BUILD.tpl b/third_party/gpus/cuda/hermetic/cuda_culibos.BUILD.tpl new file mode 100644 index 0000000..4957dee --- /dev/null +++ b/third_party/gpus/cuda/hermetic/cuda_culibos.BUILD.tpl @@ -0,0 +1,11 @@ +licenses(["restricted"]) # NVIDIA proprietary license + +load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda_newer_than") + +%{multiline_comment} +cc_import( + name = "culibos_static_library", + static_library = if_cuda_newer_than("13_0", "lib/libculibos.a", None), + visibility = ["//visibility:public"], +) +%{multiline_comment} diff --git a/third_party/gpus/cuda/hermetic/cuda_cupti.BUILD.tpl b/third_party/gpus/cuda/hermetic/cuda_cupti.BUILD.tpl index 6780ca3..c7bde53 100644 --- a/third_party/gpus/cuda/hermetic/cuda_cupti.BUILD.tpl +++ b/third_party/gpus/cuda/hermetic/cuda_cupti.BUILD.tpl @@ -2,6 +2,7 @@ licenses(["restricted"]) # NVIDIA proprietary license load( "@local_config_cuda//cuda:build_defs.bzl", "if_cuda_newer_than", + "if_static_cuda", "if_version_equal_or_greater_than", ) load( @@ -15,10 +16,22 @@ cc_import( hdrs = [":headers"], shared_library = "lib/libcupti.so.%{libcupti_version}", ) + +cc_import( + name = "cupti_static_library", + hdrs = [":headers"], + static_library = "lib/libcupti_static.a", +) + +cc_import( + name = "nvperf_host_static_library", + hdrs = [":headers"], + static_library = "lib/libnvperf_host_static.a", +) %{multiline_comment} cc_library( name = "cupti", - %{comment}deps = [":cupti_shared_library"], + %{comment}deps = if_static_cuda([":cupti_static_library"], [":cupti_shared_library"]), %{comment}linkopts = if_cuda_newer_than( %{comment}"13_0", %{comment}if_true = cuda_rpath_flags("nvidia/cu13/lib"), diff --git a/third_party/gpus/cuda/hermetic/cuda_curand.BUILD.tpl b/third_party/gpus/cuda/hermetic/cuda_curand.BUILD.tpl index 093c7c0..686bafb 100644 --- a/third_party/gpus/cuda/hermetic/cuda_curand.BUILD.tpl +++ b/third_party/gpus/cuda/hermetic/cuda_curand.BUILD.tpl @@ -2,6 +2,7 @@ licenses(["restricted"]) # NVIDIA proprietary license load( "@local_config_cuda//cuda:build_defs.bzl", "if_cuda_newer_than", + "if_static_cuda", ) load( "@rules_ml_toolchain//third_party/gpus:nvidia_common_rules.bzl", @@ -14,10 +15,16 @@ cc_import( hdrs = [":headers"], shared_library = "lib/libcurand.so.%{libcurand_version}", ) + +cc_import( + name = "curand_static_library", + hdrs = [":headers"], + static_library = "lib/libcurand_static.a", +) %{multiline_comment} cc_library( name = "curand", - %{comment}deps = [":curand_shared_library"], + %{comment}deps = if_static_cuda([":curand_static_library"], [":curand_shared_library"]), %{comment}linkopts = if_cuda_newer_than( %{comment}"13_0", %{comment}if_true = cuda_rpath_flags("nvidia/cu13/lib"), diff --git a/third_party/gpus/cuda/hermetic/cuda_cusolver.BUILD.tpl b/third_party/gpus/cuda/hermetic/cuda_cusolver.BUILD.tpl index 23be787..69552a7 100644 --- a/third_party/gpus/cuda/hermetic/cuda_cusolver.BUILD.tpl +++ b/third_party/gpus/cuda/hermetic/cuda_cusolver.BUILD.tpl @@ -2,6 +2,7 @@ licenses(["restricted"]) # NVIDIA proprietary license load( "@local_config_cuda//cuda:build_defs.bzl", "if_cuda_newer_than", + "if_static_cuda", ) load( "@rules_ml_toolchain//third_party/gpus:nvidia_common_rules.bzl", @@ -13,17 +14,41 @@ cc_import( name = "cusolver_shared_library", hdrs = [":headers"], shared_library = "lib/libcusolver.so.%{libcusolver_version}", - deps = [ - "@cuda_nvjitlink//:nvjitlink", - "@cuda_cusparse//:cusparse", - "@cuda_cublas//:cublas", - "@cuda_cublas//:cublasLt", - ], +) + +cc_import( + name = "cusolver_lapack_static_library", + hdrs = [":headers"], + static_library = "lib/libcusolver_lapack_static.a", +) + +cc_import( + name = "cusolver_metis_static_library", + hdrs = [":headers"], + static_library = "lib/libcusolver_metis_static.a", +) + +cc_import( + name = "cusolver_static_library", + hdrs = [":headers"], + static_library = "lib/libcusolver_static.a", +) + +cc_import( + name = "metis_static_library", + hdrs = [":headers"], + static_library = "lib/libmetis_static.a", ) %{multiline_comment} cc_library( name = "cusolver", - %{comment}deps = [":cusolver_shared_library"], + %{comment}deps = if_static_cuda([":cusolver_static_library", ":cusolver_lapack_static_library", ":cusolver_metis_static_library", ":metis_static_library"], [":cusolver_shared_library"]) + %{comment}+ [ + %{comment}"@cuda_nvjitlink//:nvjitlink", + %{comment}"@cuda_cusparse//:cusparse", + %{comment}"@cuda_cublas//:cublas", + %{comment}"@cuda_cublas//:cublasLt", + %{comment}], %{comment}linkopts = if_cuda_newer_than( %{comment}"13_0", %{comment}if_true = cuda_rpath_flags("nvidia/cu13/lib"), diff --git a/third_party/gpus/cuda/hermetic/cuda_cusparse.BUILD.tpl b/third_party/gpus/cuda/hermetic/cuda_cusparse.BUILD.tpl index 5c67c11..65f37f3 100644 --- a/third_party/gpus/cuda/hermetic/cuda_cusparse.BUILD.tpl +++ b/third_party/gpus/cuda/hermetic/cuda_cusparse.BUILD.tpl @@ -2,6 +2,7 @@ licenses(["restricted"]) # NVIDIA proprietary license load( "@local_config_cuda//cuda:build_defs.bzl", "if_cuda_newer_than", + "if_static_cuda", ) load( "@rules_ml_toolchain//third_party/gpus:nvidia_common_rules.bzl", @@ -13,12 +14,17 @@ cc_import( name = "cusparse_shared_library", hdrs = [":headers"], shared_library = "lib/libcusparse.so.%{libcusparse_version}", - deps = ["@cuda_nvjitlink//:nvjitlink"], +) + +cc_import( + name = "cusparse_static_library", + hdrs = [":headers"], + static_library = "lib/libcusparse_static.a", ) %{multiline_comment} cc_library( name = "cusparse", - %{comment}deps = [":cusparse_shared_library"], + %{comment}deps = if_static_cuda([":cusparse_static_library"], [":cusparse_shared_library"]) + ["@cuda_nvjitlink//:nvjitlink"], %{comment}linkopts = if_cuda_newer_than( %{comment}"13_0", %{comment}if_true = cuda_rpath_flags("nvidia/cu13/lib"), diff --git a/third_party/gpus/cuda/hermetic/cuda_nvcc.BUILD.tpl b/third_party/gpus/cuda/hermetic/cuda_nvcc.BUILD.tpl index e311c43..f0f42bf 100644 --- a/third_party/gpus/cuda/hermetic/cuda_nvcc.BUILD.tpl +++ b/third_party/gpus/cuda/hermetic/cuda_nvcc.BUILD.tpl @@ -1,5 +1,9 @@ licenses(["restricted"]) # NVIDIA proprietary license +load( + "@local_config_cuda//cuda:build_defs.bzl", + "if_static_cuda", +) load( "@rules_ml_toolchain//cc/cuda/features:cuda_nvcc_feature.bzl", "cuda_nvcc_feature", @@ -84,6 +88,24 @@ cuda_nvcc_feature( ], ) +%{multiline_comment} +cc_import( + name = "nvptxcompiler_static_library", + hdrs = [":headers"], + static_library = if_cuda_newer_than("13_0", None, "lib/libnvptxcompiler_static.a"), +) +%{multiline_comment} + +cc_library( + name = "nvptxcompiler", + %{comment}deps = if_static_cuda(if_cuda_newer_than( + %{comment}"13_0", + %{comment}["@cuda_nvptxcompiler//:nvptxcompiler"], + %{comment}[":nvptxcompiler_static_library"], + %{comment})), + visibility = ["//visibility:public"], +) + cc_library( name = "headers", %{comment}hdrs = glob([ diff --git a/third_party/gpus/cuda/hermetic/cuda_nvjitlink.BUILD.tpl b/third_party/gpus/cuda/hermetic/cuda_nvjitlink.BUILD.tpl index 5ca20a2..c4b3ab0 100644 --- a/third_party/gpus/cuda/hermetic/cuda_nvjitlink.BUILD.tpl +++ b/third_party/gpus/cuda/hermetic/cuda_nvjitlink.BUILD.tpl @@ -2,6 +2,7 @@ licenses(["restricted"]) # NVIDIA proprietary license load( "@local_config_cuda//cuda:build_defs.bzl", "if_cuda_newer_than", + "if_static_cuda", ) load( "@rules_ml_toolchain//third_party/gpus:nvidia_common_rules.bzl", @@ -14,10 +15,16 @@ cc_import( hdrs = [":headers"], shared_library = "lib/libnvJitLink.so.%{libnvjitlink_version}", ) + +cc_import( + name = "nvjitlink_static_library", + hdrs = [":headers"], + static_library = "lib/libnvJitLink_static.a", +) %{multiline_comment} cc_library( name = "nvjitlink", - %{comment}deps = [":nvjitlink_shared_library"], + %{comment}deps = if_static_cuda([":nvjitlink_static_library"], [":nvjitlink_shared_library"]), %{comment}linkopts = if_cuda_newer_than( %{comment}"13_0", %{comment}if_true = cuda_rpath_flags("nvidia/cu13/lib"), diff --git a/third_party/gpus/cuda/hermetic/cuda_nvptxcompiler.BUILD.tpl b/third_party/gpus/cuda/hermetic/cuda_nvptxcompiler.BUILD.tpl new file mode 100644 index 0000000..7a0fd3e --- /dev/null +++ b/third_party/gpus/cuda/hermetic/cuda_nvptxcompiler.BUILD.tpl @@ -0,0 +1,18 @@ +licenses(["restricted"]) # NVIDIA proprietary license + +load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda_newer_than") + +%{multiline_comment} +cc_import( + name = "nvptxcompiler_static_library", + hdrs = ["include/nvPTXCompiler.h"], + static_library = if_cuda_newer_than("13_0", "lib/libnvptxcompiler_static.a", None), +) +%{multiline_comment} + +cc_library( + name = "nvptxcompiler", + %{comment}deps = [":nvptxcompiler_static_library"], + visibility = ["//visibility:public"], +) + diff --git a/third_party/gpus/cuda/hermetic/cuda_nvrtc.BUILD.tpl b/third_party/gpus/cuda/hermetic/cuda_nvrtc.BUILD.tpl index 883966b..449f7e8 100644 --- a/third_party/gpus/cuda/hermetic/cuda_nvrtc.BUILD.tpl +++ b/third_party/gpus/cuda/hermetic/cuda_nvrtc.BUILD.tpl @@ -2,6 +2,7 @@ licenses(["restricted"]) # NVIDIA proprietary license load( "@local_config_cuda//cuda:build_defs.bzl", "if_cuda_newer_than", + "if_static_nvrtc", ) load( "@rules_ml_toolchain//third_party/gpus:nvidia_common_rules.bzl", @@ -20,13 +21,38 @@ cc_import( hdrs = [":headers"], shared_library = "lib/libnvrtc-builtins.so.%{libnvrtc-builtins_version}", ) + +cc_import( + name = "nvrtc_builtins_static_alt", + static_library = "lib/libnvrtc-builtins_static.alt.a", +) + +cc_import( + name = "nvrtc_static", + static_library = "lib/libnvrtc_static.a", +) + +cc_import( + name = "nvrtc_builtins_static", + static_library = "lib/libnvrtc-builtins_static.a", +) + +cc_import( + name = "nvrtc_static_alt", + static_library = "lib/libnvrtc_static.alt.a", +) %{multiline_comment} cc_library( name = "nvrtc", - %{comment}deps = [ + %{comment}deps = if_static_nvrtc([ + %{comment}":nvrtc_static", + %{comment}":nvrtc_builtins_static", + %{comment}"@cuda_nvcc//:nvptxcompiler", + %{comment}], + %{comment}[ %{comment}":nvrtc_main", %{comment}":nvrtc_builtins", - %{comment}], + %{comment}]), %{comment}linkopts = if_cuda_newer_than( %{comment}"13_0", %{comment}if_true = cuda_rpath_flags("nvidia/cu13/lib"), diff --git a/third_party/gpus/cuda/hermetic/cuda_redist_versions.bzl b/third_party/gpus/cuda/hermetic/cuda_redist_versions.bzl index 5461d8e..b802370 100644 --- a/third_party/gpus/cuda/hermetic/cuda_redist_versions.bzl +++ b/third_party/gpus/cuda/hermetic/cuda_redist_versions.bzl @@ -517,6 +517,12 @@ REDIST_VERSIONS_TO_BUILD_TEMPLATES = { "11": "//third_party/gpus/cuda/hermetic:cuda_cudart.BUILD.tpl", }, }, + "cuda_culibos": { + "repo_name": "cuda_culibos", + "version_to_template": { + "13": "//third_party/gpus/cuda/hermetic:cuda_culibos.BUILD.tpl", + }, + }, "libcufft": { "repo_name": "cuda_cufft", "version_to_template": { @@ -637,6 +643,12 @@ REDIST_VERSIONS_TO_BUILD_TEMPLATES = { "11": "//third_party/gpus/cuda/hermetic:cuda_nvtx.BUILD.tpl", }, }, + "libnvptxcompiler": { + "repo_name": "cuda_nvptxcompiler", + "version_to_template": { + "13": "//third_party/gpus/cuda/hermetic:cuda_nvptxcompiler.BUILD.tpl", + }, + }, } NVSHMEM_REDIST_VERSIONS_TO_BUILD_TEMPLATES = {