From ddb9a4ac67d910472ce40997573e489345121248 Mon Sep 17 00:00:00 2001
From: Han Zhu <zhuhan@meta.com>
Date: Sat, 22 Feb 2025 13:13:01 -0800
Subject: [PATCH] Skeleton code for nvcc compilation planner

Summary:
Add a `-_NVCC_DRYRUN_` option to wrap_nvcc.py. If provided, wrap_nvcc.py will generate several files related to how the CUDA file will be compiled (to be implemented):
* A plain dump of the `nvcc -dryrun` output.
* A list of environment variables for the compilation sub-commands to run with.
* A dependency graph of the compilation sub-commands.

This diff adds the skeleton API for this `nvcc_compilation_plan`. This currently runs in addition to the regular monolithic CUDA compilation, so that we still produce a valid C++ output object when `_NVCC_DRYRUN_` is passed. Once the implementation is complete, I'll stop running the mono CUDA compilation in this branch.

Reviewed By: get9

Differential Revision: D69757705

fbshipit-source-id: 5f5c2dc2074c1d9dc6debc20545aaa6d7dcef464
---
 prelude/cxx/compile.bzl | 30 +++++++++++++++++++++---------
 prelude/cxx/cuda.bzl    | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 55 insertions(+), 9 deletions(-)

diff --git a/prelude/cxx/compile.bzl b/prelude/cxx/compile.bzl
index 2657d0f5814a6..aadfedf07a56d 100644
--- a/prelude/cxx/compile.bzl
+++ b/prelude/cxx/compile.bzl
@@ -20,6 +20,7 @@ load(
     "HeaderExtension",
     "HeadersDepFiles",
 )
+load("@prelude//cxx:cuda.bzl", "cuda_compile")
 load("@prelude//cxx:cxx_toolchain_types.bzl", "CxxToolchainInfo")
 load("@prelude//cxx:cxx_utility.bzl", "cxx_attrs_get_allow_cache_upload")
 load(
@@ -385,15 +386,26 @@ def _compile_single_cxx(
         )
         cmd.add(cmd_args(external_debug_info.as_output(), format = "--fbcc-create-external-debug-info={}"))
 
-    ctx.actions.run(
-        cmd,
-        category = src_compile_cmd.cxx_compile_cmd.category,
-        identifier = identifier,
-        dep_files = action_dep_files,
-        allow_cache_upload = src_compile_cmd.cxx_compile_cmd.allow_cache_upload,
-        allow_dep_file_cache_upload = False,
-        **error_handler_args
-    )
+    if src_compile_cmd.src.extension == ".cu":
+        cuda_compile(
+            ctx,
+            cmd,
+            src_compile_cmd,
+            identifier,
+            action_dep_files,
+            allow_dep_file_cache_upload = False,
+            error_handler_args = error_handler_args,
+        )
+    else:
+        ctx.actions.run(
+            cmd,
+            category = src_compile_cmd.cxx_compile_cmd.category,
+            identifier = identifier,
+            dep_files = action_dep_files,
+            allow_cache_upload = src_compile_cmd.cxx_compile_cmd.allow_cache_upload,
+            allow_dep_file_cache_upload = False,
+            **error_handler_args
+        )
 
     # If we're building with split debugging, where the debug info is in the
     # original object, then add the object as external debug info
diff --git a/prelude/cxx/cuda.bzl b/prelude/cxx/cuda.bzl
index 1ffb2d2bf1ce4..16e1545778151 100644
--- a/prelude/cxx/cuda.bzl
+++ b/prelude/cxx/cuda.bzl
@@ -5,6 +5,8 @@
 # License, Version 2.0 found in the LICENSE-APACHE file in the root directory
 # of this source tree.
 
+load("@prelude//cxx:compile_types.bzl", "CxxSrcCompileCommand")
+
 CudaCompileStyle = enum(
     # Use NVCC as the compiler driver and compile a CUDA file in a single Buck
     # action.
@@ -13,3 +15,35 @@ CudaCompileStyle = enum(
     # sub-command.
     "dist",
 )
+
+def cuda_compile(
+        ctx: AnalysisContext,
+        cmd: cmd_args,
+        src_compile_cmd: CxxSrcCompileCommand,
+        identifier: str,
+        action_dep_files: dict[str, ArtifactTag],
+        allow_dep_file_cache_upload: bool,
+        error_handler_args: dict[str, [typing.Callable, None]]):
+    if ctx.attrs.cuda_compile_style == CudaCompileStyle("mono").value:
+        ctx.actions.run(
+            cmd,
+            category = src_compile_cmd.cxx_compile_cmd.category,
+            identifier = identifier,
+            dep_files = action_dep_files,
+            allow_cache_upload = src_compile_cmd.cxx_compile_cmd.allow_cache_upload,
+            allow_dep_file_cache_upload = allow_dep_file_cache_upload,
+            **error_handler_args
+        )
+    elif ctx.attrs.cuda_compile_style == CudaCompileStyle("dist").value:
+        cmd.add("-_NVCC_DRYRUN_")
+        ctx.actions.run(
+            cmd,
+            category = "cuda_compile_prepare",
+            identifier = identifier,
+            dep_files = action_dep_files,
+            allow_cache_upload = True,
+            allow_dep_file_cache_upload = allow_dep_file_cache_upload,
+            **error_handler_args
+        )
+    else:
+        fail("Unsupported CUDA compile style: {}".format(ctx.attrs.cuda_compile_style))