Skip to content

Commit 28e7ae9

Browse files
committed
fixup! Add minimal clang-tidy pass to cmake and CI
1 parent b0cbe1f commit 28e7ae9

File tree

6 files changed

+43
-6
lines changed

6 files changed

+43
-6
lines changed

ci/matrix.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ workflows:
2121
# args: '--preset libcudacxx --lit-tests "cuda/utility/basic_any.pass.cpp"' }
2222
#
2323
override:
24+
- { jobs: ['build'], project: 'tidy', std: 'min', cxx: ['clang'], cudacxx: ['clang'], ctk: 'clang-cuda', sm: '75' }
2425

2526
pull_request:
2627
# Old CTK: Oldest/newest supported host compilers:

cudax/include/cuda/experimental/__cuco/hyperloglog_ref.cuh

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
#endif // no system header
2323

2424
#include <cuda/std/__cstddef/types.h>
25+
#include <cuda/std/__type_traits/enable_if.h>
26+
#include <cuda/std/__type_traits/is_convertible.h>
2527
#include <cuda/std/span>
2628
#include <cuda/stream>
2729

@@ -105,8 +107,38 @@ public:
105107
//!
106108
//! @param __group CUDA Cooperative group this operation is executed in
107109
template <class _CG>
108-
_CCCL_DEVICE constexpr void clear(_CG __group) noexcept
110+
_CCCL_DEVICE constexpr ::cuda::std::enable_if_t<!::cuda::std::is_convertible_v<_CG, ::cuda::stream_ref>, void>
111+
clear(_CG __group) noexcept
109112
{
113+
// The enable_if above is to work around an incompatibility between host and device
114+
// overload preference for clang and NVCC. See
115+
// https://llvm.org/docs/CompileCudaWithLLVM.html#overloading-based-on-host-and-device-attributes
116+
// for further reading, but the bottom line is when:
117+
//
118+
// 1. Compiling in device mode (and clang compiles CUDA in a "hybrid" host-device mode,
119+
// also explained by the link above).
120+
// 2. And the current function is __host__ __device__.
121+
// 3. And the function whose overload needs to be resolved has both a __host__ __device__,
122+
// and __device__ (and/or __host__) overload.
123+
//
124+
// Then clang will prefer these overloads (assuming they have equal priority under C++
125+
// rules) in the following order:
126+
//
127+
// 1. __host__ __device__
128+
// 2. __device__
129+
// 3. __host__
130+
//
131+
// In this particular case, `clear(_CG)` conflicts with `clear(::cuda::stream_ref)` when called
132+
// from `hyperloglog::clear(::cuda::stream_ref)`. `hyperloglog::clear(::cuda::stream_ref)`
133+
// is constexpr, and therefore implicitly __host__ __device__. Since
134+
// `clear(::cuda::stream_ref)` on this class is only __host__, it will take lower priority
135+
// that `clear(_CG)`, and we get:
136+
//
137+
// cudax/include/cuda/experimental/__cuco/__hyperloglog/hyperloglog_impl.cuh:131:28: error: no member named
138+
// 'thread_rank' in 'cuda::stream_ref' [clang-diagnostic-error]
139+
//
140+
// 131 | for (int __i = __group.thread_rank(); __i < __sketch.size(); __i += __group.size())
141+
// | ~~~~~~~ ^
110142
__impl.__clear(__group);
111143
}
112144

cudax/include/cuda/experimental/__execution/stream/adaptor.cuh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,11 @@ private:
301301

302302
// without the following, the kernel in __host_start will fail to launch with
303303
// cudaErrorInvalidDeviceFunction.
304+
#if _CCCL_HAS_CDP()
305+
// clang<22 errors when compiling this, complaining that we are taking a reference to
306+
// __global__ function inside a __device__ function.
304307
::__cccl_unused(&__completion_kernel<__block_threads, _Rcvr, __results_t>);
308+
#endif
305309
__state.__state_.__complete_inline_ = true;
306310
execution::start(__state.__opstate_);
307311
}

cudax/include/cuda/experimental/__stf/internal/launch.cuh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -181,8 +181,8 @@ public:
181181
template <typename Fun>
182182
void operator->*(Fun&& f)
183183
{
184-
# if __NVCOMPILER
185-
// With nvc++, all lambdas can run on host and device.
184+
# if __NVCOMPILER || _CCCL_CUDA_COMPILER(CLANG)
185+
// With nvc++ or clang, all lambdas can run on host and device.
186186
static constexpr bool is_extended_host_device_lambda_closure_type = true,
187187
is_extended_device_lambda_closure_type = false;
188188
# else
@@ -306,7 +306,7 @@ public:
306306
template <typename Fun>
307307
void operator->*(Fun&& f)
308308
{
309-
# if __NVCOMPILER
309+
# if __NVCOMPILER || _CCCL_CUDA_COMPILER(CLANG)
310310
// With nvc++, all lambdas can run on host and device.
311311
static constexpr bool is_extended_host_device_lambda_closure_type = true,
312312
is_extended_device_lambda_closure_type = false;

cudax/include/cuda/experimental/__stf/internal/parallel_for_scope.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -614,7 +614,7 @@ public:
614614

615615
static constexpr bool need_reduction = (deps_ops_t::does_work || ...);
616616

617-
# if __NVCOMPILER
617+
# if __NVCOMPILER || _CCCL_CUDA_COMPILER(CLANG)
618618
// With nvc++, all lambdas can run on host and device.
619619
static constexpr bool is_extended_host_device_lambda_closure_type = true,
620620
is_extended_device_lambda_closure_type = false;

cudax/test/execution/test_let_value.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -467,7 +467,7 @@ C2H_TEST("let_value works when the function returns a dependent sender", "[adapt
467467

468468
#endif // _CCCL_HOST_COMPILATION()
469469

470-
#if !_CCCL_CUDA_COMPILER(NVCC) || !defined(_CCCL_CLANG_TIDY_INVOKED)
470+
#if !_CCCL_CUDA_COMPILER(NVCC) && !defined(_CCCL_CLANG_TIDY_INVOKED)
471471
// This example causes nvcc to segfault, and clang-tidy to error out with
472472
//
473473
// cudax/test/execution/test_let_value.cu:487:17: error: static assertion failed due to requirement

0 commit comments

Comments
 (0)