cccl/cudax/include/cuda/experimental/__places/data_place_interface.cuh at 7aa966706ad9720be72b81c10c1f2cde47f86e0d · caugonnet/cccl · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
//===----------------------------------------------------------------------===//
//
// Part of CUDASTF in CUDA C++ Core Libraries,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

/**
 * @file
 * @brief Abstract interface for data_place implementations
 *
 * This interface defines the contract that all data_place implementations must satisfy.
 * It enables a clean polymorphic design where host, managed, device, composite, and
 * custom places (e.g. green contexts) all implement a common interface.
 */

#pragma once

#include <cuda/__cccl_config>

#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
#  pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
#  pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
#  pragma system_header
#endif // no system header

#include <cuda/std/__cccl/assert.h>

#include <cstddef>
#include <functional>
#include <limits>
#include <memory>
#include <stdexcept>
#include <string>
#include <typeinfo>

#include <cuda.h>
#include <cuda_runtime.h>

namespace cuda::experimental::stf
{
// Forward declarations
class exec_place;
class pos4;
class dim4;

//! Function type for computing executor placement from data coordinates
using partition_fn_t = pos4 (*)(pos4, dim4, dim4);

/**
 * @brief Abstract interface for data_place implementations
 *
 * All data_place types (host, managed, device, composite, future places) implement
 * this interface. The data_place class holds a shared_ptr to this interface
 * and delegates all operations to it.
 */
class data_place_interface
{
public:
  virtual ~data_place_interface() = default;

  /**
   * @brief Special device ordinal values for non-device places
   *
   * Returned by get_device_ordinal() for places that don't correspond
   * to a specific CUDA device.
   */
  enum ord : int
  {
    invalid     = ::std::numeric_limits<int>::min(),
    composite   = -5,
    device_auto = -4,
    affine      = -3,
    managed     = -2,
    host        = -1,
  };

  // === Core properties ===

  /**
   * @brief Whether this place is fully resolved and ready for allocation
   *
   * Returns true for places that represent a concrete memory target:
   * host, managed, device(N), composite, green_ctx, etc.
   * Returns false for abstract/deferred places that need further
   * resolution: invalid, affine, device_auto.
   */
  virtual bool is_resolved() const = 0;

  /**
   * @brief Get the device ordinal for this place
   *
   * Returns:
   * - >= 0 for specific CUDA devices
   * - data_place_ordinals::host (-1) for host
   * - data_place_ordinals::managed (-2) for managed
   * - data_place_ordinals::affine (-3) for affine
   * - data_place_ordinals::device_auto (-4) for device_auto
   * - data_place_ordinals::composite (-5) for composite
   * - data_place_ordinals::invalid for invalid
   */
  virtual int get_device_ordinal() const = 0;

  /**
   * @brief Get a string representation of this place
   */
  virtual ::std::string to_string() const = 0;

  /**
   * @brief Compute a hash value for this place
   */
  virtual size_t hash() const = 0;

  /**
   * @brief Three-way comparison with another place
   *
   * @return -1 if *this < other, 0 if *this == other, 1 if *this > other
   */
  virtual int cmp(const data_place_interface& other) const = 0;

  // === Memory allocation ===

  /**
   * @brief Allocate memory at this place
   *
   * @param size Size of the allocation in bytes
   * @param stream CUDA stream for stream-ordered allocations
   * @return Pointer to allocated memory
   * @throws std::runtime_error if allocation is not supported for this place type
   */
  virtual void* allocate(::std::ptrdiff_t size, cudaStream_t stream) const = 0;

  /**
   * @brief Deallocate memory at this place
   *
   * @param ptr Pointer to memory to deallocate
   * @param size Size of the allocation
   * @param stream CUDA stream for stream-ordered deallocations
   */
  virtual void deallocate(void* ptr, size_t size, cudaStream_t stream) const = 0;

  /**
   * @brief Returns true if allocation/deallocation is stream-ordered
   */
  virtual bool allocation_is_stream_ordered() const = 0;

  /**
   * @brief Create a physical memory allocation for this place (VMM API)
   *
   * Default implementation returns CUDA_ERROR_NOT_SUPPORTED.
   * Subclasses that support VMM should override this.
   *
   * @param handle Output parameter for the allocation handle
   * @param size Size of the allocation in bytes
   * @return CUresult indicating success or failure
   */
  virtual CUresult mem_create(CUmemGenericAllocationHandle*, size_t) const
  {
    return CUDA_ERROR_NOT_SUPPORTED;
  }

  /**
   * @brief Get the implementation for the affine exec_place (for custom place types)
   *
   * Custom data_place implementations (e.g. green contexts) override this to
   * provide their own affine exec_place. Returns nullptr by default, which
   * causes data_place::affine_exec_place() to fall through to the error path.
   * The returned shared_ptr should be castable to shared_ptr<exec_place::impl>.
   */
  virtual ::std::shared_ptr<void> get_affine_exec_impl() const
  {
    return nullptr;
  }

  // === Composite-specific (throw by default) ===

  /**
   * @brief Get the partitioner function for composite places
   * @throws std::logic_error if not a composite place
   */
  virtual const partition_fn_t& get_partitioner() const
  {
    throw ::std::logic_error("get_partitioner() called on non-composite data_place");
  }
};
} // end namespace cuda::experimental::stf