|
| 1 | +//==-- p2p_usm_residency.cpp - P2P USM residency test ---------------------==// |
| 2 | +// |
| 3 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | +// See https://llvm.org/LICENSE.txt for license information. |
| 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | +// |
| 7 | +//===----------------------------------------------------------------------===// |
| 8 | +// |
| 9 | +// Verify that the Level Zero v2 adapter correctly makes USM device memory |
| 10 | +// resident on peer devices when P2P access is enabled. |
| 11 | +// |
| 12 | +// Phase 1: Allocates memory on dev0, fills it with a known pattern, enables |
| 13 | +// P2P access from dev1 to dev0, then uses dev1's queue to copy the data to |
| 14 | +// the host and verifies all values match the fill pattern. |
| 15 | +// |
| 16 | +// Phase 2 (opposite direction): Allocates memory on dev1, fills it with a |
| 17 | +// different pattern, enables P2P access from dev0 to dev1, then uses dev0's |
| 18 | +// queue to copy the data to the host and verifies correctness. |
| 19 | +// |
| 20 | +// Phase 3 (negative): Allocates memory on dev0, enables then disables P2P |
| 21 | +// access from dev1, and verifies that a subsequent device-to-device memcpy |
| 22 | +// via dev1's queue throws an exception. |
| 23 | +// |
| 24 | +// REQUIRES: level_zero && two-or-more-gpu-devices |
| 25 | +// UNSUPPORTED: level_zero_v1_adapter |
| 26 | +// UNSUPPORTED-INTENDED: Test is specific to the Level Zero v2 adapter. |
| 27 | +// |
| 28 | +// RUN: %{build} -o %t.out |
| 29 | +// RUN: env UR_LOADER_USE_LEVEL_ZERO_V2=1 %{run} %t.out |
| 30 | + |
| 31 | +#include <iostream> |
| 32 | +#include <vector> |
| 33 | + |
| 34 | +#include <sycl/detail/core.hpp> |
| 35 | +#include <sycl/platform.hpp> |
| 36 | +#include <sycl/usm.hpp> |
| 37 | + |
| 38 | +using namespace sycl; |
| 39 | + |
| 40 | +// Allocate N ints on srcQueue's device, fill with fillVal, enable P2P so that |
| 41 | +// dstDev can access srcDev's allocations, copy to host via dstQueue, verify |
| 42 | +// all values, then clean up. Returns false on failure. |
| 43 | +static bool testP2PRead(context &ctx, queue &srcQueue, device &srcDev, |
| 44 | + queue &dstQueue, device &dstDev, size_t N, int fillVal, |
| 45 | + const char *label) { |
| 46 | + int *src = sycl::malloc_device<int>(N, srcQueue); |
| 47 | + if (!src) { |
| 48 | + std::cout << label << ": device alloc failed. Skipping.\n"; |
| 49 | + return true; // not a test failure |
| 50 | + } |
| 51 | + srcQueue.fill(src, fillVal, N).wait(); |
| 52 | + |
| 53 | + // Enable P2P: dstDev may now access allocations on srcDev. Under the |
| 54 | + // Level Zero v2 adapter this also makes the srcDev allocation resident |
| 55 | + // on dstDev. |
| 56 | + std::cout << "Enabling P2P: dstDev may now access allocations on srcDev.\n"; |
| 57 | + dstDev.ext_oneapi_enable_peer_access(srcDev); |
| 58 | + |
| 59 | + std::vector<int> result(N, 0); |
| 60 | + dstQueue.memcpy(result.data(), src, N * sizeof(int)).wait(); |
| 61 | + |
| 62 | + std::cout |
| 63 | + << "Disabling P2P: dstDev may no longer access allocations on srcDev.\n"; |
| 64 | + dstDev.ext_oneapi_disable_peer_access(srcDev); |
| 65 | + sycl::free(src, ctx); |
| 66 | + |
| 67 | + for (size_t i = 0; i < N; ++i) { |
| 68 | + if (result[i] != fillVal) { |
| 69 | + std::cout << label << ": FAIL at index " << i << ": got " << result[i] |
| 70 | + << ", expected " << fillVal << "\n"; |
| 71 | + return false; |
| 72 | + } |
| 73 | + } |
| 74 | + std::cout << label << ": OK\n"; |
| 75 | + return true; |
| 76 | +} |
| 77 | + |
| 78 | +// Allocate N ints on srcQueue's device, fill with fillVal, enable P2P, then |
| 79 | +// disable P2P, and verify that a device-to-device memcpy from dstQueue fails |
| 80 | +// (since dstDev should no longer be able to access srcDev's allocations after |
| 81 | +// P2P is disabled). |
| 82 | +static bool testP2PReadFailsAfterDisable(context &ctx, queue &srcQueue, |
| 83 | + device &srcDev, queue &dstQueue, |
| 84 | + device &dstDev, size_t N, int fillVal, |
| 85 | + const char *label) { |
| 86 | + int *src = sycl::malloc_device<int>(N, srcQueue); |
| 87 | + if (!src) { |
| 88 | + std::cout << label << ": device alloc failed (src). Skipping.\n"; |
| 89 | + return true; |
| 90 | + } |
| 91 | + |
| 92 | + int *dst = sycl::malloc_device<int>(N, dstQueue); |
| 93 | + if (!dst) { |
| 94 | + std::cout << label << ": device alloc failed (dst). Skipping.\n"; |
| 95 | + sycl::free(src, ctx); |
| 96 | + return true; |
| 97 | + } |
| 98 | + |
| 99 | + srcQueue.fill(src, fillVal, N).wait(); |
| 100 | + |
| 101 | + // Enable then disable P2P: dstDev should no longer be able to access |
| 102 | + // allocations on srcDev. |
| 103 | + std::cout << "Enabling P2P (temporarily).\n"; |
| 104 | + dstDev.ext_oneapi_enable_peer_access(srcDev); |
| 105 | + std::cout << "Disabling P2P: dstDev should no longer access srcDev.\n"; |
| 106 | + dstDev.ext_oneapi_disable_peer_access(srcDev); |
| 107 | + |
| 108 | + // Attempt a device-to-device memcpy from src (on srcDev) to dst (on dstDev) |
| 109 | + // via dstQueue after P2P has been revoked — this should fail. |
| 110 | + bool gotException = false; |
| 111 | + try { |
| 112 | + dstQueue.memcpy(dst, src, N * sizeof(int)).wait(); |
| 113 | + } catch (sycl::exception &e) { |
| 114 | + std::cout << label << ": memcpy threw exception: " << e.what() << "\n"; |
| 115 | + gotException = true; |
| 116 | + } |
| 117 | + |
| 118 | + sycl::free(dst, ctx); |
| 119 | + sycl::free(src, ctx); |
| 120 | + |
| 121 | + if (!gotException) { |
| 122 | + std::cout << label |
| 123 | + << ": FAIL — device-to-device memcpy succeeded after P2P was " |
| 124 | + "disabled\n"; |
| 125 | + return false; |
| 126 | + } |
| 127 | + std::cout << label << ": OK (memcpy correctly failed after P2P disable)\n"; |
| 128 | + return true; |
| 129 | +} |
| 130 | + |
| 131 | +// Allocate N ints on srcQueue's device, fill with fillVal, and verify that a |
| 132 | +// device-to-device memcpy from dstQueue fails without ever enabling P2P (since |
| 133 | +// dstDev must not access srcDev's allocations when P2P has never been enabled). |
| 134 | +static bool testP2PReadFailsWithoutEnable(context &ctx, queue &srcQueue, |
| 135 | + device &srcDev, queue &dstQueue, |
| 136 | + device &dstDev, size_t N, int fillVal, |
| 137 | + const char *label) { |
| 138 | + (void)srcDev; |
| 139 | + (void)dstDev; |
| 140 | + |
| 141 | + int *src = sycl::malloc_device<int>(N, srcQueue); |
| 142 | + if (!src) { |
| 143 | + std::cout << label << ": device alloc failed (src). Skipping.\n"; |
| 144 | + return true; |
| 145 | + } |
| 146 | + |
| 147 | + int *dst = sycl::malloc_device<int>(N, dstQueue); |
| 148 | + if (!dst) { |
| 149 | + std::cout << label << ": device alloc failed (dst). Skipping.\n"; |
| 150 | + sycl::free(src, ctx); |
| 151 | + return true; |
| 152 | + } |
| 153 | + |
| 154 | + srcQueue.fill(src, fillVal, N).wait(); |
| 155 | + |
| 156 | + // Attempt a device-to-device memcpy without ever enabling P2P — must fail. |
| 157 | + bool gotException = false; |
| 158 | + try { |
| 159 | + dstQueue.memcpy(dst, src, N * sizeof(int)).wait(); |
| 160 | + } catch (sycl::exception &e) { |
| 161 | + std::cout << label << ": memcpy threw exception: " << e.what() << "\n"; |
| 162 | + gotException = true; |
| 163 | + } |
| 164 | + |
| 165 | + sycl::free(dst, ctx); |
| 166 | + sycl::free(src, ctx); |
| 167 | + |
| 168 | + if (!gotException) { |
| 169 | + std::cout << label |
| 170 | + << ": FAIL — device-to-device memcpy succeeded without P2P\n"; |
| 171 | + return false; |
| 172 | + } |
| 173 | + std::cout << label << ": OK (memcpy correctly failed without P2P)\n"; |
| 174 | + return true; |
| 175 | +} |
| 176 | + |
| 177 | +// Verify the transition from blocked to permitted using the same allocation: |
| 178 | +// first attempt a device-to-device memcpy from dstQueue without P2P (must |
| 179 | +// fail), then enable P2P and retry the copy (must succeed with correct data). |
| 180 | +static bool testP2PReadFailsThenSucceedsAfterEnable( |
| 181 | + context &ctx, queue &srcQueue, device &srcDev, queue &dstQueue, |
| 182 | + device &dstDev, size_t N, int fillVal, const char *label) { |
| 183 | + int *src = sycl::malloc_device<int>(N, srcQueue); |
| 184 | + if (!src) { |
| 185 | + std::cout << label << ": device alloc failed (src). Skipping.\n"; |
| 186 | + return true; |
| 187 | + } |
| 188 | + |
| 189 | + int *dst = sycl::malloc_device<int>(N, dstQueue); |
| 190 | + if (!dst) { |
| 191 | + std::cout << label << ": device alloc failed (dst). Skipping.\n"; |
| 192 | + sycl::free(src, ctx); |
| 193 | + return true; |
| 194 | + } |
| 195 | + |
| 196 | + srcQueue.fill(src, fillVal, N).wait(); |
| 197 | + |
| 198 | + // Without P2P the copy must fail. |
| 199 | + bool gotException = false; |
| 200 | + try { |
| 201 | + dstQueue.memcpy(dst, src, N * sizeof(int)).wait(); |
| 202 | + } catch (sycl::exception &e) { |
| 203 | + std::cout << label << ": first memcpy (no P2P) threw: " << e.what() << "\n"; |
| 204 | + gotException = true; |
| 205 | + } |
| 206 | + |
| 207 | + if (!gotException) { |
| 208 | + std::cout << label << ": FAIL — first memcpy succeeded without P2P\n"; |
| 209 | + sycl::free(dst, ctx); |
| 210 | + sycl::free(src, ctx); |
| 211 | + return false; |
| 212 | + } |
| 213 | + |
| 214 | + // Enable P2P: dstDev may now access allocations on srcDev. |
| 215 | + std::cout << label << ": enabling P2P.\n"; |
| 216 | + dstDev.ext_oneapi_enable_peer_access(srcDev); |
| 217 | + |
| 218 | + // Retry — must succeed now. |
| 219 | + bool copyOk = true; |
| 220 | + std::vector<int> result(N, 0); |
| 221 | + try { |
| 222 | + dstQueue.memcpy(dst, src, N * sizeof(int)).wait(); |
| 223 | + // Read back to host for verification. |
| 224 | + dstQueue.memcpy(result.data(), dst, N * sizeof(int)).wait(); |
| 225 | + } catch (sycl::exception &e) { |
| 226 | + std::cout << label << ": second memcpy (P2P enabled) threw: " << e.what() |
| 227 | + << "\n"; |
| 228 | + copyOk = false; |
| 229 | + } |
| 230 | + |
| 231 | + std::cout << label << ": disabling P2P.\n"; |
| 232 | + dstDev.ext_oneapi_disable_peer_access(srcDev); |
| 233 | + sycl::free(dst, ctx); |
| 234 | + sycl::free(src, ctx); |
| 235 | + |
| 236 | + if (!copyOk) |
| 237 | + return false; |
| 238 | + |
| 239 | + for (size_t i = 0; i < N; ++i) { |
| 240 | + if (result[i] != fillVal) { |
| 241 | + std::cout << label << ": FAIL at index " << i << ": got " << result[i] |
| 242 | + << ", expected " << fillVal << "\n"; |
| 243 | + return false; |
| 244 | + } |
| 245 | + } |
| 246 | + std::cout << label << ": OK (failed without P2P, succeeded after enable)\n"; |
| 247 | + return true; |
| 248 | +} |
| 249 | + |
| 250 | +int main() { |
| 251 | + // Find a platform with at least two GPU devices. |
| 252 | + std::vector<device> gpus; |
| 253 | + for (auto &plat : platform::get_platforms()) { |
| 254 | + gpus = plat.get_devices(info::device_type::gpu); |
| 255 | + if (gpus.size() >= 2) |
| 256 | + break; |
| 257 | + } |
| 258 | + |
| 259 | + if (gpus.size() < 2) { |
| 260 | + std::cout << "Test requires at least two GPU devices on the same platform. " |
| 261 | + "Skipping.\n"; |
| 262 | + return 0; |
| 263 | + } |
| 264 | + |
| 265 | + device &dev0 = gpus[0]; |
| 266 | + device &dev1 = gpus[1]; |
| 267 | + |
| 268 | + std::cout << "Device 0: " << dev0.get_info<info::device::name>() << "\n"; |
| 269 | + std::cout << "Device 1: " << dev1.get_info<info::device::name>() << "\n"; |
| 270 | + |
| 271 | + // Both devices share a single context for cross-device USM. |
| 272 | + context ctx({dev0, dev1}); |
| 273 | + queue q0(ctx, dev0); |
| 274 | + queue q1(ctx, dev1); |
| 275 | + |
| 276 | + // Allocation size must exceed the disjoint pool's MaxPoolableSize (4 MB for |
| 277 | + // device memory) so that the allocation goes directly to the memory provider |
| 278 | + // where residency is established. |
| 279 | + constexpr size_t N = 2 * 1024 * 1024; // 2M ints = 8 MB |
| 280 | + |
| 281 | + // Phase 1: dev1 reads dev0's memory (P2P: dev1 -> dev0). |
| 282 | + std::cout << "Phase 1: dev1 reads dev0's memory (P2P: dev1 -> dev0).\n"; |
| 283 | + if (!dev1.ext_oneapi_can_access_peer( |
| 284 | + dev0, ext::oneapi::peer_access::access_supported)) { |
| 285 | + std::cout << "No hardware P2P support (dev1->dev0). Skipping.\n"; |
| 286 | + return 0; |
| 287 | + } |
| 288 | + if (!testP2PRead(ctx, q0, dev0, q1, dev1, N, 0x42, |
| 289 | + "Phase 1 (dev1 reads dev0)")) |
| 290 | + return 1; |
| 291 | + |
| 292 | + // Phase 2 (opposite): dev0 reads dev1's memory (P2P: dev0 -> dev1). |
| 293 | + std::cout |
| 294 | + << "Phase 2 (opposite): dev0 reads dev1's memory (P2P: dev0 -> dev1).\n"; |
| 295 | + if (!dev0.ext_oneapi_can_access_peer( |
| 296 | + dev1, ext::oneapi::peer_access::access_supported)) { |
| 297 | + std::cout << "No hardware P2P support (dev0->dev1). Skipping phase 2.\n"; |
| 298 | + std::cout << "PASS\n"; |
| 299 | + return 0; |
| 300 | + } |
| 301 | + if (!testP2PRead(ctx, q1, dev1, q0, dev0, N, 0x55, |
| 302 | + "Phase 2 (dev0 reads dev1)")) |
| 303 | + return 1; |
| 304 | + |
| 305 | + // Phase 3: verify that memcpy fails after P2P is disabled. |
| 306 | + std::cout << "Phase 3: verify memcpy fails after P2P is disabled.\n"; |
| 307 | + if (!testP2PReadFailsAfterDisable(ctx, q0, dev0, q1, dev1, N, 0x77, |
| 308 | + "Phase 3 (dev1 reads dev0 after disable)")) |
| 309 | + return 1; |
| 310 | + |
| 311 | + // Phase 4: verify that memcpy fails without ever enabling P2P. |
| 312 | + std::cout << "Phase 4: verify memcpy fails without ever enabling P2P.\n"; |
| 313 | + if (!testP2PReadFailsWithoutEnable(ctx, q0, dev0, q1, dev1, N, 0x99, |
| 314 | + "Phase 4 (dev1 reads dev0 without P2P)")) |
| 315 | + return 1; |
| 316 | + |
| 317 | + // Phase 5: verify the transition from blocked to permitted. |
| 318 | + std::cout << "Phase 5: verify memcpy fails without P2P then succeeds after " |
| 319 | + "enabling it.\n"; |
| 320 | + if (!testP2PReadFailsThenSucceedsAfterEnable( |
| 321 | + ctx, q0, dev0, q1, dev1, N, 0xAA, |
| 322 | + "Phase 5 (dev1 reads dev0: fail then succeed)")) |
| 323 | + return 1; |
| 324 | + |
| 325 | + std::cout << "PASS\n"; |
| 326 | + return 0; |
| 327 | +} |
0 commit comments