Skip to content

Commit 34b1504

Browse files
authored
[SYCL][UR][L0] Restrict USM residency to peers with enabled P2P access (intel#21889)
Implement P2P access control for the Level Zero v2 adapter so that USM device memory is made resident on a peer device only when peer access has been explicitly enabled by the user. - platform.cpp: Populate a per-device peers[] table during device-cache initialisation using zeDeviceGetP2PProperties + zeDeviceCanAccessPeer. Each connectable pair starts as DISABLED; pairs without hardware connectivity are NO_CONNECTION. - device.hpp/cpp: Introduce PeerStatus enum (NO_CONNECTION / DISABLED / ENABLED) and a peers[] vector on ur_device_handle_t_. - usm_p2p.cpp (v2, new): Implement P2P enable/disable/query. Enable/disable update peerDevice->peers[commandDevice->Id] and call Context->changeResidentDevice() for every context on the platform. - context.cpp/hpp: Replace the static p2pAccessDevices table and getP2PDevices() with two dynamic helpers that consult the live peers[] table: getDevicesWhoseAllocationsCanBeAccessedFrom() and getDevicesWhichCanAccessAllocationsPresentOn(). Add changeResidentDevice() to propagate P2P changes to all USM pools. - usm.cpp/hpp: Update makeProvider() to use getDevicesWhichCanAccessAllocationsPresentOn() so providers only pin memory on peers with actively enabled access. Add changeResidentDevice() to ur_usm_pool_handle_t_ which calls umfLevelZeroMemoryProviderResidentDeviceChange() for the affected peer. - command_list_manager.cpp: Add checkP2PAccess() helper and call it from appendUSMMemcpy() to reject copies between devices without enabled P2P, returning UR_RESULT_ERROR_INVALID_OPERATION. - ur_pool_manager.hpp: Add supportsResidentDevices() and forEachPoolWithDesc() to pool_descriptor / pool_manager. - sycl/source/device.cpp: Extract p2pAccessHelper<> template shared by ext_oneapi_enable_peer_access and ext_oneapi_disable_peer_access to avoid duplication and enforce the same-platform constraint. Co-authored-by: Łukasz Ślusarczyk <lukasz.slusarczyk at intel.com> Co-authored-by: Lukasz Dorau <lukasz.dorau at intel.com> --------- Signed-off-by: Lukasz Dorau <lukasz.dorau@intel.com>
1 parent eca4c9f commit 34b1504

24 files changed

Lines changed: 1444 additions & 149 deletions

sycl/source/device.cpp

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -209,22 +209,32 @@ ur_native_handle_t device::getNative() const { return impl->getNative(); }
209209

210210
bool device::has(aspect Aspect) const { return impl->has(Aspect); }
211211

212+
template <detail::UrApiKind ApiKind>
213+
static void p2pAccessHelper(const device &self, const device &peer,
214+
ur_device_handle_t Device, ur_device_handle_t Peer,
215+
detail::adapter_impl &Adapter,
216+
const char *errorMsg) {
217+
if (Device == Peer)
218+
return;
219+
220+
if (peer.get_platform() != self.get_platform())
221+
throw exception(errc::invalid, errorMsg);
222+
223+
Adapter.call<ApiKind>(Device, Peer);
224+
}
225+
212226
void device::ext_oneapi_enable_peer_access(const device &peer) {
213-
ur_device_handle_t Device = impl->getHandleRef();
214-
ur_device_handle_t Peer = peer.impl->getHandleRef();
215-
if (Device != Peer) {
216-
detail::adapter_impl &Adapter = impl->getAdapter();
217-
Adapter.call<detail::UrApiKind::urUsmP2PEnablePeerAccessExp>(Device, Peer);
218-
}
227+
p2pAccessHelper<detail::UrApiKind::urUsmP2PEnablePeerAccessExp>(
228+
*this, peer, impl->getHandleRef(), peer.impl->getHandleRef(),
229+
impl->getAdapter(),
230+
"Cannot enable peer access between different platforms");
219231
}
220232

221233
void device::ext_oneapi_disable_peer_access(const device &peer) {
222-
ur_device_handle_t Device = impl->getHandleRef();
223-
ur_device_handle_t Peer = peer.impl->getHandleRef();
224-
if (Device != Peer) {
225-
detail::adapter_impl &Adapter = impl->getAdapter();
226-
Adapter.call<detail::UrApiKind::urUsmP2PDisablePeerAccessExp>(Device, Peer);
227-
}
234+
p2pAccessHelper<detail::UrApiKind::urUsmP2PDisablePeerAccessExp>(
235+
*this, peer, impl->getHandleRef(), peer.impl->getHandleRef(),
236+
impl->getAdapter(),
237+
"Cannot disable peer access between different platforms");
228238
}
229239

230240
bool device::ext_oneapi_can_access_peer(const device &peer,

sycl/test-e2e/USM/P2P/p2p_copy.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,9 @@ int main() {
3737
return 0;
3838
}
3939

40-
// Enables Devs[0] to access Devs[1] memory.
41-
Devs[0].ext_oneapi_enable_peer_access(Devs[1]);
40+
// Enables Devs[1] to access Devs[0] memory (Devs[1]'s queue will read
41+
// from arr0 which lives on Devs[0]).
42+
Devs[1].ext_oneapi_enable_peer_access(Devs[0]);
4243

4344
std::vector<int> input(N);
4445
std::iota(input.begin(), input.end(), 0);
@@ -53,6 +54,8 @@ int main() {
5354
int out[N];
5455
Queues[1].copy(arr1, out, N).wait();
5556

57+
// Disable P2P before releasing the allocation it was guarding.
58+
Devs[1].ext_oneapi_disable_peer_access(Devs[0]);
5659
sycl::free(arr0, Queues[0]);
5760
sycl::free(arr1, Queues[1]);
5861

Lines changed: 327 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,327 @@
1+
//==-- p2p_usm_residency.cpp - P2P USM residency test ---------------------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// Verify that the Level Zero v2 adapter correctly makes USM device memory
10+
// resident on peer devices when P2P access is enabled.
11+
//
12+
// Phase 1: Allocates memory on dev0, fills it with a known pattern, enables
13+
// P2P access from dev1 to dev0, then uses dev1's queue to copy the data to
14+
// the host and verifies all values match the fill pattern.
15+
//
16+
// Phase 2 (opposite direction): Allocates memory on dev1, fills it with a
17+
// different pattern, enables P2P access from dev0 to dev1, then uses dev0's
18+
// queue to copy the data to the host and verifies correctness.
19+
//
20+
// Phase 3 (negative): Allocates memory on dev0, enables then disables P2P
21+
// access from dev1, and verifies that a subsequent device-to-device memcpy
22+
// via dev1's queue throws an exception.
23+
//
24+
// REQUIRES: level_zero && two-or-more-gpu-devices
25+
// UNSUPPORTED: level_zero_v1_adapter
26+
// UNSUPPORTED-INTENDED: Test is specific to the Level Zero v2 adapter.
27+
//
28+
// RUN: %{build} -o %t.out
29+
// RUN: env UR_LOADER_USE_LEVEL_ZERO_V2=1 %{run} %t.out
30+
31+
#include <iostream>
32+
#include <vector>
33+
34+
#include <sycl/detail/core.hpp>
35+
#include <sycl/platform.hpp>
36+
#include <sycl/usm.hpp>
37+
38+
using namespace sycl;
39+
40+
// Allocate N ints on srcQueue's device, fill with fillVal, enable P2P so that
41+
// dstDev can access srcDev's allocations, copy to host via dstQueue, verify
42+
// all values, then clean up. Returns false on failure.
43+
static bool testP2PRead(context &ctx, queue &srcQueue, device &srcDev,
44+
queue &dstQueue, device &dstDev, size_t N, int fillVal,
45+
const char *label) {
46+
int *src = sycl::malloc_device<int>(N, srcQueue);
47+
if (!src) {
48+
std::cout << label << ": device alloc failed. Skipping.\n";
49+
return true; // not a test failure
50+
}
51+
srcQueue.fill(src, fillVal, N).wait();
52+
53+
// Enable P2P: dstDev may now access allocations on srcDev. Under the
54+
// Level Zero v2 adapter this also makes the srcDev allocation resident
55+
// on dstDev.
56+
std::cout << "Enabling P2P: dstDev may now access allocations on srcDev.\n";
57+
dstDev.ext_oneapi_enable_peer_access(srcDev);
58+
59+
std::vector<int> result(N, 0);
60+
dstQueue.memcpy(result.data(), src, N * sizeof(int)).wait();
61+
62+
std::cout
63+
<< "Disabling P2P: dstDev may no longer access allocations on srcDev.\n";
64+
dstDev.ext_oneapi_disable_peer_access(srcDev);
65+
sycl::free(src, ctx);
66+
67+
for (size_t i = 0; i < N; ++i) {
68+
if (result[i] != fillVal) {
69+
std::cout << label << ": FAIL at index " << i << ": got " << result[i]
70+
<< ", expected " << fillVal << "\n";
71+
return false;
72+
}
73+
}
74+
std::cout << label << ": OK\n";
75+
return true;
76+
}
77+
78+
// Allocate N ints on srcQueue's device, fill with fillVal, enable P2P, then
79+
// disable P2P, and verify that a device-to-device memcpy from dstQueue fails
80+
// (since dstDev should no longer be able to access srcDev's allocations after
81+
// P2P is disabled).
82+
static bool testP2PReadFailsAfterDisable(context &ctx, queue &srcQueue,
83+
device &srcDev, queue &dstQueue,
84+
device &dstDev, size_t N, int fillVal,
85+
const char *label) {
86+
int *src = sycl::malloc_device<int>(N, srcQueue);
87+
if (!src) {
88+
std::cout << label << ": device alloc failed (src). Skipping.\n";
89+
return true;
90+
}
91+
92+
int *dst = sycl::malloc_device<int>(N, dstQueue);
93+
if (!dst) {
94+
std::cout << label << ": device alloc failed (dst). Skipping.\n";
95+
sycl::free(src, ctx);
96+
return true;
97+
}
98+
99+
srcQueue.fill(src, fillVal, N).wait();
100+
101+
// Enable then disable P2P: dstDev should no longer be able to access
102+
// allocations on srcDev.
103+
std::cout << "Enabling P2P (temporarily).\n";
104+
dstDev.ext_oneapi_enable_peer_access(srcDev);
105+
std::cout << "Disabling P2P: dstDev should no longer access srcDev.\n";
106+
dstDev.ext_oneapi_disable_peer_access(srcDev);
107+
108+
// Attempt a device-to-device memcpy from src (on srcDev) to dst (on dstDev)
109+
// via dstQueue after P2P has been revoked — this should fail.
110+
bool gotException = false;
111+
try {
112+
dstQueue.memcpy(dst, src, N * sizeof(int)).wait();
113+
} catch (sycl::exception &e) {
114+
std::cout << label << ": memcpy threw exception: " << e.what() << "\n";
115+
gotException = true;
116+
}
117+
118+
sycl::free(dst, ctx);
119+
sycl::free(src, ctx);
120+
121+
if (!gotException) {
122+
std::cout << label
123+
<< ": FAIL — device-to-device memcpy succeeded after P2P was "
124+
"disabled\n";
125+
return false;
126+
}
127+
std::cout << label << ": OK (memcpy correctly failed after P2P disable)\n";
128+
return true;
129+
}
130+
131+
// Allocate N ints on srcQueue's device, fill with fillVal, and verify that a
132+
// device-to-device memcpy from dstQueue fails without ever enabling P2P (since
133+
// dstDev must not access srcDev's allocations when P2P has never been enabled).
134+
static bool testP2PReadFailsWithoutEnable(context &ctx, queue &srcQueue,
135+
device &srcDev, queue &dstQueue,
136+
device &dstDev, size_t N, int fillVal,
137+
const char *label) {
138+
(void)srcDev;
139+
(void)dstDev;
140+
141+
int *src = sycl::malloc_device<int>(N, srcQueue);
142+
if (!src) {
143+
std::cout << label << ": device alloc failed (src). Skipping.\n";
144+
return true;
145+
}
146+
147+
int *dst = sycl::malloc_device<int>(N, dstQueue);
148+
if (!dst) {
149+
std::cout << label << ": device alloc failed (dst). Skipping.\n";
150+
sycl::free(src, ctx);
151+
return true;
152+
}
153+
154+
srcQueue.fill(src, fillVal, N).wait();
155+
156+
// Attempt a device-to-device memcpy without ever enabling P2P — must fail.
157+
bool gotException = false;
158+
try {
159+
dstQueue.memcpy(dst, src, N * sizeof(int)).wait();
160+
} catch (sycl::exception &e) {
161+
std::cout << label << ": memcpy threw exception: " << e.what() << "\n";
162+
gotException = true;
163+
}
164+
165+
sycl::free(dst, ctx);
166+
sycl::free(src, ctx);
167+
168+
if (!gotException) {
169+
std::cout << label
170+
<< ": FAIL — device-to-device memcpy succeeded without P2P\n";
171+
return false;
172+
}
173+
std::cout << label << ": OK (memcpy correctly failed without P2P)\n";
174+
return true;
175+
}
176+
177+
// Verify the transition from blocked to permitted using the same allocation:
178+
// first attempt a device-to-device memcpy from dstQueue without P2P (must
179+
// fail), then enable P2P and retry the copy (must succeed with correct data).
180+
static bool testP2PReadFailsThenSucceedsAfterEnable(
181+
context &ctx, queue &srcQueue, device &srcDev, queue &dstQueue,
182+
device &dstDev, size_t N, int fillVal, const char *label) {
183+
int *src = sycl::malloc_device<int>(N, srcQueue);
184+
if (!src) {
185+
std::cout << label << ": device alloc failed (src). Skipping.\n";
186+
return true;
187+
}
188+
189+
int *dst = sycl::malloc_device<int>(N, dstQueue);
190+
if (!dst) {
191+
std::cout << label << ": device alloc failed (dst). Skipping.\n";
192+
sycl::free(src, ctx);
193+
return true;
194+
}
195+
196+
srcQueue.fill(src, fillVal, N).wait();
197+
198+
// Without P2P the copy must fail.
199+
bool gotException = false;
200+
try {
201+
dstQueue.memcpy(dst, src, N * sizeof(int)).wait();
202+
} catch (sycl::exception &e) {
203+
std::cout << label << ": first memcpy (no P2P) threw: " << e.what() << "\n";
204+
gotException = true;
205+
}
206+
207+
if (!gotException) {
208+
std::cout << label << ": FAIL — first memcpy succeeded without P2P\n";
209+
sycl::free(dst, ctx);
210+
sycl::free(src, ctx);
211+
return false;
212+
}
213+
214+
// Enable P2P: dstDev may now access allocations on srcDev.
215+
std::cout << label << ": enabling P2P.\n";
216+
dstDev.ext_oneapi_enable_peer_access(srcDev);
217+
218+
// Retry — must succeed now.
219+
bool copyOk = true;
220+
std::vector<int> result(N, 0);
221+
try {
222+
dstQueue.memcpy(dst, src, N * sizeof(int)).wait();
223+
// Read back to host for verification.
224+
dstQueue.memcpy(result.data(), dst, N * sizeof(int)).wait();
225+
} catch (sycl::exception &e) {
226+
std::cout << label << ": second memcpy (P2P enabled) threw: " << e.what()
227+
<< "\n";
228+
copyOk = false;
229+
}
230+
231+
std::cout << label << ": disabling P2P.\n";
232+
dstDev.ext_oneapi_disable_peer_access(srcDev);
233+
sycl::free(dst, ctx);
234+
sycl::free(src, ctx);
235+
236+
if (!copyOk)
237+
return false;
238+
239+
for (size_t i = 0; i < N; ++i) {
240+
if (result[i] != fillVal) {
241+
std::cout << label << ": FAIL at index " << i << ": got " << result[i]
242+
<< ", expected " << fillVal << "\n";
243+
return false;
244+
}
245+
}
246+
std::cout << label << ": OK (failed without P2P, succeeded after enable)\n";
247+
return true;
248+
}
249+
250+
int main() {
251+
// Find a platform with at least two GPU devices.
252+
std::vector<device> gpus;
253+
for (auto &plat : platform::get_platforms()) {
254+
gpus = plat.get_devices(info::device_type::gpu);
255+
if (gpus.size() >= 2)
256+
break;
257+
}
258+
259+
if (gpus.size() < 2) {
260+
std::cout << "Test requires at least two GPU devices on the same platform. "
261+
"Skipping.\n";
262+
return 0;
263+
}
264+
265+
device &dev0 = gpus[0];
266+
device &dev1 = gpus[1];
267+
268+
std::cout << "Device 0: " << dev0.get_info<info::device::name>() << "\n";
269+
std::cout << "Device 1: " << dev1.get_info<info::device::name>() << "\n";
270+
271+
// Both devices share a single context for cross-device USM.
272+
context ctx({dev0, dev1});
273+
queue q0(ctx, dev0);
274+
queue q1(ctx, dev1);
275+
276+
// Allocation size must exceed the disjoint pool's MaxPoolableSize (4 MB for
277+
// device memory) so that the allocation goes directly to the memory provider
278+
// where residency is established.
279+
constexpr size_t N = 2 * 1024 * 1024; // 2M ints = 8 MB
280+
281+
// Phase 1: dev1 reads dev0's memory (P2P: dev1 -> dev0).
282+
std::cout << "Phase 1: dev1 reads dev0's memory (P2P: dev1 -> dev0).\n";
283+
if (!dev1.ext_oneapi_can_access_peer(
284+
dev0, ext::oneapi::peer_access::access_supported)) {
285+
std::cout << "No hardware P2P support (dev1->dev0). Skipping.\n";
286+
return 0;
287+
}
288+
if (!testP2PRead(ctx, q0, dev0, q1, dev1, N, 0x42,
289+
"Phase 1 (dev1 reads dev0)"))
290+
return 1;
291+
292+
// Phase 2 (opposite): dev0 reads dev1's memory (P2P: dev0 -> dev1).
293+
std::cout
294+
<< "Phase 2 (opposite): dev0 reads dev1's memory (P2P: dev0 -> dev1).\n";
295+
if (!dev0.ext_oneapi_can_access_peer(
296+
dev1, ext::oneapi::peer_access::access_supported)) {
297+
std::cout << "No hardware P2P support (dev0->dev1). Skipping phase 2.\n";
298+
std::cout << "PASS\n";
299+
return 0;
300+
}
301+
if (!testP2PRead(ctx, q1, dev1, q0, dev0, N, 0x55,
302+
"Phase 2 (dev0 reads dev1)"))
303+
return 1;
304+
305+
// Phase 3: verify that memcpy fails after P2P is disabled.
306+
std::cout << "Phase 3: verify memcpy fails after P2P is disabled.\n";
307+
if (!testP2PReadFailsAfterDisable(ctx, q0, dev0, q1, dev1, N, 0x77,
308+
"Phase 3 (dev1 reads dev0 after disable)"))
309+
return 1;
310+
311+
// Phase 4: verify that memcpy fails without ever enabling P2P.
312+
std::cout << "Phase 4: verify memcpy fails without ever enabling P2P.\n";
313+
if (!testP2PReadFailsWithoutEnable(ctx, q0, dev0, q1, dev1, N, 0x99,
314+
"Phase 4 (dev1 reads dev0 without P2P)"))
315+
return 1;
316+
317+
// Phase 5: verify the transition from blocked to permitted.
318+
std::cout << "Phase 5: verify memcpy fails without P2P then succeeds after "
319+
"enabling it.\n";
320+
if (!testP2PReadFailsThenSucceedsAfterEnable(
321+
ctx, q0, dev0, q1, dev1, N, 0xAA,
322+
"Phase 5 (dev1 reads dev0: fail then succeed)"))
323+
return 1;
324+
325+
std::cout << "PASS\n";
326+
return 0;
327+
}

0 commit comments

Comments
 (0)