forked from XpuOS/xsched
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtrt-xsched-2206.patch
More file actions
76 lines (71 loc) · 2.28 KB
/
trt-xsched-2206.patch
File metadata and controls
76 lines (71 loc) · 2.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5da6ecd..6471619 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -94,6 +94,8 @@ if(${TRITON_ENABLE_NVTX})
add_definitions(-DTRITON_ENABLE_NVTX=1)
endif() # TRITON_ENABLE_NVTX
+find_package(XSched REQUIRED)
+
#
# Shared library implementing the Triton Backend API
#
@@ -197,6 +199,13 @@ target_link_libraries(
CUDA::cudart
)
+target_link_libraries(
+ triton-tensorrt-backend
+ PRIVATE
+ XSched::preempt
+ XSched::halcuda
+)
+
#
# Install
diff --git a/src/tensorrt.cc b/src/tensorrt.cc
index a9a023b..cef3c1f 100644
--- a/src/tensorrt.cc
+++ b/src/tensorrt.cc
@@ -45,6 +45,8 @@
#include <set>
#include <thread>
#include <unordered_map>
+#include "xsched/cuda/hal.h"
+#include "xsched/xsched.h"
//
// TensorRT Backend that implements the TRITONBACKEND API.
@@ -2926,6 +2928,15 @@ ModelInstanceState::EvaluateTensorRTContext(
return nullptr;
}
+void SetStreamXQueuePriority(CUstream stream, int priority)
+{
+ HwQueueHandle hwqueue;
+ CudaQueueCreate(&hwqueue, (CUstream)stream);
+ XQueueHandle xqueue;
+ XQueueCreate(&xqueue, hwqueue, kPreemptLevelDeactivate, kQueueCreateFlagNone);
+ XHintPriority(xqueue, -priority); // In XSched, lower number means lower priority
+}
+
TRITONSERVER_Error*
ModelInstanceState::InitStreamsAndEvents()
{
@@ -2961,15 +2972,19 @@ ModelInstanceState::InitStreamsAndEvents()
CreateCudaStream(DeviceId(), cuda_stream_priority_, &stream_));
}
}
+ SetStreamXQueuePriority(stream_, cuda_stream_priority_);
#ifdef TRITON_ENABLE_STATS
RETURN_IF_ERROR(
CreateCudaStream(DeviceId(), cuda_stream_priority_, &signal_stream_));
+ SetStreamXQueuePriority(signal_stream_, cuda_stream_priority_);
#endif // TRITON_ENABLE_STATS
RETURN_IF_ERROR(
CreateCudaStream(DeviceId(), cuda_stream_priority_, &input_copy_stream_));
+ SetStreamXQueuePriority(input_copy_stream_, cuda_stream_priority_);
if (model_state_->SeparateOutputStream()) {
RETURN_IF_ERROR(CreateCudaStream(
DeviceId(), cuda_stream_priority_, &output_copy_stream_));
+ SetStreamXQueuePriority(output_copy_stream_, cuda_stream_priority_);
}
// Create CUDA events associated with the execution states
RETURN_IF_ERROR(InitEventSet(model_state_->BusyWaitEvents()));