Skip to content

Commit fe5035a

Browse files
iassiouriassiourclaude
authored andcommitted
Port graph segment scheduling changes to 7.2 (#5520)
## Motivation Cherry-pick of the graph segment scheduling foundation commit from develop (#1372) to `release/rocm-rel-7.2`. <!-- Explain the purpose of this PR and the goals it aims to achieve. --> ## Technical Details The segment scheduling architecture partitions graph nodes into segments based on execution paths, assigns segments to parallel streams by dependency level, and dispatches them in batches. <!-- Explain the changes along with any relevant GitHub links. --> ## JIRA ID ROC-23586 <!-- If applicable, mention the JIRA ID resolved by this PR (Example: Resolves SWDEV-12345). --> <!-- Do not post any JIRA links here. --> ## Test Plan 1) Run hip graphs test with this change and check for regressions 2) Confirm that the change improves the hipLaunchGraph latency in the benchmark used in ROC-23586 <!-- Explain any relevant testing done to verify this PR. --> ## Test Result 1) graph tests pass 2) improves hipLaunchGraph latency by around10x <!-- Briefly summarize test outcomes. --> ## Submission Checklist - [x] Look over the contributing guidelines at https://github.com/ROCm/ROCm/blob/develop/CONTRIBUTING.md#pull-requests. --------- Co-authored-by: iassiour <iassiour@dell-tester1.aus-b200.dcgpu> Co-authored-by: Claude Opus 4 (1M context) <noreply@anthropic.com>
1 parent 4aacd01 commit fe5035a

10 files changed

Lines changed: 1534 additions & 487 deletions

File tree

hipamd/src/hip_graph.cpp

Lines changed: 60 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1418,9 +1418,9 @@ hipError_t hipGraphExecMemcpyNodeSetParams1D(hipGraphExec_t hGraphExec, hipGraph
14181418
if (status != hipSuccess) {
14191419
HIP_RETURN(status);
14201420
}
1421-
if (DEBUG_CLR_GRAPH_PACKET_CAPTURE) {
1422-
status = reinterpret_cast<hip::GraphExec*>(hGraphExec)
1423-
->UpdateAQLPacket(reinterpret_cast<hip::GraphKernelNode*>(clonedNode));
1421+
auto graphExec = reinterpret_cast<hip::GraphExec*>(hGraphExec);
1422+
if (graphExec->IsSegmentSchedulingEnabled()) {
1423+
status = graphExec->UpdateAQLPacket(reinterpret_cast<hip::GraphKernelNode*>(clonedNode));
14241424
}
14251425
HIP_RETURN(status);
14261426
}
@@ -1516,12 +1516,14 @@ hipError_t ihipGraphInstantiate(hip::GraphExec** pGraphExec, hip::Graph* graph,
15161516
return hipErrorOutOfMemory;
15171517
}
15181518
graph->clone(*pGraphExec, true);
1519-
(*pGraphExec)->ScheduleNodes();
1520-
if (false == (*pGraphExec)->TopologicalOrder()) {
1519+
1520+
hipError_t scheduleStatus = (*pGraphExec)->ScheduleNodes();
1521+
if (scheduleStatus != hipSuccess) {
15211522
delete *pGraphExec;
1522-
return hipErrorInvalidValue;
1523+
*pGraphExec = nullptr;
1524+
return scheduleStatus;
15231525
}
1524-
graph->SetGraphInstantiated(true);
1526+
15251527
if (DEBUG_HIP_GRAPH_DOT_PRINT) {
15261528
static int i = 1;
15271529
std::string filename =
@@ -1531,7 +1533,10 @@ hipError_t ihipGraphInstantiate(hip::GraphExec** pGraphExec, hip::Graph* graph,
15311533
LogPrintfInfo("[hipGraph] graph dump:%s", filename.c_str());
15321534
}
15331535
}
1534-
if (DEBUG_CLR_GRAPH_PACKET_CAPTURE) {
1536+
1537+
graph->SetGraphInstantiated(true);
1538+
1539+
if ((*pGraphExec)->IsSegmentSchedulingEnabled()) {
15351540
(*pGraphExec)->SetKernelArgManager(new hip::GraphKernelArgManager());
15361541
}
15371542
return (*pGraphExec)->Init();
@@ -1548,7 +1553,7 @@ hipError_t hipGraphInstantiate(hipGraphExec_t* pGraphExec, hipGraph_t graph,
15481553
if (status == hipSuccess) {
15491554
*pGraphExec = reinterpret_cast<hipGraphExec_t>(ge);
15501555
}
1551-
HIP_RETURN(status);
1556+
HIP_RETURN(status, ReturnPtrValue(pGraphExec));
15521557
}
15531558

15541559
hipError_t hipGraphInstantiateWithFlags(hipGraphExec_t* pGraphExec, hipGraph_t graph,
@@ -1567,7 +1572,7 @@ hipError_t hipGraphInstantiateWithFlags(hipGraphExec_t* pGraphExec, hipGraph_t g
15671572
hip::GraphExec* ge;
15681573
hipError_t status = ihipGraphInstantiate(&ge, reinterpret_cast<hip::Graph*>(graph), flags);
15691574
*pGraphExec = reinterpret_cast<hipGraphExec_t>(ge);
1570-
HIP_RETURN(status);
1575+
HIP_RETURN(status, ReturnPtrValue(pGraphExec));
15711576
}
15721577

15731578
hipError_t hipGraphInstantiateWithParams(hipGraphExec_t* pGraphExec, hipGraph_t graph,
@@ -1602,7 +1607,7 @@ hipError_t hipGraphInstantiateWithParams(hipGraphExec_t* pGraphExec, hipGraph_t
16021607
HIP_RETURN(status);
16031608
}
16041609

1605-
HIP_RETURN(hipSuccess);
1610+
HIP_RETURN(hipSuccess, ReturnPtrValue(pGraphExec));
16061611
}
16071612

16081613
hipError_t hipGraphExecDestroy(hipGraphExec_t pGraphExec) {
@@ -1813,9 +1818,9 @@ hipError_t hipGraphExecMemcpyNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNo
18131818
if (status != hipSuccess) {
18141819
HIP_RETURN(status);
18151820
}
1816-
if (DEBUG_CLR_GRAPH_PACKET_CAPTURE) {
1817-
status = reinterpret_cast<hip::GraphExec*>(hGraphExec)
1818-
->UpdateAQLPacket(reinterpret_cast<hip::GraphKernelNode*>(clonedNode));
1821+
auto graphExec = reinterpret_cast<hip::GraphExec*>(hGraphExec);
1822+
if (graphExec->IsSegmentSchedulingEnabled()) {
1823+
status = graphExec->UpdateAQLPacket(reinterpret_cast<hip::GraphKernelNode*>(clonedNode));
18191824
}
18201825
HIP_RETURN(status);
18211826
}
@@ -1864,9 +1869,9 @@ hipError_t hipGraphExecMemsetNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNo
18641869
if (status != hipSuccess) {
18651870
HIP_RETURN(status);
18661871
}
1867-
if (DEBUG_CLR_GRAPH_PACKET_CAPTURE) {
1868-
status = reinterpret_cast<hip::GraphExec*>(hGraphExec)
1869-
->UpdateAQLPacket(reinterpret_cast<hip::GraphKernelNode*>(clonedNode));
1872+
auto graphExec = reinterpret_cast<hip::GraphExec*>(hGraphExec);
1873+
if (graphExec->IsSegmentSchedulingEnabled()) {
1874+
status = graphExec->UpdateAQLPacket(reinterpret_cast<hip::GraphKernelNode*>(clonedNode));
18701875
}
18711876
HIP_RETURN(status);
18721877
}
@@ -1924,9 +1929,9 @@ hipError_t hipGraphExecKernelNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNo
19241929
if (status != hipSuccess) {
19251930
HIP_RETURN(status);
19261931
}
1927-
if (DEBUG_CLR_GRAPH_PACKET_CAPTURE) {
1928-
status = reinterpret_cast<hip::GraphExec*>(hGraphExec)
1929-
->UpdateAQLPacket(reinterpret_cast<hip::GraphKernelNode*>(clonedNode));
1932+
auto graphExec = reinterpret_cast<hip::GraphExec*>(hGraphExec);
1933+
if (graphExec->IsSegmentSchedulingEnabled()) {
1934+
status = graphExec->UpdateAQLPacket(reinterpret_cast<hip::GraphKernelNode*>(clonedNode));
19301935
}
19311936
HIP_RETURN(status);
19321937
}
@@ -2001,13 +2006,18 @@ hipError_t hipGraphExecChildGraphNodeSetParams(hipGraphExec_t hGraphExec, hipGra
20012006
if (status != hipSuccess) {
20022007
return status;
20032008
}
2004-
if (reinterpret_cast<hip::ChildGraphNode*>(clonedNode)->GetGraphCaptureStatus()) {
2009+
2010+
hip::ChildGraphNode* childNode = reinterpret_cast<hip::ChildGraphNode*>(clonedNode);
2011+
2012+
// After SetParams updates node parameters in-place, we need to update the cached AQL packets
2013+
auto graphExec = reinterpret_cast<hip::GraphExec*>(hGraphExec);
2014+
if (graphExec->IsSegmentSchedulingEnabled() || childNode->GetGraphCaptureStatus()) {
20052015
std::vector<hip::GraphNode*> childGraphNodes;
2006-
reinterpret_cast<hip::ChildGraphNode*>(clonedNode)->TopologicalOrder(childGraphNodes);
2016+
childNode->TopologicalOrder(childGraphNodes);
20072017
for (std::vector<hip::GraphNode*>::size_type i = 0; i != childGraphNodes.size(); i++) {
20082018
if (childGraphNodes[i]->GraphCaptureEnabled()) {
2009-
status = reinterpret_cast<hip::ChildGraphNode*>(clonedNode)
2010-
->UpdateAQLPacket(reinterpret_cast<hip::GraphKernelNode*>(childGraphNodes[i]));
2019+
status =
2020+
childNode->UpdateAQLPacket(reinterpret_cast<hip::GraphKernelNode*>(childGraphNodes[i]));
20112021
if (status != hipSuccess) {
20122022
return status;
20132023
}
@@ -2407,9 +2417,9 @@ hipError_t hipGraphExecMemcpyNodeSetParamsFromSymbol(hipGraphExec_t hGraphExec,
24072417
if (status != hipSuccess) {
24082418
HIP_RETURN(status);
24092419
}
2410-
if (DEBUG_CLR_GRAPH_PACKET_CAPTURE) {
2411-
status = reinterpret_cast<hip::GraphExec*>(hGraphExec)
2412-
->UpdateAQLPacket(reinterpret_cast<hip::GraphKernelNode*>(clonedNode));
2420+
auto graphExec = reinterpret_cast<hip::GraphExec*>(hGraphExec);
2421+
if (graphExec->IsSegmentSchedulingEnabled()) {
2422+
status = graphExec->UpdateAQLPacket(reinterpret_cast<hip::GraphKernelNode*>(clonedNode));
24132423
}
24142424
HIP_RETURN(status);
24152425
}
@@ -2490,9 +2500,9 @@ hipError_t hipGraphExecMemcpyNodeSetParamsToSymbol(hipGraphExec_t hGraphExec, hi
24902500
if (status != hipSuccess) {
24912501
HIP_RETURN(status);
24922502
}
2493-
if (DEBUG_CLR_GRAPH_PACKET_CAPTURE) {
2494-
status = reinterpret_cast<hip::GraphExec*>(hGraphExec)
2495-
->UpdateAQLPacket(reinterpret_cast<hip::GraphKernelNode*>(clonedNode));
2503+
auto graphExec = reinterpret_cast<hip::GraphExec*>(hGraphExec);
2504+
if (graphExec->IsSegmentSchedulingEnabled()) {
2505+
status = graphExec->UpdateAQLPacket(reinterpret_cast<hip::GraphKernelNode*>(clonedNode));
24962506
}
24972507
HIP_RETURN(status);
24982508
}
@@ -2727,10 +2737,11 @@ hipError_t hipGraphExecUpdate(hipGraphExec_t hGraphExec, hipGraph_t hGraph,
27272737
*updateResult_out = hipGraphExecUpdateErrorNotSupported;
27282738
}
27292739
HIP_RETURN(hipErrorGraphExecUpdateFailure);
2730-
} else if (DEBUG_CLR_GRAPH_PACKET_CAPTURE && newGraphNodes[i]->GraphCaptureEnabled()) {
2731-
status =
2732-
reinterpret_cast<hip::GraphExec*>(hGraphExec)
2733-
->UpdateAQLPacket(reinterpret_cast<hip::GraphKernelNode*>(oldGraphExecNodes[i]));
2740+
} else {
2741+
auto graphExec = reinterpret_cast<hip::GraphExec*>(hGraphExec);
2742+
if (graphExec->IsSegmentSchedulingEnabled() && newGraphNodes[i]->GraphCaptureEnabled()) {
2743+
status = graphExec->UpdateAQLPacket(reinterpret_cast<hip::GraphKernelNode*>(oldGraphExecNodes[i]));
2744+
}
27342745
}
27352746
} else {
27362747
*hErrorNode_out = reinterpret_cast<hipGraphNode_t>(newGraphNodes[i]);
@@ -3084,12 +3095,16 @@ hipError_t hipGraphNodeSetEnabled(hipGraphExec_t hGraphExec, hipGraphNode_t hNod
30843095
HIP_RETURN(hipErrorInvalidValue);
30853096
}
30863097
clonedNode->SetEnabled(isEnabled);
3087-
// Update packet batches when node is enabled/disabled
3088-
hipError_t status = graphExec->UpdatePacketBatchesForNodeEnableDisable(clonedNode, isEnabled != 0);
3089-
if (status != hipSuccess) {
3090-
HIP_RETURN(status);
3098+
3099+
hipError_t status = hipSuccess;
3100+
if (graphExec->IsSegmentSchedulingEnabled()) {
3101+
// Update packet batches when node is enabled/disabled
3102+
status = graphExec->UpdatePacketBatchesForNodeEnableDisable(clonedNode, isEnabled != 0);
3103+
if (status != hipSuccess) {
3104+
HIP_RETURN(status);
3105+
}
30913106
}
3092-
HIP_RETURN(hipSuccess);
3107+
HIP_RETURN(status);
30933108
}
30943109

30953110
hipError_t hipGraphNodeGetEnabled(hipGraphExec_t hGraphExec, hipGraphNode_t hNode,
@@ -3442,8 +3457,9 @@ hipError_t hipDrvGraphExecMemsetNodeSetParams(hipGraphExec_t hGraphExec, hipGrap
34423457
if (status != hipSuccess) {
34433458
HIP_RETURN(status);
34443459
}
3445-
if (DEBUG_CLR_GRAPH_PACKET_CAPTURE) {
3446-
status = reinterpret_cast<hip::GraphExec*>(hGraphExec)->UpdateAQLPacket(clonedNode);
3460+
auto graphExec = reinterpret_cast<hip::GraphExec*>(hGraphExec);
3461+
if (graphExec->IsSegmentSchedulingEnabled()) {
3462+
status = graphExec->UpdateAQLPacket(clonedNode);
34473463
}
34483464
HIP_RETURN(status);
34493465
}
@@ -3565,8 +3581,9 @@ hipError_t hipGraphExecNodeSetParams(hipGraphExec_t graphExec, hipGraphNode_t no
35653581
return status;
35663582
}
35673583

3568-
if (DEBUG_CLR_GRAPH_PACKET_CAPTURE) {
3569-
status = reinterpret_cast<hip::GraphExec*>(graphExec)->UpdateAQLPacket(clonedNode);
3584+
auto graphExecPtr = reinterpret_cast<hip::GraphExec*>(graphExec);
3585+
if (graphExecPtr->IsSegmentSchedulingEnabled()) {
3586+
status = graphExecPtr->UpdateAQLPacket(clonedNode);
35703587
}
35713588
return status;
35723589
}

0 commit comments

Comments
 (0)