Skip to content

Commit f8ae943

Browse files
committed
Merge branch 'develop' into poyenc/integrate-fmha-fwd-v2-v3-apis
2 parents 4464745 + 6cb0bc2 commit f8ae943

File tree

3,340 files changed

+21314
-9299
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

3,340 files changed

+21314
-9299
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ Documentation for Composable Kernel available at [https://rocm.docs.amd.com/proj
66

77
### Added
88
* Added support for bf16 data type to grouped_gemm and grouped_gemm_preshuffle.
9+
* Added Col-Col-Row-Col layout support for aquant mode in blockscale GEMM.
910
* Added support for mixed precision fp8 x bf8 universal GEMM and weight preshuffle GEMM
1011
* Added a compute async pipeline in the CK TILE universal GEMM on gfx950
1112
* Added support for B Tensor type pk_int4_t in the CK TILE weight preshuffle GEMM.

CMakeLists.txt

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
2+
# SPDX-License-Identifier: MIT
3+
14
cmake_minimum_required(VERSION 3.14)
25
if(POLICY CMP0140)
36
# policies CMP0140 not known to CMake until 3.25
@@ -39,10 +42,12 @@ option(ENABLE_CLANG_CPP_CHECKS "Enables clang tidy, cppcheck" ON)
3942
option(MIOPEN_REQ_LIBS_ONLY "Build only the MIOpen required libraries" OFF)
4043
option(CK_EXPERIMENTAL_BUILDER "Enable experimental builder" OFF)
4144
option(BUILD_MHA_LIB "Build the static library for flash attention" OFF)
45+
option(FORCE_DISABLE_XDL "Skip compiling XDL specific instances (even if supported GPUs are included in GPU_TARGETS)" OFF)
46+
option(FORCE_DISABLE_WMMA "Skip compiling WMMA specific instances (even if supported GPUs are included in GPU_TARGETS)" OFF)
4247

4348
if(CK_EXPERIMENTAL_BUILDER)
4449
add_definitions(-DCK_EXPERIMENTAL_BUILDER)
45-
include_directories(${PROJECT_SOURCE_DIR}/experimental/builder/include)
50+
include_directories(${PROJECT_SOURCE_DIR}/experimental/builder/include)
4651
endif()
4752

4853
# Usage: for customized Python location cmake -DCK_USE_ALTERNATIVE_PYTHON="/opt/Python-3.8.13/bin/python3.8"
@@ -229,12 +234,12 @@ message(STATUS "Building CK for the following targets: ${SUPPORTED_GPU_TARGETS}"
229234
# Cache SUPPORTED_GPU_TARGETS for debug
230235
set(SUPPORTED_GPU_TARGETS "${SUPPORTED_GPU_TARGETS}" CACHE STRING "List of supported GPU targets")
231236

232-
if (SUPPORTED_GPU_TARGETS MATCHES "gfx9|gfx11|gfx12")
237+
if (SUPPORTED_GPU_TARGETS MATCHES "gfx9|gfx11|gfx12" AND NOT FORCE_DISABLE_XDL)
233238
message(STATUS "Enabling XDL instances")
234239
add_definitions(-DCK_USE_XDL)
235240
set(CK_USE_XDL "ON")
236241
endif()
237-
if (SUPPORTED_GPU_TARGETS MATCHES "gfx94" OR SUPPORTED_GPU_TARGETS MATCHES "gfx95")
242+
if ((SUPPORTED_GPU_TARGETS MATCHES "gfx94" OR SUPPORTED_GPU_TARGETS MATCHES "gfx95") AND NOT FORCE_DISABLE_XDL)
238243
message(STATUS "Enabling XDL FP8 gemms on native architectures")
239244
add_definitions(-DCK_USE_GFX94)
240245
set(CK_USE_GFX94 "ON")
@@ -247,7 +252,7 @@ if (SUPPORTED_GPU_TARGETS MATCHES "gfx10")
247252
add_definitions(-DCK_GFX1030_SUPPORT)
248253
endif()
249254

250-
if (SUPPORTED_GPU_TARGETS MATCHES "gfx11" OR SUPPORTED_GPU_TARGETS MATCHES "gfx12")
255+
if ((SUPPORTED_GPU_TARGETS MATCHES "gfx11" OR SUPPORTED_GPU_TARGETS MATCHES "gfx12") AND NOT FORCE_DISABLE_WMMA)
251256
message(STATUS "Enabling WMMA instances")
252257
add_definitions(-DCK_USE_WMMA)
253258
set(CK_USE_WMMA "ON")
@@ -257,7 +262,7 @@ endif()
257262
# define the macro with the current value (0 or 1)
258263
add_definitions(-DCK_TILE_USE_WMMA=${CK_TILE_USE_WMMA})
259264

260-
if (SUPPORTED_GPU_TARGETS MATCHES "gfx12")
265+
if (SUPPORTED_GPU_TARGETS MATCHES "gfx12" AND NOT FORCE_DISABLE_WMMA)
261266
message(STATUS "Enabling WMMA FP8 gemms on native architectures")
262267
add_definitions(-DCK_USE_WMMA_FP8)
263268
set(CK_USE_WMMA_FP8 "ON")
@@ -739,6 +744,13 @@ rocm_install(FILES
739744
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/ck/
740745
)
741746

747+
if(CK_EXPERIMENTAL_BUILDER)
748+
rocm_install(DIRECTORY
749+
${PROJECT_SOURCE_DIR}/experimental/builder/include/ck_tile/builder
750+
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/ck_tile
751+
)
752+
endif()
753+
742754
set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE")
743755
set(CPACK_RPM_PACKAGE_LICENSE "MIT")
744756

Jenkinsfile

Lines changed: 152 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,129 @@ def sendFailureNotifications() {
7272
}
7373
}
7474

75+
def generateAndArchiveBuildTraceVisualization() {
76+
try {
77+
def buildTraceFileName = "ck_build_trace.json";
78+
79+
// Attempt to download the build trace file to check if it exists
80+
def traceFileExists = false
81+
try {
82+
copyArtifacts(
83+
projectName: env.JOB_NAME,
84+
selector: specific(env.BUILD_NUMBER),
85+
filter: buildTraceFileName
86+
)
87+
traceFileExists = fileExists(buildTraceFileName)
88+
} catch (Exception e) {
89+
echo "Could not copy artifacts: ${e.getMessage()}"
90+
traceFileExists = false
91+
}
92+
93+
sh """
94+
echo "post download:"
95+
ls -la
96+
"""
97+
98+
if (traceFileExists) {
99+
// Move the build trace file to a temporary location to preserve it during checkout
100+
sh """
101+
mkdir -p /tmp/jenkins_artifacts
102+
cp ${buildTraceFileName} /tmp/jenkins_artifacts/${buildTraceFileName}
103+
ls -la /tmp/jenkins_artifacts/
104+
"""
105+
} else {
106+
echo "Build trace archive not found"
107+
return
108+
}
109+
110+
// Checkout source code to get required files
111+
checkout scm
112+
113+
// Restore the build trace file after checkout
114+
sh """
115+
ls -la
116+
cp /tmp/jenkins_artifacts/${buildTraceFileName} ${buildTraceFileName}
117+
ls -la ${buildTraceFileName}
118+
"""
119+
120+
// Pull image
121+
def image = "ghcr.io/puppeteer/puppeteer:24.30.0"
122+
echo "Pulling image: ${image}"
123+
def retimage = docker.image("${image}")
124+
retimage.pull()
125+
126+
// Create a temporary workspace
127+
sh """#!/bin/bash
128+
ls -la
129+
mkdir -p workspace
130+
cp ./script/infra_helper/capture_build_trace.js ./workspace
131+
cp ${buildTraceFileName} ./workspace/${buildTraceFileName}
132+
chmod 777 ./workspace
133+
ls -la ./workspace
134+
"""
135+
136+
// Run container to get snapshot
137+
def dockerOpts = "--cap-add=SYS_ADMIN -v \"\$(pwd)/workspace:/workspace\" -e NODE_PATH=/home/pptruser/node_modules"
138+
// Create unique image name by sanitizing job name
139+
def sanitizedJobName = env.JOB_NAME.replaceAll(/[\/\\:*?"<>| ]/, '_')
140+
def imageName = "perfetto_snapshot_${sanitizedJobName}_build_${env.BUILD_NUMBER}.png"
141+
sh """
142+
docker run --rm ${dockerOpts} ${image} node /workspace/capture_build_trace.js
143+
mv ./workspace/perfetto_snapshot_build.png ./workspace/${imageName}
144+
"""
145+
146+
// Archive the snapshot
147+
sh """
148+
mv ./workspace/${imageName} ${imageName}
149+
"""
150+
archiveArtifacts "${imageName}"
151+
152+
// Notify the channel
153+
withCredentials([string(credentialsId: 'ck_ci_build_perf_webhook_url', variable: 'WEBHOOK_URL')]) {
154+
sh '''
155+
# Create build trace filename with build number based on the original filename
156+
BUILD_TRACE_WITH_NUMBER=$(echo "''' + buildTraceFileName + '''" | sed 's/.json/_''' + sanitizedJobName + '''_''' + env.BUILD_NUMBER + '''.json/')
157+
158+
# Convert image to base64
159+
echo "Converting image to base64..."
160+
IMAGE_BASE64=$(base64 -w 0 ''' + imageName + ''')
161+
echo "Image base64 length: ${#IMAGE_BASE64}"
162+
163+
# Convert build trace to base64
164+
echo "Converting build trace to base64..."
165+
BUILD_TRACE_BASE64=$(base64 -w 0 ''' + buildTraceFileName + ''')
166+
echo "Build trace base64 length: ${#BUILD_TRACE_BASE64}"
167+
168+
# Create JSON payload with base64 data
169+
echo "Creating JSON payload..."
170+
{
171+
printf '{\n'
172+
printf ' "jobName": "%s",\n' "''' + env.JOB_NAME + '''"
173+
printf ' "buildNumber": "%s",\n' "''' + env.BUILD_NUMBER + '''"
174+
printf ' "jobUrl": "%s",\n' "''' + env.RUN_DISPLAY_URL + '''"
175+
printf ' "imageName": "%s",\n' "''' + imageName + '''"
176+
printf ' "imageData": "%s",\n' "$IMAGE_BASE64"
177+
printf ' "buildTraceName": "%s",\n' "$BUILD_TRACE_WITH_NUMBER"
178+
printf ' "buildTraceData": "%s"\n' "$BUILD_TRACE_BASE64"
179+
printf '}\n'
180+
} > webhook_payload.json
181+
182+
echo "JSON payload created, size: $(wc -c < webhook_payload.json) bytes"
183+
184+
curl -X POST "${WEBHOOK_URL}" \
185+
-H "Content-Type: application/json" \
186+
-d @webhook_payload.json
187+
188+
# Clean up temporary file
189+
rm -f webhook_payload.json
190+
'''
191+
}
192+
} catch (Exception e) {
193+
echo "Throwing error exception while generating build trace visualization"
194+
echo 'Exception occurred: ' + e.toString()
195+
}
196+
}
197+
75198
class Version {
76199
int major, minor, patch
77200
@Override
@@ -474,6 +597,9 @@ def cmake_build(Map conf=[:]){
474597
if (params.NINJA_BUILD_TRACE) {
475598
echo "running ninja build trace"
476599
}
600+
if (params.RUN_BUILDER_TESTS && !setup_args.contains("-DCK_CXX_STANDARD=") && !setup_args.contains("gfx10") && !setup_args.contains("gfx11")) {
601+
setup_args = " -D CK_EXPERIMENTAL_BUILDER=ON " + setup_args
602+
}
477603
setup_cmd = conf.get(
478604
"setup_cmd",
479605
"""${cmake_envs} cmake -G Ninja ${setup_args} -DCMAKE_CXX_FLAGS=" -O3 " .. """
@@ -520,6 +646,9 @@ def cmake_build(Map conf=[:]){
520646
else{
521647
sh "ninja check"
522648
}
649+
if (params.RUN_BUILDER_TESTS && !setup_args.contains("-DCK_CXX_STANDARD=") && !setup_args.contains("gfx10") && !setup_args.contains("gfx11")) {
650+
sh 'ninja check-builder'
651+
}
523652
if(params.BUILD_PACKAGES){
524653
echo "Build ckProfiler packages"
525654
sh 'ninja -j64 package'
@@ -545,6 +674,9 @@ def cmake_build(Map conf=[:]){
545674
else{
546675
sh "ninja check"
547676
}
677+
if (params.RUN_BUILDER_TESTS && !setup_args.contains("-DCK_CXX_STANDARD=") && !setup_args.contains("gfx10") && !setup_args.contains("gfx11")) {
678+
sh 'ninja check-builder'
679+
}
548680
if(params.BUILD_PACKAGES){
549681
echo "Build ckProfiler packages"
550682
sh 'ninja -j64 package'
@@ -1077,7 +1209,7 @@ pipeline {
10771209
description: "Build CK and run tests on gfx950 (default: ON)")
10781210
booleanParam(
10791211
name: "BUILD_GFX101",
1080-
defaultValue: true,
1212+
defaultValue: false,
10811213
description: "Build CK and run tests on gfx101 (default: OFF)")
10821214
booleanParam(
10831215
name: "BUILD_GFX103",
@@ -1107,6 +1239,10 @@ pipeline {
11071239
name: "RUN_INDUCTOR_TESTS",
11081240
defaultValue: true,
11091241
description: "Run inductor codegen tests (default: ON)")
1242+
booleanParam(
1243+
name: "RUN_BUILDER_TESTS",
1244+
defaultValue: true,
1245+
description: "Run CK_BUILDER tests (default: ON)")
11101246
booleanParam(
11111247
name: "RUN_ALL_UNIT_TESTS",
11121248
defaultValue: false,
@@ -1479,11 +1615,13 @@ pipeline {
14791615
-D GPU_TARGETS="gfx90a" \
14801616
-D GEMM_DATATYPE="fp8;fp16" \
14811617
-D GEMM_LAYOUT="rcr;rrr;crr;ccr" \
1618+
-D GEMM_STREAMK_DATATYPE="fp8;fp16" \
1619+
-D GEMM_STREAMK_LAYOUT="rcr" \
14821620
-D GEMM_MULTI_D_DATATYPE="fp16" \
14831621
-D GEMM_MULTI_D_LAYOUT="rcrr;rrrr;crrr;ccrr" \
14841622
-D GEMM_PRESHUFFLE_DATATYPE="fp16;fp8;bf16;bf8" \
14851623
-D GEMM_PRESHUFFLE_LAYOUT="rcr" .. && \
1486-
ninja -j64 benchmark_gemm_all benchmark_gemm_preshuffle_all benchmark_gemm_multi_d_all && \
1624+
ninja -j64 benchmark_gemm_all benchmark_gemm_preshuffle_all benchmark_gemm_multi_d_all benchmark_gemm_streamk_all && \
14871625
python3 ../tile_engine/ops/gemm/gemm_benchmark.py . --problem-sizes "1024,1024,1024" --warmup 5 --repeat 5 --verbose --json results.json && \
14881626
python3 ../tile_engine/ops/gemm_preshuffle/gemm_preshuffle_benchmark.py . --problem-sizes "1024,1024,1024" --warmup 5 --repeat 5 --verbose --json results.json && \
14891627
python3 ../tile_engine/ops/gemm_multi_d/gemm_multi_d_benchmark.py . --problem-sizes "1024,1024,1024" --warmup 5 --repeat 5 --verbose --json results.json """
@@ -1508,11 +1646,13 @@ pipeline {
15081646
-D GPU_TARGETS="gfx942" \
15091647
-D GEMM_DATATYPE="fp8;fp16" \
15101648
-D GEMM_LAYOUT="rcr;rrr;crr;ccr" \
1649+
-D GEMM_STREAMK_DATATYPE="fp8;fp16" \
1650+
-D GEMM_STREAMK_LAYOUT="rcr" \
15111651
-D GEMM_MULTI_D_DATATYPE="fp16" \
15121652
-D GEMM_MULTI_D_LAYOUT="rcrr;rrrr;crrr;ccrr" \
15131653
-D GEMM_PRESHUFFLE_DATATYPE="fp16;fp8;bf16;bf8" \
15141654
-D GEMM_PRESHUFFLE_LAYOUT="rcr" .. && \
1515-
ninja -j64 benchmark_gemm_all benchmark_gemm_preshuffle_all benchmark_gemm_multi_d_all && \
1655+
ninja -j64 benchmark_gemm_all benchmark_gemm_preshuffle_all benchmark_gemm_multi_d_all benchmark_gemm_streamk_all && \
15161656
python3 ../tile_engine/ops/gemm/gemm_benchmark.py . --problem-sizes "1024,1024,1024" --warmup 5 --repeat 5 --verbose --json results.json && \
15171657
python3 ../tile_engine/ops/gemm_preshuffle/gemm_preshuffle_benchmark.py . --problem-sizes "1024,1024,1024" --warmup 5 --repeat 5 --verbose --json results.json && \
15181658
python3 ../tile_engine/ops/gemm_multi_d/gemm_multi_d_benchmark.py . --problem-sizes "1024,1024,1024" --warmup 5 --repeat 5 --verbose --json results.json """
@@ -1737,6 +1877,15 @@ pipeline {
17371877
}
17381878
}
17391879
post {
1880+
always {
1881+
node(rocmnode("nogpu")) {
1882+
script {
1883+
// Simulate capture
1884+
generateAndArchiveBuildTraceVisualization()
1885+
}
1886+
cleanWs()
1887+
}
1888+
}
17401889
success {
17411890
script {
17421891
// Report the parent stage build ck and run tests status
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,5 @@
1+
# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
2+
# SPDX-License-Identifier: MIT
3+
14
add_executable(client_gemm gemm.cpp)
25
target_link_libraries(client_gemm PRIVATE composable_kernel::device_other_operations composable_kernel::device_gemm_operations)

client_example/02_gemm_add_add_fastgelu/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
2+
# SPDX-License-Identifier: MIT
3+
14
if(GPU_TARGETS MATCHES "gfx9")
25
add_custom_target(client_gemm_fastgelu_examples)
36

client_example/03_gemm_layernorm/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
2+
# SPDX-License-Identifier: MIT
3+
14
if(GPU_TARGETS MATCHES "gfx9")
25
add_executable(client_gemm_add_add_layernorm_naive gemm_add_add_layernorm_naive.cpp)
36
target_link_libraries(client_gemm_add_add_layernorm_naive PRIVATE composable_kernel::device_gemm_operations composable_kernel::device_other_operations)

client_example/04_contraction/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
2+
# SPDX-License-Identifier: MIT
3+
14
if(GPU_TARGETS MATCHES "gfx9")
25
add_executable(client_contraction_scale_fp32 contraction_scale_fp32.cpp)
36
target_link_libraries(client_contraction_scale_fp32 PRIVATE composable_kernel::device_other_operations composable_kernel::device_contraction_operations composable_kernel::device_gemm_operations)

client_example/05_layernorm/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
2+
# SPDX-License-Identifier: MIT
3+
14
add_executable(client_layernorm2d_bwd_data layernorm2d_bwd_data.cpp)
25
target_link_libraries(client_layernorm2d_bwd_data PRIVATE composable_kernel::device_other_operations)
36

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,5 @@
1+
# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
2+
# SPDX-License-Identifier: MIT
3+
14
add_executable(client_softmax4d softmax4d.cpp)
25
target_link_libraries(client_softmax4d PRIVATE composable_kernel::device_other_operations composable_kernel::device_reduction_operations)

client_example/07_grouped_convnd_fwd/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
2+
# SPDX-License-Identifier: MIT
3+
14
if(GPU_TARGETS MATCHES "gfx9")
25
add_executable(client_grouped_conv2d_fwd grouped_conv2d_fwd.cpp)
36
target_link_libraries(client_grouped_conv2d_fwd PRIVATE composable_kernel::device_conv_operations)

0 commit comments

Comments
 (0)