Skip to content

Commit de0a195

Browse files
committed
Testcases: Down adjust L2/L3 cache size for GPU targets
1 parent 06a6219 commit de0a195

File tree

4 files changed

+16
-4
lines changed

4 files changed

+16
-4
lines changed

apps/bilateral_grid/CMakeLists.txt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,11 @@ add_halide_library(bilateral_grid_auto_schedule FROM bilateral_grid.generator
2525
GENERATOR bilateral_grid
2626
STMT bilateral_grid_auto_schedule_STMT
2727
SCHEDULE bilateral_grid_auto_schedule_SCHEDULE
28-
AUTOSCHEDULER Halide::Mullapudi2016)
28+
AUTOSCHEDULER Halide::Mullapudi2016
29+
# When target=host-cuda or host-metal, limit the GPU shared
30+
# memory per block to avoid gpu kernel launch failure.
31+
PARAMS autoscheduler.last_level_cache_size=20000
32+
)
2933

3034
# Main executable
3135
add_executable(bilateral_grid_process filter.cpp)

apps/local_laplacian/CMakeLists.txt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,11 @@ add_halide_generator(local_laplacian.generator
2020
add_halide_library(local_laplacian FROM local_laplacian.generator)
2121
add_halide_library(local_laplacian_auto_schedule FROM local_laplacian.generator
2222
GENERATOR local_laplacian
23-
AUTOSCHEDULER Halide::Mullapudi2016)
23+
AUTOSCHEDULER Halide::Mullapudi2016
24+
# When target=host-cuda or host-metal, limit the GPU shared
25+
# memory per block to avoid gpu kernel launch failure.
26+
PARAMS autoscheduler.last_level_cache_size=30000
27+
)
2428

2529
# Main executable
2630
add_executable(local_laplacian_process process.cpp)

apps/stencil_chain/CMakeLists.txt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,11 @@ add_halide_generator(stencil_chain.generator SOURCES stencil_chain_generator.cpp
1818
add_halide_library(stencil_chain FROM stencil_chain.generator)
1919
add_halide_library(stencil_chain_auto_schedule FROM stencil_chain.generator
2020
GENERATOR stencil_chain
21-
AUTOSCHEDULER Halide::Mullapudi2016)
21+
AUTOSCHEDULER Halide::Mullapudi2016
22+
# When target=host-cuda or host-metal, limit the GPU shared
23+
# memory per block to avoid gpu kernel launch failure.
24+
PARAMS autoscheduler.last_level_cache_size=15000
25+
)
2226

2327
# Main executable
2428
add_executable(stencil_chain_process process.cpp)

src/autoschedulers/mullapudi2016/AutoSchedule.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ struct ArchParams {
4242
* CACHE_SIZE to 48 KB.
4343
*/
4444
constexpr ArchParams(bool has_gpu_feature)
45-
: parallelism(has_gpu_feature ? 128 : 16), last_level_cache_size(has_gpu_feature ? 48 * 1024 : 16 * 1024 * 1024),
45+
: parallelism(has_gpu_feature ? 128 : 16), last_level_cache_size(has_gpu_feature ? 35 * 1024 : 16 * 1024 * 1024),
4646
balance(has_gpu_feature ? 20 : 40) {
4747
}
4848
};

0 commit comments

Comments
 (0)