Skip to content

Commit 65a793c

Browse files
committed
Default machine params depends on has_gpu_feature
1 parent cdc64c8 commit 65a793c

File tree

1 file changed

+21
-4
lines changed

1 file changed

+21
-4
lines changed

src/autoschedulers/mullapudi2016/AutoSchedule.cpp

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,27 @@ namespace {
2424

2525
struct ArchParams {
2626
/** Maximum level of parallelism avalaible. */
27-
int parallelism = 16;
27+
int parallelism{};
2828

2929
/** Size of the last-level cache (in bytes). */
30-
uint64_t last_level_cache_size = 16 * 1024 * 1024;
30+
uint64_t last_level_cache_size{};
3131

3232
/** Indicates how much more expensive is the cost of a load compared to
3333
* the cost of an arithmetic operation at last level cache. */
34-
float balance = 40;
34+
float balance{};
35+
36+
/** If GPU target is detected, but machine parameters are not specified, *
37+
* make a realistic estimate based on consumer-grade GPUs (Nvidia GTX *
38+
* 1660/Turing), or low-cost scientific-grade GPUs (Nvidia K40/Tesla).
39+
*
40+
* Section 5.4 of the Mullapudi2016 article: We configure the auto-scheduler
41+
* to target the GPU by setting the PARALLELISM_THRESHOLD to 128, ..., and
42+
* CACHE_SIZE to 48 KB.
43+
*/
44+
constexpr ArchParams(bool has_gpu_feature)
45+
: parallelism(has_gpu_feature ? 128 : 16), last_level_cache_size(has_gpu_feature ? 48 * 1024 : 16 * 1024 * 1024),
46+
balance(has_gpu_feature ? 20 : 40) {
47+
}
3548
};
3649

3750
// Substitute parameter estimates into the exprs describing the box bounds.
@@ -2823,6 +2836,10 @@ void Partitioner::vectorize_stage(const Group &g, Stage f_handle, int stage_num,
28232836
// values produced by the function.
28242837
const auto vec_len = [&]() -> int {
28252838
if (t.has_gpu_feature()) {
2839+
/** Section 5.4 of the Mullapudi2016 article: We configure the
2840+
* auto-scheduler to target the GPU by set- ting the ...,
2841+
* VECTOR_WIDTH to 32.
2842+
*/
28262843
return GPUTilingDedup::min_n_threads;
28272844
}
28282845

@@ -3851,7 +3868,7 @@ struct Mullapudi2016 {
38513868
pipeline_outputs.push_back(f.function());
38523869
}
38533870

3854-
ArchParams arch_params;
3871+
ArchParams arch_params{target.has_gpu_feature()};
38553872
{
38563873
ParamParser parser(params_in.extra);
38573874
parser.parse("parallelism", &arch_params.parallelism);

0 commit comments

Comments
 (0)