@@ -1154,7 +1154,11 @@ class GPUTilingDedup {
1154
1154
VarOrRVar inner{var + " _i" , v.is_rvar };
1155
1155
1156
1156
split_t entry{v, outer, inner, factor, TailStrategy::Auto};
1157
- parallelize.try_emplace (var, entry);
1157
+ const auto [_, insertion_happened] = parallelize.try_emplace (var, entry);
1158
+ if (!insertion_happened) {
1159
+ return std::nullopt;
1160
+ }
1161
+
1158
1162
return entry;
1159
1163
}
1160
1164
@@ -1163,15 +1167,16 @@ class GPUTilingDedup {
1163
1167
* @param[in] vo split into outer dimension
1164
1168
* @param[in] vi split into inner dimension
1165
1169
* @param[in] factor the partition size.
1170
+ * @return whether the vectorize() request is accepted or rejected.
1166
1171
*/
1167
- void can_vectorize (const VarOrRVar &v, const VarOrRVar &vo, const VarOrRVar &vi, const Expr &factor) {
1172
+ bool can_vectorize (const VarOrRVar &v, const VarOrRVar &vo, const VarOrRVar &vi, const Expr &factor) {
1168
1173
const auto &var = v.name ();
1169
1174
1170
1175
if (is_inner (var)) {
1171
1176
// For CPU, it makes sense to further split the inner loop and run
1172
1177
// SIMD instruction. But this operation is redundant in GPU as the
1173
1178
// gpu_block is already specified.
1174
- return ;
1179
+ return false ;
1175
1180
}
1176
1181
1177
1182
debug (2 ) << f.name () << " .vectorize(" << v.name () << " ," << factor << " )\n " ;
@@ -1180,10 +1185,11 @@ class GPUTilingDedup {
1180
1185
// vectorized dimension is treated as a thread in GPU. No need to
1181
1186
// further split it to match the natural_vector_size() of CPUs.
1182
1187
inner_vars.emplace (v.name ());
1183
- return ;
1188
+ return false ;
1184
1189
}
1185
1190
1186
1191
parallelize.try_emplace (var, split_t {v, vo, vi, factor, TailStrategy::Auto});
1192
+ return true ;
1187
1193
}
1188
1194
1189
1195
/* * Mark the current dimension is already split by Mullapudi2016's
@@ -2880,11 +2886,11 @@ std::optional<pair<VarOrRVar, VarOrRVar>> Partitioner::vectorize_stage(const Gro
2880
2886
internal_assert (is_rvar == dims[vec_dim_index].is_rvar ());
2881
2887
2882
2888
VarOrRVar vec_var (vec_dim_name, is_rvar);
2883
- auto [inner, outer] = [&]() -> std::pair <VarOrRVar, VarOrRVar> {
2889
+ auto [inner, outer, accepted ] = [&]() -> std::tuple <VarOrRVar, VarOrRVar, bool > {
2884
2890
if (t.has_gpu_feature ()) {
2885
2891
VarOrRVar inner{vec_var.name () + " _vi" , vec_var.is_rvar }, outer{vec_var.name () + " _vo" , vec_var.is_rvar };
2886
- gpu_tiling.can_vectorize (vec_var, outer, inner, vec_len);
2887
- return {inner, outer};
2892
+ const bool accepted = gpu_tiling.can_vectorize (vec_var, outer, inner, vec_len);
2893
+ return {inner, outer, accepted };
2888
2894
}
2889
2895
2890
2896
auto split_vars = split_dim (g, f_handle, stage_num, def, is_group_output, vec_var, vec_len,
@@ -2894,7 +2900,7 @@ std::optional<pair<VarOrRVar, VarOrRVar>> Partitioner::vectorize_stage(const Gro
2894
2900
sched.push_schedule (f_handle.name (), stage_num,
2895
2901
" vectorize(" + split_vars.first .name () + " )" ,
2896
2902
{split_vars.first .name ()});
2897
- return split_vars;
2903
+ return std::make_tuple ( split_vars. first , split_vars. second , true ) ;
2898
2904
}();
2899
2905
2900
2906
if (is_rvar) {
@@ -2912,6 +2918,10 @@ std::optional<pair<VarOrRVar, VarOrRVar>> Partitioner::vectorize_stage(const Gro
2912
2918
<< " \" in function \" " << f_handle.name () << " \"\n " ;
2913
2919
}
2914
2920
2921
+ if (!accepted) {
2922
+ return std::nullopt;
2923
+ }
2924
+
2915
2925
return make_pair (inner, outer);
2916
2926
}
2917
2927
@@ -3284,7 +3294,8 @@ void Partitioner::generate_group_cpu_schedule(
3284
3294
}
3285
3295
3286
3296
// Find the level at which group members will be computed.
3287
- int tile_inner_index = dims.size () - outer_dims.size () - 1 ;
3297
+ internal_assert (dims.size () > outer_dims.size ());
3298
+ const auto tile_inner_index = dims.size () - outer_dims.size () - 1 ;
3288
3299
VarOrRVar tile_inner_var (Var::outermost ());
3289
3300
if (!outer_dims.empty ()) {
3290
3301
string var_name = get_base_name (dims[tile_inner_index].var );
0 commit comments