We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent f7dbd7d commit 33be906Copy full SHA for 33be906
fbgemm_gpu/experimental/gemm/triton_gemm/fp8_gemm.py
@@ -3342,6 +3342,34 @@ def get_full_non_persistent_tuning_space():
3342
num_warps=8,
3343
num_stages=2,
3344
),
3345
+ triton.Config(
3346
+ {
3347
+ "BLOCK_M": 256,
3348
+ "BLOCK_N": 256,
3349
+ "BLOCK_K": 128,
3350
+ "GROUP_M": 2,
3351
+ "SPLIT_K": 1,
3352
+ "waves_per_eu": 0,
3353
+ "matrix_instr_nonkdim": 32,
3354
+ "kpack": 2,
3355
+ },
3356
+ num_warps=8,
3357
+ num_stages=2,
3358
+ ),
3359
3360
3361
3362
+ "BLOCK_N": 128,
3363
3364
+ "GROUP_M": 4,
3365
3366
3367
+ "matrix_instr_nonkdim": 16,
3368
+ "kpack": 1,
3369
3370
3371
3372
3373
]
3374
3375
0 commit comments