|
41 | 41 | "^test_attention_4d_with_past_and_present_qk_matmul_bias_4d_mask_causal*", // location of infinities |
42 | 42 | "^test_attention_4d_attn_mask_3d_causal_expanded*", // webgpu |
43 | 43 | "^test_attention_4d_diff_heads_mask4d_padded_kv*", // Need nonpad_kv_seqlen |
44 | | - // TODO: support the following tests in Attention-cuda |
| 44 | + // TODO: support the following tests in Attention-cuda (softcap, softmax precision and qk_matmul_mode) |
45 | 45 | "^test_attention_3d_diff_heads_sizes_softcap_cuda", // softcap not supported in Attention-cuda |
46 | 46 | "^test_attention_4d_diff_heads_sizes_softcap_cuda", // softcap not supported in Attention-cuda |
47 | 47 | "^test_attention_3d_softcap_cuda", // softcap not supported in Attention-cuda |
|
66 | 66 | "^test_attention_4d_attn_mask_4d_causal_cuda", |
67 | 67 | "^test_attention_4d_causal_cuda", |
68 | 68 | "^test_attention_4d_diff_heads_sizes_causal_cuda", |
| 69 | + // GQA Attention-cuda does not support fp16 and 4d QKV |
| 70 | + "^test_attention_4d_gqa_with_past_and_present_fp16_cuda", // 4d QKV |
| 71 | + "^test_attention_4d_gqa_with_past_and_present_cuda", // fp32 |
| 72 | + "^test_attention_4d_gqa_softcap_cuda", // fp32 |
| 73 | + "^test_attention_4d_gqa_scaled_cuda", // fp32 |
| 74 | + "^test_attention_4d_gqa_cuda", // fp32 |
| 75 | + "^test_attention_3d_gqa_attn_mask_cuda", // fp32 |
| 76 | + "^test_attention_3d_gqa_causal_cuda", // fp32 |
| 77 | + "^test_attention_3d_gqa_cuda", // fp32 |
| 78 | + "^test_attention_3d_gqa_scaled_cuda", // fp32 |
| 79 | + "^test_attention_3d_gqa_softcap_cuda", // fp32 |
| 80 | + "^test_attention_3d_gqa_with_past_and_present_cuda", // fp32 |
| 81 | + "^test_attention_4d_gqa_attn_mask_cuda", // fp32 |
| 82 | + "^test_attention_4d_gqa_causal_cuda", // fp32 |
69 | 83 | "^test_tensorscatter*", // TensorScatter(24) not implemented |
70 | 84 | "^test_castlike_no_saturate_FLOAT_to_FLOAT8*", // ORT does not support ml_dtypes |
71 | 85 | "^test_castlike_UINT4_to*", // ORT does not support ml_dtypes |
|
0 commit comments