Skip to content

Commit 51edb6a

Browse files
committed
allow clip fa for anything besides cuda on gpu
1 parent 442fa7c commit 51edb6a

1 file changed

Lines changed: 6 additions & 1 deletion

File tree

gpttype_adapter.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2665,14 +2665,19 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
26652665
printf("Clip will use CPU for this model!\n");
26662666
}
26672667
#endif
2668+
clip_flash_attn_type clip_fa = (kcpp_data->flash_attn?CLIP_FLASH_ATTN_TYPE_ENABLED:CLIP_FLASH_ATTN_TYPE_DISABLED); //kcpp: disabled in 1.102.2 as some headsizes break on turing
2669+
#if defined(GGML_USE_CUDA)
2670+
clip_fa = CLIP_FLASH_ATTN_TYPE_DISABLED; //kcpp: disabled in 1.102.2 as some headsizes break on turing
2671+
#endif
26682672
if(inputs.mmproj_cpu)
26692673
{
26702674
set_clip_uses_gpu(false);
26712675
printf("Clip forced to use CPU!\n");
2676+
clip_fa = (kcpp_data->flash_attn?CLIP_FLASH_ATTN_TYPE_ENABLED:CLIP_FLASH_ATTN_TYPE_DISABLED); //however if using CPU, fa is fine
26722677
}
26732678
clip_context_params ctx_clip_params {
26742679
/* use_gpu */ true,
2675-
/* flash_attn_type */ CLIP_FLASH_ATTN_TYPE_DISABLED, //kcpp: disabled in 1.102.2 as some headsizes break on turing
2680+
/* flash_attn_type */ clip_fa,
26762681
/* image_min_tokens */ -1,
26772682
/* image_max_tokens */ -1,
26782683
};

0 commit comments

Comments
 (0)