Skip to content

Commit 611e41a

Browse files
authored
Merge pull request #7 from Thireus/revert-6-revert-4-revert-3-ik/check_up_gate_fmoe
Revert "Revert "Revert "Check if ffn_up and ffn_gate are of the same type before using fmoe"""
2 parents 916c9c3 + 2a35bed commit 611e41a

File tree

1 file changed

+1
-21
lines changed

1 file changed

+1
-21
lines changed

src/llama.cpp

Lines changed: 1 addition & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3447,26 +3447,6 @@ static bool llama_kv_cache_init(
34473447
buft_layer_count[llama_default_buffer_type_cpu(true)] = n_layer;
34483448
}
34493449

3450-
if (cparams.fused_moe_up_gate) {
3451-
int nbad = 0;
3452-
for (int i = 0; i < (int) n_layer; i++) {
3453-
auto& layer = model.layers[i];
3454-
if (layer.ffn_gate_exps && layer.ffn_up_exps && layer.ffn_gate_exps->type != layer.ffn_up_exps->type) {
3455-
++nbad;
3456-
}
3457-
}
3458-
if (nbad > 0) {
3459-
if (nbad == (int)n_layer) {
3460-
LLAMA_LOG_WARN("=============== ffn_up and ffn_gate are of different type => disabling fmoe\n");
3461-
const_cast<llama_cparams&>(cparams).fused_moe_up_gate = false;
3462-
}
3463-
else {
3464-
LLAMA_LOG_WARN("=============== ffn_up and ffn_gate are of different in %d out of %d layers, where fmoe will be disabled\n",
3465-
nbad, (int)n_layer);
3466-
}
3467-
}
3468-
}
3469-
34703450
// create a context for each buffer type
34713451
std::map<ggml_backend_buffer_type_t, ggml_context *> ctx_map;
34723452
for (auto & it : buft_layer_count) {
@@ -9861,7 +9841,7 @@ llm_expert_gating_func_type gating_op,
98619841
}
98629842

98639843
ggml_tensor * par;
9864-
if (lctx.cparams.fused_moe_up_gate && up_exps->type == gate_exps->type) {
9844+
if (lctx.cparams.fused_moe_up_gate) {
98659845
par = ggml_moe_up_gate(ctx, up_exps, gate_exps, cur, selected_experts, type_op == LLM_FFN_SILU ? GGML_UNARY_OP_SILU : GGML_UNARY_OP_GELU);
98669846
} else {
98679847
ggml_tensor * up = llm_build_lora_mm_id(lctx, ctx, up_exps, cur, selected_experts); // [n_ff, n_expert_used, n_tokens]

0 commit comments

Comments
 (0)