Skip to content

Commit 371869d

Browse files
committed
w_scale_gate and w_scale_up also need to exchange for gate_up_swapped cases
1 parent 5405515 commit 371869d

File tree

1 file changed

+12
-2
lines changed

1 file changed

+12
-2
lines changed

src/plugins/intel_cpu/src/nodes/llm_mlp.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -401,10 +401,20 @@ struct LLMMLP::Executor : public LLMMLP::ExecutorBase {
401401
if (m_config.gate_up_combined) {
402402
w_scale_up = w_scale_gate + N;
403403
}
404+
405+
// When gate_up_combined=true and gate_up_swapped=true, we need to swap the scales
406+
// to match the swapped weight layout
407+
auto* scale_first = w_scale_gate;
408+
auto* scale_second = w_scale_up;
409+
if (m_config.gate_up_combined && m_config.gate_up_swapped) {
410+
scale_first = w_scale_up;
411+
scale_second = w_scale_gate;
412+
}
413+
404414
for (size_t i = 0; i < N; i += 16) {
405-
memcpy(dst, w_scale_gate + i, 16 * sizeof(float));
415+
memcpy(dst, scale_first + i, 16 * sizeof(float));
406416
dst += 16;
407-
memcpy(dst, w_scale_up + i, 16 * sizeof(float));
417+
memcpy(dst, scale_second + i, 16 * sizeof(float));
408418
dst += 16;
409419
}
410420
}

0 commit comments

Comments
 (0)