Skip to content

Commit 901a2b8

Browse files
committed
Restore flow for longrope ordering
1 parent fc7ac2a commit 901a2b8

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

src/plugins/intel_npu/src/plugin/npuw/llm_infer_request.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -826,8 +826,6 @@ void ov::npuw::LLMInferRequest::infer_prefill(ov::SoPtr<ov::ITensor> input_ids,
826826
"\"NPUW_LLM_MAX_PROMPT_LEN\" or shorten the prompt.");
827827
}
828828

829-
process_longrope(m_prefill_request, m_prefill_in_ports, position_ids);
830-
831829
m_llm_profile["1/prefill:1.prepare_for_new_conversation"].record([&]() {
832830
prepare_for_new_conversation(prompt_length);
833831
});
@@ -836,6 +834,8 @@ void ov::npuw::LLMInferRequest::infer_prefill(ov::SoPtr<ov::ITensor> input_ids,
836834
apply_lora();
837835
});
838836

837+
process_longrope(m_prefill_request, m_prefill_in_ports, position_ids);
838+
839839
const bool use_chunk_prefill = m_npuw_llm_compiled_model->m_use_chunk_prefill;
840840
m_llm_profile["1/prefill:3.infer"].record([&]() {
841841
if (use_chunk_prefill) {

0 commit comments

Comments
 (0)