Skip to content

Commit 46aba01

Browse files
committed
logits_to_keep & use_query_position
1 parent e198e59 commit 46aba01

File tree

2 files changed

+4
-8
lines changed

2 files changed

+4
-8
lines changed

src/optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -145,13 +145,10 @@ def prepare_forward_args(self, *args):
145145
local_block_tables = args.pop(0) if self.rbln_config.use_local_attention else None
146146
query_position = (
147147
args.pop(0)
148-
# query_position usage: 1. causal_lm prefill or 2. sliding_window cache_position
148+
# query_position usage: 1. prefill & logits_to_keep == 1 or 2. sliding_window cache_position
149149
if (
150-
"prefill" in self.phase
151-
and (
152-
(self.is_causal_lm and self.rbln_config.logits_to_keep == 1)
153-
or self.rbln_config.use_local_attention
154-
)
150+
("prefill" in self.phase and self.rbln_config.logits_to_keep == 1)
151+
or self.rbln_config.use_local_attention
155152
)
156153
else None
157154
)

src/optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
Qwen2_5_VLRotaryEmbedding,
3434
)
3535

36-
from ....configuration_utils import RBLNCompileConfig, RBLNModelConfig
36+
from ....configuration_utils import RBLNCompileConfig
3737
from ....modeling import RBLNModel
3838
from ....utils.logging import get_logger
3939
from ...modeling_outputs import RBLNDecoderOnlyOutput
@@ -387,7 +387,6 @@ def prefill_output_size(self):
387387
hidden_size,
388388
)
389389

390-
391390
@classmethod
392391
def get_input_info(
393392
cls,

0 commit comments

Comments
 (0)