Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion python/sglang/srt/managers/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1488,7 +1488,9 @@ def handle_generate_request(
else:
# Create a new request from a previous session
session = self.sessions[recv_req.session_params.id]
req = session.create_req(recv_req, self.tokenizer)
req = session.create_req(
recv_req, self.tokenizer, self.model_config.vocab_size
)
if isinstance(req.finished_reason, FINISH_ABORT):
self.init_req_max_new_tokens(req)
self._add_request_to_queue(req)
Expand Down
4 changes: 2 additions & 2 deletions python/sglang/srt/managers/session_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def __init__(self, capacity_of_str_len: int, session_id: Optional[str] = None):
self.capacity_of_str_len = capacity_of_str_len
self.req_nodes: Dict[str, SessionReqNode] = {}

def create_req(self, req: TokenizedGenerateReqInput, tokenizer):
def create_req(self, req: TokenizedGenerateReqInput, tokenizer, vocab_size: int):
assert req.session_params is not None
session_params = req.session_params

Expand Down Expand Up @@ -144,7 +144,7 @@ def create_req(self, req: TokenizedGenerateReqInput, tokenizer):
return_logprob=req.return_logprob,
top_logprobs_num=req.top_logprobs_num,
token_ids_logprob=req.token_ids_logprob,
vocab_size=tokenizer.vocab_size,
vocab_size=vocab_size,
)
if last_req is not None:
new_req.multimodal_inputs = last_req.multimodal_inputs
Expand Down
Loading