Skip to content

Commit e20b89a

Browse files
authored
Improve error logging when invalid number of tokens is requested. (#3680)
1 parent 07e512a commit e20b89a

File tree

1 file changed

+6
-0
lines changed

1 file changed

+6
-0
lines changed

megatron/core/inference/engines/dynamic_engine.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -839,10 +839,16 @@ def _add_request(
839839
len(request.prompt_tokens) + request.sampling_params.num_tokens_to_generate
840840
> self.context.max_sequence_length
841841
) or (request.sampling_params.num_tokens_to_generate < 0):
842+
logging.error(
843+
f"{request_id=} Invalid number of tokens to generate. Prompt len: {len(request.prompt_tokens)}, tokens to generate: {request.sampling_params.num_tokens_to_generate}, max seq len: {self.context.max_sequence_length}."
844+
)
842845
request.status = Status.FAILED
843846
request.add_event_error_nontransient(MaxSequenceLengthOverflowError(request_id))
844847

845848
if len(request.prompt_tokens) > self.context.max_tokens and not self.enable_chunked_prefill:
849+
logging.error(
850+
f"{request_id=} Prompt is longer than context.max_tokens. Prompt tokens: {len(request.prompt_tokens)}, context.max_tokens: {self.context.max_tokens}, chunked_prefill: {self.enable_chunked_prefill}"
851+
)
846852
request.status = Status.FAILED
847853
request.add_event_error_nontransient(TokenOverflowError(request_id))
848854

0 commit comments

Comments
 (0)