-
Notifications
You must be signed in to change notification settings - Fork 3.7k
Track errors through the inference return path #3776
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 5 commits
72cfb39
3b0e2ac
271d2ef
41767bc
86193b8
2f0bc7f
b4f3b69
182367c
6e6ebbb
e706d19
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,6 +8,7 @@ | |
| import uuid | ||
| import warnings | ||
|
|
||
| from megatron.core.inference.inference_request import DynamicInferenceEventType | ||
| from megatron.core.inference.sampling_params import SamplingParams | ||
| from megatron.core.tokenizers.text.parsers import PARSER_MAPPING | ||
|
|
||
|
|
@@ -151,7 +152,29 @@ async def chat_completions(): | |
| f"{time.perf_counter() - start_time:.2f}s" | ||
| ) | ||
|
|
||
| # --- 4. Format OpenAI Response --- | ||
| # --- 4. Check for failed requests --- | ||
| failed_errors = [] | ||
| for i, record in enumerate(batch_results): | ||
| last_request = record.requests[-1] | ||
| if last_request.failed(): | ||
| error_events = [ | ||
| e | ||
| for e in last_request.events | ||
| if e.type | ||
| in ( | ||
| DynamicInferenceEventType.ERROR_NONTRANSIENT, | ||
| DynamicInferenceEventType.ERROR_TRANSIENT, | ||
| ) | ||
| ] | ||
| error_msg = str(error_events[-1].payload) if error_events else "Unknown error" | ||
| failed_errors.append(f"Request {i}: {error_msg}") | ||
|
|
||
| if failed_errors: | ||
| error_detail = "; ".join(failed_errors) | ||
| logger.error(f"Inference request(s) failed: {error_detail}") | ||
| return Response(f"Inference request(s) failed: {error_detail}", status=400) | ||
|
||
|
|
||
| # --- 5. Format OpenAI Response --- | ||
| choices = [] | ||
| total_completion_tokens = 0 | ||
| prompt_tokens_counts = [] | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,6 +4,7 @@ | |
| import logging | ||
| import time | ||
|
|
||
| from megatron.core.inference.inference_request import DynamicInferenceEventType | ||
| from megatron.core.inference.sampling_params import SamplingParams | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
@@ -124,7 +125,29 @@ async def completions(): | |
| f"{time.perf_counter() - start_time:.2f}s" | ||
| ) | ||
|
|
||
| # --- 4. Format Response (matching old_completions.py) --- | ||
| # --- 4. Check for failed requests --- | ||
| failed_errors = [] | ||
| for i, record in enumerate(batch_results): | ||
| last_request = record.requests[-1] | ||
| if last_request.failed(): | ||
| error_events = [ | ||
| e | ||
| for e in last_request.events | ||
| if e.type | ||
| in ( | ||
| DynamicInferenceEventType.ERROR_NONTRANSIENT, | ||
| DynamicInferenceEventType.ERROR_TRANSIENT, | ||
| ) | ||
| ] | ||
| error_msg = str(error_events[-1].payload) if error_events else "Unknown error" | ||
| failed_errors.append(f"Request {i}: {error_msg}") | ||
|
|
||
| if failed_errors: | ||
| error_detail = "; ".join(failed_errors) | ||
| logger.error(f"Inference request(s) failed: {error_detail}") | ||
| return f"Inference request(s) failed: {error_detail}", 400 | ||
|
||
|
|
||
| # --- 5. Format Response (matching old_completions.py) --- | ||
| choices = [] | ||
|
|
||
| request_idx = 0 | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.