-
Notifications
You must be signed in to change notification settings - Fork 32
feat: add metrics CLI filter, skip field, retry improvements, and infer endpoint #222
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 5 commits
e4ab579
b563026
4efda3c
8171fa2
ce00854
be8019d
3e0af30
47f06b6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -27,12 +27,19 @@ | |
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| def _is_too_many_requests_error(exception: BaseException) -> bool: | ||
| """Check if exception is a 429 error.""" | ||
| return ( | ||
| isinstance(exception, httpx.HTTPStatusError) | ||
| and exception.response.status_code == 429 | ||
| ) | ||
| def _is_retryable_server_error(exception: BaseException) -> bool: | ||
| """Check if exception is a retryable HTTP error (429 or 5xx). | ||
|
|
||
| Args: | ||
| exception: The exception to check. | ||
|
|
||
| Returns: | ||
| True if the exception is a retryable HTTP status error. | ||
| """ | ||
| if not isinstance(exception, httpx.HTTPStatusError): | ||
| return False | ||
| status = exception.response.status_code | ||
| return status == 429 or 500 <= status < 600 | ||
|
|
||
|
|
||
| class APIClient: | ||
|
|
@@ -59,10 +66,11 @@ def __init__( | |
| retry_decorator = self._create_retry_decorator() | ||
| self._standard_query_with_retry = retry_decorator(self._standard_query) | ||
| self._streaming_query_with_retry = retry_decorator(self._streaming_query) | ||
| self._rlsapi_infer_query_with_retry = retry_decorator(self._rlsapi_infer_query) | ||
|
|
||
| def _create_retry_decorator(self) -> Any: | ||
| return retry( | ||
| retry=retry_if_exception(_is_too_many_requests_error), | ||
| retry=retry_if_exception(_is_retryable_server_error), | ||
| stop=stop_after_attempt( | ||
| self.config.num_retries + 1 | ||
| ), # +1 to account for the initial attempt | ||
|
|
@@ -186,6 +194,8 @@ def query( | |
|
|
||
| if self.config.endpoint_type == "streaming": | ||
| response = self._streaming_query_with_retry(api_request) | ||
| elif self.config.endpoint_type == "infer": | ||
| response = self._rlsapi_infer_query_with_retry(api_request) | ||
| else: | ||
| response = self._standard_query_with_retry(api_request) | ||
|
|
||
|
|
@@ -196,7 +206,7 @@ def query( | |
| except RetryError as e: | ||
| raise APIError( | ||
| f"Maximum retry attempts ({self.config.num_retries}) reached " | ||
| "due to persistent rate limiting (HTTP 429)." | ||
| "due to retryable server errors (HTTP 429/5xx)." | ||
| ) from e | ||
|
|
||
| def _prepare_request( | ||
|
|
@@ -285,8 +295,7 @@ def _standard_query(self, api_request: APIRequest) -> APIResponse: | |
| except httpx.TimeoutException as e: | ||
| raise self._handle_timeout_error("standard", self.config.timeout) from e | ||
| except httpx.HTTPStatusError as e: | ||
| # Re-raise 429 errors without conversion to allow retry decorator to handle them | ||
| if e.response.status_code == 429: | ||
| if _is_retryable_server_error(e): | ||
| raise | ||
| raise self._handle_http_error(e) from e | ||
| except ValueError as e: | ||
|
|
@@ -313,8 +322,7 @@ def _streaming_query(self, api_request: APIRequest) -> APIResponse: | |
| except httpx.TimeoutException as e: | ||
| raise self._handle_timeout_error("streaming", self.config.timeout) from e | ||
| except httpx.HTTPStatusError as e: | ||
| # Re-raise 429 errors without conversion to allow retry decorator to handle them | ||
| if e.response.status_code == 429: | ||
| if _is_retryable_server_error(e): | ||
| raise | ||
| raise self._handle_http_error(e) from e | ||
| except ValueError as e: | ||
|
|
@@ -324,6 +332,118 @@ def _streaming_query(self, api_request: APIRequest) -> APIResponse: | |
| except Exception as e: | ||
| raise self._handle_unexpected_error(e, "streaming query") from e | ||
|
|
||
| def _rlsapi_infer_query(self, api_request: APIRequest) -> APIResponse: | ||
| """Query the RLSAPI /infer endpoint for tool call and RAG metadata. | ||
|
|
||
| The infer endpoint uses a different request/response format than | ||
| the standard query/streaming endpoints, converting "query" to | ||
| "question" and parsing tool_calls and rag_chunks from tool_results. | ||
|
|
||
| Args: | ||
| api_request: The prepared API request. | ||
|
|
||
| Returns: | ||
| APIResponse with response text, tool calls, and RAG contexts. | ||
|
|
||
| Raises: | ||
| APIError: If the request fails or response is invalid. | ||
| """ | ||
| if not self.client: | ||
| raise APIError("HTTP client not initialized") | ||
| try: | ||
| request_data = api_request.model_dump(exclude_none=True) | ||
| infer_request: dict[str, object] = { | ||
| "question": request_data.pop("query"), | ||
| "include_metadata": True, | ||
| } | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don’t drop prepared request parameters for Line 356 sends only Proposed fix- request_data = api_request.model_dump(exclude_none=True)
+ request_data = self._serialize_request(api_request)
+ question = request_data.pop("query")
infer_request: dict[str, object] = {
- "question": request_data.pop("query"),
+ **request_data,
+ "question": question,
"include_metadata": True,
}🤖 Prompt for AI Agents
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed — added a code comment explaining that
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Only users with a collaborator, contributor, member, or owner role can interact with CodeRabbit. |
||
|
|
||
| logger.debug( | ||
| "RLSAPI infer request URL: /api/lightspeed/%s/infer", | ||
| self.config.version, | ||
| ) | ||
| logger.debug("RLSAPI infer request body: %s", infer_request) | ||
|
coderabbitai[bot] marked this conversation as resolved.
Outdated
|
||
|
|
||
| response = self.client.post( | ||
| f"/api/lightspeed/{self.config.version}/infer", | ||
| json=infer_request, | ||
| ) | ||
| response.raise_for_status() | ||
|
|
||
| response_data = response.json() | ||
|
|
||
| if "data" in response_data: | ||
| data = response_data["data"] | ||
| if "text" in data: | ||
| response_data["response"] = data["text"] | ||
| if "request_id" in data: | ||
| response_data["conversation_id"] = data["request_id"] | ||
| if "input_tokens" in data: | ||
| response_data["input_tokens"] = data["input_tokens"] | ||
| if "output_tokens" in data: | ||
| response_data["output_tokens"] = data["output_tokens"] | ||
| if "tool_calls" in data: | ||
| response_data["tool_calls"] = data["tool_calls"] | ||
| if "tool_results" in data: | ||
| tool_results = data["tool_results"] | ||
| for result in tool_results: | ||
| if result.get("type") == "mcp_call": | ||
| content = result["content"].split("---") | ||
| response_data["rag_chunks"] = [ | ||
| {"content": chunk} for chunk in content | ||
| ] | ||
|
coderabbitai[bot] marked this conversation as resolved.
Outdated
|
||
|
|
||
| if "response" not in response_data: | ||
| raise APIError("API response missing 'response' field") | ||
|
|
||
| if "tool_calls" in response_data and response_data["tool_calls"]: | ||
| raw_tool_calls = response_data["tool_calls"] | ||
| formatted_tool_calls = [] | ||
|
|
||
| for tool_call in raw_tool_calls: | ||
| if isinstance(tool_call, dict): | ||
| formatted_tool: dict[str, object] = { | ||
| "tool_name": ( | ||
| tool_call.get("tool_name") | ||
| or tool_call.get("name") | ||
| or "" | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. tool_name & name were used to make it compatible with older LCORE version. But is it the same scenario for RLSAPI ? can we use actual property name ?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point — RLSAPI uses its own native fields (
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looking at the whole flow, I think that this (overall API interaction/data processing) can be modularized further. Non-blocker for this PR.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agreed. The latest refactoring commit (3e0af30) broke |
||
| ), | ||
| "arguments": ( | ||
| tool_call.get("arguments") | ||
| or tool_call.get("args") | ||
| or {} | ||
| ), | ||
| } | ||
| if "tool_results" in response_data.get("data", {}): | ||
| tool_call_id = tool_call.get("id") | ||
| matching_result = next( | ||
| ( | ||
| r | ||
| for r in response_data["data"]["tool_results"] | ||
| if r.get("id") == tool_call_id | ||
| ), | ||
| None, | ||
| ) | ||
| if matching_result: | ||
| formatted_tool["result"] = matching_result["status"] | ||
| formatted_tool_calls.append([formatted_tool]) | ||
|
coderabbitai[bot] marked this conversation as resolved.
Outdated
|
||
|
|
||
| response_data["tool_calls"] = formatted_tool_calls | ||
|
|
||
| return APIResponse.from_raw_response(response_data) | ||
|
|
||
| except httpx.TimeoutException as e: | ||
| raise self._handle_timeout_error("infer", self.config.timeout) from e | ||
| except httpx.HTTPStatusError as e: | ||
| if _is_retryable_server_error(e): | ||
| raise | ||
| raise self._handle_http_error(e) from e | ||
| except ValueError as e: | ||
| raise self._handle_validation_error(e) from e | ||
| except APIError: | ||
| raise | ||
| except Exception as e: | ||
| raise self._handle_unexpected_error(e, "infer query") from e | ||
|
|
||
| def _handle_response_errors(self, response: httpx.Response) -> None: | ||
| """Handle HTTP response errors for streaming endpoint.""" | ||
| if response.status_code != 200: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -56,7 +56,7 @@ | |
| DEFAULT_API_VERSION = "v1" | ||
| DEFAULT_API_TIMEOUT = 300 | ||
| DEFAULT_ENDPOINT_TYPE = "streaming" | ||
| SUPPORTED_ENDPOINT_TYPES = ["streaming", "query"] | ||
| SUPPORTED_ENDPOINT_TYPES = ["streaming", "query", "infer"] | ||
| DEFAULT_API_CACHE_DIR = ".caches/api_cache" | ||
|
|
||
| DEFAULT_API_NUM_RETRIES = 3 | ||
|
Comment on lines
+59
to
62
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Inconsistent retry default: API retries still 3 while LLM retries bumped to 5. The PR description states default retry attempts were increased from 3 to 5, but 🤖 Prompt for AI Agents
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Addressed — reverted
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Only users with a collaborator, contributor, member, or owner role can interact with CodeRabbit. |
||
|
|
@@ -70,7 +70,7 @@ | |
| DEFAULT_SSL_CERT_FILE = None | ||
| DEFAULT_LLM_TEMPERATURE = 0.0 | ||
| DEFAULT_LLM_MAX_TOKENS = 512 | ||
| DEFAULT_LLM_RETRIES = 3 | ||
| DEFAULT_LLM_RETRIES = 5 | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is for JudgeLLM, not for API call.. Also this is just the default value, actual value can be set in config as per the need. Do we really need to change this ?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agreed, reverted to 3. Will configure via eval config when needed. |
||
| DEFAULT_LLM_CACHE_DIR = ".caches/llm_cache" | ||
|
|
||
| DEFAULT_EMBEDDING_PROVIDER = "openai" | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -171,6 +171,7 @@ def load_evaluation_data( | |
| data_path: str, | ||
| tags: Optional[list[str]] = None, | ||
| conv_ids: Optional[list[str]] = None, | ||
| metrics: Optional[list[str]] = None, | ||
| ) -> list[EvaluationData]: | ||
| """Load, filter, and validate evaluation data from YAML file. | ||
|
|
||
|
|
@@ -184,6 +185,7 @@ def load_evaluation_data( | |
| data_path: Path to the evaluation data YAML file | ||
| tags: Optional list of tags to filter by | ||
| conv_ids: Optional list of conversation group IDs to filter by | ||
| metrics: Optional list of metrics to run (filters each turn's turn_metrics) | ||
|
|
||
| Returns: | ||
| Filtered and validated list of Evaluation Data | ||
|
|
@@ -230,6 +232,19 @@ def load_evaluation_data( | |
| # Filter by scope before validation | ||
| evaluation_data = self._filter_by_scope(evaluation_data, tags, conv_ids) | ||
|
|
||
| # Remove skipped conversations | ||
| evaluation_data = [e for e in evaluation_data if not e.skip] | ||
|
|
||
| # Filter turn_metrics if --metrics was specified | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Based on this logic, I see two limitations
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Both issues addressed:
Added 3 tests covering all cases.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry I didn't mention that metric resolution is already handled. Here we are adding 1. duplicate logic to process turn & conversation level logic, 2. Managing metric resolution from config/system. But this is already managed in MetricManager where we resolve the final turn and conversation level metrics. Moving the CLI filter there will eliminate the duplicate code significantly and will be more consistent. This can be refactored later (follow up PR). I am considering this as non-blocker
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agreed — I can see how MetricManager is the right home for this. Happy to have it backlogged and refactored in a follow-up. Thanks for flagging it. |
||
| if metrics: | ||
| metrics_set = set(metrics) | ||
| for eval_data in evaluation_data: | ||
| for turn in eval_data.turns: | ||
| if turn.turn_metrics: | ||
| turn.turn_metrics = [ | ||
| m for m in turn.turn_metrics if m in metrics_set | ||
| ] | ||
|
|
||
| # Semantic validation (metrics availability and requirements) | ||
| if not self._validate_evaluation_data(evaluation_data): | ||
| raise DataValidationError("Evaluation data validation failed") | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Instead of range, could you please write the exact status code like 502, 503..
I understand the purpose, but at the same time I am concerned that we will end up wasting retries.
In future we will probably make it configurable.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done — narrowed to
status in (429, 502, 503, 504). Only clearly transient errors: 429=rate limit, 502=bad gateway, 503=unavailable, 504=gateway timeout. 500 excluded since it can indicate permanent server bugs.