-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathapi_client.py
More file actions
148 lines (120 loc) · 5.78 KB
/
api_client.py
File metadata and controls
148 lines (120 loc) · 5.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
"""API interaction functions for LLM evaluation."""
import time
from typing import Any, Dict
import requests
from shared import RESPONSE_TIME_DECIMAL_PLACES
from validation import APIRequest, EvaluationRequest, APIResponseValidator
def get_model_response(endpoint_url: str, model: str, prompt: str, api_key: str = None,
system_prompt: str = None, throttling_secs: float = 0.1) -> Dict[str, Any]:
"""Gets a response from the specified model with input validation."""
try:
# Validate inputs
if not endpoint_url or not model or not prompt:
raise ValueError("endpoint_url, model, and prompt are required")
# Sanitize and validate prompt content
prompt = APIResponseValidator.sanitize_content(prompt, 10000)
if system_prompt:
system_prompt = APIResponseValidator.sanitize_content(system_prompt, 2000)
# Apply throttling
time.sleep(throttling_secs)
# Build messages
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": prompt})
# Validate request structure
api_request = APIRequest(
model=model,
messages=messages,
stream=False
)
# Prepare request
payload = api_request.dict()
headers = {"Content-Type": "application/json"}
if api_key:
headers["Authorization"] = f"Bearer {api_key}"
# Make request with timeout
response = requests.post(
endpoint_url,
json=payload,
headers=headers,
timeout=700 # 700 second timeout
)
response.raise_for_status()
# Validate response structure
response_data = response.json()
APIResponseValidator.validate_openai_response(response_data)
return response_data
except requests.exceptions.Timeout:
raise ValueError(f"Request timeout for model {model}")
except requests.exceptions.ConnectionError:
raise ValueError(f"Connection error for endpoint {endpoint_url}")
except requests.exceptions.HTTPError as e:
if hasattr(e, 'response') and e.response is not None:
raise ValueError(f"HTTP error {e.response.status_code}: {e.response.text}")
else:
raise ValueError(f"HTTP error: {e}")
except ValueError as e:
raise ValueError(f"Validation error: {e}")
except Exception as e:
raise ValueError(f"Unexpected error: {e}")
def evaluate_correctness(endpoint_url: str, evaluator_model: str, expected_answer: str,
generated_answer: str, api_key: str = None,
throttling_secs: float = 0.1) -> tuple[bool, str]:
"""Evaluates the correctness of a generated answer using an evaluator model."""
try:
# Validate inputs
if not expected_answer or not generated_answer:
# Return False for empty answers instead of raising an error
return False, "Both expected_answer and generated_answer are required"
# Simple string comparison if no evaluator model
if not evaluator_model:
is_correct = generated_answer.lower().strip() == expected_answer.lower().strip()
note = "Exact string matching evaluation" if not is_correct else ""
return is_correct, note
# Validate evaluation request
eval_request = EvaluationRequest(
expected_answer=expected_answer,
generated_answer=generated_answer,
evaluator_model=evaluator_model
)
system_prompt = "You are an evaluator. Compare the expected answer with the generated answer. Ignore the tag content. The generated answers may vary slightly in wording but should preserve the original meaning. If the answers are equivalent in meaning, mark as correct. Respond with only 'CORRECT' or 'INCORRECT'. If the answer is INCORRECT, provide a brief explanation of why on a new line starting with 'NOTE:'."
user_prompt = f"Expected Answer: {eval_request.expected_answer}\nGenerated Answer: {eval_request.generated_answer}"
# Get evaluation from model
eval_response = get_model_response(
endpoint_url,
evaluator_model,
user_prompt,
api_key,
system_prompt,
throttling_secs
)
eval_result = eval_response.get('choices', [{}])[0].get('message', {}).get('content', '').strip()
# Validate evaluation result
if not eval_result:
raise ValueError("Empty evaluation response")
# Parse evaluation result and optional note
lines = eval_result.split('\n')
is_correct = False
note = ""
# Check first line for CORRECT/INCORRECT
if "CORRECT" == lines[0]:
is_correct = True
elif "INCORRECT" == lines[0]:
is_correct = False
else:
# Default to incorrect if response is ambiguous
print(f"⚠️ Ambiguous evaluation response: '{eval_result}', marking as incorrect")
is_correct = False
# Extract note if present (starts with NOTE:)
for line in lines[1:]:
if line.strip().startswith('NOTE:'):
note = line.strip()[5:].strip() # Remove 'NOTE:' prefix
break
return is_correct, note
except ValueError as e:
print(f"❌ Evaluation validation error: {e}")
return False, f"Evaluation validation error: {e}"
except Exception as e:
print(f"❌ Evaluation error: {e}")
return False, f"Evaluation error: {e}"