Skip to content

Commit e081a58

Browse files
authored
Evaluation: Track evaluator usage through user agent (#38600)
* track evaluator usage through user agent * fix black * fix the tests * fix the type hint
1 parent 69f0c22 commit e081a58

5 files changed

Lines changed: 37 additions & 23 deletions

File tree

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/rai_service.py

Lines changed: 30 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -72,18 +72,21 @@ def get_formatted_template(data: dict, annotation_task: str) -> str:
7272
return user_text.replace("'", '\\"')
7373

7474

75-
def get_common_headers(token: str) -> Dict:
75+
def get_common_headers(token: str, evaluator_name: Optional[str] = None) -> Dict:
7676
"""Get common headers for the HTTP request
7777
7878
:param token: The Azure authentication token.
7979
:type token: str
80+
:param evaluator_name: The evaluator name. Default is None.
81+
:type evaluator_name: str
8082
:return: The common headers.
8183
:rtype: Dict
8284
"""
85+
user_agent = f"{USER_AGENT} (type=evaluator; subtype={evaluator_name})" if evaluator_name else USER_AGENT
8386
return {
8487
"Authorization": f"Bearer {token}",
8588
"Content-Type": "application/json",
86-
"User-Agent": USER_AGENT,
89+
"User-Agent": user_agent,
8790
# Handle "RuntimeError: Event loop is closed" from httpx AsyncClient
8891
# https://github.com/encode/httpx/discussions/2959
8992
"Connection": "close",
@@ -175,7 +178,9 @@ def generate_payload(normalized_user_text: str, metric: str, annotation_task: st
175178
)
176179

177180

178-
async def submit_request(data: dict, metric: str, rai_svc_url: str, token: str, annotation_task: str) -> str:
181+
async def submit_request(
182+
data: dict, metric: str, rai_svc_url: str, token: str, annotation_task: str, evaluator_name: str
183+
) -> str:
179184
"""Submit request to Responsible AI service for evaluation and return operation ID
180185
181186
:param data: The data to evaluate.
@@ -188,14 +193,16 @@ async def submit_request(data: dict, metric: str, rai_svc_url: str, token: str,
188193
:type token: str
189194
:param annotation_task: The annotation task to use.
190195
:type annotation_task: str
196+
:param evaluator_name: The evaluator name.
197+
:type evaluator_name: str
191198
:return: The operation ID.
192199
:rtype: str
193200
"""
194201
normalized_user_text = get_formatted_template(data, annotation_task)
195202
payload = generate_payload(normalized_user_text, metric, annotation_task=annotation_task)
196203

197204
url = rai_svc_url + "/submitannotation"
198-
headers = get_common_headers(token)
205+
headers = get_common_headers(token, evaluator_name)
199206

200207
async with get_async_http_client_with_timeout() as client:
201208
http_response = await client.post(url, json=payload, headers=headers)
@@ -493,24 +500,26 @@ async def evaluate_with_rai_service(
493500
credential: TokenCredential,
494501
annotation_task: str = Tasks.CONTENT_HARM,
495502
metric_display_name=None,
503+
evaluator_name=None,
496504
) -> Dict[str, Union[str, float]]:
497-
""" "Evaluate the content safety of the response using Responsible AI service
505+
"""Evaluate the content safety of the response using Responsible AI service
498506
499-
:param data: The data to evaluate.
500-
:type data: dict
501-
:param metric_name: The evaluation metric to use.
502-
:type metric_name: str
503-
:param project_scope: The Azure AI project scope details.
504-
:type project_scope: Dict
505-
:param credential: The Azure authentication credential.
506-
:type credential:
507-
~azure.core.credentials.TokenCredential
508-
:param annotation_task: The annotation task to use.
509-
:type annotation_task: str
510-
:param metric_display_name: The display name of metric to use.
511-
:type metric_display_name: str
512-
:return: The parsed annotation result.
513-
:rtype: Dict[str, Union[str, float]]
507+
:param data: The data to evaluate.
508+
:type data: dict
509+
:param metric_name: The evaluation metric to use.
510+
:type metric_name: str
511+
:param project_scope: The Azure AI project scope details.
512+
:type project_scope: Dict
513+
:param credential: The Azure authentication credential.
514+
:type credential: ~azure.core.credentials.TokenCredential
515+
:param annotation_task: The annotation task to use.
516+
:type annotation_task: str
517+
:param metric_display_name: The display name of metric to use.
518+
:type metric_display_name: str
519+
:param evaluator_name: The evaluator name to use.
520+
:type evaluator_name: str
521+
:return: The parsed annotation result.
522+
:rtype: Dict[str, Union[str, float]]
514523
"""
515524

516525
# Get RAI service URL from discovery service and check service availability
@@ -519,7 +528,7 @@ async def evaluate_with_rai_service(
519528
await ensure_service_availability(rai_svc_url, token, annotation_task)
520529

521530
# Submit annotation request and fetch result
522-
operation_id = await submit_request(data, metric_name, rai_svc_url, token, annotation_task)
531+
operation_id = await submit_request(data, metric_name, rai_svc_url, token, annotation_task, evaluator_name)
523532
annotation_response = cast(List[Dict], await fetch_result(operation_id, rai_svc_url, credential, token))
524533
result = parse_response(annotation_response, metric_name, metric_display_name)
525534

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,12 @@ def __init__(self, *, result_key: str, prompty_file: str, model_config: dict, ev
4747
self._prompty_file = prompty_file
4848
super().__init__(eval_last_turn=eval_last_turn)
4949

50+
subclass_name = self.__class__.__name__
51+
user_agent = f"{USER_AGENT} (type=evaluator subtype={subclass_name})"
5052
prompty_model_config = construct_prompty_model_config(
5153
validate_model_config(model_config),
5254
self._DEFAULT_OPEN_API_VERSION,
53-
USER_AGENT,
55+
user_agent,
5456
)
5557

5658
self._flow = AsyncPrompty.load(source=prompty_file, model=prompty_model_config)

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ async def _do_eval(self, eval_input: Dict) -> Dict[str, T]:
111111
project_scope=self._azure_ai_project,
112112
credential=self._credential,
113113
annotation_task=self._get_task(),
114+
evaluator_name=self.__class__.__name__,
114115
)
115116

116117
def _get_task(self):

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
from ._utils import JsonLineChatProtocol
2626

2727

28-
USER_AGENT += " (type=simulator subtype=Simulator)"
28+
USER_AGENT += " (type=simulator; subtype=Simulator)"
2929

3030

3131
@experimental

sdk/evaluation/azure-ai-evaluation/tests/unittests/test_content_safety_rai_script.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ async def test_submit_request(self, client_mock):
178178
rai_svc_url="www.notarealurl.com",
179179
token="dummy",
180180
annotation_task=Tasks.CONTENT_HARM,
181+
evaluator_name="dummy-evaluator",
181182
)
182183
assert result == "dummy-operation-id"
183184

@@ -198,6 +199,7 @@ async def test_submit_request_not_found(self, client_mock):
198199
rai_svc_url="www.notarealurl.com",
199200
token="dummy",
200201
annotation_task=Tasks.CONTENT_HARM,
202+
evaluator_name="dummy-evaluator",
201203
)
202204
assert "Operation returned an invalid status '404 Not Found'" in str(exc_info._excinfo[1])
203205

0 commit comments

Comments
 (0)