-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathserver.py
More file actions
171 lines (147 loc) · 5.09 KB
/
server.py
File metadata and controls
171 lines (147 loc) · 5.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
"""
Quotient MCP Server
"""
import argparse
import sys
from starlette.requests import Request
from starlette.responses import JSONResponse
from fastmcp import FastMCP
from pydantic import BaseModel
import requests
from typing import Literal
mcp = FastMCP("quotient-mcp-server")
@mcp.custom_route("/health", methods=["GET"])
async def health_check(_request: Request):
"""Health check endpoint for deployment platforms (HTTP transport only)"""
return JSONResponse({"status": "ok"})
class EvaluationResult(BaseModel):
"""
Result of evaluating tool calls.
"""
score: str
reason: list[str]
@mcp.tool(
name="evaluate_tool_call",
description="""
Double check Agent Tool Calls with evaluate_tool_call. Evaluate whether an AI agent correctly used available tools in a conversation context.
Parameters:
- available_tools: Array of tool definitions
- message_history: Array of conversation messages
Example available_tools:
[
{
"name": "google-play-developer",
"description": "Get apps by a developer on Google Play",
"input_schema": {
"type": "object",
"properties": {
"devId": {"type": "string", "description": "Developer ID"},
"num": {"type": "number", "default": 60, "description": "Number of results"},
"lang": {"type": "string", "default": "en", "description": "Language code"},
"country": {"type": "string", "default": "us", "description": "Country code"}
},
"required": ["devId"]
}
}
]
Example message_history:
[
{
"role": "user",
"content": "Get 50 apps by 'Example Developer' for US market in English"
},
{
"role": "assistant",
"content": "I'll fetch the apps for you.",
"tool_calls": [{
"function": {
"name": "google-play-developer",
"arguments": {
"devId": "com.example.developer",
"num": 50,
"lang": "en",
"country": "us"
}
}
}]
}
]
Returns:
{
"score": "correct|incorrect_tool|incorrect_parameter_names|incorrect_parameter_values",
"reason": ["Detailed explanation of any issues found"]
}
""",
annotations={
"title": "Evaluate Tool Call",
"readOnlyHint": True,
"openWorldHint": False,
},
)
def evaluate_tool_call(
available_tools: list[dict], message_history: list[dict], model_size: Literal["3B", "7B"] = "7B"
) -> EvaluationResult:
"""
Evaluates tool calling behavior via the Limbic Tool Use API.
Args:
available_tools: List of tool definitions with name, description, and input_schema
message_history: Conversation messages with role, content, and optional tool_calls
model_size: Model size to use for evaluation. Can be "3B", or "7B" (default: "7B")
Returns:
EvaluationResult with score and detailed reasoning
"""
if model_size == "3B":
inference_endpoint_url = "https://quotient-ai--tool-call-evaluator-3b-api-v0-fastapi-app.modal.run/api/v1/detections/tool-use"
elif model_size == "7B":
inference_endpoint_url = "https://quotient-ai--tool-call-evaluator-7b-api-v0-fastapi-app.modal.run/api/v1/detections/tool-use"
else:
raise ValueError(f"Invalid model size: {model_size}. Must be one of: 3B, 7B")
payload = {"messages": message_history, "available_tools": available_tools}
try:
# Make request to Modal endpoint
response = requests.post(inference_endpoint_url, json=payload, timeout=120)
if response.status_code == 200:
result = response.json()
return EvaluationResult(
score=result["score"], reason=result.get("reasoning", [])
)
else:
raise RuntimeError(f"API request failed with status {response.status_code}")
except Exception as e:
raise e # re-raise the original exception and let fastmcp handle it
def main():
"""Main entry point with argument parsing for transport selection"""
parser = argparse.ArgumentParser(description="Quotient MCP Server")
parser.add_argument(
"--transport",
choices=["stdio", "http"],
default="http",
help="Transport method: stdio for local MCP clients, http for remote access"
)
parser.add_argument(
"--port",
type=int,
default=None,
help="Port for HTTP transport (defaults to PORT env var or 8888)"
)
parser.add_argument(
"--host",
default="0.0.0.0",
help="Host for HTTP transport (default: 0.0.0.0)"
)
args = parser.parse_args()
if args.transport == "stdio":
# Run with stdio transport for local MCP clients
mcp.run(transport="stdio")
else:
# Run with HTTP transport for remote access
import os
port = args.port or int(os.environ.get("PORT", 8888))
mcp.run(
transport="http",
host=args.host,
port=port,
stateless_http=True
)
if __name__ == "__main__":
main()