|
| 1 | +"""Use the agent-server through an OpenAI-compatible Chat Completions client. |
| 2 | +
|
| 3 | +This example starts a local agent-server, stores an LLM profile, lists it through |
| 4 | +``GET /v1/models``, then calls ``POST /v1/chat/completions`` with the OpenAI |
| 5 | +Python SDK. The returned ``X-OpenHands-ServerConversation-ID`` header is passed |
| 6 | +back on a second call to continue the same OpenHands conversation. |
| 7 | +""" |
| 8 | + |
| 9 | +import os |
| 10 | +from uuid import UUID |
| 11 | + |
| 12 | +import httpx |
| 13 | +from openai import OpenAI |
| 14 | +from scripts.utils import ManagedAPIServer |
| 15 | + |
| 16 | + |
| 17 | +# The gateway runs a full OpenHands agent, but OpenAI clients still need a |
| 18 | +# normal model-like name. We create an LLM profile below and expose it as |
| 19 | +# `openhands_<profile_name>` through `/v1/models`. |
| 20 | + |
| 21 | +api_key = os.getenv("LLM_API_KEY") or os.getenv("OPENAI_API_KEY") |
| 22 | +assert api_key is not None, "Set LLM_API_KEY or OPENAI_API_KEY." |
| 23 | + |
| 24 | +llm_model = os.getenv("LLM_MODEL", "gpt-5-nano") |
| 25 | +llm_base_url = os.getenv("LLM_BASE_URL") |
| 26 | +profile_name = "gateway_demo" |
| 27 | +gateway_model = f"openhands_{profile_name}" |
| 28 | + |
| 29 | +# Start a local agent-server for the demo. `use_session_api_key=True` turns on |
| 30 | +# authentication; the same key works as both `X-Session-API-Key` for native |
| 31 | +# agent-server routes and `Authorization: Bearer ...` for OpenAI SDK calls. |
| 32 | + |
| 33 | +with ManagedAPIServer( |
| 34 | + port=8770, |
| 35 | + use_session_api_key=True, |
| 36 | + extra_env={ |
| 37 | + "OH_ENABLE_VNC": "0", |
| 38 | + "OH_ENABLE_VSCODE": "0", |
| 39 | + "OH_PRELOAD_TOOLS": "0", |
| 40 | + "OH_SECRET_KEY": "example-secret-key-for-demo-only-32b", |
| 41 | + "OH_WEBHOOKS": "[]", |
| 42 | + }, |
| 43 | + health_request_timeout=2.0, |
| 44 | +) as server: |
| 45 | + session_api_key = ( |
| 46 | + os.getenv("SESSION_API_KEY") |
| 47 | + or os.getenv("OH_SESSION_API_KEYS_0") |
| 48 | + or server.session_api_key |
| 49 | + ) |
| 50 | + assert session_api_key is not None |
| 51 | + |
| 52 | + # Use the native REST API once to create the profile that backs the gateway |
| 53 | + # model. After that, normal OpenAI SDK calls are enough for chat traffic. |
| 54 | + api_client = httpx.Client( |
| 55 | + base_url=server.base_url, |
| 56 | + headers={"X-Session-API-Key": session_api_key}, |
| 57 | + timeout=120.0, |
| 58 | + ) |
| 59 | + openai_client = OpenAI( |
| 60 | + api_key=session_api_key, |
| 61 | + base_url=f"{server.base_url}/v1", |
| 62 | + timeout=120.0, |
| 63 | + ) |
| 64 | + |
| 65 | + llm_config = {"model": llm_model, "api_key": api_key} |
| 66 | + if llm_base_url: |
| 67 | + llm_config["base_url"] = llm_base_url |
| 68 | + |
| 69 | + # `gateway_demo` becomes visible to OpenAI clients as `openhands_gateway_demo`. |
| 70 | + profile_response = api_client.post( |
| 71 | + f"/api/profiles/{profile_name}", |
| 72 | + json={"llm": llm_config, "include_secrets": True}, |
| 73 | + ) |
| 74 | + assert profile_response.status_code == 201, profile_response.text |
| 75 | + |
| 76 | + models = openai_client.models.list() |
| 77 | + model_ids = [model.id for model in models.data] |
| 78 | + assert gateway_model in model_ids |
| 79 | + print(f"Gateway models include: {gateway_model}") |
| 80 | + |
| 81 | + # Ask through the OpenAI SDK. `with_raw_response` lets us read the custom |
| 82 | + # response header that identifies the OpenHands conversation created behind |
| 83 | + # this otherwise OpenAI-shaped request. |
| 84 | + |
| 85 | + first_response = openai_client.chat.completions.with_raw_response.create( |
| 86 | + model=gateway_model, |
| 87 | + messages=[ |
| 88 | + { |
| 89 | + "role": "system", |
| 90 | + "content": "Answer directly and do not use tools.", |
| 91 | + }, |
| 92 | + { |
| 93 | + "role": "user", |
| 94 | + "content": ( |
| 95 | + "In one sentence, explain what an OpenAI-compatible " |
| 96 | + "agent-server gateway does." |
| 97 | + ), |
| 98 | + }, |
| 99 | + ], |
| 100 | + ) |
| 101 | + first_completion = first_response.parse() |
| 102 | + conversation_id = first_response.headers.get("X-OpenHands-ServerConversation-ID") |
| 103 | + assert conversation_id is not None |
| 104 | + UUID(conversation_id) |
| 105 | + |
| 106 | + first_answer = first_completion.choices[0].message.content |
| 107 | + print(f"First answer: {first_answer}") |
| 108 | + print(f"OpenHands conversation ID: {conversation_id}") |
| 109 | + |
| 110 | + persisted_response = api_client.get(f"/api/conversations/{conversation_id}") |
| 111 | + assert persisted_response.status_code == 200, persisted_response.text |
| 112 | + |
| 113 | + # The gateway keeps conversations by default. Passing the header back lets |
| 114 | + # another OpenAI-compatible request continue the same server-side agent |
| 115 | + # conversation instead of starting over. |
| 116 | + |
| 117 | + second_completion = openai_client.chat.completions.create( |
| 118 | + model=gateway_model, |
| 119 | + messages=[ |
| 120 | + { |
| 121 | + "role": "user", |
| 122 | + "content": "Now answer in five words or fewer: what did I ask about?", |
| 123 | + } |
| 124 | + ], |
| 125 | + extra_headers={"X-OpenHands-ServerConversation-ID": conversation_id}, |
| 126 | + ) |
| 127 | + second_answer = second_completion.choices[0].message.content |
| 128 | + print(f"Second answer using same conversation: {second_answer}") |
| 129 | + |
| 130 | + conversation_response = api_client.get(f"/api/conversations/{conversation_id}") |
| 131 | + assert conversation_response.status_code == 200, conversation_response.text |
| 132 | + stats = conversation_response.json().get("stats") or {} |
| 133 | + usage_to_metrics = stats.get("usage_to_metrics") or {} |
| 134 | + accumulated_cost = sum( |
| 135 | + metrics.get("accumulated_cost", 0.0) for metrics in usage_to_metrics.values() |
| 136 | + ) |
| 137 | + |
| 138 | + # Clean up the demo resources. Real applications can keep the conversation |
| 139 | + # ID and inspect it later through the native agent-server API. |
| 140 | + api_client.delete(f"/api/conversations/{conversation_id}") |
| 141 | + api_client.delete(f"/api/profiles/{profile_name}") |
| 142 | + api_client.close() |
| 143 | + |
| 144 | + print(f"EXAMPLE_COST: {accumulated_cost}") |
0 commit comments