|
1 | 1 | // Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. |
2 | 2 | // SPDX-License-Identifier: MIT |
3 | 3 | // |
4 | | -// In-process mock HTTP server mimicking the Lemonade Server API. |
5 | | -// Used by benchmarks to avoid requiring a real LLM backend. |
| 4 | +// The canonical mock server now lives under cpp/tests/support/. This header |
| 5 | +// remains as a thin re-include so existing benchmark sources compile |
| 6 | +// unchanged. Do not add new contents here. |
6 | 7 |
|
7 | 8 | #pragma once |
8 | 9 |
|
9 | | -#include <atomic> |
10 | | -#include <chrono> |
11 | | -#include <deque> |
12 | | -#include <mutex> |
13 | | -#include <stdexcept> |
14 | | -#include <string> |
15 | | -#include <thread> |
16 | | - |
17 | | -#include <httplib.h> |
18 | | - |
19 | | -namespace bench { |
20 | | - |
21 | | -// Default chat completion response — agent returns a final answer immediately. |
22 | | -static const std::string kDefaultAnswer = R"({"choices":[{"message":{"content":"{\"thought\":\"done\",\"goal\":\"complete\",\"answer\":\"benchmark result\"}"}}]})"; |
23 | | - |
24 | | -// Tool-call response — agent calls the echo tool first. |
25 | | -static const std::string kToolCall = R"({"choices":[{"message":{"content":"{\"thought\":\"calling tool\",\"goal\":\"test\",\"tool\":\"echo\",\"tool_args\":{\"message\":\"bench\"}}"}}]})"; |
26 | | - |
27 | | -// Health response — reports mock-model as already loaded so ensureModelLoaded() skips /load. |
28 | | -static const std::string kHealthOk = R"({"status":"ok","all_models_loaded":[{"model_name":"mock-model","recipe_options":{"ctx_size":16384}}]})"; |
29 | | - |
30 | | -// Models list response |
31 | | -static const std::string kModelsList = R"({"data":[{"id":"mock-model"}]})"; |
32 | | - |
33 | | -// Load response |
34 | | -static const std::string kLoadOk = R"({"status":"ok"})"; |
35 | | - |
36 | | -class MockLlmServer { |
37 | | -public: |
38 | | - /// Start server on an OS-assigned port. |
39 | | - /// Constructor blocks until the server is accepting connections. |
40 | | - MockLlmServer() : server_(std::make_unique<httplib::Server>()) { |
41 | | - registerHandlers(); |
42 | | - |
43 | | - // bind_to_any_port returns the OS-assigned port (avoids CI port conflicts) |
44 | | - port_ = server_->bind_to_any_port("127.0.0.1"); |
45 | | - if (port_ <= 0) { |
46 | | - throw std::runtime_error("MockLlmServer: failed to bind to any port"); |
47 | | - } |
48 | | - |
49 | | - thread_ = std::thread([this]() { server_->listen_after_bind(); }); |
50 | | - |
51 | | - waitUntilReady(); |
52 | | - } |
53 | | - |
54 | | - ~MockLlmServer() { |
55 | | - server_->stop(); |
56 | | - if (thread_.joinable()) { |
57 | | - thread_.join(); |
58 | | - } |
59 | | - } |
60 | | - |
61 | | - // Non-copyable, non-movable |
62 | | - MockLlmServer(const MockLlmServer&) = delete; |
63 | | - MockLlmServer& operator=(const MockLlmServer&) = delete; |
64 | | - |
65 | | - /// The port the server is listening on. |
66 | | - int port() const { return port_; } |
67 | | - |
68 | | - /// Base URL suitable for AgentConfig::baseUrl (without /api/v1 — LemonadeClient adds it). |
69 | | - std::string baseUrl() const { return "http://127.0.0.1:" + std::to_string(port_); } |
70 | | - |
71 | | - /// Push a response to return for the next POST /chat/completions call. |
72 | | - /// When the queue is empty the default answer response is returned. |
73 | | - void pushResponse(const std::string& body) { |
74 | | - std::lock_guard<std::mutex> lk(mu_); |
75 | | - responseQueue_.push_back(body); |
76 | | - } |
77 | | - |
78 | | - /// Push N copies of a response. |
79 | | - void pushResponses(const std::string& body, int n) { |
80 | | - std::lock_guard<std::mutex> lk(mu_); |
81 | | - for (int i = 0; i < n; ++i) { |
82 | | - responseQueue_.push_back(body); |
83 | | - } |
84 | | - } |
85 | | - |
86 | | - /// Clear pending queued responses. |
87 | | - void clearQueue() { |
88 | | - std::lock_guard<std::mutex> lk(mu_); |
89 | | - responseQueue_.clear(); |
90 | | - } |
91 | | - |
92 | | - /// Number of chat completion requests handled so far. |
93 | | - int requestCount() const { return requestCount_.load(); } |
94 | | - |
95 | | -private: |
96 | | - void registerHandlers() { |
97 | | - // Health check — always reports mock-model loaded |
98 | | - server_->Get("/api/v1/health", [](const httplib::Request&, httplib::Response& res) { |
99 | | - res.set_content(kHealthOk, "application/json"); |
100 | | - }); |
101 | | - |
102 | | - // Load model — no-op safety fallback |
103 | | - server_->Post("/api/v1/load", [](const httplib::Request&, httplib::Response& res) { |
104 | | - res.set_content(kLoadOk, "application/json"); |
105 | | - }); |
106 | | - |
107 | | - // Models list |
108 | | - server_->Get("/api/v1/models", [](const httplib::Request&, httplib::Response& res) { |
109 | | - res.set_content(kModelsList, "application/json"); |
110 | | - }); |
111 | | - |
112 | | - // Chat completions — dequeue a pre-loaded response or return default answer |
113 | | - server_->Post("/api/v1/chat/completions", |
114 | | - [this](const httplib::Request&, httplib::Response& res) { |
115 | | - ++requestCount_; |
116 | | - std::string body; |
117 | | - { |
118 | | - std::lock_guard<std::mutex> lk(mu_); |
119 | | - if (!responseQueue_.empty()) { |
120 | | - body = responseQueue_.front(); |
121 | | - responseQueue_.pop_front(); |
122 | | - } else { |
123 | | - body = kDefaultAnswer; |
124 | | - } |
125 | | - } |
126 | | - res.set_content(body, "application/json"); |
127 | | - }); |
128 | | - } |
129 | | - |
130 | | - void waitUntilReady() { |
131 | | - // Poll health endpoint until the server responds |
132 | | - httplib::Client cli("127.0.0.1", port_); |
133 | | - cli.set_connection_timeout(1); |
134 | | - cli.set_read_timeout(1); |
135 | | - |
136 | | - for (int attempt = 0; attempt < 50; ++attempt) { |
137 | | - auto res = cli.Get("/api/v1/health"); |
138 | | - if (res && res->status == 200) { |
139 | | - return; |
140 | | - } |
141 | | - std::this_thread::sleep_for(std::chrono::milliseconds(20)); |
142 | | - } |
143 | | - throw std::runtime_error("MockLlmServer: server did not become ready"); |
144 | | - } |
145 | | - |
146 | | - std::unique_ptr<httplib::Server> server_; |
147 | | - std::thread thread_; |
148 | | - int port_ = 0; |
149 | | - std::mutex mu_; |
150 | | - std::deque<std::string> responseQueue_; |
151 | | - std::atomic<int> requestCount_{0}; |
152 | | -}; |
153 | | - |
154 | | -} // namespace bench |
| 10 | +#include "../tests/support/mock_llm_server.h" |
0 commit comments