Skip to content

Commit 421b113

Browse files
authored
allow api server terminated through requests from clients (#3533)
* allow terminate * fix engine role for tubomind
1 parent 6fc2d54 commit 421b113

File tree

4 files changed

+89
-10
lines changed

4 files changed

+89
-10
lines changed

lmdeploy/cli/serve.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ def add_parser_api_server():
133133
ArgumentHelper.model_name(parser)
134134
ArgumentHelper.max_log_len(parser)
135135
ArgumentHelper.disable_fastapi_docs(parser)
136-
136+
ArgumentHelper.allow_terminate_by_client(parser)
137137
# chat template args
138138
ArgumentHelper.chat_template(parser)
139139

@@ -361,6 +361,7 @@ def api_server(args):
361361
allow_credentials=args.allow_credentials,
362362
allow_methods=args.allow_methods,
363363
allow_headers=args.allow_headers,
364+
allow_terminate_by_client=args.allow_terminate_by_client,
364365
log_level=args.log_level.upper(),
365366
api_keys=args.api_keys,
366367
ssl=args.ssl,

lmdeploy/cli/utils.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,15 @@ def tool_call_parser(parser):
406406
default=None,
407407
help=f'The registered tool parser name {ToolParserManager.module_dict.keys()}. Default to None.')
408408

409+
@staticmethod
410+
def allow_terminate_by_client(parser):
411+
"""Add argument allow_terminate_by_client to parser."""
412+
413+
return parser.add_argument('--allow-terminate-by-client',
414+
action='store_true',
415+
default=False,
416+
help='Enable server to be terminated by request from client')
417+
409418
@staticmethod
410419
def cache_max_entry_count(parser):
411420
"""Add argument cache_max_entry_count to parser."""

lmdeploy/serve/openai/api_server.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ class VariableInterface:
5454
reasoning_parser: Optional[ReasoningParser] = None
5555
# following is for tool parsers
5656
tool_parser: Optional[ToolParser] = None
57+
allow_terminate_by_client: bool = False
5758

5859

5960
router = APIRouter()
@@ -234,6 +235,19 @@ async def health() -> Response:
234235
return Response(status_code=200)
235236

236237

238+
@router.get('/terminate')
239+
async def terminate():
240+
"""terminate server."""
241+
import signal
242+
243+
if not VariableInterface.allow_terminate_by_client:
244+
return create_error_response(
245+
HTTPStatus.BAD_REQUEST,
246+
'The server can not be terminated. Please add --allow-terminate-by-client when start the server.')
247+
os.kill(os.getpid(), signal.SIGTERM)
248+
return Response(status_code=200)
249+
250+
237251
# modified from https://github.com/vllm-project/vllm/blob/v0.5.4/vllm/entrypoints/openai/logits_processors.py#L51 # noqa
238252
def logit_bias_logits_processor(logit_bias: Union[Dict[int, float], Dict[str, float]], tokenizer) -> LogitsProcessor:
239253
try:
@@ -1056,21 +1070,16 @@ async def startup_event():
10561070
try:
10571071
import requests
10581072
engine_config = VariableInterface.async_engine.engine.engine_config
1073+
engine_role = engine_config.role.value if hasattr(engine_config, 'role') else 1
10591074
url = f'{VariableInterface.proxy_url}/nodes/add'
1060-
data = {
1061-
'url': VariableInterface.api_server_url,
1062-
'status': {
1063-
'models': get_model_list(),
1064-
'role': engine_config.role.value
1065-
}
1066-
}
1075+
data = {'url': VariableInterface.api_server_url, 'status': {'models': get_model_list(), 'role': engine_role}}
10671076
headers = {'accept': 'application/json', 'Content-Type': 'application/json'}
10681077
response = requests.post(url, headers=headers, json=data)
10691078

10701079
if response.status_code != 200:
10711080
raise HTTPException(status_code=400, detail='Service registration failed')
10721081
except Exception as e:
1073-
print(f'Service registration failed: {e}')
1082+
logger.error(f'Service registration failed: {e}')
10741083

10751084

10761085
class ConcurrencyLimitMiddleware(BaseHTTPMiddleware):
@@ -1127,6 +1136,7 @@ def serve(model_path: str,
11271136
max_concurrent_requests: Optional[int] = None,
11281137
reasoning_parser: Optional[str] = None,
11291138
tool_call_parser: Optional[str] = None,
1139+
allow_terminate_by_client: bool = False,
11301140
**kwargs):
11311141
"""An example to perform model inference through the command line
11321142
interface.
@@ -1178,6 +1188,7 @@ def serve(model_path: str,
11781188
clients concurrently during that time. Default to None.
11791189
reasoning_parser (str): The reasoning parser name.
11801190
tool_call_parser (str): The tool call parser name.
1191+
allow_terminate_by_client (bool): Allow request from client to terminate server.
11811192
"""
11821193
if os.getenv('TM_LOG_LEVEL') is None:
11831194
os.environ['TM_LOG_LEVEL'] = log_level
@@ -1207,6 +1218,7 @@ def serve(model_path: str,
12071218
if max_concurrent_requests is not None:
12081219
app.add_middleware(ConcurrencyLimitMiddleware, max_concurrent_requests=max_concurrent_requests)
12091220

1221+
VariableInterface.allow_terminate_by_client = allow_terminate_by_client
12101222
if api_keys is not None:
12111223
if isinstance(api_keys, str):
12121224
api_keys = api_keys.split(',')

lmdeploy/serve/proxy/proxy.py

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,35 @@ def remove(self, node_url: str):
179179
for conn in dropped_conn:
180180
self.pd_connection_pool.drop(*conn)
181181

182+
def terminate_node(self, node_url: str):
183+
"""terminate a node."""
184+
success = True
185+
if node_url in self.nodes:
186+
self.nodes.pop(node_url)
187+
headers = {'accept': 'application/json'}
188+
try:
189+
response = requests.get(f'{node_url}/terminate', headers=headers)
190+
if response.status_code != 200:
191+
success = False
192+
logger.error(f'Failed to terminate node {node_url}, '
193+
f'error_code={response.status_code}, '
194+
f'error_msg={response.text}')
195+
except: # noqa
196+
success = False
197+
else:
198+
success = False
199+
self.update_config_file()
200+
return success
201+
202+
def terminate_all_nodes(self):
203+
"""terminate all nodes."""
204+
node_url_li = list(self.nodes.keys())
205+
all_success = True
206+
for node_url in node_url_li:
207+
if not self.terminate_node(node_url):
208+
all_success = False
209+
return all_success
210+
182211
def remove_stale_nodes_by_expiration(self):
183212
"""remove stale nodes."""
184213
to_be_deleted = []
@@ -433,9 +462,10 @@ def add_node(node: Node, raw_request: Request = None):
433462

434463

435464
@app.post('/nodes/remove', dependencies=[Depends(check_api_key)])
436-
def remove_node(node_url: str):
465+
def remove_node(node: Node):
437466
"""Show available models."""
438467
try:
468+
node_url = node.url
439469
node_manager.remove(node_url)
440470
logger.info(f'delete node {node_url} successfully')
441471
return 'Deleted successfully'
@@ -444,6 +474,33 @@ def remove_node(node_url: str):
444474
return 'Failed to delete, please check the input url.'
445475

446476

477+
@app.post('/nodes/terminate', dependencies=[Depends(check_api_key)])
478+
def terminate_node(node: Node):
479+
"""terminate nodes."""
480+
try:
481+
node_url = node.url
482+
success = node_manager.terminate_node(node_url)
483+
if not success:
484+
return f'Failed to terminate node {node_url}'
485+
return 'Terminated successfully'
486+
except: # noqa
487+
logger.error(f'Terminate node {node_url} failed.')
488+
return 'Failed to terminate node {node_url}, please check the input url.'
489+
490+
491+
@app.get('/nodes/terminate_all', dependencies=[Depends(check_api_key)])
492+
def terminate_node_all():
493+
"""terminate nodes."""
494+
try:
495+
success = node_manager.terminate_all_nodes()
496+
if not success:
497+
return 'Failed to terminate all nodes'
498+
return 'All nodes terminated successfully'
499+
except: # noqa
500+
logger.error('Failed to terminate all nodes')
501+
return 'Failed to terminate all nodes.'
502+
503+
447504
@app.post('/distserve/connection_warmup')
448505
async def connection_warmup():
449506
await asyncio.gather(*[

0 commit comments

Comments
 (0)