Skip to content

Commit 963def7

Browse files
satyamk7054Satyam Kumar
andauthored
Move lora request validation to tokenizer_manager from server (#18962)
Co-authored-by: Satyam Kumar <satyamk@linkedin.com>
1 parent d07e8aa commit 963def7

File tree

7 files changed

+40
-114
lines changed

7 files changed

+40
-114
lines changed

python/sglang/srt/entrypoints/openai/serving_base.py

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -70,19 +70,6 @@ def _resolve_lora_path(
7070
# Fall back to explicit lora_path
7171
return explicit_lora_path
7272

73-
def _validate_lora_enabled(self, adapter_name: str) -> None:
74-
"""Check that LoRA is enabled before attempting to use an adapter.
75-
76-
Raises ValueError with actionable guidance if --enable-lora flag is missing.
77-
Adapter existence is validated later by TokenizerManager.lora_registry.
78-
"""
79-
if not self.tokenizer_manager.server_args.enable_lora:
80-
raise ValueError(
81-
f"LoRA adapter '{adapter_name}' was requested, but LoRA is not enabled. "
82-
"Please launch the server with --enable-lora flag and preload adapters "
83-
"using --lora-paths or /load_lora_adapter endpoint."
84-
)
85-
8673
async def handle_request(
8774
self, request: OpenAIServingRequest, raw_request: Request
8875
) -> Union[Any, StreamingResponse, ErrorResponse]:

python/sglang/srt/entrypoints/openai/serving_chat.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -277,15 +277,6 @@ def _convert_to_internal_request(
277277

278278
# Resolve LoRA adapter from model parameter or explicit lora_path
279279
lora_path = self._resolve_lora_path(request.model, request.lora_path)
280-
if lora_path:
281-
first_adapter = (
282-
lora_path
283-
if isinstance(lora_path, str)
284-
else next((a for a in lora_path if a), None)
285-
)
286-
if first_adapter:
287-
self._validate_lora_enabled(first_adapter)
288-
289280
img_max_dynamic_patch, vid_max_dynamic_patch = _extract_max_dynamic_patch(
290281
request
291282
)

python/sglang/srt/entrypoints/openai/serving_completions.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -98,14 +98,6 @@ def _convert_to_internal_request(
9898

9999
# Resolve LoRA adapter from model parameter or explicit lora_path
100100
lora_path = self._resolve_lora_path(request.model, request.lora_path)
101-
if lora_path:
102-
first_adapter = (
103-
lora_path
104-
if isinstance(lora_path, str)
105-
else next((a for a in lora_path if a), None)
106-
)
107-
if first_adapter:
108-
self._validate_lora_enabled(first_adapter)
109101

110102
adapted_request = GenerateReqInput(
111103
**prompt_kwargs,

python/sglang/srt/entrypoints/openai/serving_embedding.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -128,14 +128,6 @@ def _convert_to_internal_request(
128128

129129
# Resolve LoRA adapter from model parameter or explicit lora_path
130130
lora_path = self._resolve_lora_path(request.model, request.lora_path)
131-
if lora_path:
132-
first_adapter = (
133-
lora_path
134-
if isinstance(lora_path, str)
135-
else next((a for a in lora_path if a), None)
136-
)
137-
if first_adapter:
138-
self._validate_lora_enabled(first_adapter)
139131

140132
adapted_request = EmbeddingReqInput(
141133
**prompt_kwargs,

python/sglang/srt/managers/tokenizer_manager.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -511,8 +511,7 @@ async def generate_request(
511511
await self.is_pause_cond.wait_for(lambda: not self.is_pause)
512512

513513
async with self.model_update_lock.reader_lock:
514-
if self.server_args.enable_lora and obj.lora_path:
515-
await self._resolve_lora_path(obj)
514+
await self._validate_and_resolve_lora(obj)
516515

517516
# Tokenize the request and send it to the scheduler
518517
if obj.is_single:
@@ -2213,6 +2212,27 @@ def _handle_update_weights_from_disk_req_output(self, recv_obj):
22132212
if len(self.model_update_tmp) == self.server_args.dp_size:
22142213
self.model_update_result.set_result(self.model_update_tmp)
22152214

2215+
async def _validate_and_resolve_lora(
2216+
self, obj: Union[GenerateReqInput, EmbeddingReqInput]
2217+
) -> None:
2218+
if not obj.lora_path:
2219+
return
2220+
2221+
if not self.server_args.enable_lora:
2222+
first_adapter = (
2223+
obj.lora_path
2224+
if isinstance(obj.lora_path, str)
2225+
else next((a for a in obj.lora_path if a), None)
2226+
)
2227+
2228+
raise ValueError(
2229+
f"LoRA adapter '{first_adapter}' was requested, but LoRA is not enabled. "
2230+
"Please launch the server with --enable-lora flag and preload adapters "
2231+
"using --lora-paths or /load_lora_adapter endpoint."
2232+
)
2233+
2234+
await self._resolve_lora_path(obj)
2235+
22162236
async def _resolve_lora_path(self, obj: Union[GenerateReqInput, EmbeddingReqInput]):
22172237
if isinstance(obj.lora_path, str):
22182238
unique_lora_paths = set([obj.lora_path])

test/registered/lora/test_embedding_lora_support.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,24 @@
4646
class TestEmbeddingLoraSupport(unittest.TestCase):
4747
"""Test LoRA support in embedding request structures."""
4848

49+
def test_engine_encode_validates_enable_lora(self):
50+
"""Test Engine.encode() validates enable_lora before processing lora_path."""
51+
# Use a simple non-gated model for this validation test
52+
with SRTRunner(
53+
MODEL_PATH,
54+
torch_dtype=torch.float16,
55+
model_type="embedding",
56+
port=DEFAULT_PORT_FOR_SRT_TEST_RUNNER,
57+
) as runner:
58+
# Should raise ValueError because enable_lora was not set for the server
59+
with self.assertRaises(ValueError) as context:
60+
runner.engine.encode(prompt="Test", lora_path="fake-adapter")
61+
62+
error_msg = str(context.exception)
63+
self.assertIn("not enabled", error_msg.lower())
64+
self.assertIn("--enable-lora", error_msg)
65+
self.assertIn("fake-adapter", error_msg)
66+
4967
def test_embedding_lora_fields(self):
5068
"""Test LoRA fields exist and work correctly across all embedding structures."""
5169
# EmbeddingReqInput: fields exist, normalization expands single to batch, indexing works

test/registered/lora/test_lora_openai_api.py

Lines changed: 0 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -142,44 +142,6 @@ def test_complex_model_name_with_adapter(self):
142142
self.assertEqual(result, "adapter-name")
143143

144144

145-
class TestValidateLoraEnabled(unittest.TestCase):
146-
"""Test _validate_lora_enabled method."""
147-
148-
def test_validation_passes_when_lora_enabled(self):
149-
"""Test validation passes when LoRA is enabled."""
150-
tokenizer_manager = MockTokenizerManager(enable_lora=True)
151-
serving = ConcreteServingBase(tokenizer_manager)
152-
153-
# Should not raise
154-
try:
155-
serving._validate_lora_enabled("sql-expert")
156-
except ValueError:
157-
self.fail("_validate_lora_enabled raised ValueError unexpectedly")
158-
159-
def test_validation_fails_when_lora_disabled(self):
160-
"""Test validation fails with helpful message when LoRA is disabled."""
161-
tokenizer_manager = MockTokenizerManager(enable_lora=False)
162-
serving = ConcreteServingBase(tokenizer_manager)
163-
164-
with self.assertRaises(ValueError) as context:
165-
serving._validate_lora_enabled("sql-expert")
166-
167-
error_message = str(context.exception)
168-
self.assertIn("sql-expert", error_message)
169-
self.assertIn("--enable-lora", error_message)
170-
self.assertIn("not enabled", error_message)
171-
172-
def test_validation_error_mentions_adapter_name(self):
173-
"""Test that error message includes the requested adapter name."""
174-
tokenizer_manager = MockTokenizerManager(enable_lora=False)
175-
serving = ConcreteServingBase(tokenizer_manager)
176-
177-
with self.assertRaises(ValueError) as context:
178-
serving._validate_lora_enabled("my-custom-adapter")
179-
180-
self.assertIn("my-custom-adapter", str(context.exception))
181-
182-
183145
class TestIntegrationScenarios(unittest.TestCase):
184146
"""Integration tests for common usage scenarios."""
185147

@@ -196,9 +158,6 @@ def test_openai_compatible_usage(self):
196158
lora_path = self.serving._resolve_lora_path(model, explicit_lora)
197159
self.assertEqual(lora_path, "sql-expert")
198160

199-
# Validation should pass
200-
self.serving._validate_lora_enabled(lora_path)
201-
202161
def test_backward_compatible_usage(self):
203162
"""Test backward-compatible usage with explicit lora_path."""
204163
model = "meta-llama/Llama-3.1-8B"
@@ -207,9 +166,6 @@ def test_backward_compatible_usage(self):
207166
lora_path = self.serving._resolve_lora_path(model, explicit_lora)
208167
self.assertEqual(lora_path, "sql-expert")
209168

210-
# Validation should pass
211-
self.serving._validate_lora_enabled(lora_path)
212-
213169
def test_base_model_usage(self):
214170
"""Test using base model without any adapter."""
215171
model = "meta-llama/Llama-3.1-8B"
@@ -228,10 +184,6 @@ def test_batch_request_scenario(self):
228184
lora_path = self.serving._resolve_lora_path(model, explicit_lora)
229185
self.assertEqual(lora_path, explicit_lora)
230186

231-
# Validate first adapter in list
232-
if isinstance(lora_path, list) and lora_path[0]:
233-
self.serving._validate_lora_enabled(lora_path[0])
234-
235187
def test_adapter_in_model_overrides_batch_list(self):
236188
"""Test that adapter in model parameter overrides batch list."""
237189
model = "meta-llama/Llama-3.1-8B:preferred-adapter"
@@ -240,24 +192,6 @@ def test_adapter_in_model_overrides_batch_list(self):
240192
lora_path = self.serving._resolve_lora_path(model, explicit_lora)
241193
self.assertEqual(lora_path, "preferred-adapter")
242194

243-
def test_error_when_lora_not_enabled(self):
244-
"""Test comprehensive error flow when LoRA is not enabled."""
245-
# Setup server without LoRA enabled
246-
tokenizer_manager = MockTokenizerManager(enable_lora=False)
247-
serving = ConcreteServingBase(tokenizer_manager)
248-
249-
# User tries to use adapter
250-
model = "meta-llama/Llama-3.1-8B:sql-expert"
251-
lora_path = serving._resolve_lora_path(model, None)
252-
253-
# Should get helpful error
254-
with self.assertRaises(ValueError) as context:
255-
serving._validate_lora_enabled(lora_path)
256-
257-
error = str(context.exception)
258-
self.assertIn("--enable-lora", error)
259-
self.assertIn("sql-expert", error)
260-
261195

262196
class TestEdgeCases(unittest.TestCase):
263197
"""Test edge cases and error conditions."""
@@ -318,14 +252,6 @@ def test_empty_string_as_explicit_lora_path(self):
318252
result = self.serving._resolve_lora_path("model-name", "")
319253
self.assertEqual(result, "")
320254

321-
def test_validation_with_empty_adapter_name(self):
322-
"""Test validation with empty adapter name still raises error."""
323-
tokenizer_manager = MockTokenizerManager(enable_lora=False)
324-
serving = ConcreteServingBase(tokenizer_manager)
325-
326-
with self.assertRaises(ValueError):
327-
serving._validate_lora_enabled("")
328-
329255

330256
if __name__ == "__main__":
331257
unittest.main()

0 commit comments

Comments
 (0)