[multi-lora-rl] Address review feedback: path safety + legacy lora_name

erictang000 · claude · erictang000 · commit f5e68d89ff65 · 2026-05-08T18:58:15.000Z
Two reviewer concerns:

1. Path traversal in os.path.join(base_sync_path, model_id). model_id is
   server-generated (api.py validates against ID_PATTERN), so this is
   defense in depth, but route through os.path.basename in both Megatron
   and FSDP workers so a misformed id can't escape lora_sync_path.
   Also add _cleanup_lora_sync_subdir on per-adapter delete_model so the
   per-tenant subdirs don't accumulate as adapters churn.

2. Legacy update_named_weights path didn't carry the adapter name —
   vllm_engine generated a numeric name from time.time_ns(), making the
   adapter inaccessible by tenant model_id. Add lora_name to
   LoraLoadRequest, plumb through both BaseVLLMInferenceEngine
   variants (sync + async _load_lora_from_disk), and pass lora_name from
   both worker files in the legacy branch. Empty string preserves the
   pre-existing single-tenant behavior.

All 6 existing multi-LoRA tests still pass.

Co-Authored-By: Claude Opus 4.7 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/skyrl/backends/skyrl_train/inference_engines/vllm/vllm_engine.py b/skyrl/backends/skyrl_train/inference_engines/vllm/vllm_engine.py
@@ -308,10 +308,16 @@ async def init_weight_update_communicator(self, init_info: "WeightSyncInitInfo")
             args=(pickled_init_info,),
         )
 
-    async def _load_lora_from_disk(self, lora_path: str):
-        """Load LoRA adapters from disk using vLLM's native add_lora method."""
+    async def _load_lora_from_disk(self, lora_path: str, lora_name: str = ""):
+        """Load LoRA adapters from disk using vLLM's native add_lora method.
+
+        When ``lora_name`` is empty (legacy single-tenant), a numeric name is
+        generated. Multi-tenant callers pass ``lora_name`` so subsequent
+        ``model=<lora_name>`` sampling routes to the right adapter.
+        """
         lora_id = int(time.time_ns() % 0x7FFFFFFF)
-        lora_request = LoRARequest(lora_name=f"{lora_id}", lora_int_id=lora_id, lora_path=lora_path)
+        name = lora_name or f"{lora_id}"
+        lora_request = LoRARequest(lora_name=name, lora_int_id=lora_id, lora_path=lora_path)
         result = self.llm.llm_engine.add_lora(lora_request)
         return result
 
@@ -320,7 +326,7 @@ async def update_named_weights(self, request: WeightUpdateRequest):
 
         # Handle LoRA disk loading request
         if isinstance(request, LoraLoadRequest):
-            return await self._load_lora_from_disk(request.lora_path)
+            return await self._load_lora_from_disk(request.lora_path, lora_name=request.lora_name)
 
         if not len(request):
             raise ValueError("Weight update request must not be empty")
@@ -453,10 +459,16 @@ def _create_ray_prometheus_stat_loggers(self):
             )
             return None
 
-    async def _load_lora_from_disk(self, lora_path: str):
-        """Load LoRA adapters from disk using vLLM's native add_lora method."""
+    async def _load_lora_from_disk(self, lora_path: str, lora_name: str = ""):
+        """Load LoRA adapters from disk using vLLM's native add_lora method.
+
+        When ``lora_name`` is empty (legacy single-tenant), a numeric name is
+        generated. Multi-tenant callers pass ``lora_name`` so subsequent
+        ``model=<lora_name>`` sampling routes to the right adapter.
+        """
         lora_id = int(time.time_ns() % 0x7FFFFFFF)
-        lora_request = LoRARequest(lora_name=f"{lora_id}", lora_int_id=lora_id, lora_path=lora_path)
+        name = lora_name or f"{lora_id}"
+        lora_request = LoRARequest(lora_name=name, lora_int_id=lora_id, lora_path=lora_path)
         result = await self.llm.add_lora(lora_request)
         return result
 
@@ -539,7 +551,7 @@ async def update_named_weights(self, request: WeightUpdateRequest):
 
         # Check for LoRA disk loading request
         if isinstance(request, LoraLoadRequest):
-            return await self._load_lora_from_disk(request.lora_path)
+            return await self._load_lora_from_disk(request.lora_path, lora_name=request.lora_name)
 
         if not len(request):
             raise ValueError("Weight update request must not be empty")
diff --git a/skyrl/backends/skyrl_train/weight_sync/base.py b/skyrl/backends/skyrl_train/weight_sync/base.py
@@ -47,12 +47,18 @@ class LoraLoadRequest(WeightUpdateRequest):
     from disk rather than transferring weights from training. Unlike other
     WeightUpdateRequest subclasses, this doesn't transfer weights - it tells
     the inference engine to load LoRA from a path.
+
+    ``lora_name`` is the name vLLM should register the adapter under and is
+    what callers later pass as ``model=<lora_name>`` when sampling. Empty
+    string preserves the legacy single-tenant behavior where the engine
+    generates a numeric name itself.
     """
 
     names: List[str] = field(default_factory=list)
     dtypes: List[str] = field(default_factory=list)
     shapes: List[List[int]] = field(default_factory=list)
     lora_path: str = ""
+    lora_name: str = ""
 
 
 @dataclass
diff --git a/skyrl/backends/skyrl_train/workers/fsdp/fsdp_worker.py b/skyrl/backends/skyrl_train/workers/fsdp/fsdp_worker.py
@@ -273,7 +273,7 @@ async def _save_lora_adapters_and_sync(
             if isinstance(inference_engine_client, RemoteInferenceClient):
                 await inference_engine_client.load_lora_adapter(lora_name, lora_sync_path)
             else:
-                lora_request = LoraLoadRequest(lora_path=lora_sync_path)
+                lora_request = LoraLoadRequest(lora_path=lora_sync_path, lora_name=lora_name)
                 await inference_engine_client.update_named_weights(lora_request)
 
         torch.distributed.barrier()
@@ -302,8 +302,12 @@ async def broadcast_to_inference_engines(
             # Multi-tenant: per-adapter subdir + per-adapter vLLM name. Single
             # tenant (model_id=None) keeps the legacy single-path behavior.
             base_sync_path = self.cfg.policy.model.lora.lora_sync_path
-            lora_name = model_id if model_id is not None else SKYRL_LORA_ADAPTER_NAME
-            lora_sync_path = os.path.join(base_sync_path, model_id) if model_id is not None else base_sync_path
+            # Defense in depth: api.py validates model_id against ID_PATTERN,
+            # but route everything through basename here so that even an
+            # internally-misformed id can't escape lora_sync_path.
+            safe_model_id = os.path.basename(model_id) if model_id is not None else None
+            lora_name = safe_model_id if safe_model_id else SKYRL_LORA_ADAPTER_NAME
+            lora_sync_path = os.path.join(base_sync_path, safe_model_id) if safe_model_id else base_sync_path
             await self._save_lora_adapters_and_sync(
                 peft_model, lora_sync_path, inference_engine_client, lora_name=lora_name
             )
diff --git a/skyrl/backends/skyrl_train/workers/megatron/megatron_worker.py b/skyrl/backends/skyrl_train/workers/megatron/megatron_worker.py
@@ -898,7 +898,7 @@ async def _save_lora_adapters_and_sync(
             if isinstance(inference_engine_client, RemoteInferenceClient):
                 await inference_engine_client.load_lora_adapter(lora_name, lora_sync_path)
             else:
-                lora_request = LoraLoadRequest(lora_path=lora_sync_path)
+                lora_request = LoraLoadRequest(lora_path=lora_sync_path, lora_name=lora_name)
                 await inference_engine_client.update_named_weights(lora_request)
 
         torch.distributed.barrier()
@@ -927,8 +927,12 @@ async def broadcast_to_inference_engines(
             # works: model_id is None, we fall back to the legacy single
             # adapter name + shared path.
             base_sync_path = self.cfg.policy.model.lora.lora_sync_path
-            lora_name = model_id if model_id is not None else SKYRL_LORA_ADAPTER_NAME
-            lora_sync_path = os.path.join(base_sync_path, model_id) if model_id is not None else base_sync_path
+            # Defense in depth: api.py validates model_id against ID_PATTERN,
+            # but route everything through basename here so that even an
+            # internally-misformed id can't escape lora_sync_path.
+            safe_model_id = os.path.basename(model_id) if model_id is not None else None
+            lora_name = safe_model_id if safe_model_id else SKYRL_LORA_ADAPTER_NAME
+            lora_sync_path = os.path.join(base_sync_path, safe_model_id) if safe_model_id else base_sync_path
             await self._save_lora_adapters_and_sync(lora_sync_path, inference_engine_client, lora_name=lora_name)
         else:
             # Extract and send weights using the sender created at init time
diff --git a/skyrl/backends/skyrl_train_backend.py b/skyrl/backends/skyrl_train_backend.py
@@ -3,6 +3,7 @@
 import asyncio
 import io
 import os
+import shutil
 import tarfile
 import tempfile
 
@@ -459,6 +460,31 @@ def _create_colocate_pg(self):
 
         return ResolvedPlacementGroup(pg)
 
+    def _cleanup_lora_sync_subdir(self, model_id: str) -> None:
+        """Remove the per-tenant lora_sync_path subdir written by the worker.
+
+        The Megatron / FSDP workers write each adapter's safetensors into
+        ``lora_sync_path/<basename(model_id)>/`` on every save_weights_for_sampler.
+        Without cleanup these subdirs accumulate as adapters churn. Mirror the
+        worker's path construction (incl. basename sanitization) to avoid
+        deleting anything outside the configured base.
+        """
+        try:
+            base = self._cfg.policy.model.lora.lora_sync_path
+        except AttributeError:
+            return
+        if not base:
+            return
+        safe_id = os.path.basename(model_id)
+        if not safe_id:
+            return
+        subdir = os.path.join(base, safe_id)
+        try:
+            shutil.rmtree(subdir, ignore_errors=True)
+        except OSError as e:
+            # Best-effort cleanup — log but don't propagate.
+            logger.warning(f"Failed to remove lora_sync subdir {subdir}: {e}")
+
     def delete_model(self, model_id: str) -> None:
         role = self._get_role(model_id)
 
@@ -469,6 +495,7 @@ def delete_model(self, model_id: str) -> None:
         if len(self._model_ids_to_role) > 1:
             if role == "policy" and self._base_lora_signature is not None:
                 self._dispatch.delete_adapter("policy", model_id)
+                self._cleanup_lora_sync_subdir(model_id)
                 del self._model_ids_to_role[model_id]
                 self._model_metadata.pop(model_id, None)
                 logger.info(f"Removed LoRA adapter '{model_id}'")