remove autoscale and use multiplexed_model_id

nilsmechtel · nilsmechtel · commit e05c0b23796e · 2025-01-28T13:10:36.000+01:00
diff --git a/bioimageio_colab/__main__.py b/bioimageio_colab/__main__.py
@@ -35,6 +35,12 @@
         default=None,
         help="Address of the Ray cluster for running SAM",
     )
+    parser.add_argument(
+        "--num_replicas",
+        default=2,
+        type=int,
+        help="Number of replicas for the SAM deployment",
+    )
     parser.add_argument(
         "--restart_deployment",
         default=False,
@@ -49,7 +55,7 @@
     )
     parser.add_argument(
         "--max_concurrent_requests",
-        default=10,
+        default=4,
         type=int,
         help="Maximum number of concurrent requests to the service",
     )
diff --git a/bioimageio_colab/models/sam_deployment.py b/bioimageio_colab/models/sam_deployment.py
@@ -58,6 +58,9 @@ async def get_model(self, model_id: str):
             model_architecture=self.models[model_id]["architecture"],
         )
 
-    async def __call__(self, model_id: str, array: np.ndarray):
+    async def __call__(self, array: np.ndarray) -> np.ndarray:
+        # Get the model from the request
+        model_id = serve.get_multiplexed_model_id()
         model = await self.get_model(model_id)
+        
         return model.encode(array)
diff --git a/bioimageio_colab/register_sam_service.py b/bioimageio_colab/register_sam_service.py
@@ -9,6 +9,7 @@
 from dotenv import find_dotenv, load_dotenv
 from hypha_rpc import connect_to_server
 from hypha_rpc.rpc import RemoteService
+
 # from kaibu_utils import mask_to_features
 from ray.serve.config import AutoscalingConfig
 from tifffile import imread
@@ -73,8 +74,8 @@ def connect_to_ray(address: str = None) -> None:
 async def deploy_to_ray(
     cache_dir: str,
     app_name: str = "SAM Image Encoder",
-    min_replicas: int = 1,
-    max_replicas: int = 2,
+    num_replicas: int = 2,
+    max_queued_requests: int = 10,
     restart_deployment: bool = False,
     skip_test_runs: bool = False,
 ) -> None:
@@ -86,16 +87,15 @@ async def deploy_to_ray(
     Returns:
         dict: Handles to the deployed image encoders.
     """
-    logger.info(f"Deploying the app '{app_name}' with {min_replicas} to {max_replicas} replicas on Ray Serve...")
-    # Set autoscaling configuration
-    autoscaling_config = AutoscalingConfig(
-        min_replicas=min_replicas,
-        max_replicas=max_replicas,
+    logger.info(
+        f"Deploying the app '{app_name}' with {num_replicas} replicas on Ray Serve..."
     )
 
     # Create a new Ray Serve deployment
     deployment = SamDeployment.options(
-        autoscaling_config=autoscaling_config,
+        num_replicas=num_replicas,
+        max_replicas_per_node=1,
+        max_queued_requests=max_queued_requests,
     )
 
     # Bind the arguments to the deployment and return an Application.
@@ -113,22 +113,28 @@ async def deploy_to_ray(
     # Deploy the application
     ray.serve.run(app, name=app_name, route_prefix=None)
 
+    # Get the application handle
+    app_handle = ray.serve.get_app_handle(name=app_name)
+    if not app_handle:
+        raise ConnectionError("Failed to get the application handle.")
+
     if app_name in applications:
         logger.info(f"Updated application deployment '{app_name}'.")
     else:
         logger.info(f"Deployed application '{app_name}'.")
-    
+
     if skip_test_runs:
         logger.info("Skipping test runs for each model.")
     else:
         # Test run each model
-        handle = ray.serve.get_app_handle(name=app_name)
         img_file = os.path.join(BASE_DIR, "data/example_image.tif")
         image = imread(img_file)
         for model_id in SAM_MODELS.keys():
-            await handle.remote(model_id=model_id, array=image)
+            await app_handle.options(multiplexed_model_id=model_id).remote(image)
             logger.info(f"Test run for model '{model_id}' completed successfully.")
 
+    return app_handle
+
 
 def hello(context: dict = None) -> str:
     return "Welcome to the Interactive Segmentation service!"
@@ -139,8 +145,8 @@ def ping(context: dict = None) -> str:
 
 
 async def compute_image_embedding(
+    app_handle: ray.serve.handle.DeploymentHandle,
     semaphore: asyncio.Semaphore,
-    app_name: str,
     image: np.ndarray,
     model_id: str,
     require_login: bool = False,
@@ -162,8 +168,9 @@ async def compute_image_embedding(
 
             # Compute the embedding
             # Returns: {"features": embedding, "input_size": input_size}
-            handle = ray.serve.get_app_handle(name=app_name)
-            result = await handle.remote(model_id=model_id, array=image)
+            result = await app_handle.options(multiplexed_model_id=model_id).remote(
+                image
+            )
 
             logger.info(f"User '{user_id}' - Embedding computed successfully.")
 
@@ -232,19 +239,21 @@ async def check_readiness(client: RemoteService, service_id: str) -> dict:
     """
     Readiness probe for the SAM service.
     """
-    logger.info("Checking the readiness of the SAM service...")
-
-    services = await client.list_services()
-    service_found = False
-    for service in services:
-        if service["id"] == service_id:
-            service_found = True
-            break
-    assert service_found, f"Service with ID '{service_id}' not found."
+    try:
+        services = await client.list_services()
+        service_found = False
+        for service in services:
+            if service["id"] == service_id:
+                service_found = True
+                break
+        assert service_found, f"Service with ID '{service_id}' not found."
 
-    logger.info(f"Service with ID '{service_id}' is ready.")
+        logger.info(f"Service with ID '{service_id}' is ready.")
 
-    return {"status": "ready"}
+        return {"status": "ready"}
+    except Exception as e:
+        logger.error(f"Error during readiness probe: {e}")
+        raise e
 
 
 def format_time(last_deployed_time_s, tz: timezone = timezone.utc) -> str:
@@ -285,40 +294,41 @@ async def check_liveness(app_name: str, client: RemoteService, service_id: str)
     """
     Liveness probe for the SAM service.
     """
-    logger.info(f"Checking the liveness of the SAM service...")
-    output = {}
-
-    # Check the Ray Serve application status
-    serve_status = ray.serve.status()
-    application = serve_status.applications[app_name]
-    assert application.status == "RUNNING"
-    formatted_time = format_time(application.last_deployed_time_s)
-    output[f"application: {app_name}"] = {
-        "status": application.status.value,
-        "last_deployed_at": formatted_time["last_deployed_at"],
-        "duration_since": formatted_time["duration_since"],
-    }
-
-    for name, deployment in serve_status.applications[app_name].deployments.items():
-        assert deployment.status == "HEALTHY"
-        # assert deployment.replica_states == {"RUNNING": 1}
-        output[f"application: {app_name}"][f"deployment: {name}"] = {
-            "status": deployment.status.value,
-            # "replica_states": deployment.replica_states,
+    try:
+        output = {}
+
+        # Check the Ray Serve application status
+        serve_status = ray.serve.status()
+        application = serve_status.applications[app_name]
+        assert application.status == "RUNNING"
+        formatted_time = format_time(application.last_deployed_time_s)
+        output[f"application: {app_name}"] = {
+            "status": application.status.value,
+            "last_deployed_at": formatted_time["last_deployed_at"],
+            "duration_since": formatted_time["duration_since"],
         }
 
-    # Check if the service can be accessed
-    service = await client.get_service(service_id)
-    assert await service.ping() == "pong"
-
-    output["service"] = {
-        "status": "RUNNING",
-        "service_id": service_id,
-    }
-
-    logger.info(f"Service with ID '{service_id}' is live.")
+        for name, deployment in serve_status.applications[app_name].deployments.items():
+            assert deployment.status == "HEALTHY"
+            assert deployment.replica_states["RUNNING"] > 0
+            output[f"application: {app_name}"][f"deployment: {name}"] = {
+                "status": deployment.status.value,
+                "replica_states": deployment.replica_states,
+            }
+
+        # Check if the service can be accessed
+        service = await client.get_service(service_id)
+        assert await service.ping() == "pong"
+
+        output["service"] = {
+            "status": "RUNNING",
+            "service_id": service_id,
+        }
 
-    return output
+        return output
+    except Exception as e:
+        logger.error(f"Error during liveness probe: {e}")
+        raise e
 
 
 async def register_service(args: dict) -> None:
@@ -353,18 +363,24 @@ async def register_service(args: dict) -> None:
     # Deploy SAM image encoders
     cache_dir = os.path.abspath(args.cache_dir)
     app_name = "SAM Image Encoder"
-    await deploy_to_ray(
+    app_handle = await deploy_to_ray(
         cache_dir=cache_dir,
         app_name=app_name,
+        num_replicas=args.num_replicas,
+        max_queued_requests=args.max_concurrent_requests,
         restart_deployment=args.restart_deployment,
         skip_test_runs=args.skip_test_runs,
     )
 
     # Register a new service
     semaphore = asyncio.Semaphore(args.max_concurrent_requests)
-    logger.info(f"Created semaphore for {args.max_concurrent_requests} concurrent requests.")
+    logger.info(
+        f"Created semaphore for {args.max_concurrent_requests} concurrent requests."
+    )
 
-    logger.ingo(f"Registering the SAM service: ID='{args.service_id}', require_login={args.require_login}")
+    logger.info(
+        f"Registering the SAM service: ID='{args.service_id}', require_login={args.require_login}"
+    )
     service_info = await client.register_service(
         {
             "name": "Interactive Segmentation",
@@ -379,8 +395,8 @@ async def register_service(args: dict) -> None:
             "ping": ping,
             "compute_embedding": partial(
                 compute_image_embedding,
+                app_handle=app_handle,
                 semaphore=semaphore,
-                app_name=app_name,
                 require_login=args.require_login,
             ),
             # "compute_mask": partial(
diff --git a/pyproject.toml b/pyproject.toml
@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
 
 [project]
 name = "bioimageio-colab"
-version = "0.2.2"
+version = "0.2.3"
 readme = "README.md"
 description = "Collaborative image annotation and model training with human in the loop."
 dependencies = [