Strategy methods act on copies of models if needed

JasonKChow · JasonKChow · commit 61f22b0dd07b · 2025-02-10T01:46:18.000-06:00
Summary: To support multi client server, strategy methods will not act on copies of models to avoid changing tensor gradients between two threads.

Test Plan: New test
diff --git a/aepsych/server/server.py b/aepsych/server/server.py
@@ -53,6 +53,7 @@ def __init__(
         self.host = host
         self.port = port
         self.max_workers = max_workers
+        self.clients_connected = 0
         self.db: db.Database = db.Database(database_path)
         self.is_performing_replay = False
         self.exit_server_loop = False
@@ -323,6 +324,7 @@ async def handle_client(self, reader, writer):
         """
         addr = writer.get_extra_info("peername")
         logger.info(f"Connected to {addr}")
+        self.clients_connected += 1
 
         try:
             while True:
@@ -361,6 +363,7 @@ async def handle_client(self, reader, writer):
             logger.info(f"Connection closed for {addr}")
             writer.close()
             await writer.wait_closed()
+            self.clients_connected -= 1
 
     def handle_request(self, message: Dict[str, Any]) -> Union[Dict[str, Any], str]:
         """Given a message, dispatch the correct handler and return the result.
diff --git a/aepsych/strategy/strategy.py b/aepsych/strategy/strategy.py
@@ -8,6 +8,7 @@
 from __future__ import annotations
 
 import warnings
+from copy import deepcopy
 from typing import Any, Dict, List, Mapping, Optional, Tuple, Union
 
 import numpy as np
@@ -56,6 +57,7 @@ def __init__(
         name: str = "",
         run_indefinitely: bool = False,
         transforms: ChainedInputTransform = ChainedInputTransform(**{}),
+        copy_model: bool = False,
     ) -> None:
         """Initialize the strategy object.
 
@@ -90,6 +92,9 @@ def __init__(
                 should be defined in raw parameter space for initialization. However,
                 if the lb/ub attribute are access from an initialized Strategy object,
                 it will be returned in transformed space.
+            copy_model (bool): Whether to do any model-related methods on a
+                copy or the original. Used for multi-client strategies. Defaults
+                to False.
         """
         self.is_finished = False
 
@@ -160,6 +165,7 @@ def __init__(
         self.min_total_outcome_occurrences = min_total_outcome_occurrences
         self.max_asks = max_asks or generator.max_asks
         self.keep_most_recent = keep_most_recent
+        self.copy_model = copy_model
 
         self.transforms = transforms
         if self.transforms is not None:
@@ -267,7 +273,8 @@ def gen(self, num_points: int = 1, **kwargs) -> torch.Tensor:
             self.model.to(self.generator_device)  # type: ignore
 
         self._count = self._count + num_points
-        points = self.generator.gen(num_points, self.model, **kwargs)
+        model = deepcopy(self.model) if self.copy_model else self.model
+        points = self.generator.gen(num_points, model, **kwargs)
 
         if original_device is not None:
             self.model.to(original_device)  # type: ignore
@@ -295,9 +302,9 @@ def get_max(
             self.model is not None
         ), "model is None! Cannot get the max without a model!"
         self.model.to(self.model_device)
-
+        model = deepcopy(self.model) if self.copy_model else self.model
         val, arg = get_max(
-            self.model,
+            model,
             self.bounds,
             locked_dims=constraints,
             probability_space=probability_space,
@@ -324,9 +331,9 @@ def get_min(
             self.model is not None
         ), "model is None! Cannot get the min without a model!"
         self.model.to(self.model_device)
-
+        model = deepcopy(self.model) if self.copy_model else self.model
         val, arg = get_min(
-            self.model,
+            model,
             self.bounds,
             locked_dims=constraints,
             probability_space=probability_space,
@@ -358,9 +365,9 @@ def inv_query(
             self.model is not None
         ), "model is None! Cannot get the inv_query without a model!"
         self.model.to(self.model_device)
-
+        model = deepcopy(self.model) if self.copy_model else self.model
         val, arg = inv_query(
-            model=self.model,
+            model=model,
             y=y,
             bounds=self.bounds,
             locked_dims=constraints,
@@ -385,7 +392,8 @@ def predict(
         """
         assert self.model is not None, "model is None! Cannot predict without a model!"
         self.model.to(self.model_device)
-        return self.model.predict(x=x, probability_space=probability_space)
+        model = deepcopy(self.model) if self.copy_model else self.model
+        return model.predict(x=x, probability_space=probability_space)
 
     @ensure_model_is_fresh
     def sample(self, x: torch.Tensor, num_samples: int = 1000) -> torch.Tensor:
@@ -400,7 +408,8 @@ def sample(self, x: torch.Tensor, num_samples: int = 1000) -> torch.Tensor:
         """
         assert self.model is not None, "model is None! Cannot sample without a model!"
         self.model.to(self.model_device)
-        return self.model.sample(x, num_samples=num_samples)
+        model = deepcopy(self.model) if self.copy_model else self.model
+        return model.sample(x, num_samples=num_samples)
 
     def finish(self) -> None:
         """Finish the strategy."""
@@ -442,7 +451,8 @@ def finished(self) -> bool:
             assert (
                 self.model is not None
             ), "model is None! Cannot predict without a model!"
-            fmean, _ = self.model.predict(self.eval_grid, probability_space=True)
+            model = deepcopy(self.model) if self.copy_model else self.model
+            fmean, _ = model.predict(self.eval_grid, probability_space=True)
             meets_post_range = bool(
                 ((fmean.max() - fmean.min()) >= self.min_post_range).item()
             )
@@ -504,9 +514,10 @@ def fit(self) -> None:
         """Fit the model."""
         if self.can_fit:
             self.model.to(self.model_device)  # type: ignore
+            model = deepcopy(self.model) if self.copy_model else self.model
             if self.keep_most_recent is not None:
                 try:
-                    self.model.fit(  # type: ignore
+                    model.fit(  # type: ignore
                         self.x[-self.keep_most_recent :],  # type: ignore
                         self.y[-self.keep_most_recent :],  # type: ignore
                     )
@@ -516,21 +527,23 @@ def fit(self) -> None:
                     )
             else:
                 try:
-                    self.model.fit(self.x, self.y)  # type: ignore
+                    model.fit(self.x, self.y)  # type: ignore
                 except ModelFittingError:
                     logger.warning(
                         "Failed to fit model! Predictions may not be accurate!"
                     )
+            self.model = model
         else:
             warnings.warn("Cannot fit: no model has been initialized!", RuntimeWarning)
 
     def update(self) -> None:
         """Update the model."""
         if self.can_fit:
             self.model.to(self.model_device)  # type: ignore
+            model = deepcopy(self.model) if self.copy_model else self.model
             if self.keep_most_recent is not None:
                 try:
-                    self.model.update(  # type: ignore
+                    model.update(  # type: ignore
                         self.x[-self.keep_most_recent :],  # type: ignore
                         self.y[-self.keep_most_recent :],  # type: ignore
                     )
@@ -540,11 +553,13 @@ def update(self) -> None:
                     )
             else:
                 try:
-                    self.model.update(self.x, self.y)  # type: ignore
+                    model.update(self.x, self.y)  # type: ignore
                 except ModelFittingError:
                     logger.warning(
                         "Failed to fit model! Predictions may not be accurate!"
                     )
+
+            self.model = model
         else:
             warnings.warn("Cannot fit: no model has been initialized!", RuntimeWarning)
 
diff --git a/tests/server/test_server.py b/tests/server/test_server.py
@@ -45,6 +45,7 @@
 generator = OptimizeAcqfGenerator
 model = GPClassificationModel
 min_total_outcome_occurrences = 0
+copy_model = True
 
 [OptimizeAcqfGenerator]
 acqf = MCPosteriorVariance
@@ -82,15 +83,17 @@ def database_path(self):
         return "./{}_test_server.db".format(str(uuid.uuid4().hex))
 
     async def asyncSetUp(self):
-        ip = "127.0.0.1"
-        port = 5555
+        self.ip = "127.0.0.1"
+        self.port = 5555
 
         # setup logger
         server.logger = utils_logging.getLogger("unittests")
 
         # random datebase path name without dashes
         database_path = self.database_path
-        self.s = server.AEPsychServer(database_path=database_path, host=ip, port=port)
+        self.s = server.AEPsychServer(
+            database_path=database_path, host=self.ip, port=self.port
+        )
         self.db_name = database_path.split("/")[1]
         self.db_path = database_path
 
@@ -106,7 +109,7 @@ async def asyncSetUp(self):
             self.server_task = asyncio.create_task(self.s.serve())
         await asyncio.sleep(0.1)
 
-        self.reader, self.writer = await asyncio.open_connection(ip, port)
+        self.reader, self.writer = await asyncio.open_connection(self.ip, self.port)
 
     async def asyncTearDown(self):
         # Stops the client
@@ -486,6 +489,48 @@ async def test_receive(self):
             else:
                 self.assertTrue("KeyError" in response["error"])  # Specific error
 
+    async def test_multi_client(self):
+        setup_request = {
+            "type": "setup",
+            "version": "0.01",
+            "message": {"config_str": dummy_config},
+        }
+        ask_request = {"type": "ask", "message": ""}
+        tell_request = {
+            "type": "tell",
+            "message": {"config": {"x": [0.5]}, "outcome": 1},
+            "extra_info": {},
+        }
+
+        await self.mock_client(setup_request)
+
+        # Create second client
+        reader2, writer2 = await asyncio.open_connection(self.ip, self.port)
+
+        async def _mock_client2(request: Dict[str, Any]) -> Any:
+            writer2.write(json.dumps(request).encode())
+            await writer2.drain()
+
+            response = await reader2.read(1024 * 512)
+            return response.decode()
+
+        for _ in range(2):  # 2 loops should do it as we have 2 clients
+            tasks = [
+                asyncio.create_task(self.mock_client(ask_request)),
+                asyncio.create_task(_mock_client2(ask_request)),
+            ]
+            await asyncio.gather(*tasks)
+
+            tasks = [
+                asyncio.create_task(self.mock_client(tell_request)),
+                asyncio.create_task(_mock_client2(tell_request)),
+            ]
+            await asyncio.gather(*tasks)
+
+        self.assertTrue(self.s.strat.finished)
+        self.assertTrue(self.s.strat.x.numel() == 4)
+        self.assertTrue(self.s.clients_connected == 2)
+
 
 if __name__ == "__main__":
     unittest.main()