diff --git a/.jules/bolt.md b/.jules/bolt.md new file mode 100644 index 00000000000..e63080c6ce4 --- /dev/null +++ b/.jules/bolt.md @@ -0,0 +1,4 @@ + +## 2024-05-24 - [Optimize RequestMetrics.to_dict serialization] +**Learning:** `dataclasses.asdict` does deep copies, which incurs significant overhead for frequently serialized dataclasses like `RequestMetrics` in a hotpath like the API server. +**Action:** Use manual `__slots__` iteration with `getattr` for faster serialization when the dataclass structure is mostly primitives, while only falling back to `asdict` for nested dataclasses lacking slots (like `SpeculateMetrics`). diff --git a/fastdeploy/engine/request.py b/fastdeploy/engine/request.py index 8413ccb7dd3..5fc4ca5f467 100644 --- a/fastdeploy/engine/request.py +++ b/fastdeploy/engine/request.py @@ -894,7 +894,14 @@ def to_dict(self): """ Convert the RequestMetrics object to a dictionary. """ - return {k: v for k, v in asdict(self).items()} + res = {} + for key in self.__dataclass_fields__: + v = getattr(self, key) + if v is not None and hasattr(v, "__dataclass_fields__"): + res[key] = asdict(v) + else: + res[key] = v + return res def record_recv_first_token(self): cur_time = time.time()