Skip to content

Commit 1d24866

Browse files
authored
[https://nvbugs/6000658][fix] Fix disagg gen-only hang where 10s sleep in can_forward blocks KV transfers and overflows CTX memory (NVIDIA#12640)
Signed-off-by: peihengh <259410613+peihu-nv@users.noreply.github.com>
1 parent 3422af5 commit 1d24866

1 file changed

Lines changed: 3 additions & 3 deletions

File tree

tensorrt_llm/_torch/pyexecutor/py_executor.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1790,7 +1790,7 @@ def _prepare_and_schedule_batch(self):
17901790
new_requests += iter_requests
17911791
self.hang_detector.checkpoint()
17921792
if self.num_fetch_requests < fill_target:
1793-
time.sleep(1)
1793+
time.sleep(0.1)
17941794

17951795
iter_stats = None
17961796
if self.enable_iter_perf_stats:
@@ -2199,10 +2199,10 @@ def _executor_loop_overlap(self):
21992199
else:
22002200
if self.dist.rank == 0:
22012201
logger.info(
2202-
f"sleep 10 seconds, num_fetched_requests: {self.num_fetch_requests}, "
2202+
f"sleep 0.1 seconds, num_fetched_requests: {self.num_fetch_requests}, "
22032203
f"total_gen_count: {total_gen_count}, "
22042204
f"scheduled_gen_batch: {local_gen_count}")
2205-
time.sleep(10)
2205+
time.sleep(0.1)
22062206
continue
22072207
else:
22082208
if scheduled_batch.num_generation_requests < self.benchmark_req_queues_size:

0 commit comments

Comments
 (0)