34
34
35
35
logger = logging .getLogger (__name__ )
36
36
37
+ # info to calculation GPU utilization
38
+ _service_begin_time = time .time ()
39
+ _busy_time = 0
40
+ _tasks_start_at = {} # id_task -> time in second
41
+
42
+
43
+ def _make_gpu_utilization ():
44
+ curr = time .time ()
45
+ busy_time = _busy_time
46
+
47
+ # in case of current task is not finished yet
48
+ if current_task :
49
+ busy_time += (curr - _tasks_start_at .get (current_task , curr ))
50
+
51
+ return busy_time / (curr - _service_begin_time )
37
52
38
53
def get_task_queue_info ():
39
- return current_task , pending_tasks , finished_tasks , finished_task_count , failed_task_count , consecutive_failed_task_count , last_error_message
54
+ return current_task , pending_tasks , finished_tasks , finished_task_count , failed_task_count , consecutive_failed_task_count , last_error_message , _make_gpu_utilization ()
40
55
41
56
42
57
def start_task (id_task ):
@@ -49,6 +64,7 @@ def start_task(id_task):
49
64
50
65
task_info = _pop_task_from_queue (id_task )
51
66
task_info ['started_at' ] = time .time ()
67
+ _tasks_start_at [id_task ] = time .time ()
52
68
53
69
return task_info
54
70
@@ -68,9 +84,16 @@ def finish_task(id_task, task_failed=False, error_message=''):
68
84
global finished_tasks
69
85
global failed_tasks
70
86
global last_error_message
87
+ global _busy_time
88
+
71
89
logger .info (
72
90
f'finish_task, id_task: { id_task } , current_task: { current_task } , current_task_step: { current_task_step } ' )
73
91
92
+ # record gpu busy time
93
+ curr = time .time ()
94
+ task_started_at = _tasks_start_at .pop (id_task , curr )
95
+ _busy_time += (curr - task_started_at )
96
+
74
97
# if a task was finished before start, we need pop it out from pending queue
75
98
_pop_task_from_queue (id_task )
76
99
0 commit comments