11import logging
2- from typing import List
2+ from typing import List , Generator , Optional
33from eudoxia .utils import DISK_SCAN_GB_SEC , Priority
44from eudoxia .workload import Operator
55
66logger = logging .getLogger (__name__ )
77
88
9- class Container :
9+ class Container :
1010 """
11- An encapsulation of CPU, RAM, and a list of operators. A container is
12- created and then calculates how many ticks it will need to run with
13- resources provided .
11+ An encapsulation of CPU, RAM, and a list of operators. A container executes
12+ operators tick-by-tick, tracking memory usage and allowing suspension only
13+ between operator boundaries .
1414 """
1515 next_container_num = 1
1616
@@ -21,18 +21,22 @@ def __init__(self, ram, cpu, ops, prty: Priority, pool_id: int, ticks_per_second
2121 self .ram = ram
2222 self .cpu = cpu
2323 self .operators : List [Operator ] = ops
24- self .segment_tick_boundaries = []
2524 self .suspend_ticks = None
2625 self ._suspend_ticks_left = None
2726 self .priority = prty
28- self .error : str = None
27+ self .error : Optional [ str ] = None
2928 self .ticks_per_second = ticks_per_second
3029 self .tick_length_secs = 1.0 / ticks_per_second
3130
32- self .num_ticks = self ._compute_ticks ()
33- self ._num_ticks_left = self .num_ticks
34- self .num_secs = self .num_ticks * self .tick_length_secs
35-
31+ # Tick state (updated by generator)
32+ self ._current_memory : float = 0.0
33+ self ._can_suspend : bool = False
34+ self ._completed : bool = False
35+ self ._ticks_elapsed : int = 0
36+
37+ # Start the generator
38+ self ._tick_iter = self ._tick_generator ()
39+
3640 def get_pipeline_id (self ):
3741 """Get the pipeline ID from operators, handling mixed pipelines"""
3842 pipeline_ids = set ()
@@ -49,62 +53,98 @@ def get_pipeline_id(self):
4953
5054 def __repr__ (self ):
5155 num_ops = len (self .operators )
52- return f"container={ self .container_id } pipeline={ self .get_pipeline_id ()} ops={ num_ops } cpus={ self .cpu } ram_gb={ self .ram } runtime_secs= { self . num_secs :.1f } "
56+ return f"container={ self .container_id } pipeline={ self .get_pipeline_id ()} ops={ num_ops } cpus={ self .cpu } ram_gb={ self .ram } "
5357
54- def _compute_ticks (self ) -> int :
58+ def _tick_generator (self ) -> Generator [ None , None , None ] :
5559 """
56- This function utilizes functions provided by Segment to calculate the
57- amount of CPU and RAM ticks that are needed across all segments. It also
58- computes the boundaries between segments. The reason is that we can only
59- suspend containers between segments (not in the middle of while a segment
60- is running).
61- Returns:
62- int: number of ticks this container will need to run for
60+ Generator that drives tick-by-tick execution, updating container state directly.
61+
62+ Updates self._current_memory and self._can_suspend on each yield.
63+
64+ Memory usage is determined by the current segment:
65+ - If segment.memory_gb is set: fixed memory usage
66+ - If segment.memory_gb is None: memory grows linearly with I/O progress
67+
68+ Suspension is only allowed between operators (not between segments).
6369 """
64- total_ticks = 0
65- for op in self .operators :
66- for seg in op .get_segments ():
67- # will it OOM, and if so, when?
68- oom_seconds = seg .get_seconds_until_oom (self .ram )
69-
70- if oom_seconds is not None :
71- # compute how long it it will be until the OOM occurs
72- self .error = "OOM"
73- seg_ticks_before_OOM = int (oom_seconds / self .tick_length_secs )
74- total_ticks += seg_ticks_before_OOM
75- return total_ticks # after OOM, we cannot run other segments
76- else :
77- # there is no OOM. We will spend all the time
78- # expected on I/O (first), then CPU (second)
79- io_time_secs = seg .get_io_seconds ()
80- cpu_time_secs = seg .get_cpu_time (self .cpu )
81- total_ticks += int ((io_time_secs + cpu_time_secs ) / self .tick_length_secs )
82- self .segment_tick_boundaries .append (total_ticks )
83- return total_ticks
70+
71+ # loop over every tick of every segment of every op
72+ #
73+ # at each iteration, determine memory usage, whether there is
74+ # an OOM, and whether suspension is possible
75+ for op_idx , op in enumerate (self .operators ):
76+ segments = op .get_segments ()
77+ for seg_idx , seg in enumerate (segments ):
78+ # Calculate ticks for I/O phase and CPU phase
79+ io_secs = seg .get_io_seconds ()
80+ cpu_secs = seg .get_cpu_time (self .cpu )
81+ io_ticks = int (io_secs / self .tick_length_secs )
82+ cpu_ticks = int (cpu_secs / self .tick_length_secs )
83+ total_seg_ticks = io_ticks + cpu_ticks
84+
85+ for i in range (total_seg_ticks ):
86+ # determine current memory consumption
87+ if i < io_ticks :
88+ if seg .memory_gb is not None :
89+ self ._current_memory = seg .memory_gb
90+ else :
91+ # Memory grows linearly with I/O progress
92+ io_progress_secs = (i + 1 ) * self .tick_length_secs
93+ self ._current_memory = io_progress_secs * DISK_SCAN_GB_SEC
94+ else :
95+ self ._current_memory = seg .get_peak_memory_gb ()
96+
97+ # have we OOM'd?
98+ if self ._current_memory > self .ram :
99+ self .error = "OOM"
100+ self ._completed = True
101+ yield
102+ return
103+
104+ # are we at the end of the op (last tick of last
105+ # seg)? if so, we're either completed, or we can
106+ # suspend, depending on whether this is the last
107+ # op.
108+ self ._can_suspend = False
109+ if seg_idx == len (segments )- 1 and i == total_seg_ticks - 1 :
110+ if op_idx == len (self .operators ) - 1 :
111+ self ._current_memory = 0.0
112+ self ._completed = True
113+ else :
114+ self ._can_suspend = True
115+ yield
84116
85117 def tick (self ):
86- self ._num_ticks_left -= 1
118+ """
119+ Execute one tick. Advances to next state (OOM checked in _advance_tick).
120+ """
121+ if self ._completed :
122+ return
123+ next (self ._tick_iter )
124+ self ._ticks_elapsed += 1
87125
88126 def is_completed (self ):
89- # Use <= to handle edge case: immediate OOM results in 0 ticks,
90- # but tick() may still be called, causing _num_ticks_left to go negative
91- return (self ._num_ticks_left <= 0 )
127+ return self ._completed
128+
129+ def ticks_elapsed (self ) -> int :
130+ """Returns the number of ticks that have been executed."""
131+ return self ._ticks_elapsed
92132
93133 def can_suspend_container (self ) -> bool :
94- """Can only suspend a container if between execution of
95- segments. Must wait for Segment to complete before the
96- container is pausable"""
97- elapsed = self .num_ticks - self ._num_ticks_left
98- if elapsed in self .segment_tick_boundaries :
99- return True
100- return False
134+ """Can only suspend a container between operators."""
135+ return self ._can_suspend
136+
137+ def get_current_memory_usage (self ) -> float :
138+ """Returns current memory usage in GB."""
139+ return self ._current_memory
101140
102141 def suspend_container (self ):
103142 """
104143 Suspend container execution, free CPUs and RAM. Requires writing
105- current data to disk.
144+ current data to disk.
106145 """
107- write_to_disk_ticks = self .ram / DISK_SCAN_GB_SEC
146+ write_to_disk_secs = self .ram / DISK_SCAN_GB_SEC
147+ write_to_disk_ticks = int (write_to_disk_secs / self .tick_length_secs )
108148 self .suspend_ticks = write_to_disk_ticks
109149 self ._suspend_ticks_left = write_to_disk_ticks
110150
0 commit comments