|
12 | 12 | from cloud_goodput.ml_goodput_measurement.src.checkpoint_badput_calculator import CheckpointBadputCalculator |
13 | 13 | from cloud_goodput.ml_goodput_measurement.src.checkpoint_badput_calculator import CheckpointLoggerOptions |
14 | 14 | from cloud_goodput.ml_goodput_measurement.src.goodput_cache import GoodputCache |
15 | | -from cloud_goodput.ml_goodput_measurement.src.goodput_utils import BadputType, GoodputInfo, get_timestamp_from_log_entry |
16 | | -import numpy as np |
17 | | -from scipy import stats |
| 15 | +from cloud_goodput.ml_goodput_measurement.src.goodput_utils import BadputType, GoodputInfo, StepInfo |
| 16 | +from cloud_goodput.ml_goodput_measurement.src.goodput_utils import compute_ideal_step_time, get_extra_time_from_anomalous_steps, get_timestamp_from_log_entry |
18 | 17 |
|
19 | 18 |
|
20 | 19 | _JOB_NAME = 'job_name' |
@@ -455,31 +454,6 @@ def _get_current_productive_and_unproductive_time( |
455 | 454 | now based on the latest entries retrieved from Cloud Logging. |
456 | 455 | """ |
457 | 456 |
|
458 | | - def get_extra_time_from_anomalous_steps(step_times: list[Any]) -> float: |
459 | | - def get_anomalous_and_normal_step_times( |
460 | | - step_times: list[Any], |
461 | | - ) -> tuple[list[Any], list[Any]]: |
462 | | - mad = stats.median_abs_deviation(step_times) |
463 | | - med = np.median(step_times) |
464 | | - |
465 | | - anomalous_step_times = [] |
466 | | - normal_step_times = [] |
467 | | - for step_time in step_times: |
468 | | - if step_time > (med + mad * 3): |
469 | | - anomalous_step_times.append(step_time) |
470 | | - else: |
471 | | - normal_step_times.append(step_time) |
472 | | - |
473 | | - return anomalous_step_times, normal_step_times |
474 | | - |
475 | | - anomalous_step_times, normal_step_times = ( |
476 | | - get_anomalous_and_normal_step_times(step_times) |
477 | | - ) |
478 | | - normal_step_mean = np.mean(normal_step_times) |
479 | | - return sum(anomalous_step_times) - ( |
480 | | - len(anomalous_step_times) * normal_step_mean |
481 | | - ) |
482 | | - |
483 | 457 | def get_segment_productive_and_unproductive_time( |
484 | 458 | step_start_data: dict[int, float], curr_step: int |
485 | 459 | ) -> tuple[float, dict[BadputType, float]]: |
@@ -1010,6 +984,76 @@ def get_job_goodput_interval( |
1010 | 984 | self._number_of_interruptions, |
1011 | 985 | ) |
1012 | 986 |
|
| 987 | + def _get_step_times(self): |
| 988 | + """Helper function to compute step times from the log entries.""" |
| 989 | + step_times = {} |
| 990 | + previous_step_start_time = None |
| 991 | + previous_step_count = None |
| 992 | + for payload in self._current_entries: |
| 993 | + if _STEP_START_TIME in payload: |
| 994 | + step_start_time = payload[_STEP_START_TIME] |
| 995 | + step_count = int(payload[_STEP_COUNT]) |
| 996 | + if ( |
| 997 | + previous_step_start_time is not None |
| 998 | + and previous_step_count is not None |
| 999 | + and step_count == previous_step_count + 1 |
| 1000 | + ): |
| 1001 | + step_times[previous_step_count] = ( |
| 1002 | + step_start_time - previous_step_start_time |
| 1003 | + ) |
| 1004 | + previous_step_count = step_count |
| 1005 | + previous_step_start_time = step_start_time |
| 1006 | + return step_times |
| 1007 | + |
| 1008 | + def get_step_deviation( |
| 1009 | + self, configured_ideal_step_time: Optional[float] = None |
| 1010 | + ) -> dict[int, float]: |
| 1011 | + """Method to get the step deviation of the current step based on the ideal step time. |
| 1012 | +
|
| 1013 | + This method computes the ideal step time if one is not provided by the user |
| 1014 | + and returns the step deviation of the current step. |
| 1015 | +
|
| 1016 | + Args: |
| 1017 | + configured_ideal_step_time: Optional user-defined ideal step time. |
| 1018 | +
|
| 1019 | + Returns: |
| 1020 | + A dictionary of step deviation for each step. |
| 1021 | + """ |
| 1022 | + # Get the log entries. |
| 1023 | + self._update_log_entries() |
| 1024 | + # Compute step times from the log entries. |
| 1025 | + step_times = self._get_step_times() |
| 1026 | + # Get the previous ideal step time from the cache. |
| 1027 | + previous_ideal_step_time = ( |
| 1028 | + self._goodput_cache._step_info.ideal_step_time |
| 1029 | + if self._goodput_cache._step_info |
| 1030 | + and self._goodput_cache._step_info.ideal_step_time |
| 1031 | + else None |
| 1032 | + ) |
| 1033 | + # Compute ideal step time. |
| 1034 | + ideal_step_time = ( |
| 1035 | + configured_ideal_step_time |
| 1036 | + if configured_ideal_step_time is not None |
| 1037 | + else compute_ideal_step_time( |
| 1038 | + step_times=list(step_times.values()), |
| 1039 | + previous_ideal_step_time=previous_ideal_step_time, |
| 1040 | + ) |
| 1041 | + ) |
| 1042 | + |
| 1043 | + # Compute step deviation. |
| 1044 | + step_deviations = { |
| 1045 | + step_count: abs(step_time - ideal_step_time) |
| 1046 | + for step_count, step_time in step_times.items() |
| 1047 | + } |
| 1048 | + # Update the step information in the cache. |
| 1049 | + self._goodput_cache.update_step_info( |
| 1050 | + StepInfo( |
| 1051 | + ideal_step_time=ideal_step_time, |
| 1052 | + step_deviations=step_deviations, |
| 1053 | + ) |
| 1054 | + ) |
| 1055 | + return step_deviations |
| 1056 | + |
1013 | 1057 | def _get_job_badput_breakdown( |
1014 | 1058 | self, total_productive_time, total_unproductive_time, total_job_time |
1015 | 1059 | ): |
|
0 commit comments