1
- import time
2
- import pandas as pd
3
1
from contextlib import contextmanager
4
2
import numpy as np
5
3
6
- from arcticdb .exceptions import UserInputException
7
4
from arcticdb_ext .tools import QueryStats
5
+ from arcticdb_ext .tools .QueryStats import StatsGroupName , StatsName
8
6
9
7
class QueryStatsTool :
10
- def __init__ (self ):
11
- self ._create_time = time .time_ns ()
12
- self ._is_context_manager = False
13
- QueryStats .register_new_query_stat_tool ()
8
+ # Define enum values as lists since pybind11 enums are not iterable
9
+ _STATS_NAME_VALUES = [StatsName .result_count , StatsName .total_time_ms , StatsName .count ]
10
+ _STATS_GROUP_NAME_VALUES = [StatsGroupName .arcticdb_call , StatsGroupName .key_type , StatsGroupName .storage_ops ]
14
11
15
- def __del__ (self ):
16
- QueryStats .deregister_query_stat_tool ()
17
-
18
- def __sub__ (self , other ):
19
- return self ._populate_stats (other ._create_time , self ._create_time )
12
+ @classmethod
13
+ def context_manager (cls ):
14
+ @contextmanager
15
+ def _func ():
16
+ cls .enable ()
17
+ yield
18
+ cls .disable ()
19
+ return _func ()
20
20
21
- def _populate_stats ( self , start_time , end_time ):
22
- df = pd . DataFrame ( QueryStats . get_stats ())
23
- if df . empty :
24
- return {}
21
+ @ classmethod
22
+ def get_query_stats ( cls ):
23
+ # Get raw stats from C++ layer
24
+ raw_stats = QueryStats . root_layers ()
25
25
26
- df ["exec_time" ] = pd .to_numeric (df ["exec_time" ], errors = "coerce" )
27
- df = df [df ["exec_time" ].between (start_time , end_time )]
28
- df = df .drop (columns = ["exec_time" ])
26
+ # Transform raw stats into structured dictionary
27
+ result = {}
29
28
30
- if "result_count" in df .columns :
31
- df ["result_count" ] = pd .to_numeric (df ["result_count" ], errors = "coerce" )
29
+ # Process each layer
30
+ for layer in raw_stats :
31
+ if layer :
32
+ cls ._process_layer (layer , result )
33
+
34
+ return result
35
+
36
+ @classmethod
37
+ def _process_layer (cls , layer , current_dict ):
38
+ def _get_enum_name (enum_value ):
39
+ return str (enum_value ).split ('.' )[- 1 ]
32
40
33
- groupby_cols = ["arcticdb_call" , "stage" , "key_type" , "storage_op" ]
41
+ # Process stats array
42
+ stats_array = layer .stats
43
+ for stat_enum in cls ._STATS_NAME_VALUES :
44
+ stat_idx = int (stat_enum )
45
+ if stats_array [stat_idx ] > 0 :
46
+ stat_name = _get_enum_name (stat_enum )
47
+ if stat_name not in current_dict :
48
+ current_dict [stat_name ] = stats_array [stat_idx ]
49
+ else :
50
+ current_dict [stat_name ] += stats_array [stat_idx ]
34
51
35
- for col in groupby_cols :
36
- if col not in df .columns :
37
- df [col ] = pd .Series (dtype = 'object' )
38
-
39
- def process_time_values (time_values ):
40
- time_buckets = {}
41
- for time_val in time_values :
42
- bucket = (time_val // 10 ) * 10
43
- time_buckets [str (bucket )] = time_buckets .get (str (bucket ), 0 ) + 1
44
- return time_buckets
45
-
46
- def get_non_grouped_times (data , current_level ):
47
- # Only process NaN values for the current grouping level
48
- mask = data [current_level ].isna ()
49
- if not mask .any ():
50
- return {}
52
+ # Process next_layer_maps
53
+ next_layer_maps = layer .next_layer_maps
54
+ for group_enum in cls ._STATS_GROUP_NAME_VALUES :
55
+ group_idx = int (group_enum )
51
56
52
- time_values = pd .to_numeric (data .loc [mask , "time" ].dropna (), errors = "coerce" )
53
- if not time_values .empty :
54
- time_buckets = process_time_values (time_values )
55
- if time_buckets :
56
- return {"time" : time_buckets }
57
- return {}
58
-
59
- def process_group (group_data , is_leaf ):
60
- result = {}
61
-
62
- if is_leaf :
63
- numeric_cols = [col for col in group_data .columns if col not in groupby_cols and col != "time" ]
64
- for col in numeric_cols :
65
- values = pd .to_numeric (group_data [col ].dropna (), errors = "coerce" )
66
- if not values .empty :
67
- total = values .sum ()
68
- if not np .isnan (total ):
69
- result [col ] = int (total )
57
+ if not next_layer_maps [group_idx ]:
58
+ continue
70
59
71
- time_values = pd .to_numeric (group_data ["time" ].dropna (), errors = "coerce" )
72
- if not time_values .empty :
73
- time_buckets = process_time_values (time_values )
74
- if time_buckets :
75
- result ["time" ] = time_buckets
60
+ next_layer_map = next_layer_maps [group_idx ]
76
61
77
- return result
62
+ # top level
63
+ if group_enum == StatsGroupName .arcticdb_call :
64
+ for op_name , op_layer in next_layer_map .items ():
65
+ if op_name not in current_dict :
66
+ current_dict [op_name ] = {}
67
+ cls ._process_layer (op_layer , current_dict [op_name ])
68
+ else :
69
+ layer_type = _get_enum_name (group_enum )
78
70
79
- def group_by_level (data , columns ):
80
- if not columns :
81
- return process_group (data , True )
82
-
83
- result = {}
84
- current_col = columns [0 ]
85
-
86
- non_grouped = get_non_grouped_times (data , current_col )
87
- result .update (non_grouped )
88
-
89
- grouped = data [~ data [current_col ].isna ()].groupby (current_col )
90
- nested = {}
91
-
92
- for name , group in grouped :
93
- sub_result = group_by_level (group , columns [1 :])
94
- if sub_result :
95
- nested [str (name )] = sub_result
96
-
97
- if nested :
98
- result [f"{ current_col } s" ] = nested
99
-
100
- return result
101
-
102
- result = {}
103
- for call_name , call_group in df .groupby ("arcticdb_call" ):
104
- if pd .isna (call_name ):
105
- continue
106
- call_result = group_by_level (call_group , groupby_cols [1 :])
107
- if call_result :
108
- result [str (call_name )] = call_result
109
-
110
- return result
71
+ if layer_type not in current_dict :
72
+ current_dict [layer_type ] = {}
73
+ for sub_name , sub_layer in next_layer_map .items ():
74
+ if sub_name not in current_dict [layer_type ]:
75
+ current_dict [layer_type ][sub_name ] = {}
76
+ cls ._process_layer (sub_layer , current_dict [layer_type ][sub_name ])
111
77
112
78
@classmethod
113
- def context_manager (cls ):
114
- @contextmanager
115
- def _func ():
116
- query_stats_tools = cls ()
117
- query_stats_tools ._is_context_manager = True
118
- yield query_stats_tools
119
- query_stats_tools ._end_time = time .time_ns ()
120
- return _func ()
79
+ def reset_stats (cls ):
80
+ QueryStats .reset_stats ()
121
81
122
- def get_query_stats (self ):
123
- if self ._is_context_manager :
124
- return self ._populate_stats (self ._create_time , self ._end_time )
125
- else :
126
- raise UserInputException ("get_query_stats should be used with a context manager initialized QueryStatsTools" )
82
+ @classmethod
83
+ def enable (cls ):
84
+ QueryStats .enable ()
127
85
128
86
@classmethod
129
- def reset_stats (cls ):
130
- QueryStats .reset_stats ()
87
+ def disable (cls ):
88
+ QueryStats .disable ()
89
+
0 commit comments