Some bug fix

phoebusm · phoebusm · commit 696dc62ce49a · 2025-03-11T11:59:02.000Z
diff --git a/cpp/arcticdb/util/query_stats.cpp b/cpp/arcticdb/util/query_stats.cpp
@@ -15,11 +15,12 @@ namespace arcticdb::util::query_stats {
 std::shared_ptr<StatsGroupLayer> QueryStats::current_layer(){
     // current_layer_ != nullptr && root_layer_ != nullptr -> stats has been setup; Nothing to do
     // current_layer_ == nullptr && root_layer_ == nullptr -> clean slate; Need to setup
-    // current_layer_ != nullptr && root_layer_ == nullptr -> stats have been reset; Need to setup
+    // current_layer_ != nullptr && root_layer_ == nullptr -> Something is off
     // current_layer_ == nullptr && root_layer_ != nullptr -> Something is off
     if (!thread_local_var_.current_layer_ || !thread_local_var_.root_layer_) {
         check(!async::is_folly_thread, "Folly thread should have its StatsGroupLayer passed by caller only");
-        check(thread_local_var_.current_layer_ || !thread_local_var_.root_layer_, "QueryStats root_layer_ should be null if current_layer_ is null");
+        check(thread_local_var_.current_layer_.operator bool() == thread_local_var_.root_layer_.operator bool(), 
+            "QueryStats root_layer_ and current_layer_ should be either both null or both non-null");
         thread_local_var_.root_layer_ = std::make_shared<StatsGroupLayer>();
         {
             std::lock_guard<std::mutex> lock(root_layer_mutex_);
@@ -44,7 +45,7 @@ void QueryStats::reset_stats() {
     check(!async::TaskScheduler::instance()->tasks_pending(), "Folly tasks are still running");
     std::lock_guard<std::mutex> lock(root_layer_mutex_);
     for (auto& layer : root_layers_) {
-        layer.reset();
+        layer->reset_stats();
     }
 }
 
diff --git a/python/arcticdb/toolbox/query_stats.py b/python/arcticdb/toolbox/query_stats.py
@@ -1,130 +1,89 @@
-import time
-import pandas as pd
 from contextlib import contextmanager
 import numpy as np
 
-from arcticdb.exceptions import UserInputException
 from arcticdb_ext.tools import QueryStats
+from arcticdb_ext.tools.QueryStats import StatsGroupName, StatsName
 
 class QueryStatsTool:
-    def __init__(self):
-        self._create_time = time.time_ns()
-        self._is_context_manager = False
-        QueryStats.register_new_query_stat_tool()
+    # Define enum values as lists since pybind11 enums are not iterable
+    _STATS_NAME_VALUES = [StatsName.result_count, StatsName.total_time_ms, StatsName.count]
+    _STATS_GROUP_NAME_VALUES = [StatsGroupName.arcticdb_call, StatsGroupName.key_type, StatsGroupName.storage_ops]
 
-    def __del__(self):
-        QueryStats.deregister_query_stat_tool()
-
-    def __sub__(self, other):
-        return self._populate_stats(other._create_time, self._create_time)
+    @classmethod
+    def context_manager(cls):
+        @contextmanager
+        def _func():
+            cls.enable()
+            yield
+            cls.disable()
+        return _func()
 
-    def _populate_stats(self, start_time, end_time):
-        df = pd.DataFrame(QueryStats.get_stats())
-        if df.empty:
-            return {}
+    @classmethod
+    def get_query_stats(cls):
+        # Get raw stats from C++ layer
+        raw_stats = QueryStats.root_layers()
         
-        df["exec_time"] = pd.to_numeric(df["exec_time"], errors="coerce")
-        df = df[df["exec_time"].between(start_time, end_time)]
-        df = df.drop(columns=["exec_time"])
+        # Transform raw stats into structured dictionary
+        result = {}
         
-        if "result_count" in df.columns:
-            df["result_count"] = pd.to_numeric(df["result_count"], errors="coerce")
+        # Process each layer
+        for layer in raw_stats:
+            if layer:
+                cls._process_layer(layer, result)
+            
+        return result
+    
+    @classmethod
+    def _process_layer(cls, layer, current_dict):
+        def _get_enum_name(enum_value):
+            return str(enum_value).split('.')[-1]
         
-        groupby_cols = ["arcticdb_call", "stage", "key_type", "storage_op"]
+        # Process stats array
+        stats_array = layer.stats
+        for stat_enum in cls._STATS_NAME_VALUES:
+            stat_idx = int(stat_enum)
+            if stats_array[stat_idx] > 0:
+                stat_name = _get_enum_name(stat_enum)
+                if stat_name not in current_dict:
+                    current_dict[stat_name] = stats_array[stat_idx]
+                else:
+                    current_dict[stat_name] += stats_array[stat_idx]
         
-        for col in groupby_cols:
-            if col not in df.columns:
-                df[col] = pd.Series(dtype='object')
-
-        def process_time_values(time_values):
-            time_buckets = {}
-            for time_val in time_values:
-                bucket = (time_val // 10) * 10
-                time_buckets[str(bucket)] = time_buckets.get(str(bucket), 0) + 1
-            return time_buckets
-
-        def get_non_grouped_times(data, current_level):
-            # Only process NaN values for the current grouping level
-            mask = data[current_level].isna()
-            if not mask.any():
-                return {}
+        # Process next_layer_maps
+        next_layer_maps = layer.next_layer_maps
+        for group_enum in cls._STATS_GROUP_NAME_VALUES:
+            group_idx = int(group_enum)
             
-            time_values = pd.to_numeric(data.loc[mask, "time"].dropna(), errors="coerce")
-            if not time_values.empty:
-                time_buckets = process_time_values(time_values)
-                if time_buckets:
-                    return {"time": time_buckets}
-            return {}
-
-        def process_group(group_data, is_leaf):
-            result = {}
-            
-            if is_leaf:
-                numeric_cols = [col for col in group_data.columns if col not in groupby_cols and col != "time"]
-                for col in numeric_cols:
-                    values = pd.to_numeric(group_data[col].dropna(), errors="coerce")
-                    if not values.empty:
-                        total = values.sum()
-                        if not np.isnan(total):
-                            result[col] = int(total)
+            if not next_layer_maps[group_idx]:
+                continue
                 
-                time_values = pd.to_numeric(group_data["time"].dropna(), errors="coerce")
-                if not time_values.empty:
-                    time_buckets = process_time_values(time_values)
-                    if time_buckets:
-                        result["time"] = time_buckets
+            next_layer_map = next_layer_maps[group_idx]
             
-            return result
+            # top level
+            if group_enum == StatsGroupName.arcticdb_call:
+                for op_name, op_layer in next_layer_map.items():
+                    if op_name not in current_dict:
+                        current_dict[op_name] = {}
+                    cls._process_layer(op_layer, current_dict[op_name])
+            else:
+                layer_type = _get_enum_name(group_enum)
 
-        def group_by_level(data, columns):
-            if not columns:
-                return process_group(data, True)
-            
-            result = {}
-            current_col = columns[0]
-            
-            non_grouped = get_non_grouped_times(data, current_col)
-            result.update(non_grouped)
-            
-            grouped = data[~data[current_col].isna()].groupby(current_col)
-            nested = {}
-            
-            for name, group in grouped:
-                sub_result = group_by_level(group, columns[1:])
-                if sub_result:
-                    nested[str(name)] = sub_result
-            
-            if nested:
-                result[f"{current_col}s"] = nested
-            
-            return result
-
-        result = {}
-        for call_name, call_group in df.groupby("arcticdb_call"):
-            if pd.isna(call_name):
-                continue
-            call_result = group_by_level(call_group, groupby_cols[1:])
-            if call_result:
-                result[str(call_name)] = call_result
-        
-        return result
+                if layer_type not in current_dict:
+                    current_dict[layer_type] = {}
+                for sub_name, sub_layer in next_layer_map.items():
+                    if sub_name not in current_dict[layer_type]:
+                        current_dict[layer_type][sub_name] = {}
+                    cls._process_layer(sub_layer, current_dict[layer_type][sub_name])
 
     @classmethod
-    def context_manager(cls):
-        @contextmanager
-        def _func():
-            query_stats_tools = cls()
-            query_stats_tools._is_context_manager = True
-            yield query_stats_tools
-            query_stats_tools._end_time = time.time_ns()
-        return _func()
+    def reset_stats(cls):
+        QueryStats.reset_stats()
 
-    def get_query_stats(self):
-        if self._is_context_manager:
-            return self._populate_stats(self._create_time, self._end_time)
-        else:
-            raise UserInputException("get_query_stats should be used with a context manager initialized QueryStatsTools")
+    @classmethod
+    def enable(cls):
+        QueryStats.enable()
 
     @classmethod
-    def reset_stats(cls):
-        QueryStats.reset_stats()
+    def disable(cls):
+        QueryStats.disable()
+
diff --git a/python/tests/conftest.py b/python/tests/conftest.py
@@ -55,6 +55,7 @@
     SSL_TEST_SUPPORTED,
 )
 from arcticdb.storage_fixtures.utils import safer_rmtree
+from arcticdb.toolbox.query_stats import QueryStatsTool
 
 
 # region =================================== Misc. Constants & Setup ====================================
@@ -1089,3 +1090,9 @@ def in_memory_version_store_tiny_segment(in_memory_store_factory):
 @pytest.fixture(params=["lmdb_version_store_tiny_segment", "in_memory_version_store_tiny_segment"])
 def lmdb_or_in_memory_version_store_tiny_segment(request):
     return request.getfixturevalue(request.param)
+
+
+@pytest.fixture
+def clear_query_stats():
+    yield
+    QueryStatsTool.reset_stats()
diff --git a/python/tests/integration/toolbox/test_query_stats.py b/python/tests/integration/toolbox/test_query_stats.py
@@ -1,46 +1,33 @@
 from arcticdb.toolbox.query_stats import QueryStatsTool
-from arcticdb_ext.tools import QueryStats
 
-def test_query_stats(s3_version_store_v1):
-    query_stats_tools_write = QueryStatsTool() # For testing whether stats has been filtered
+def test_query_stats(s3_version_store_v1, clear_query_stats):
     s3_version_store_v1.write("a", 1)
-    query_stats_tools_start = QueryStatsTool()
+    QueryStatsTool.enable()
     s3_version_store_v1.list_symbols()
-    query_stats_tools_end = QueryStatsTool()
-    stats = query_stats_tools_end - query_stats_tools_start
+    QueryStatsTool.disable()
+    stats = QueryStatsTool.get_query_stats()
     """
     Sample output:
     {
         "list_symbols": {
-            "time": {
-                "500": 1
-            },
-            "stages": {
-                "list": {
-                    "time": {
-                        "500": 1
-                    },
-                    "key_types": {
-                        "l": {
-                            "storage_ops": {
-                                "ListObjectsV2": {
-                                    "result_count": 1,
-                                    "time": {
-                                        "20": 1,
-                                        "10": 1
-                                    }
-                                }
-                            }
-                        },
-                        "r": {
-                            "storage_ops": {
-                                "ListObjectsV2": {
-                                    "result_count": 1,
-                                    "time": {
-                                        "10": 1
-                                    }
-                                }
-                            }
+            "total_time_ms": 476,
+            "count": 1,
+            "key_type": {
+                "l": {
+                    "storage_ops": {
+                        "ListObjectsV2": {
+                            "result_count": 1,
+                            "total_time_ms": 48,
+                            "count": 2
+                        }
+                    }
+                },
+                "r": {
+                    "storage_ops": {
+                        "ListObjectsV2": {
+                            "result_count": 1,
+                            "total_time_ms": 21,
+                            "count": 1
                         }
                     }
                 }
@@ -49,10 +36,8 @@ def test_query_stats(s3_version_store_v1):
     }
     """
     assert "list_symbols" in stats
-    assert "stages" in stats["list_symbols"]
-    assert "list" in stats["list_symbols"]["stages"]
-    assert "key_types" in stats["list_symbols"]["stages"]["list"]
-    key_types = stats["list_symbols"]["stages"]["list"]["key_types"]
+    assert "key_type" in stats["list_symbols"]
+    key_types = stats["list_symbols"]["key_type"]
     assert "l" in key_types
     assert "r" in key_types
     
@@ -63,32 +48,22 @@ def test_query_stats(s3_version_store_v1):
         assert key_types[key_type]["storage_ops"]["ListObjectsV2"]["result_count"] == 1
         # Not asserting the time values as they are non-deterministic
 
-def test_query_stats_context(s3_version_store_v1):
-    with QueryStatsTool.context_manager(): # For testing whether stats has been filtered
-        s3_version_store_v1.write("a", 1)
-        with QueryStatsTool.context_manager() as query_stats_tools:
-            s3_version_store_v1.list_symbols()
-        stats = query_stats_tools.get_query_stats()    
-        key_types = stats["list_symbols"]["stages"]["list"]["key_types"]
-        for key_type in ["l", "r"]:
-            assert key_types[key_type]["storage_ops"]["ListObjectsV2"]["result_count"] == 1
-
-
-def test_query_stats_clear(s3_version_store_v1):
+def test_query_stats_context(s3_version_store_v1, clear_query_stats):
     s3_version_store_v1.write("a", 1)
-    query_stats_tools_start = QueryStatsTool()
-    s3_version_store_v1.list_symbols()
-    query_stats_tools_end = QueryStatsTool()
-    QueryStats.reset()
-    assert not (query_stats_tools_end - query_stats_tools_start)
+    with QueryStatsTool.context_manager():
+        s3_version_store_v1.list_symbols()
+    stats = QueryStatsTool.get_query_stats()
+    key_types = stats["list_symbols"]["key_type"]
+    for key_type in ["l", "r"]:
+        assert key_types[key_type]["storage_ops"]["ListObjectsV2"]["result_count"] == 1
 
 
-def test_query_stats_tool_counter(s3_version_store_v1):
-    query_stats_tools_start = QueryStatsTool()
+def test_query_stats_clear(s3_version_store_v1, clear_query_stats):
+    s3_version_store_v1.write("a", 1)
+    QueryStatsTool.enable()
     s3_version_store_v1.list_symbols()
-    query_stats_tools_end = QueryStatsTool()
-    del query_stats_tools_start
-    del query_stats_tools_end
-
-    assert not QueryStats.get_stats()
+    QueryStatsTool.disable()
+    QueryStatsTool.get_query_stats()
+    QueryStatsTool.reset_stats()
+    assert not QueryStatsTool.get_query_stats()