cloud-bulldozer
diff --git a/‎fmatch/logrus.py
Lines changed: 4 additions & 4 deletions b/‎fmatch/logrus.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎fmatch/matcher.py
Lines changed: 61 additions & 56 deletions b/‎fmatch/matcher.py
Lines changed: 61 additions & 56 deletions
diff --git a/‎fmatch/test_fmatch.py
Lines changed: 14 additions & 12 deletions b/‎fmatch/test_fmatch.py
Lines changed: 14 additions & 12 deletions
@@ -17,27 +17,27 @@ class SingletonLogger:
 
     def __new__(cls, debug: int, name: str):
         if (not cls.instance) or name not in cls.instance:
-            cls.instance[name] = cls._initialize_logger(debug,name)
+            cls.instance[name] = cls._initialize_logger(debug, name)
         return cls.instance[name]
 
     @staticmethod
     def _initialize_logger(debug: int, name: str) -> logging.Logger:
         level = debug  # if debug else logging.INFO
         logger = logging.getLogger(name)
-        logger.propagate=False
+        logger.propagate = False
         if not logger.hasHandlers():
             logger.setLevel(level)
             handler = logging.StreamHandler(sys.stdout)
             handler.setLevel(level)
             formatter = logging.Formatter(
-                "%(asctime)s - %(name)-10s - %(levelname)s - file: %(filename)s - line: %(lineno)d - %(message)s" # pylint: disable = line-too-long
+                "%(asctime)s - %(name)-10s - %(levelname)s - file: %(filename)s - line: %(lineno)d - %(message)s"  # pylint: disable = line-too-long
             )
             handler.setFormatter(formatter)
             logger.addHandler(handler)
         return logger
 
     @classmethod
-    def getLogger(cls, name:str) -> logging.Logger:
+    def getLogger(cls, name: str) -> logging.Logger:
         """Return logger in instance
 
         Args:
 
@@ -18,62 +18,67 @@
 
 
 class Matcher:
-    """Matcher"""
     # pylint: disable=too-many-instance-attributes
+    """
+    A class used to match or interact with an Elasticsearch index for performance scale testing.
+
+    Attributes:
+        index (str): Name of the Elasticsearch index to interact with.
+        level (int): Logging level (e.g., logging.INFO).
+        es_url (str): Elasticsearch endpoint, can be specified by the environment variable ES_SERVER
+        verify_certs (bool): Whether to verify SSL certificates when connecting to Elasticsearch.
+        version_field (str): Name of the field containing the OpenShift version.
+        uuid_field (str): Name of the field containing the UUID.
+    """
+
     def __init__(
         self,
         index: str = "ospst-perf-scale-ci",
         level: int = logging.INFO,
-        ES_URL: str = os.getenv("ES_SERVER"),
+        es_url: str = os.getenv("ES_SERVER"),
         verify_certs: bool = True,
         version_field: str = "ocpVersion",
         uuid_field: str = "uuid"
     ):
         self.index = index
-        self.es_url = ES_URL
         self.search_size = 10000
         self.logger = SingletonLogger(debug=level, name="Matcher")
-        self.es = Elasticsearch([self.es_url], timeout=30, verify_certs=verify_certs)
+        self.es = Elasticsearch([es_url], timeout=30, verify_certs=verify_certs)
         self.data = None
         self.version_field = version_field
         self.uuid_field = uuid_field
 
-    def get_metadata_by_uuid(self, uuid: str, index: str = None) -> dict:
+    def get_metadata_by_uuid(self, uuid: str) -> dict:
         """Returns back metadata when uuid is given
 
         Args:
             uuid (str): uuid of the run
-            index (str, optional): index to be searched in. Defaults to None.
 
         Returns:
             _type_: _description_
         """
-        if index is None:
-            index = self.index
         query = Q("match",  **{self.uuid_field: {"value": f"{uuid}"}})
         result = {}
-        s = Search(using=self.es, index=index).query(query)
-        res = self.query_index(index, s)
+        s = Search(using=self.es, index=self.index).query(query)
+        res = self.query_index(s)
         hits = res.hits.hits
         if hits:
             result = dict(hits[0].to_dict()["_source"])
         return result
 
-    def query_index(self, index: str, search: Search) -> Response:
+    def query_index(self, search: Search) -> Response:
         """generic query function
 
         Args:
-            index (str): _description_
             search (Search) : Search object with query
         """
-        self.logger.info("Executing query against index=%s", index)
+        self.logger.info("Executing query against index: %s", self.index)
         self.logger.debug("Executing query \r\n%s", search.to_dict())
         return search.execute()
 
     def get_uuid_by_metadata(
         self,
         meta: Dict[str, Any],
-        index: str = None,
         lookback_date: datetime = None,
         lookback_size: int = 10000,
         timestamp_field: str = "timestamp"
@@ -82,37 +87,30 @@ def get_uuid_by_metadata(
 
         Args:
             meta (Dict[str, Any]): metadata of the runs
-            index (str, optional): Index to search. Defaults to None.
-            lookback_date (datetime, optional): 
+            lookback_date (datetime, optional):
             The cutoff date to get the uuids from. Defaults to None.
-            lookback_size (int, optional): 
+            lookback_size (int, optional):
             Maximum number of runs to get, gets the latest. Defaults to 10000.
 
-            lookback_size and lookback_date get the data on the 
+            lookback_size and lookback_date get the data on the
             precedency of whichever cutoff comes first.
             Similar to a car manufacturer's warranty limits.
 
         Returns:
             List[Dict[str, str]]: _description_
         """
+        must_clause = []
         must_not_clause = []
-        if index is None:
-            index = self.index
-
         version = str(meta[self.version_field])[:4]
 
-        must_clause = [
-            (
-                Q("match", **{field: str(value)})
-                if isinstance(value, str)
-                else Q("match", **{field: value})
-            )
-            for field, value in meta.items()
-            if field not in [self.version_field, "ocpMajorVersion", "not"]
-        ]
-
-        for field, value in meta.get("not", {}).items():
-            must_not_clause.append(Q("match", **{field: str(value)}))
+        for field, value in meta.items():
+            if field in ["ocpVersion", "ocpMajorVersion"]:
+                continue
+            if field != "not":
+                must_clause.append(Q("match", **{field: str(value)}))
+            else:
+                for not_field, not_value in meta["not"].items():
+                    must_not_clause.append(Q("match", **{not_field: str(not_value)}))
 
         if "ocpMajorVersion" in meta:
             version = meta["ocpMajorVersion"]
@@ -135,26 +133,32 @@ def get_uuid_by_metadata(
             filter=filter_clause,
         )
         s = (
-            Search(using=self.es, index=index)
+            Search(using=self.es, index=self.index)
             .query(query)
             .sort({timestamp_field: {"order": "desc"}})
             .extra(size=lookback_size)
         )
-        result = self.query_index(index, s)
+        result = self.query_index(s)
         hits = result.hits.hits
         uuids_docs = []
         for hit in hits:
             if "buildUrl" in hit["_source"]:
-                uuids_docs.append({
+                uuids_docs.append(
+                    {
                         self.uuid_field: hit.to_dict()["_source"][self.uuid_field],
-                        "buildUrl": hit.to_dict()["_source"]["buildUrl"]})
+                        "buildUrl": hit.to_dict()["_source"]["buildUrl"],
+                    }
+                )
             else:
-                uuids_docs.append({
+                uuids_docs.append(
+                    {
                         self.uuid_field: hit.to_dict()["_source"][self.uuid_field],
-                        "buildUrl": "http://bogus-url"})
+                        "buildUrl": "http://bogus-url",
+                    }
+                )
         return uuids_docs
 
-    def match_kube_burner(self, uuids: List[str], index: str) -> List[Dict[str, Any]]:
+    def match_kube_burner(self, uuids: List[str]) -> List[Dict[str, Any]]:
         """match kube burner runs
         Args:
             uuids (list): list of uuids
@@ -170,9 +174,11 @@ def match_kube_burner(self, uuids: List[str], index: str) -> List[Dict[str, Any]
             ],
         )
         search = (
-            Search(using=self.es, index=index).query(query).extra(size=self.search_size)
+            Search(using=self.es, index=self.index)
+            .query(query)
+            .extra(size=self.search_size)
         )
-        result = self.query_index(index, search)
+        result = self.query_index(search)
         runs = [item.to_dict()["_source"] for item in result.hits.hits]
         return runs
 
@@ -193,10 +199,9 @@ def filter_runs(self, pdata: Dict[Any, Any], data: Dict[Any, Any]) -> List[str]:
         ids_df = ndf.loc[df["jobConfig.jobIterations"] == iterations]
         return ids_df["uuid"].to_list()
 
-    def getResults(
+    def get_results(
         self, uuid: str,
         uuids: List[str],
-        index_str: str,
         metrics: Dict[str, Any]
     ) -> Dict[Any, Any]:
         """
@@ -205,7 +210,6 @@ def getResults(
         Args:
             uuid (str): _description_
             uuids (list): _description_
-            index_str (str): _description_
             metrics (dict): _description_
 
         Returns:
@@ -232,24 +236,23 @@ def getResults(
             ],
         )
         search = (
-            Search(using=self.es, index=index_str)
+            Search(using=self.es, index=self.index)
             .query(query)
             .extra(size=self.search_size)
         )
-        result = self.query_index(index_str, search)
+        result = self.query_index(search)
         runs = [item.to_dict()["_source"] for item in result.hits.hits]
         return runs
 
     def get_agg_metric_query(
         self, uuids: List[str],
-        index: str,
         metrics: Dict[str, Any],
-        timestamp_field: str="timestamp"):
+        timestamp_field: str = "timestamp"
+    ):
         """burner_metric_query will query for specific metrics data.
 
         Args:
             uuids (list): List of uuids
-            index (str): ES/OS Index to query from
             metrics (dict): metrics defined in es index metrics
         """
         metric_queries = []
@@ -266,12 +269,14 @@ def get_agg_metric_query(
         query = Q(
             "bool",
             must=[
-                Q("terms", **{self.uuid_field +".keyword": uuids}),
+                Q("terms", **{self.uuid_field + ".keyword": uuids}),
                 metric_query,
             ],
         )
         search = (
-            Search(using=self.es, index=index).query(query).extra(size=self.search_size)
+            Search(using=self.es, index=self.index)
+            .query(query)
+            .extra(size=self.search_size)
         )
         agg_value = metrics["agg"]["value"]
         agg_type = metrics["agg"]["agg_type"]
@@ -281,15 +286,15 @@ def get_agg_metric_query(
         search.aggs.bucket(
             "uuid", "terms", field=self.uuid_field+".keyword", size=self.search_size
         ).metric(agg_value, agg_type, field=metrics["metric_of_interest"])
-        result = self.query_index(index, search)
+        result = self.query_index(search)
         data = self.parse_agg_results(result, agg_value, agg_type, timestamp_field)
         return data
 
     def parse_agg_results(
         self, data: Dict[Any, Any],
         agg_value: str,
         agg_type: str,
-        timestap_field: str="timestamp"
+        timestamp_field: str = "timestamp"
     ) -> List[Dict[Any, Any]]:
         """parse out CPU data from kube-burner query
         Args:
@@ -306,7 +311,7 @@ def parse_agg_results(
         for stamp in stamps:
             dat = {}
             dat[self.uuid_field] = stamp.key
-            dat[timestap_field] = stamp.time.value_as_string
+            dat[timestamp_field] = stamp.time.value_as_string
             agg_values = next(
                 (item for item in agg_buckets if item.key == stamp.key), None
             )
@@ -320,7 +325,7 @@ def parse_agg_results(
     def convert_to_df(
         self, data: Dict[Any, Any],
         columns: List[str] = None,
-        timestamp_field: str="timestamp"
+        timestamp_field: str = "timestamp"
     ) -> pd.DataFrame:
         """convert to a dataframe
         Args:
 
@@ -5,6 +5,7 @@
 from datetime import datetime
 import sys
 import warnings
+
 # pylint: disable=import-error
 import pandas as pd
 
@@ -17,7 +18,7 @@
 )
 
 match = Matcher(index="perf_scale_ci*", verify_certs=False)
-res=match.get_metadata_by_uuid("b4afc724-f175-44d1-81ff-a8255fea034f",'perf_scale_ci*')
+res = match.get_metadata_by_uuid("b4afc724-f175-44d1-81ff-a8255fea034f")
 
 meta = {}
 meta["masterNodesType"] = "m6a.xlarge"
@@ -34,15 +35,16 @@
 # meta['fips'] = "false"
 
 uuids = match.get_uuid_by_metadata(meta)
-print("All uuids",len(uuids))
-date= datetime.strptime("2024-07-01T13:46:24Z","%Y-%m-%dT%H:%M:%SZ")
-uuids2= match.get_uuid_by_metadata(meta,lookback_date=date)
-print("lookback uuids",len(uuids2))
+print("All uuids", len(uuids))
+date = datetime.strptime("2024-07-01T13:46:24Z", "%Y-%m-%dT%H:%M:%SZ")
+uuids2 = match.get_uuid_by_metadata(meta, lookback_date=date)
+print("lookback uuids", len(uuids2))
 uuids2 = match.get_uuid_by_metadata(meta)
 if len(uuids) == 0:
     print("No UUID present for given metadata")
     sys.exit()
-runs = match.match_kube_burner(uuids,"ripsaw-kube-burner*")
+match = Matcher(index="ripsaw-kube-burner*", verify_certs=False)
+runs = match.match_kube_burner(uuids)
 
 ids = match.filter_runs(runs, runs)
 podl_metrics = {
@@ -52,25 +54,25 @@
     "metric_of_interest": "P99",
     "not": {"jobConfig.name": "garbage-collection"},
 }
-podl = match.getResults("", ids, "ripsaw-kube-burner*",metrics=podl_metrics)
+podl = match.get_results("", ids, metrics=podl_metrics)
 kapi_metrics = {
     "name": "apiserverCPU",
     "metricName": "containerCPU",
     "labels.namespace.keyword": "openshift-kube-apiserver",
     "metric_of_interest": "value",
     "agg": {"value": "cpu", "agg_type": "avg"},
 }
-kapi_cpu = match.get_agg_metric_query(ids, "ripsaw-kube-burner*", metrics=kapi_metrics)
+kapi_cpu = match.get_agg_metric_query(ids, metrics=kapi_metrics)
 podl_df = match.convert_to_df(
-    podl, columns=['uuid', 'timestamp', 'quantileName', 'P99'])
+    podl, columns=["uuid", "timestamp", "quantileName", "P99"]
+)
 kapi_cpu_df = match.convert_to_df(kapi_cpu)
 merge_df = pd.merge(kapi_cpu_df, podl_df, on="uuid")
-match.save_results(merge_df, "merged.csv", [
-                   "uuid", "timestamp_x", "cpu_avg", "P99"])
+match.save_results(merge_df, "merged.csv", ["uuid", "timestamp_x", "cpu_avg", "P99"])
 
 df = pd.read_csv("merged.csv")
 ls = df["uuid"].to_list()
 # Check merged csv data - Debug
 for i in ls:
     # Debug - Ensure they are all using the same networkType
-    print(match.get_metadata_by_uuid(i)['networkType'])
+    print(match.get_metadata_by_uuid(i)["networkType"])