Skip to content

Commit e0badc2

Browse files
authored
Refactor and formatting improvements (#35)
* Refactor and formatting Signed-off-by: Raul Sevilla <[email protected]> * Missing docs Signed-off-by: Raul Sevilla <[email protected]> * Rebase Signed-off-by: Raul Sevilla <[email protected]> * Fix errata Signed-off-by: Raul Sevilla <[email protected]> * snake_case method name Signed-off-by: Raul Sevilla <[email protected]> * Fix linting Signed-off-by: Raul Sevilla <[email protected]> * Tests Signed-off-by: Raul Sevilla <[email protected]> --------- Signed-off-by: Raul Sevilla <[email protected]>
1 parent fcdcf73 commit e0badc2

File tree

5 files changed

+151
-120
lines changed

5 files changed

+151
-120
lines changed

fmatch/logrus.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,27 +17,27 @@ class SingletonLogger:
1717

1818
def __new__(cls, debug: int, name: str):
1919
if (not cls.instance) or name not in cls.instance:
20-
cls.instance[name] = cls._initialize_logger(debug,name)
20+
cls.instance[name] = cls._initialize_logger(debug, name)
2121
return cls.instance[name]
2222

2323
@staticmethod
2424
def _initialize_logger(debug: int, name: str) -> logging.Logger:
2525
level = debug # if debug else logging.INFO
2626
logger = logging.getLogger(name)
27-
logger.propagate=False
27+
logger.propagate = False
2828
if not logger.hasHandlers():
2929
logger.setLevel(level)
3030
handler = logging.StreamHandler(sys.stdout)
3131
handler.setLevel(level)
3232
formatter = logging.Formatter(
33-
"%(asctime)s - %(name)-10s - %(levelname)s - file: %(filename)s - line: %(lineno)d - %(message)s" # pylint: disable = line-too-long
33+
"%(asctime)s - %(name)-10s - %(levelname)s - file: %(filename)s - line: %(lineno)d - %(message)s" # pylint: disable = line-too-long
3434
)
3535
handler.setFormatter(formatter)
3636
logger.addHandler(handler)
3737
return logger
3838

3939
@classmethod
40-
def getLogger(cls, name:str) -> logging.Logger:
40+
def getLogger(cls, name: str) -> logging.Logger:
4141
"""Return logger in instance
4242
4343
Args:

fmatch/matcher.py

Lines changed: 61 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -18,62 +18,67 @@
1818

1919

2020
class Matcher:
21-
"""Matcher"""
2221
# pylint: disable=too-many-instance-attributes
22+
"""
23+
A class used to match or interact with an Elasticsearch index for performance scale testing.
24+
25+
Attributes:
26+
index (str): Name of the Elasticsearch index to interact with.
27+
level (int): Logging level (e.g., logging.INFO).
28+
es_url (str): Elasticsearch endpoint, can be specified by the environment variable ES_SERVER
29+
verify_certs (bool): Whether to verify SSL certificates when connecting to Elasticsearch.
30+
version_field (str): Name of the field containing the OpenShift version.
31+
uuid_field (str): Name of the field containing the UUID.
32+
"""
33+
2334
def __init__(
2435
self,
2536
index: str = "ospst-perf-scale-ci",
2637
level: int = logging.INFO,
27-
ES_URL: str = os.getenv("ES_SERVER"),
38+
es_url: str = os.getenv("ES_SERVER"),
2839
verify_certs: bool = True,
2940
version_field: str = "ocpVersion",
3041
uuid_field: str = "uuid"
3142
):
3243
self.index = index
33-
self.es_url = ES_URL
3444
self.search_size = 10000
3545
self.logger = SingletonLogger(debug=level, name="Matcher")
36-
self.es = Elasticsearch([self.es_url], timeout=30, verify_certs=verify_certs)
46+
self.es = Elasticsearch([es_url], timeout=30, verify_certs=verify_certs)
3747
self.data = None
3848
self.version_field = version_field
3949
self.uuid_field = uuid_field
4050

41-
def get_metadata_by_uuid(self, uuid: str, index: str = None) -> dict:
51+
def get_metadata_by_uuid(self, uuid: str) -> dict:
4252
"""Returns back metadata when uuid is given
4353
4454
Args:
4555
uuid (str): uuid of the run
46-
index (str, optional): index to be searched in. Defaults to None.
4756
4857
Returns:
4958
_type_: _description_
5059
"""
51-
if index is None:
52-
index = self.index
5360
query = Q("match", **{self.uuid_field: {"value": f"{uuid}"}})
5461
result = {}
55-
s = Search(using=self.es, index=index).query(query)
56-
res = self.query_index(index, s)
62+
s = Search(using=self.es, index=self.index).query(query)
63+
res = self.query_index(s)
5764
hits = res.hits.hits
5865
if hits:
5966
result = dict(hits[0].to_dict()["_source"])
6067
return result
6168

62-
def query_index(self, index: str, search: Search) -> Response:
69+
def query_index(self, search: Search) -> Response:
6370
"""generic query function
6471
6572
Args:
66-
index (str): _description_
6773
search (Search) : Search object with query
6874
"""
69-
self.logger.info("Executing query against index=%s", index)
75+
self.logger.info("Executing query against index: %s", self.index)
7076
self.logger.debug("Executing query \r\n%s", search.to_dict())
7177
return search.execute()
7278

7379
def get_uuid_by_metadata(
7480
self,
7581
meta: Dict[str, Any],
76-
index: str = None,
7782
lookback_date: datetime = None,
7883
lookback_size: int = 10000,
7984
timestamp_field: str = "timestamp"
@@ -82,37 +87,30 @@ def get_uuid_by_metadata(
8287
8388
Args:
8489
meta (Dict[str, Any]): metadata of the runs
85-
index (str, optional): Index to search. Defaults to None.
86-
lookback_date (datetime, optional):
90+
lookback_date (datetime, optional):
8791
The cutoff date to get the uuids from. Defaults to None.
88-
lookback_size (int, optional):
92+
lookback_size (int, optional):
8993
Maximum number of runs to get, gets the latest. Defaults to 10000.
9094
91-
lookback_size and lookback_date get the data on the
95+
lookback_size and lookback_date get the data on the
9296
precedency of whichever cutoff comes first.
9397
Similar to a car manufacturer's warranty limits.
9498
9599
Returns:
96100
List[Dict[str, str]]: _description_
97101
"""
102+
must_clause = []
98103
must_not_clause = []
99-
if index is None:
100-
index = self.index
101-
102104
version = str(meta[self.version_field])[:4]
103105

104-
must_clause = [
105-
(
106-
Q("match", **{field: str(value)})
107-
if isinstance(value, str)
108-
else Q("match", **{field: value})
109-
)
110-
for field, value in meta.items()
111-
if field not in [self.version_field, "ocpMajorVersion", "not"]
112-
]
113-
114-
for field, value in meta.get("not", {}).items():
115-
must_not_clause.append(Q("match", **{field: str(value)}))
106+
for field, value in meta.items():
107+
if field in ["ocpVersion", "ocpMajorVersion"]:
108+
continue
109+
if field != "not":
110+
must_clause.append(Q("match", **{field: str(value)}))
111+
else:
112+
for not_field, not_value in meta["not"].items():
113+
must_not_clause.append(Q("match", **{not_field: str(not_value)}))
116114

117115
if "ocpMajorVersion" in meta:
118116
version = meta["ocpMajorVersion"]
@@ -135,26 +133,32 @@ def get_uuid_by_metadata(
135133
filter=filter_clause,
136134
)
137135
s = (
138-
Search(using=self.es, index=index)
136+
Search(using=self.es, index=self.index)
139137
.query(query)
140138
.sort({timestamp_field: {"order": "desc"}})
141139
.extra(size=lookback_size)
142140
)
143-
result = self.query_index(index, s)
141+
result = self.query_index(s)
144142
hits = result.hits.hits
145143
uuids_docs = []
146144
for hit in hits:
147145
if "buildUrl" in hit["_source"]:
148-
uuids_docs.append({
146+
uuids_docs.append(
147+
{
149148
self.uuid_field: hit.to_dict()["_source"][self.uuid_field],
150-
"buildUrl": hit.to_dict()["_source"]["buildUrl"]})
149+
"buildUrl": hit.to_dict()["_source"]["buildUrl"],
150+
}
151+
)
151152
else:
152-
uuids_docs.append({
153+
uuids_docs.append(
154+
{
153155
self.uuid_field: hit.to_dict()["_source"][self.uuid_field],
154-
"buildUrl": "http://bogus-url"})
156+
"buildUrl": "http://bogus-url",
157+
}
158+
)
155159
return uuids_docs
156160

157-
def match_kube_burner(self, uuids: List[str], index: str) -> List[Dict[str, Any]]:
161+
def match_kube_burner(self, uuids: List[str]) -> List[Dict[str, Any]]:
158162
"""match kube burner runs
159163
Args:
160164
uuids (list): list of uuids
@@ -170,9 +174,11 @@ def match_kube_burner(self, uuids: List[str], index: str) -> List[Dict[str, Any]
170174
],
171175
)
172176
search = (
173-
Search(using=self.es, index=index).query(query).extra(size=self.search_size)
177+
Search(using=self.es, index=self.index)
178+
.query(query)
179+
.extra(size=self.search_size)
174180
)
175-
result = self.query_index(index, search)
181+
result = self.query_index(search)
176182
runs = [item.to_dict()["_source"] for item in result.hits.hits]
177183
return runs
178184

@@ -193,10 +199,9 @@ def filter_runs(self, pdata: Dict[Any, Any], data: Dict[Any, Any]) -> List[str]:
193199
ids_df = ndf.loc[df["jobConfig.jobIterations"] == iterations]
194200
return ids_df["uuid"].to_list()
195201

196-
def getResults(
202+
def get_results(
197203
self, uuid: str,
198204
uuids: List[str],
199-
index_str: str,
200205
metrics: Dict[str, Any]
201206
) -> Dict[Any, Any]:
202207
"""
@@ -205,7 +210,6 @@ def getResults(
205210
Args:
206211
uuid (str): _description_
207212
uuids (list): _description_
208-
index_str (str): _description_
209213
metrics (dict): _description_
210214
211215
Returns:
@@ -232,24 +236,23 @@ def getResults(
232236
],
233237
)
234238
search = (
235-
Search(using=self.es, index=index_str)
239+
Search(using=self.es, index=self.index)
236240
.query(query)
237241
.extra(size=self.search_size)
238242
)
239-
result = self.query_index(index_str, search)
243+
result = self.query_index(search)
240244
runs = [item.to_dict()["_source"] for item in result.hits.hits]
241245
return runs
242246

243247
def get_agg_metric_query(
244248
self, uuids: List[str],
245-
index: str,
246249
metrics: Dict[str, Any],
247-
timestamp_field: str="timestamp"):
250+
timestamp_field: str = "timestamp"
251+
):
248252
"""burner_metric_query will query for specific metrics data.
249253
250254
Args:
251255
uuids (list): List of uuids
252-
index (str): ES/OS Index to query from
253256
metrics (dict): metrics defined in es index metrics
254257
"""
255258
metric_queries = []
@@ -266,12 +269,14 @@ def get_agg_metric_query(
266269
query = Q(
267270
"bool",
268271
must=[
269-
Q("terms", **{self.uuid_field +".keyword": uuids}),
272+
Q("terms", **{self.uuid_field + ".keyword": uuids}),
270273
metric_query,
271274
],
272275
)
273276
search = (
274-
Search(using=self.es, index=index).query(query).extra(size=self.search_size)
277+
Search(using=self.es, index=self.index)
278+
.query(query)
279+
.extra(size=self.search_size)
275280
)
276281
agg_value = metrics["agg"]["value"]
277282
agg_type = metrics["agg"]["agg_type"]
@@ -281,15 +286,15 @@ def get_agg_metric_query(
281286
search.aggs.bucket(
282287
"uuid", "terms", field=self.uuid_field+".keyword", size=self.search_size
283288
).metric(agg_value, agg_type, field=metrics["metric_of_interest"])
284-
result = self.query_index(index, search)
289+
result = self.query_index(search)
285290
data = self.parse_agg_results(result, agg_value, agg_type, timestamp_field)
286291
return data
287292

288293
def parse_agg_results(
289294
self, data: Dict[Any, Any],
290295
agg_value: str,
291296
agg_type: str,
292-
timestap_field: str="timestamp"
297+
timestamp_field: str = "timestamp"
293298
) -> List[Dict[Any, Any]]:
294299
"""parse out CPU data from kube-burner query
295300
Args:
@@ -306,7 +311,7 @@ def parse_agg_results(
306311
for stamp in stamps:
307312
dat = {}
308313
dat[self.uuid_field] = stamp.key
309-
dat[timestap_field] = stamp.time.value_as_string
314+
dat[timestamp_field] = stamp.time.value_as_string
310315
agg_values = next(
311316
(item for item in agg_buckets if item.key == stamp.key), None
312317
)
@@ -320,7 +325,7 @@ def parse_agg_results(
320325
def convert_to_df(
321326
self, data: Dict[Any, Any],
322327
columns: List[str] = None,
323-
timestamp_field: str="timestamp"
328+
timestamp_field: str = "timestamp"
324329
) -> pd.DataFrame:
325330
"""convert to a dataframe
326331
Args:

fmatch/test_fmatch.py

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from datetime import datetime
66
import sys
77
import warnings
8+
89
# pylint: disable=import-error
910
import pandas as pd
1011

@@ -17,7 +18,7 @@
1718
)
1819

1920
match = Matcher(index="perf_scale_ci*", verify_certs=False)
20-
res=match.get_metadata_by_uuid("b4afc724-f175-44d1-81ff-a8255fea034f",'perf_scale_ci*')
21+
res = match.get_metadata_by_uuid("b4afc724-f175-44d1-81ff-a8255fea034f")
2122

2223
meta = {}
2324
meta["masterNodesType"] = "m6a.xlarge"
@@ -34,15 +35,16 @@
3435
# meta['fips'] = "false"
3536

3637
uuids = match.get_uuid_by_metadata(meta)
37-
print("All uuids",len(uuids))
38-
date= datetime.strptime("2024-07-01T13:46:24Z","%Y-%m-%dT%H:%M:%SZ")
39-
uuids2= match.get_uuid_by_metadata(meta,lookback_date=date)
40-
print("lookback uuids",len(uuids2))
38+
print("All uuids", len(uuids))
39+
date = datetime.strptime("2024-07-01T13:46:24Z", "%Y-%m-%dT%H:%M:%SZ")
40+
uuids2 = match.get_uuid_by_metadata(meta, lookback_date=date)
41+
print("lookback uuids", len(uuids2))
4142
uuids2 = match.get_uuid_by_metadata(meta)
4243
if len(uuids) == 0:
4344
print("No UUID present for given metadata")
4445
sys.exit()
45-
runs = match.match_kube_burner(uuids,"ripsaw-kube-burner*")
46+
match = Matcher(index="ripsaw-kube-burner*", verify_certs=False)
47+
runs = match.match_kube_burner(uuids)
4648

4749
ids = match.filter_runs(runs, runs)
4850
podl_metrics = {
@@ -52,25 +54,25 @@
5254
"metric_of_interest": "P99",
5355
"not": {"jobConfig.name": "garbage-collection"},
5456
}
55-
podl = match.getResults("", ids, "ripsaw-kube-burner*",metrics=podl_metrics)
57+
podl = match.get_results("", ids, metrics=podl_metrics)
5658
kapi_metrics = {
5759
"name": "apiserverCPU",
5860
"metricName": "containerCPU",
5961
"labels.namespace.keyword": "openshift-kube-apiserver",
6062
"metric_of_interest": "value",
6163
"agg": {"value": "cpu", "agg_type": "avg"},
6264
}
63-
kapi_cpu = match.get_agg_metric_query(ids, "ripsaw-kube-burner*", metrics=kapi_metrics)
65+
kapi_cpu = match.get_agg_metric_query(ids, metrics=kapi_metrics)
6466
podl_df = match.convert_to_df(
65-
podl, columns=['uuid', 'timestamp', 'quantileName', 'P99'])
67+
podl, columns=["uuid", "timestamp", "quantileName", "P99"]
68+
)
6669
kapi_cpu_df = match.convert_to_df(kapi_cpu)
6770
merge_df = pd.merge(kapi_cpu_df, podl_df, on="uuid")
68-
match.save_results(merge_df, "merged.csv", [
69-
"uuid", "timestamp_x", "cpu_avg", "P99"])
71+
match.save_results(merge_df, "merged.csv", ["uuid", "timestamp_x", "cpu_avg", "P99"])
7072

7173
df = pd.read_csv("merged.csv")
7274
ls = df["uuid"].to_list()
7375
# Check merged csv data - Debug
7476
for i in ls:
7577
# Debug - Ensure they are all using the same networkType
76-
print(match.get_metadata_by_uuid(i)['networkType'])
78+
print(match.get_metadata_by_uuid(i)["networkType"])

0 commit comments

Comments
 (0)