Skip to content

Commit d47a1b1

Browse files
authored
Merge pull request #729 from openedx/cag/performance-metrics-filter
feat: allow to filter course id for performance metrics
2 parents b105d42 + c124082 commit d47a1b1

3 files changed

Lines changed: 65 additions & 26 deletions

File tree

tutoraspects/commands_v0.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,16 +109,17 @@ def alembic(context, command) -> None:
109109

110110
# Ex: "tutor local do performance-metrics "
111111
@click.command(context_settings={"ignore_unknown_options": True})
112+
@click.option("--course_key", default="", help="A course_key to apply as a filter.")
112113
@click.pass_obj
113-
def performance_metrics(context) -> None:
114+
def performance_metrics(context, course_key) -> None:
114115
"""
115116
Job to measure performance metrics of charts and its queries in Superset and ClickHouse.
116117
"""
117118
config = tutor_config.load(context.root)
118119
runner = context.job_runner(config)
119120

120-
command = """echo 'Performance...' &&
121-
python /app/pythonpath/performance_metrics.py &&
121+
command = f"""echo 'Performance...' &&
122+
python /app/pythonpath/performance_metrics.py '{course_key}' &&
122123
echo 'Done!';
123124
"""
124125
runner.run_job("superset", command)
@@ -262,4 +263,5 @@ def transform_tracking_logs(context, deduplicate, **kwargs) -> None:
262263
dump_data_to_clickhouse,
263264
transform_tracking_logs,
264265
import_assets,
266+
performance_metrics,
265267
)

tutoraspects/commands_v1.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,15 +132,22 @@ def import_assets() -> list[tuple[str, str]]:
132132

133133
# Ex: "tutor local do performance-metrics "
134134
@click.command(context_settings={"ignore_unknown_options": True})
135-
def performance_metrics() -> list[tuple[str, str]]:
135+
@click.option("--course_key", default="", help="A course_key to apply as a filter.")
136+
@click.option(
137+
"--print_sql", is_flag=True, default=False, help="Print the SQL that was run."
138+
)
139+
def performance_metrics(course_key, print_sql) -> list[tuple[str, str]]:
136140
"""
137141
Job to measure performance metrics of charts and its queries in Superset and ClickHouse.
138142
"""
143+
options = f"--course_key {course_key}" if course_key else ""
144+
options += " --print_sql" if print_sql else ""
145+
139146
return [
140147
(
141148
"superset",
142149
"echo 'Performance...' && "
143-
"python /app/pythonpath/performance_metrics.py &&"
150+
f"python /app/pythonpath/performance_metrics.py {options} &&"
144151
"echo 'Done!';",
145152
),
146153
]

tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py

Lines changed: 51 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,18 @@
1+
import sys
2+
13
from superset.app import create_app
24

35
app = create_app()
46
app.app_context().push()
57

6-
78
import json
89
import logging
910
import time
1011
import uuid
1112
from datetime import datetime
1213
from unittest.mock import patch
1314

15+
import click
1416
import sqlparse
1517
from flask import g
1618
from superset import security_manager
@@ -36,11 +38,23 @@
3638
"Filters: {filters}\n\n"
3739
)
3840

39-
40-
def performance_metrics():
41-
"""Measure the performance of the dashboard."""
42-
# Mock the client name to identify the queries in the clickhouse system.query_log table by
43-
# by the http_user_agent field.
41+
@click.command()
42+
@click.option("--course_key", default="", help="A course_key to apply as a filter.")
43+
@click.option(
44+
"--print_sql",
45+
is_flag=True,
46+
default=False,
47+
help="Whether to print the SQL run."
48+
)
49+
def performance_metrics(course_key, print_sql):
50+
"""
51+
Measure the performance of the dashboard.
52+
"""
53+
# Mock the client name to identify the queries in the clickhouse system.query_log
54+
# table by by the http_user_agent field.
55+
extra_filters = []
56+
if course_key:
57+
extra_filters+=[{"col":"course_key","op":"==","val":course_key}]
4458
with patch("clickhouse_connect.common.build_client_name") as mock_build_client_name:
4559
mock_build_client_name.return_value = RUN_ID
4660
embedable_dashboards = {{SUPERSET_EMBEDDABLE_DASHBOARDS}}
@@ -53,13 +67,18 @@ def performance_metrics():
5367
for dashboard in dashboards:
5468
logger.info(f"Dashboard: {dashboard.slug}")
5569
for slice in dashboard.slices:
56-
result = measure_chart(slice)
70+
result = measure_chart(slice, extra_filters)
5771
if not result:
5872
continue
5973
for query in result["queries"]:
60-
# Remove the data from the query to avoid memory issues on large datasets.
74+
# Remove the data from the query to avoid memory issues on large
75+
# datasets.
6176
query.pop("data")
6277
report.append(result)
78+
79+
logger.info("Waiting for clickhouse log...")
80+
time.sleep(20)
81+
get_query_log_from_clickhouse(report, print_sql)
6382
return report
6483

6584

@@ -82,7 +101,8 @@ def measure_chart(slice, extra_filters=[]):
82101
)
83102

84103
if extra_filters:
85-
query_context["filters"].extend(extra_filters)
104+
for query in query_context["queries"]:
105+
query["filters"]+=extra_filters
86106

87107
g.user = security_manager.find_user(username="{{SUPERSET_ADMIN_USERNAME}}")
88108
query_context = ChartDataQueryContextSchema().load(query_context)
@@ -101,10 +121,11 @@ def measure_chart(slice, extra_filters=[]):
101121
return result
102122

103123

104-
def get_query_log_from_clickhouse(report):
124+
def get_query_log_from_clickhouse(report, print_sql):
105125
"""
106126
Get the query log from clickhouse and print the results.
107127
"""
128+
# This corresponsds to the "Query Performance" chart in Superset
108129
chart_uuid = "bb13bb31-c797-4ed3-a7f9-7825cc6dc482"
109130

110131
slice = db.session.query(Slice).filter(Slice.uuid == chart_uuid).one()
@@ -115,33 +136,46 @@ def get_query_log_from_clickhouse(report):
115136
)
116137
slice.query_context = json.dumps(query_context)
117138

118-
result = measure_chart(slice)
139+
ch_chart_result = measure_chart(slice)
119140

120141
clickhouse_queries = {}
121-
for query in result["queries"]:
142+
for query in ch_chart_result["queries"]:
122143
for row in query["data"]:
123144
parsed_sql = str(sqlparse.parse(row.pop("query"))[0])
124145
clickhouse_queries[parsed_sql] = row
125146

147+
if print_sql:
148+
print("ClickHouse SQL: ")
149+
logger.info(parsed_sql)
150+
126151
# Sort report by slowest queries
127152
report = sorted(report, key=lambda x: x["time_elapsed"], reverse=True)
128153

129154
report_str = f"\nSuperset Reports: {RUN_ID}\n\n"
130-
for i, result in enumerate(report):
155+
for i, chart_result in enumerate(report):
131156
report_str+=(
132157
report_format.format(
133-
i=(i + 1), slice=result["slice"], superset_time=result["time_elapsed"]
158+
i=(i + 1),
159+
slice=chart_result["slice"],
160+
superset_time=chart_result["time_elapsed"]
134161
)
135162
)
136-
for i, query in enumerate(result["queries"]):
163+
for i, query in enumerate(chart_result["queries"]):
137164
parsed_sql = (
138165
str(sqlparse.parse(query["query"])[0]).replace(";", "")
139166
+ "\n FORMAT Native"
140167
)
168+
169+
if print_sql:
170+
print("Superset SQL: ")
171+
logger.info(parsed_sql)
172+
141173
clickhouse_report = clickhouse_queries.get(parsed_sql, {})
142174
report_str+=(
143175
query_format.format(
144-
query_duration_ms=clickhouse_report.get("query_duration_ms") / 1000,
176+
query_duration_ms=clickhouse_report.get(
177+
"query_duration_ms", 0
178+
) / 1000,
145179
memory_usage_mb=clickhouse_report.get("memory_usage_mb"),
146180
result_rows=clickhouse_report.get("result_rows"),
147181
rowcount=query["rowcount"],
@@ -153,8 +187,4 @@ def get_query_log_from_clickhouse(report):
153187

154188
if __name__ == "__main__":
155189
logger.info(f"Running performance metrics. RUN ID: {RUN_ID}")
156-
report = performance_metrics()
157-
# Clickhouse query log takes some seconds to log queries.
158-
logger.info("Waiting for clickhouse log...")
159-
time.sleep(10)
160-
get_query_log_from_clickhouse(report)
190+
performance_metrics()

0 commit comments

Comments
 (0)