1+ import sys
2+
13from superset .app import create_app
24
35app = create_app ()
46app .app_context ().push ()
57
6-
78import json
89import logging
910import time
1011import uuid
1112from datetime import datetime
1213from unittest .mock import patch
1314
15+ import click
1416import sqlparse
1517from flask import g
1618from superset import security_manager
3638 "Filters: {filters}\n \n "
3739)
3840
39-
40- def performance_metrics ():
41- """Measure the performance of the dashboard."""
42- # Mock the client name to identify the queries in the clickhouse system.query_log table by
43- # by the http_user_agent field.
41+ @click .command ()
42+ @click .option ("--course_key" , default = "" , help = "A course_key to apply as a filter." )
43+ @click .option (
44+ "--print_sql" ,
45+ is_flag = True ,
46+ default = False ,
47+ help = "Whether to print the SQL run."
48+ )
49+ def performance_metrics (course_key , print_sql ):
50+ """
51+ Measure the performance of the dashboard.
52+ """
53+ # Mock the client name to identify the queries in the clickhouse system.query_log
54+ # table by by the http_user_agent field.
55+ extra_filters = []
56+ if course_key :
57+ extra_filters += [{"col" :"course_key" ,"op" :"==" ,"val" :course_key }]
4458 with patch ("clickhouse_connect.common.build_client_name" ) as mock_build_client_name :
4559 mock_build_client_name .return_value = RUN_ID
4660 embedable_dashboards = {{SUPERSET_EMBEDDABLE_DASHBOARDS }}
@@ -53,13 +67,18 @@ def performance_metrics():
5367 for dashboard in dashboards :
5468 logger .info (f"Dashboard: { dashboard .slug } " )
5569 for slice in dashboard .slices :
56- result = measure_chart (slice )
70+ result = measure_chart (slice , extra_filters )
5771 if not result :
5872 continue
5973 for query in result ["queries" ]:
60- # Remove the data from the query to avoid memory issues on large datasets.
74+ # Remove the data from the query to avoid memory issues on large
75+ # datasets.
6176 query .pop ("data" )
6277 report .append (result )
78+
79+ logger .info ("Waiting for clickhouse log..." )
80+ time .sleep (20 )
81+ get_query_log_from_clickhouse (report , print_sql )
6382 return report
6483
6584
@@ -82,7 +101,8 @@ def measure_chart(slice, extra_filters=[]):
82101 )
83102
84103 if extra_filters :
85- query_context ["filters" ].extend (extra_filters )
104+ for query in query_context ["queries" ]:
105+ query ["filters" ]+= extra_filters
86106
87107 g .user = security_manager .find_user (username = "{{SUPERSET_ADMIN_USERNAME}}" )
88108 query_context = ChartDataQueryContextSchema ().load (query_context )
@@ -101,10 +121,11 @@ def measure_chart(slice, extra_filters=[]):
101121 return result
102122
103123
104- def get_query_log_from_clickhouse (report ):
124+ def get_query_log_from_clickhouse (report , print_sql ):
105125 """
106126 Get the query log from clickhouse and print the results.
107127 """
128+ # This corresponsds to the "Query Performance" chart in Superset
108129 chart_uuid = "bb13bb31-c797-4ed3-a7f9-7825cc6dc482"
109130
110131 slice = db .session .query (Slice ).filter (Slice .uuid == chart_uuid ).one ()
@@ -115,33 +136,46 @@ def get_query_log_from_clickhouse(report):
115136 )
116137 slice .query_context = json .dumps (query_context )
117138
118- result = measure_chart (slice )
139+ ch_chart_result = measure_chart (slice )
119140
120141 clickhouse_queries = {}
121- for query in result ["queries" ]:
142+ for query in ch_chart_result ["queries" ]:
122143 for row in query ["data" ]:
123144 parsed_sql = str (sqlparse .parse (row .pop ("query" ))[0 ])
124145 clickhouse_queries [parsed_sql ] = row
125146
147+ if print_sql :
148+ print ("ClickHouse SQL: " )
149+ logger .info (parsed_sql )
150+
126151 # Sort report by slowest queries
127152 report = sorted (report , key = lambda x : x ["time_elapsed" ], reverse = True )
128153
129154 report_str = f"\n Superset Reports: { RUN_ID } \n \n "
130- for i , result in enumerate (report ):
155+ for i , chart_result in enumerate (report ):
131156 report_str += (
132157 report_format .format (
133- i = (i + 1 ), slice = result ["slice" ], superset_time = result ["time_elapsed" ]
158+ i = (i + 1 ),
159+ slice = chart_result ["slice" ],
160+ superset_time = chart_result ["time_elapsed" ]
134161 )
135162 )
136- for i , query in enumerate (result ["queries" ]):
163+ for i , query in enumerate (chart_result ["queries" ]):
137164 parsed_sql = (
138165 str (sqlparse .parse (query ["query" ])[0 ]).replace (";" , "" )
139166 + "\n FORMAT Native"
140167 )
168+
169+ if print_sql :
170+ print ("Superset SQL: " )
171+ logger .info (parsed_sql )
172+
141173 clickhouse_report = clickhouse_queries .get (parsed_sql , {})
142174 report_str += (
143175 query_format .format (
144- query_duration_ms = clickhouse_report .get ("query_duration_ms" ) / 1000 ,
176+ query_duration_ms = clickhouse_report .get (
177+ "query_duration_ms" , 0
178+ ) / 1000 ,
145179 memory_usage_mb = clickhouse_report .get ("memory_usage_mb" ),
146180 result_rows = clickhouse_report .get ("result_rows" ),
147181 rowcount = query ["rowcount" ],
@@ -153,8 +187,4 @@ def get_query_log_from_clickhouse(report):
153187
154188if __name__ == "__main__" :
155189 logger .info (f"Running performance metrics. RUN ID: { RUN_ID } " )
156- report = performance_metrics ()
157- # Clickhouse query log takes some seconds to log queries.
158- logger .info ("Waiting for clickhouse log..." )
159- time .sleep (10 )
160- get_query_log_from_clickhouse (report )
190+ performance_metrics ()
0 commit comments