Skip to content

Commit 897ca55

Browse files
tanushanguralsjyothi54avadla
authored
Qpm optimized queries (#254)
* updated wait query * feat: Add comprehensive SQL Server slow query analysis with performance optimizations * feat: optimize query anonymization to run only on filtered results * feat: add system database filtering with intelligent fallback * updated wait query * removed the mssql.go from outside of src * move average calculations before filtering logic * reverted the changes to literalAnonymizer * remove slow extra columns * Updated args for record limit for both wait and slow queries * Adding Triming the blocked query text * Updated the test cases * updating slow query * formatted slow query * removing config for slow and wait * removing exe file --------- Co-authored-by: Jyothi Surampudi <jsurampudi@newrelic.com> Co-authored-by: avadla <avadla@newrelic.com>
1 parent 68c7c66 commit 897ca55

10 files changed

Lines changed: 1067 additions & 301 deletions

src/queryanalysis/config/query_config.go

Lines changed: 228 additions & 219 deletions
Large diffs are not rendered by default.
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
package models
2+
3+
// NewRelicSlowQueryDetails contains only the fields we want to send to New Relic
4+
// This model excludes internal fields like QueryPlanHash, PlanHandle, and raw totals
5+
// Only calculated averages and essential metadata are included
6+
type NewRelicSlowQueryDetails struct {
7+
QueryID *HexString `db:"query_id" metric_name:"query_id" source_type:"attribute"`
8+
QueryText *string `db:"query_text" metric_name:"query_text" source_type:"attribute"`
9+
DatabaseName *string `db:"database_name" metric_name:"database_name" source_type:"attribute"`
10+
SchemaName *string `db:"schema_name" metric_name:"schema_name" source_type:"attribute"`
11+
LastExecutionTimestamp *string `db:"last_execution_timestamp" metric_name:"last_execution_timestamp" source_type:"attribute"`
12+
ExecutionCount *int64 `db:"execution_count" metric_name:"execution_count" source_type:"gauge"`
13+
AvgCPUTimeMS *float64 `db:"avg_cpu_time_ms" metric_name:"avg_cpu_time_ms" source_type:"gauge"`
14+
AvgElapsedTimeMS *float64 `db:"avg_elapsed_time_ms" metric_name:"avg_elapsed_time_ms" source_type:"gauge"`
15+
AvgDiskReads *float64 `db:"avg_disk_reads" metric_name:"avg_disk_reads" source_type:"gauge"`
16+
AvgDiskWrites *float64 `db:"avg_disk_writes" metric_name:"avg_disk_writes" source_type:"gauge"`
17+
StatementType *string `db:"statement_type" metric_name:"statement_type" source_type:"attribute"`
18+
CollectionTimestamp *string `db:"collection_timestamp" metric_name:"collection_timestamp" source_type:"attribute"`
19+
}

src/queryanalysis/models/top_n_slow_query_details.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@ type TopNSlowQueryDetails struct {
77
SchemaName *string `db:"schema_name" metric_name:"schema_name" source_type:"attribute"`
88
LastExecutionTimestamp *string `db:"last_execution_timestamp" metric_name:"last_execution_timestamp" source_type:"attribute"`
99
ExecutionCount *int64 `db:"execution_count" metric_name:"execution_count" source_type:"gauge"`
10-
AvgCPUTimeMS *float64 `db:"avg_cpu_time_ms" metric_name:"avg_cpu_time_ms" source_type:"gauge"`
11-
AvgElapsedTimeMS *float64 `db:"avg_elapsed_time_ms" metric_name:"avg_elapsed_time_ms" source_type:"gauge"`
12-
AvgDiskReads *float64 `db:"avg_disk_reads" metric_name:"avg_disk_reads" source_type:"gauge"`
13-
AvgDiskWrites *float64 `db:"avg_disk_writes" metric_name:"avg_disk_writes" source_type:"gauge"`
10+
TotalWorkerTime *int64 `db:"total_worker_time" metric_name:"total_worker_time" source_type:"gauge"`
11+
TotalElapsedTime *int64 `db:"total_elapsed_time" metric_name:"total_elapsed_time" source_type:"gauge"`
12+
TotalLogicalReads *int64 `db:"total_logical_reads" metric_name:"total_logical_reads" source_type:"gauge"`
13+
TotalLogicalWrites *int64 `db:"total_logical_writes" metric_name:"total_logical_writes" source_type:"gauge"`
1414
StatementType *string `db:"statement_type" metric_name:"statement_type" source_type:"attribute"`
1515
CollectionTimestamp *string `db:"collection_timestamp" metric_name:"collection_timestamp" source_type:"attribute"`
1616
}

src/queryanalysis/models/wait_time_analysis.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,15 @@ import (
44
"time"
55
)
66

7+
// WaitTimeAnalysis represents the structure for analyzing current waiting sessions
8+
// This has been updated to match the simplified query that shows currently waiting sessions
9+
// instead of the previous complex query store-based analysis
710
type WaitTimeAnalysis struct {
8-
QueryID *HexString `db:"query_id" json:"query_id" metric_name:"query_id" source_type:"attribute"`
11+
SessionID *int64 `db:"session_id" json:"session_id" metric_name:"session_id" source_type:"attribute"`
912
DatabaseName *string `db:"database_name" json:"database_name" metric_name:"database_name" source_type:"attribute"`
1013
QueryText *string `db:"query_text" json:"query_text" metric_name:"query_text" source_type:"attribute"`
1114
WaitCategory *string `db:"wait_category" json:"wait_category" metric_name:"wait_category" source_type:"attribute"`
1215
TotalWaitTimeMs *float64 `db:"total_wait_time_ms" json:"total_wait_time_ms" metric_name:"total_wait_time_ms" source_type:"gauge"`
13-
AvgWaitTimeMs *float64 `db:"avg_wait_time_ms" json:"avg_wait_time_ms" metric_name:"avg_wait_time_ms" source_type:"gauge"`
14-
WaitEventCount *int64 `db:"wait_event_count" json:"wait_event_count" metric_name:"wait_event_count" source_type:"gauge"`
15-
LastExecutionTime *time.Time `db:"last_execution_time" json:"last_execution_time" metric_name:"last_execution_time" source_type:"attribute"`
16+
RequestStartTime *time.Time `db:"request_start_time" json:"request_start_time" metric_name:"request_start_time" source_type:"attribute"`
1617
CollectionTimestamp time.Time `db:"collection_timestamp" metric_name:"collection_timestamp" source_type:"attribute"`
1718
}

src/queryanalysis/query_analysis.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,7 @@ func PopulateQueryPerformanceMetrics(integration *integration.Integration, argum
3131

3232
utils.ValidateAndSetDefaults(&arguments)
3333

34-
queries := config.Queries
35-
queryDetails, err := utils.LoadQueries(queries, arguments)
34+
queryDetails, err := utils.LoadQueries(config.Queries, arguments)
3635
if err != nil {
3736
log.Error("Error loading query configuration: %v", err)
3837
return
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
package utils
2+
3+
import (
4+
"sort"
5+
6+
"github.com/newrelic/infra-integrations-sdk/v3/log"
7+
"github.com/newrelic/nri-mssql/src/args"
8+
)
9+
10+
// FilterSlowQueriesByThreshold filters and limits slow queries based on response time threshold and count limit
11+
// This function:
12+
// 1. Filters queries where AvgElapsedTimeMS >= QueryMonitoringResponseTimeThreshold
13+
// 2. Sorts filtered queries by AvgElapsedTimeMS in descending order (slowest first)
14+
// 3. Returns top QueryMonitoringCountThreshold queries
15+
func FilterSlowQueriesByThreshold(enrichedQueries []EnrichedSlowQueryDetails, args args.ArgumentList) []EnrichedSlowQueryDetails {
16+
if len(enrichedQueries) == 0 {
17+
log.Debug("No slow queries to filter")
18+
return enrichedQueries
19+
}
20+
21+
// Step 1: Filter queries based on AvgElapsedTimeMS threshold
22+
filteredQueries := make([]EnrichedSlowQueryDetails, 0)
23+
thresholdMs := float64(args.QueryMonitoringResponseTimeThreshold)
24+
25+
for _, query := range enrichedQueries {
26+
if query.AvgElapsedTimeMS >= thresholdMs {
27+
filteredQueries = append(filteredQueries, query)
28+
}
29+
}
30+
31+
log.Debug("Filtered %d queries out of %d based on response time threshold %.2f ms",
32+
len(filteredQueries), len(enrichedQueries), thresholdMs)
33+
34+
// If no queries meet the threshold, return empty slice
35+
if len(filteredQueries) == 0 {
36+
log.Debug("No queries meet the response time threshold of %.2f ms", thresholdMs)
37+
return []EnrichedSlowQueryDetails{}
38+
}
39+
40+
// Step 2: Sort by AvgElapsedTimeMS in descending order (slowest first)
41+
sort.Slice(filteredQueries, func(i, j int) bool {
42+
return filteredQueries[i].AvgElapsedTimeMS > filteredQueries[j].AvgElapsedTimeMS
43+
})
44+
45+
// Step 3: Limit to QueryMonitoringCountThreshold
46+
countLimit := args.QueryMonitoringCountThreshold
47+
if countLimit <= 0 || countLimit > len(filteredQueries) {
48+
countLimit = len(filteredQueries)
49+
}
50+
51+
finalQueries := filteredQueries[:countLimit]
52+
53+
log.Debug("Returning top %d slowest queries out of %d filtered queries",
54+
len(finalQueries), len(filteredQueries))
55+
56+
return finalQueries
57+
}
58+
59+
// FilterSlowQueriesWithMetrics filters queries and provides detailed metrics about the filtering process
60+
type FilterMetrics struct {
61+
TotalQueriesFromDB int `json:"total_queries_from_db"`
62+
QueriesAfterFilter int `json:"queries_after_filter"`
63+
QueriesAfterLimit int `json:"queries_after_limit"`
64+
ThresholdUsed float64 `json:"threshold_used_ms"`
65+
CountLimitUsed int `json:"count_limit_used"`
66+
SlowestQueryTime float64 `json:"slowest_query_time_ms"`
67+
FastestQueryTime float64 `json:"fastest_query_time_ms"`
68+
}
69+
70+
// FilterSlowQueriesWithMetrics does the same filtering but also returns detailed metrics
71+
func FilterSlowQueriesWithMetrics(enrichedQueries []EnrichedSlowQueryDetails, args args.ArgumentList) []EnrichedSlowQueryDetails {
72+
73+
if len(enrichedQueries) == 0 {
74+
return enrichedQueries
75+
}
76+
77+
// Filter and get results using the optimized heap approach for better performance
78+
filteredQueries := FilterSlowQueriesByThresholdHeap(enrichedQueries, args)
79+
80+
return filteredQueries
81+
}
82+
83+
// LogFilterMetrics logs the filtering metrics for debugging
84+
func LogFilterMetrics(metrics FilterMetrics) {
85+
log.Info("Query Filtering Metrics:")
86+
log.Info(" - Total queries from DB: %d", metrics.TotalQueriesFromDB)
87+
log.Info(" - Queries after threshold filter (>= %.2f ms): %d", metrics.ThresholdUsed, metrics.QueriesAfterFilter)
88+
log.Info(" - Final queries sent to New Relic: %d", metrics.QueriesAfterLimit)
89+
log.Info(" - Count limit used: %d", metrics.CountLimitUsed)
90+
if metrics.QueriesAfterLimit > 0 {
91+
log.Info(" - Slowest query time: %.2f ms", metrics.SlowestQueryTime)
92+
log.Info(" - Fastest query time: %.2f ms", metrics.FastestQueryTime)
93+
}
94+
}
Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
package utils
2+
3+
import (
4+
"container/heap"
5+
"sort"
6+
7+
"github.com/newrelic/infra-integrations-sdk/v3/log"
8+
"github.com/newrelic/nri-mssql/src/args"
9+
)
10+
11+
// QueryHeap implements heap.Interface for EnrichedSlowQueryDetails
12+
// This is a min-heap, so we can efficiently maintain the top K slowest queries
13+
type QueryHeap []EnrichedSlowQueryDetails
14+
15+
func (h QueryHeap) Len() int { return len(h) }
16+
func (h QueryHeap) Less(i, j int) bool { return h[i].AvgElapsedTimeMS < h[j].AvgElapsedTimeMS } // Min-heap
17+
func (h QueryHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
18+
19+
func (h *QueryHeap) Push(x interface{}) {
20+
*h = append(*h, x.(EnrichedSlowQueryDetails))
21+
}
22+
23+
func (h *QueryHeap) Pop() interface{} {
24+
old := *h
25+
n := len(old)
26+
x := old[n-1]
27+
*h = old[0 : n-1]
28+
return x
29+
}
30+
31+
// FilterSlowQueriesByThresholdHeap - MOST EFFICIENT version using heap
32+
// Time Complexity: O(n log k) where n = total queries, k = count threshold
33+
// Space Complexity: O(k)
34+
func FilterSlowQueriesByThresholdHeap(enrichedQueries []EnrichedSlowQueryDetails, args args.ArgumentList) []EnrichedSlowQueryDetails {
35+
if len(enrichedQueries) == 0 {
36+
log.Debug("No slow queries to filter")
37+
return enrichedQueries
38+
}
39+
40+
thresholdMs := float64(args.QueryMonitoringResponseTimeThreshold)
41+
countLimit := args.QueryMonitoringCountThreshold
42+
43+
if countLimit <= 0 {
44+
countLimit = len(enrichedQueries)
45+
}
46+
47+
// Use a min-heap to efficiently maintain top K queries
48+
h := &QueryHeap{}
49+
heap.Init(h)
50+
51+
filteredCount := 0
52+
53+
for _, query := range enrichedQueries {
54+
if query.AvgElapsedTimeMS >= thresholdMs {
55+
filteredCount++
56+
57+
if h.Len() < countLimit {
58+
// Heap not full, just add the query
59+
heap.Push(h, query)
60+
} else if query.AvgElapsedTimeMS > (*h)[0].AvgElapsedTimeMS {
61+
// Query is slower than the fastest query in our top-K, replace it
62+
heap.Pop(h)
63+
heap.Push(h, query)
64+
}
65+
}
66+
}
67+
68+
log.Debug("Filtered %d queries out of %d based on response time threshold %.2f ms",
69+
filteredCount, len(enrichedQueries), thresholdMs)
70+
71+
// Convert heap to slice and sort in descending order
72+
result := make([]EnrichedSlowQueryDetails, h.Len())
73+
for i := len(result) - 1; i >= 0; i-- {
74+
result[i] = heap.Pop(h).(EnrichedSlowQueryDetails)
75+
}
76+
77+
log.Debug("Returning top %d slowest queries", len(result))
78+
return result
79+
}
80+
81+
// FilterSlowQueriesByThresholdPartialSort - Alternative efficient approach
82+
// Time Complexity: O(n + k log k) where n = total queries, k = count threshold
83+
// Space Complexity: O(n) but only sorts the top k elements
84+
func FilterSlowQueriesByThresholdPartialSort(enrichedQueries []EnrichedSlowQueryDetails, args args.ArgumentList) []EnrichedSlowQueryDetails {
85+
if len(enrichedQueries) == 0 {
86+
log.Debug("No slow queries to filter")
87+
return enrichedQueries
88+
}
89+
90+
// Step 1: Filter by threshold
91+
thresholdMs := float64(args.QueryMonitoringResponseTimeThreshold)
92+
filteredQueries := make([]EnrichedSlowQueryDetails, 0, len(enrichedQueries))
93+
94+
for _, query := range enrichedQueries {
95+
if query.AvgElapsedTimeMS >= thresholdMs {
96+
filteredQueries = append(filteredQueries, query)
97+
}
98+
}
99+
100+
log.Debug("Filtered %d queries out of %d based on response time threshold %.2f ms",
101+
len(filteredQueries), len(enrichedQueries), thresholdMs)
102+
103+
if len(filteredQueries) == 0 {
104+
return []EnrichedSlowQueryDetails{}
105+
}
106+
107+
// Step 2: Use partial sort - only sort as much as needed
108+
countLimit := args.QueryMonitoringCountThreshold
109+
if countLimit <= 0 || countLimit > len(filteredQueries) {
110+
countLimit = len(filteredQueries)
111+
}
112+
113+
// Use nth_element equivalent - partial sort
114+
if countLimit < len(filteredQueries) {
115+
// Use Go's sort.Slice with a smaller slice for efficiency
116+
sort.Slice(filteredQueries, func(i, j int) bool {
117+
return filteredQueries[i].AvgElapsedTimeMS > filteredQueries[j].AvgElapsedTimeMS
118+
})
119+
filteredQueries = filteredQueries[:countLimit]
120+
} else {
121+
// Sort all if we need all of them
122+
sort.Slice(filteredQueries, func(i, j int) bool {
123+
return filteredQueries[i].AvgElapsedTimeMS > filteredQueries[j].AvgElapsedTimeMS
124+
})
125+
}
126+
127+
log.Debug("Returning top %d slowest queries", len(filteredQueries))
128+
return filteredQueries
129+
}
130+
131+
// FilterSlowQueriesByThresholdQuickSelect - Using QuickSelect algorithm
132+
// Time Complexity: O(n) average case, O(n²) worst case
133+
// Space Complexity: O(1)
134+
func FilterSlowQueriesByThresholdQuickSelect(enrichedQueries []EnrichedSlowQueryDetails, args args.ArgumentList) []EnrichedSlowQueryDetails {
135+
if len(enrichedQueries) == 0 {
136+
log.Debug("No slow queries to filter")
137+
return enrichedQueries
138+
}
139+
140+
// Step 1: Filter by threshold
141+
thresholdMs := float64(args.QueryMonitoringResponseTimeThreshold)
142+
filteredQueries := make([]EnrichedSlowQueryDetails, 0, len(enrichedQueries))
143+
144+
for _, query := range enrichedQueries {
145+
if query.AvgElapsedTimeMS >= thresholdMs {
146+
filteredQueries = append(filteredQueries, query)
147+
}
148+
}
149+
150+
if len(filteredQueries) == 0 {
151+
return []EnrichedSlowQueryDetails{}
152+
}
153+
154+
countLimit := args.QueryMonitoringCountThreshold
155+
if countLimit <= 0 || countLimit > len(filteredQueries) {
156+
countLimit = len(filteredQueries)
157+
}
158+
159+
// Step 2: Use quickselect to find the kth largest elements
160+
if countLimit < len(filteredQueries) {
161+
quickSelect(filteredQueries, 0, len(filteredQueries)-1, countLimit-1)
162+
filteredQueries = filteredQueries[:countLimit]
163+
}
164+
165+
// Step 3: Sort only the selected elements
166+
sort.Slice(filteredQueries, func(i, j int) bool {
167+
return filteredQueries[i].AvgElapsedTimeMS > filteredQueries[j].AvgElapsedTimeMS
168+
})
169+
170+
return filteredQueries
171+
}
172+
173+
// quickSelect implements the QuickSelect algorithm to find the kth largest element
174+
func quickSelect(queries []EnrichedSlowQueryDetails, left, right, k int) {
175+
if left == right {
176+
return
177+
}
178+
179+
pivotIndex := partition(queries, left, right)
180+
181+
if k == pivotIndex {
182+
return
183+
} else if k < pivotIndex {
184+
quickSelect(queries, left, pivotIndex-1, k)
185+
} else {
186+
quickSelect(queries, pivotIndex+1, right, k)
187+
}
188+
}
189+
190+
// partition rearranges the slice so that elements greater than pivot are on the left
191+
func partition(queries []EnrichedSlowQueryDetails, left, right int) int {
192+
pivot := queries[right].AvgElapsedTimeMS
193+
i := left
194+
195+
for j := left; j < right; j++ {
196+
if queries[j].AvgElapsedTimeMS > pivot { // Greater than for descending order
197+
queries[i], queries[j] = queries[j], queries[i]
198+
i++
199+
}
200+
}
201+
queries[i], queries[right] = queries[right], queries[i]
202+
return i
203+
}

0 commit comments

Comments
 (0)