@@ -481,6 +481,8 @@ static void build_pertrans_for_aggref(AggStatePerTrans pertrans,
481
481
482
482
static void ExecEagerFreeAgg (AggState * node );
483
483
484
+ void agg_hash_explain_extra_message (AggState * aggstate );
485
+
484
486
/*
485
487
* Select the current grouping set; affects current_set and
486
488
* curaggcontext.
@@ -1540,6 +1542,16 @@ build_hash_tables(AggState *aggstate)
1540
1542
memory );
1541
1543
1542
1544
build_hash_table (aggstate , setno , nbuckets );
1545
+
1546
+ /* initialize some statistic info of hash table */
1547
+ perhash -> num_output_groups = 0 ;
1548
+ perhash -> num_spill_parts = 0 ;
1549
+ perhash -> num_expansions = 0 ;
1550
+ perhash -> bucket_total = 0 ;
1551
+ perhash -> bucket_used = 0 ;
1552
+ perhash -> chain_count = 0 ;
1553
+ perhash -> chain_length_total = 0 ;
1554
+ perhash -> chain_length_max = 0 ;
1543
1555
}
1544
1556
1545
1557
aggstate -> hash_ngroups_current = 0 ;
@@ -1966,6 +1978,7 @@ hash_agg_enter_spill_mode(AggState *aggstate)
1966
1978
hashagg_spill_init (aggstate , spill , aggstate -> hash_tapeinfo , 0 ,
1967
1979
perhash -> aggnode -> numGroups ,
1968
1980
aggstate -> hashentrysize );
1981
+ perhash -> num_spill_parts += spill -> npartitions ;
1969
1982
}
1970
1983
1971
1984
if (aggstate -> ss .ps .instrument )
@@ -2019,6 +2032,12 @@ hash_agg_update_metrics(AggState *aggstate, bool from_tape, int npartitions)
2019
2032
}
2020
2033
2021
2034
/* update hashentrysize estimate based on contents */
2035
+ /*
2036
+ * Greenplum doesn't use hashentrysize in the instrumentation, it will
2037
+ * calculate hash table chain length to get an accurate number.
2038
+ *
2039
+ * See the following code to collect hash table statistic info.
2040
+ */
2022
2041
if (aggstate -> hash_ngroups_current > 0 )
2023
2042
{
2024
2043
aggstate -> hashentrysize =
@@ -2031,6 +2050,47 @@ hash_agg_update_metrics(AggState *aggstate, bool from_tape, int npartitions)
2031
2050
Instrumentation * instrument = aggstate -> ss .ps .instrument ;
2032
2051
2033
2052
instrument -> workmemused = aggstate -> hash_mem_peak ;
2053
+
2054
+ /*
2055
+ * workmemwanted to avoid scratch i/o, that how much memory is needed
2056
+ * if we want to load into the hashtable at once:
2057
+ *
2058
+ * 1. add meta_mem only when from_tape is false, because when we are
2059
+ * reading from tape/spilled file, we can reuse the existing hash
2060
+ * table's meta.
2061
+ * 2. add hash_mem every time.
2062
+ * 3. don't add buffer_mem since it's unnecessary when we can load into
2063
+ * into the memory at once.
2064
+ */
2065
+ if (!from_tape )
2066
+ instrument -> workmemwanted += meta_mem ;
2067
+ instrument -> workmemwanted += hashkey_mem ;
2068
+
2069
+ /* Scan all perhashs and collect hash table statistic info */
2070
+ for (int setno = 0 ; setno < aggstate -> num_hashes ; setno ++ )
2071
+ {
2072
+ AggStatePerHash perhash = & aggstate -> perhash [setno ];
2073
+ tuplehash_hash * hashtab = perhash -> hashtable -> hashtab ;
2074
+
2075
+ Assert (hashtab );
2076
+
2077
+ perhash -> num_expansions += hashtab -> num_expansions ;
2078
+ perhash -> bucket_total += hashtab -> size ;
2079
+ perhash -> bucket_used += hashtab -> members ;
2080
+ if (hashtab -> members > 0 )
2081
+ {
2082
+ uint32 perht_chain_length_total = 0 ;
2083
+ uint32 perht_chain_count = 0 ;
2084
+
2085
+ /* collect statistic info of chain length per hash table */
2086
+ tuplehash_coll_stat (hashtab ,
2087
+ & (perhash -> chain_length_max ),
2088
+ & perht_chain_length_total ,
2089
+ & perht_chain_count );
2090
+ perhash -> chain_count += perht_chain_count ;
2091
+ perhash -> chain_length_total += perht_chain_length_total ;
2092
+ }
2093
+ }
2034
2094
}
2035
2095
}
2036
2096
@@ -2225,6 +2285,7 @@ lookup_hash_entries(AggState *aggstate)
2225
2285
hashagg_spill_init (aggstate , spill , aggstate -> hash_tapeinfo , 0 ,
2226
2286
perhash -> aggnode -> numGroups ,
2227
2287
aggstate -> hashentrysize );
2288
+ perhash -> num_spill_parts += spill -> npartitions ;
2228
2289
2229
2290
hashagg_spill_tuple (aggstate , spill , slot , hash );
2230
2291
pergroup [setno ] = NULL ;
@@ -2277,6 +2338,13 @@ ExecAgg(PlanState *pstate)
2277
2338
return result ;
2278
2339
}
2279
2340
2341
+ /* Save statistics into the cdbexplainbuf for EXPLAIN ANALYZE */
2342
+ if (node -> ss .ps .instrument &&
2343
+ (node -> ss .ps .instrument )-> need_cdb &&
2344
+ (node -> phase -> aggstrategy == AGG_HASHED ||
2345
+ node -> phase -> aggstrategy == AGG_MIXED ))
2346
+ agg_hash_explain_extra_message (node );
2347
+
2280
2348
return NULL ;
2281
2349
}
2282
2350
@@ -2807,6 +2875,7 @@ agg_refill_hash_table(AggState *aggstate)
2807
2875
spill_initialized = true;
2808
2876
hashagg_spill_init (aggstate , & spill , tapeinfo , batch -> used_bits ,
2809
2877
batch -> input_card , aggstate -> hashentrysize );
2878
+ aggstate -> perhash [aggstate -> current_set ].num_spill_parts += spill .npartitions ;
2810
2879
}
2811
2880
/* no memory for a new group, spill */
2812
2881
hashagg_spill_tuple (aggstate , & spill , spillslot , hash );
@@ -2981,6 +3050,8 @@ agg_retrieve_hash_table_in_memory(AggState *aggstate)
2981
3050
}
2982
3051
}
2983
3052
3053
+ perhash -> num_output_groups ++ ;
3054
+
2984
3055
/*
2985
3056
* Clear the per-output-tuple context for each group
2986
3057
*
@@ -5139,3 +5210,77 @@ ReuseHashTable(AggState *node)
5139
5210
!node -> streaming &&
5140
5211
!bms_overlap (node -> ss .ps .chgParam , aggnode -> aggParams ));
5141
5212
}
5213
+
5214
+ /*
5215
+ * Save statistics into the cdbexplainbuf for EXPLAIN ANALYZE
5216
+ */
5217
+ void
5218
+ agg_hash_explain_extra_message (AggState * aggstate )
5219
+ {
5220
+ /*
5221
+ * Check cdbexplain_depositStatsToNode(), Greenplum only saves extra
5222
+ * message text for the most interesting winning qExecs.
5223
+ */
5224
+ StringInfo hbuf = aggstate -> ss .ps .cdbexplainbuf ;
5225
+ uint64 sum_num_expansions = 0 ;
5226
+ uint64 sum_output_groups = 0 ;
5227
+ uint64 sum_spill_parts = 0 ;
5228
+ uint64 sum_chain_length_total = 0 ;
5229
+ uint64 sum_chain_count = 0 ;
5230
+ uint32 chain_length_max = 0 ;
5231
+ uint64 sum_bucket_used = 0 ;
5232
+ uint64 sum_bucket_total = 0 ;
5233
+
5234
+ Assert (hbuf );
5235
+
5236
+ appendStringInfo (hbuf , "hash table(s): %d" , aggstate -> num_hashes );
5237
+
5238
+ /* Scan all perhashs and collect statistic info */
5239
+ for (int setno = 0 ; setno < aggstate -> num_hashes ; setno ++ )
5240
+ {
5241
+ AggStatePerHash perhash = & aggstate -> perhash [setno ];
5242
+
5243
+ /* spill statistic info */
5244
+ if (aggstate -> hash_ever_spilled )
5245
+ {
5246
+ sum_output_groups += perhash -> num_output_groups ;
5247
+ sum_spill_parts += perhash -> num_spill_parts ;
5248
+ }
5249
+
5250
+ /* inner hash table statistic info */
5251
+ if (perhash -> chain_count > 0 )
5252
+ {
5253
+ sum_chain_length_total += perhash -> chain_length_total ;
5254
+ sum_chain_count += perhash -> chain_count ;
5255
+ if (perhash -> chain_length_max > chain_length_max )
5256
+ chain_length_max = perhash -> chain_length_max ;
5257
+ sum_bucket_used = perhash -> bucket_used ;
5258
+ sum_bucket_total = perhash -> bucket_total ;
5259
+ sum_num_expansions += perhash -> num_expansions ;
5260
+ }
5261
+ }
5262
+
5263
+ if (aggstate -> hash_ever_spilled )
5264
+ {
5265
+ appendStringInfo (hbuf ,
5266
+ "; " UINT64_FORMAT " groups total in %d batches, " UINT64_FORMAT
5267
+ " spill partitions; disk usage: " INT64_FORMAT "KB" ,
5268
+ sum_output_groups ,
5269
+ aggstate -> hash_batches_used ,
5270
+ sum_spill_parts ,
5271
+ aggstate -> hash_disk_used );
5272
+ }
5273
+
5274
+ if (sum_chain_count > 0 )
5275
+ {
5276
+ appendStringInfo (hbuf ,
5277
+ "; chain length %.1f avg, %d max;"
5278
+ " using " INT64_FORMAT " of " INT64_FORMAT " buckets;"
5279
+ " total " INT64_FORMAT " expansions.\n" ,
5280
+ (double )sum_chain_length_total / sum_chain_count ,
5281
+ chain_length_max ,
5282
+ sum_bucket_used ,
5283
+ sum_bucket_total ,
5284
+ sum_num_expansions );
5285
+ }
5286
+ }
0 commit comments