-
Notifications
You must be signed in to change notification settings - Fork 152
Expand file tree
/
Copy pathanalyze.proto
More file actions
174 lines (134 loc) · 4.87 KB
/
analyze.proto
File metadata and controls
174 lines (134 loc) · 4.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
syntax = "proto2";
package tipb;
import "gogoproto/gogo.proto";
import "rustproto.proto";
import "schema.proto";
option java_multiple_files = true;
option java_package = "com.pingcap.tidb.tipb";
option (gogoproto.goproto_sizecache_all) = false;
option (gogoproto.goproto_unkeyed_all) = false;
option (gogoproto.goproto_unrecognized_all) = false;
option (gogoproto.marshaler_all) = true;
option (gogoproto.sizer_all) = true;
option (gogoproto.unmarshaler_all) = true;
option (rustproto.lite_runtime_all) = true;
enum AnalyzeType {
TypeIndex = 0;
TypeColumn = 1;
TypeCommonHandle = 2;
TypeSampleIndex = 3;
TypeMixed = 4;
TypeFullSampling = 5;
}
message AnalyzeReq {
optional AnalyzeType tp = 1 [(gogoproto.nullable) = false];
// Deprecated. Start Ts has been moved to coprocessor.Request.
optional uint64 start_ts_fallback = 2;
optional uint64 flags = 3 [(gogoproto.nullable) = false];
optional int64 time_zone_offset = 4 [(gogoproto.nullable) = false];
optional AnalyzeIndexReq idx_req = 5;
optional AnalyzeColumnsReq col_req = 6;
}
message AnalyzeIndexReq {
// bucket_size is the max histograms bucket size.
optional int64 bucket_size = 1 [(gogoproto.nullable) = false];
// num_columns is the number of columns in the index.
optional int32 num_columns = 2 [(gogoproto.nullable) = false];
optional int32 cmsketch_depth = 3;
optional int32 cmsketch_width = 4;
optional int64 sample_size = 5 [(gogoproto.nullable) = false];
optional int64 sketch_size = 6 [(gogoproto.nullable) = false];
optional int32 top_n_size = 7;
optional int32 version = 8;
}
message AnalyzeColumnsReq {
// bucket_size is the max histograms bucket size, we need this because when primary key is handle,
// the histogram will be directly built.
optional int64 bucket_size = 1 [(gogoproto.nullable) = false];
// sample_size is the max number of samples that will be collected.
optional int64 sample_size = 2 [(gogoproto.nullable) = false];
// sketch_size is the max sketch size.
optional int64 sketch_size = 3 [(gogoproto.nullable) = false];
// columns_info is the info of all the columns that needs to be analyzed.
repeated ColumnInfo columns_info = 4;
optional int32 cmsketch_depth = 5;
optional int32 cmsketch_width = 6;
repeated int64 primary_column_ids = 7;
optional int32 version = 8;
repeated int64 primary_prefix_column_ids = 9;
repeated AnalyzeColumnGroup column_groups = 10;
// sample_rate is the sampling rate that how many samples will collected.
// There must be one non-zero value in sample_rate and sample_size.
optional double sample_rate = 11;
}
message AnalyzeMixedResp {
optional AnalyzeColumnsResp columns_resp = 1;
optional AnalyzeIndexResp index_resp = 2;
}
message AnalyzeColumnGroup {
repeated int64 column_offsets = 1;
repeated int64 prefix_lengths = 2;
}
message AnalyzeColumnsResp {
// collectors is the sample collectors for columns.
repeated SampleCollector collectors = 1;
// pk_hist is the histogram for primary key when it is the handle.
optional Histogram pk_hist = 2;
optional RowSampleCollector row_collector = 3;
}
message AnalyzeIndexResp {
optional Histogram hist = 1;
optional CMSketch cms = 2;
optional SampleCollector collector = 3;
}
// Bucket is an element of histogram.
message Bucket {
optional int64 count = 1 [(gogoproto.nullable) = false];
optional bytes lower_bound = 2;
optional bytes upper_bound = 3;
optional int64 repeats = 4 [(gogoproto.nullable) = false];
optional int64 ndv = 5;
}
message Histogram {
// ndv is the number of distinct values.
optional int64 ndv = 1 [(gogoproto.nullable) = false];
// buckets represents all the buckets.
repeated Bucket buckets = 2;
}
// FMSketch is used to count distinct values for columns.
message FMSketch {
optional uint64 mask = 1 [(gogoproto.nullable) = false];
repeated uint64 hashset = 2;
}
// SampleCollector is used for collect samples and calculate the count and ndv of an column.
message SampleCollector {
repeated bytes samples = 1;
optional int64 null_count = 2 [(gogoproto.nullable) = false];
optional int64 count = 3 [(gogoproto.nullable) = false];
optional FMSketch fm_sketch = 4;
optional CMSketch cm_sketch = 5;
optional int64 total_size = 6;
}
message RowSampleCollector {
repeated RowSample samples = 1;
repeated int64 null_counts = 2 [(gogoproto.nullable) = false];
optional int64 count = 3 [(gogoproto.nullable) = false];
repeated FMSketch fm_sketch = 4;
repeated int64 total_size = 5;
}
message RowSample {
repeated bytes row = 1;
optional int64 weight = 2 [(gogoproto.nullable) = false];
}
message CMSketchRow {
repeated uint32 counters = 1;
}
message CMSketchTopN {
optional bytes data = 1;
optional uint64 count = 2 [(gogoproto.nullable) = false];
}
message CMSketch {
repeated CMSketchRow rows = 1;
repeated CMSketchTopN top_n = 2;
optional uint64 default_value = 3 [(gogoproto.nullable) = false];
}