Skip to content

Commit c91972f

Browse files
committed
Merge branch 'master' into feat/add_linters
2 parents 3f55c4d + aab07c9 commit c91972f

File tree

11 files changed

+642
-73
lines changed

11 files changed

+642
-73
lines changed

autocomplete/autocomplete.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -529,8 +529,9 @@ func (h *Handler) ServeValues(w http.ResponseWriter, r *http.Request) {
529529
valueSQL = fmt.Sprintf("substr(Tag1, %d) AS value", len(tag)+2)
530530
wr.And(where.HasPrefix("Tag1", tag+"="+valuePrefix))
531531
} else {
532-
valueSQL = fmt.Sprintf("substr(arrayJoin(Tags), %d) AS value", len(tag)+2)
533-
wr.And(where.HasPrefix("arrayJoin(Tags)", tag+"="+valuePrefix))
532+
prefixSelector := where.HasPrefix("x", tag+"="+valuePrefix)
533+
valueSQL = fmt.Sprintf("substr(arrayFilter(x -> %s, Tags)[1], %d) AS value", prefixSelector, len(tag)+2)
534+
wr.And("arrayExists(x -> " + prefixSelector + ", Tags)")
534535
}
535536

536537
wr.Andf("Date >= '%s' AND Date <= '%s'", fromDate, untilDate)

autocomplete/autocomplete_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ func TestHandler_ServeValues(t *testing.T) {
6868
fromDate, untilDate := dateString(h.config.ClickHouse.TaggedAutocompleDays, now)
6969

7070
srv.AddResponce(
71-
"SELECT substr(arrayJoin(Tags), 6) AS value FROM graphite_tagged WHERE (((Tag1='environment=production') AND (arrayExists((x) -> x='project=web', Tags))) AND (arrayJoin(Tags) LIKE 'host=%')) AND "+
71+
"SELECT substr(arrayFilter(x -> x LIKE 'host=%', Tags)[1], 6) AS value FROM graphite_tagged WHERE (((Tag1='environment=production') AND (has(Tags, 'project=web'))) AND (arrayExists(x -> x LIKE 'host=%', Tags))) AND "+
7272
"(Date >= '"+fromDate+"' AND Date <= '"+untilDate+"') GROUP BY value ORDER BY value LIMIT 10000",
7373
&chtest.TestResponse{
7474
Body: []byte("host1\nhost2\ndc-host2\ndc-host3\n"),
@@ -129,7 +129,7 @@ func TestTagsAutocomplete_ServeValuesCached(t *testing.T) {
129129
fromDate, untilDate := dateString(h.config.ClickHouse.TaggedAutocompleDays, now)
130130

131131
srv.AddResponce(
132-
"SELECT substr(arrayJoin(Tags), 6) AS value FROM graphite_tagged WHERE (((Tag1='environment=production') AND (arrayExists((x) -> x='project=web', Tags))) AND (arrayJoin(Tags) LIKE 'host=%')) AND "+
132+
"SELECT substr(arrayFilter(x -> x LIKE 'host=%', Tags)[1], 6) AS value FROM graphite_tagged WHERE (((Tag1='environment=production') AND (has(Tags, 'project=web'))) AND (arrayExists(x -> x LIKE 'host=%', Tags))) AND "+
133133
"(Date >= '"+fromDate+"' AND Date <= '"+untilDate+"') GROUP BY value ORDER BY value LIMIT 10000",
134134
&chtest.TestResponse{
135135
Body: []byte("host1\nhost2\ndc-host2\ndc-host3\n"),

config/config.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,7 @@ type ClickHouse struct {
235235
IndexReverses IndexReverses `toml:"index-reverses" json:"index-reverses" comment:"see doc/config.md" commented:"true"`
236236
IndexTimeout time.Duration `toml:"index-timeout" json:"index-timeout" comment:"total timeout to fetch series list from index"`
237237
TaggedTable string `toml:"tagged-table" json:"tagged-table" comment:"'tagged' table from carbon-clickhouse, required for seriesByTag"`
238+
TagsCountTable string `toml:"tags-count-table" json:"tags-count-table" comment:"Table that contains the total amounts of each tag-value pair. It is used to avoid usage of high cardinality tag-value pairs when querying TaggedTable. If left empty, basic sorting will be used. See more detailed description in doc/config.md"`
238239
TaggedAutocompleDays int `toml:"tagged-autocomplete-days" json:"tagged-autocomplete-days" comment:"or how long the daemon will query tags during autocomplete"`
239240
TaggedUseDaily bool `toml:"tagged-use-daily" json:"tagged-use-daily" comment:"whether to use date filter when searching for the metrics in the tagged-table"`
240241
TaggedCosts map[string]*Costs `toml:"tagged-costs" json:"tagged-costs" comment:"costs for tags (for tune which tag will be used as primary), by default is 0, increase for costly (with poor selectivity) tags" commented:"true"`

deploy/doc/config.md

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,4 +199,30 @@ When `reverse = true` is set for data-table, there are two possibles cases for [
199199
Depends on it for having a proper retention and aggregation you must additionally set `rollup-use-reverted = true` for the first case and `rollup-use-reverted = false` for the second.
200200

201201
#### Additional tuning tagged find for seriesByTag and autocomplete
202-
Only one tag used as filter for index field Tag1, see graphite_tagged table [structure](https://github.com/lomik/
202+
Only one tag used as filter for index field Tag1, see graphite_tagged table [structure](https://github.com/lomik/
203+
204+
To always choose the best Tag1 you can set the parameter `tag1-count-table = <table_name>`. The value should be a table in clickhouse that has columns (Date, Tag1, Count) similar to the graphite_tagged table. The table can be defined like this:
205+
206+
```
207+
CREATE TABLE IF NOT EXISTS default.tag1_count_per_day
208+
(
209+
Date Date,
210+
Tag1 String,
211+
Count UInt64
212+
)
213+
ENGINE = SummingMergeTree
214+
ORDER BY (Date, Tag1);
215+
216+
CREATE MATERIALIZED VIEW IF NOT EXISTS default.tag1_count_per_day_mv TO default.tag1_count_per_day AS
217+
SELECT Date AS Date,
218+
Tag1 AS Tag1,
219+
count(*) AS Count
220+
FROM default.graphite_tags
221+
GROUP BY (Date, Tag1);
222+
```
223+
224+
Here we additionally create a materialized view to automatically save the quantities of rows with each unique Tag1 as the metrics are being written.
225+
graphite-clickhouse will query this table when it tries to decide which tag should be used when querying graphite_tagged table.
226+
Overall using this parameter will somewhat increase writing load but can improve reading tagged metrics greatly in some cases.
227+
228+
Note that this option only works for terms with '=' operator in them.

doc/config.md

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,33 @@ Depends on it for having a proper retention and aggregation you must additionall
203203

204204
#### Additional tuning tagged find for seriesByTag and autocomplete
205205
Only one tag used as filter for index field Tag1, see graphite_tagged table [structure](https://github.com/lomik/
206+
207+
To always choose the best Tag1 you can set the parameter `tag1-count-table = <table_name>`. The value should be a table in clickhouse that has columns (Date, Tag1, Count) similar to the graphite_tagged table. The table can be defined like this:
208+
209+
```
210+
CREATE TABLE IF NOT EXISTS default.tag1_count_per_day
211+
(
212+
Date Date,
213+
Tag1 String,
214+
Count UInt64
215+
)
216+
ENGINE = SummingMergeTree
217+
ORDER BY (Date, Tag1);
218+
219+
CREATE MATERIALIZED VIEW IF NOT EXISTS default.tag1_count_per_day_mv TO default.tag1_count_per_day AS
220+
SELECT Date AS Date,
221+
Tag1 AS Tag1,
222+
count(*) AS Count
223+
FROM default.graphite_tags
224+
GROUP BY (Date, Tag1);
225+
```
226+
227+
Here we additionally create a materialized view to automatically save the quantities of rows with each unique Tag1 as the metrics are being written.
228+
graphite-clickhouse will query this table when it tries to decide which tag should be used when querying graphite_tagged table.
229+
Overall using this parameter will somewhat increase writing load but can improve reading tagged metrics greatly in some cases.
230+
231+
Note that this option only works for terms with '=' operator in them.
232+
206233
```toml
207234
[common]
208235
# general listener
@@ -353,6 +380,8 @@ Only one tag used as filter for index field Tag1, see graphite_tagged table [str
353380
index-timeout = "1m0s"
354381
# 'tagged' table from carbon-clickhouse, required for seriesByTag
355382
tagged-table = "graphite_tagged"
383+
# Table that contains the total amounts of each tag-value pair. It is used to avoid usage of high cardinality tag-value pairs when querying TaggedTable. If left empty, basic sorting will be used. See more detailed description in doc/config.md
384+
tags-count-table = ""
356385
# or how long the daemon will query tags during autocomplete
357386
tagged-autocomplete-days = 7
358387
# whether to use date filter when searching for the metrics in the tagged-table

finder/finder.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ func newPlainFinder(ctx context.Context, config *config.Config, query string, fr
4141
f = NewTagged(
4242
config.ClickHouse.URL,
4343
config.ClickHouse.TaggedTable,
44+
config.ClickHouse.TagsCountTable,
4445
config.ClickHouse.TaggedUseDaily,
4546
config.FeatureFlags.UseCarbonBehavior,
4647
config.FeatureFlags.DontMatchMissingTags,
@@ -147,6 +148,7 @@ func FindTagged(ctx context.Context, config *config.Config, terms []TaggedTerm,
147148
fnd := NewTagged(
148149
config.ClickHouse.URL,
149150
config.ClickHouse.TaggedTable,
151+
config.ClickHouse.TagsCountTable,
150152
config.ClickHouse.TaggedUseDaily,
151153
config.FeatureFlags.UseCarbonBehavior,
152154
config.FeatureFlags.DontMatchMissingTags,

0 commit comments

Comments
 (0)