Skip to content
This repository was archived by the owner on Aug 13, 2019. It is now read-only.

Commit 259847a

Browse files
authored
Be smarter in how we look at matchers. (#572)
* Add unittests for PostingsForMatcher. * Selector methods are all stateless, don't need a reference. * Be smarter in how we look at matchers. Look at all matchers to see if a label can be empty. Optimise Not handling, so i!="2" is a simple lookup rather than an inverse postings list. All all the Withouts together, rather than having to subtract each from all postings. Change the pre-expand the postings logic to always do it before doing a Without only. Don't do that if it's already a list. The initial goal here was that the oft-seen pattern i=~"something.+",i!="foo",i!="bar" becomes more efficient. benchmark old ns/op new ns/op delta BenchmarkHeadPostingForMatchers/n="1"-4 5888 6160 +4.62% BenchmarkHeadPostingForMatchers/n="1",j="foo"-4 7190 6640 -7.65% BenchmarkHeadPostingForMatchers/j="foo",n="1"-4 6038 5923 -1.90% BenchmarkHeadPostingForMatchers/n="1",j!="foo"-4 6030884 4850525 -19.57% BenchmarkHeadPostingForMatchers/i=~".*"-4 887377940 230329137 -74.04% BenchmarkHeadPostingForMatchers/i=~".+"-4 490316101 319931758 -34.75% BenchmarkHeadPostingForMatchers/i=~""-4 594961991 130279313 -78.10% BenchmarkHeadPostingForMatchers/i!=""-4 537542388 318751015 -40.70% BenchmarkHeadPostingForMatchers/n="1",i=~".*",j="foo"-4 10460243 8565195 -18.12% BenchmarkHeadPostingForMatchers/n="1",i=~".*",i!="2",j="foo"-4 44964267 8561546 -80.96% BenchmarkHeadPostingForMatchers/n="1",i!="",j="foo"-4 42244885 29137737 -31.03% BenchmarkHeadPostingForMatchers/n="1",i=~".+",j="foo"-4 35285834 32774584 -7.12% BenchmarkHeadPostingForMatchers/n="1",i=~"1.+",j="foo"-4 8951047 8379024 -6.39% BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!="2",j="foo"-4 63813335 30672688 -51.93% BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!~"2.*",j="foo"-4 45381112 44924397 -1.01% Signed-off-by: Brian Brazil <[email protected]>
1 parent 7ab060c commit 259847a

File tree

6 files changed

+318
-56
lines changed

6 files changed

+318
-56
lines changed

CHANGELOG.md

+2
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
- [BUGFIX] Fsync the meta file to persist it on disk to avoid data loss in case of a host crash.
88
- [BUGFIX] Fix fd and vm_area leak on error path in chunks.NewDirReader.
99
- [BUGFIX] Fix fd and vm_area leak on error path in index.NewFileReader.
10+
- [ENHANCEMENT] Be smarter in how we look at matchers.
11+
- [ENHANCEMENT] PostListings and NotMatcher now public.
1012

1113
## 0.6.1
1214
- [BUGFIX] Update `last` after appending a non-overlapping chunk in `chunks.MergeOverlappingChunks`. [#539](https://github.com/prometheus/tsdb/pull/539)

head_bench_test.go

+6-2
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,7 @@ func BenchmarkHeadStripeSeriesCreateParallel(b *testing.B) {
4949
})
5050
}
5151

52-
// TODO: generalize benchmark and pass all postings for matchers here
5352
func BenchmarkHeadPostingForMatchers(b *testing.B) {
54-
// Put a series, select it. GC it and then access it.
5553
h, err := NewHead(nil, nil, nil, 1000)
5654
testutil.Ok(b, err)
5755
defer func() {
@@ -66,6 +64,12 @@ func BenchmarkHeadPostingForMatchers(b *testing.B) {
6664
// Have some series that won't be matched, to properly test inverted matches.
6765
h.getOrCreate(hash, labels.FromStrings("i", strconv.Itoa(i), "n", strconv.Itoa(i), "j", "bar"))
6866
hash++
67+
h.getOrCreate(hash, labels.FromStrings("i", strconv.Itoa(i), "n", "0_"+strconv.Itoa(i), "j", "bar"))
68+
hash++
69+
h.getOrCreate(hash, labels.FromStrings("i", strconv.Itoa(i), "n", "1_"+strconv.Itoa(i), "j", "bar"))
70+
hash++
71+
h.getOrCreate(hash, labels.FromStrings("i", strconv.Itoa(i), "n", "2_"+strconv.Itoa(i), "j", "bar"))
72+
hash++
6973
}
7074
}
7175

index/postings.go

+8-8
Original file line numberDiff line numberDiff line change
@@ -600,8 +600,8 @@ func (rp *removedPostings) Err() error {
600600
return rp.remove.Err()
601601
}
602602

603-
// listPostings implements the Postings interface over a plain list.
604-
type listPostings struct {
603+
// ListPostings implements the Postings interface over a plain list.
604+
type ListPostings struct {
605605
list []uint64
606606
cur uint64
607607
}
@@ -610,15 +610,15 @@ func NewListPostings(list []uint64) Postings {
610610
return newListPostings(list...)
611611
}
612612

613-
func newListPostings(list ...uint64) *listPostings {
614-
return &listPostings{list: list}
613+
func newListPostings(list ...uint64) *ListPostings {
614+
return &ListPostings{list: list}
615615
}
616616

617-
func (it *listPostings) At() uint64 {
617+
func (it *ListPostings) At() uint64 {
618618
return it.cur
619619
}
620620

621-
func (it *listPostings) Next() bool {
621+
func (it *ListPostings) Next() bool {
622622
if len(it.list) > 0 {
623623
it.cur = it.list[0]
624624
it.list = it.list[1:]
@@ -628,7 +628,7 @@ func (it *listPostings) Next() bool {
628628
return false
629629
}
630630

631-
func (it *listPostings) Seek(x uint64) bool {
631+
func (it *ListPostings) Seek(x uint64) bool {
632632
// If the current value satisfies, then return.
633633
if it.cur >= x {
634634
return true
@@ -650,7 +650,7 @@ func (it *listPostings) Seek(x uint64) bool {
650650
return false
651651
}
652652

653-
func (it *listPostings) Err() error {
653+
func (it *ListPostings) Err() error {
654654
return nil
655655
}
656656

labels/selector.go

+17-9
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
package labels
1515

1616
import (
17+
"fmt"
1718
"regexp"
1819
)
1920

@@ -36,6 +37,8 @@ type Matcher interface {
3637
Name() string
3738
// Matches checks whether a value fulfills the constraints.
3839
Matches(v string) bool
40+
// String returns a human readable matcher.
41+
String() string
3942
}
4043

4144
// EqualMatcher matches on equality.
@@ -44,13 +47,16 @@ type EqualMatcher struct {
4447
}
4548

4649
// Name implements Matcher interface.
47-
func (m *EqualMatcher) Name() string { return m.name }
50+
func (m EqualMatcher) Name() string { return m.name }
4851

4952
// Matches implements Matcher interface.
50-
func (m *EqualMatcher) Matches(v string) bool { return v == m.value }
53+
func (m EqualMatcher) Matches(v string) bool { return v == m.value }
54+
55+
// String implements Matcher interface.
56+
func (m EqualMatcher) String() string { return fmt.Sprintf("%s=%q", m.name, m.value) }
5157

5258
// Value returns the matched value.
53-
func (m *EqualMatcher) Value() string { return m.value }
59+
func (m EqualMatcher) Value() string { return m.value }
5460

5561
// NewEqualMatcher returns a new matcher matching an exact label value.
5662
func NewEqualMatcher(name, value string) Matcher {
@@ -62,8 +68,9 @@ type regexpMatcher struct {
6268
re *regexp.Regexp
6369
}
6470

65-
func (m *regexpMatcher) Name() string { return m.name }
66-
func (m *regexpMatcher) Matches(v string) bool { return m.re.MatchString(v) }
71+
func (m regexpMatcher) Name() string { return m.name }
72+
func (m regexpMatcher) Matches(v string) bool { return m.re.MatchString(v) }
73+
func (m regexpMatcher) String() string { return fmt.Sprintf("%s=~%q", m.name, m.re.String()) }
6774

6875
// NewRegexpMatcher returns a new matcher verifying that a value matches
6976
// the regular expression pattern.
@@ -87,14 +94,15 @@ func NewMustRegexpMatcher(name, pattern string) Matcher {
8794

8895
}
8996

90-
// notMatcher inverts the matching result for a matcher.
91-
type notMatcher struct {
97+
// NotMatcher inverts the matching result for a matcher.
98+
type NotMatcher struct {
9299
Matcher
93100
}
94101

95-
func (m *notMatcher) Matches(v string) bool { return !m.Matcher.Matches(v) }
102+
func (m NotMatcher) Matches(v string) bool { return !m.Matcher.Matches(v) }
103+
func (m NotMatcher) String() string { return fmt.Sprintf("not(%s)", m.Matcher.String()) }
96104

97105
// Not inverts the matcher's matching result.
98106
func Not(m Matcher) Matcher {
99-
return &notMatcher{m}
107+
return &NotMatcher{m}
100108
}

querier.go

+76-36
Original file line numberDiff line numberDiff line change
@@ -262,37 +262,92 @@ func (q *blockQuerier) Close() error {
262262
}
263263

264264
// PostingsForMatchers assembles a single postings iterator against the index reader
265-
// based on the given matchers. It returns a list of label names that must be manually
266-
// checked to not exist in series the postings list points to.
267-
// It returns EmptyPostings() if it can be determined beforehand that no results will be found.
265+
// based on the given matchers.
268266
func PostingsForMatchers(ix IndexReader, ms ...labels.Matcher) (index.Postings, error) {
269-
var its []index.Postings
267+
var its, notIts []index.Postings
268+
// See which label must be non-empty.
269+
labelMustBeSet := make(map[string]bool, len(ms))
270+
for _, m := range ms {
271+
if !m.Matches("") {
272+
labelMustBeSet[m.Name()] = true
273+
}
274+
}
270275

271276
for _, m := range ms {
272-
it, err := postingsForMatcher(ix, m)
277+
matchesEmpty := m.Matches("")
278+
if labelMustBeSet[m.Name()] || !matchesEmpty {
279+
// If this matcher must be non-empty, we can be smarter.
280+
nm, isNot := m.(*labels.NotMatcher)
281+
if isNot && matchesEmpty { // l!="foo"
282+
// If the label can't be empty and is a Not and the inner matcher
283+
// doesn't match empty, then subtract it out at the end.
284+
it, err := postingsForMatcher(ix, nm.Matcher)
285+
if err != nil {
286+
return nil, err
287+
}
288+
notIts = append(notIts, it)
289+
} else if isNot && !matchesEmpty { // l!=""
290+
// If the label can't be empty and is a Not, but the inner matcher can
291+
// be empty we need to use inversePostingsForMatcher.
292+
it, err := inversePostingsForMatcher(ix, nm.Matcher)
293+
if err != nil {
294+
return nil, err
295+
}
296+
its = append(its, it)
297+
} else { // l="a"
298+
// Non-Not matcher, use normal postingsForMatcher.
299+
it, err := postingsForMatcher(ix, m)
300+
if err != nil {
301+
return nil, err
302+
}
303+
its = append(its, it)
304+
}
305+
} else { // l=""
306+
// If the matchers for a labelname selects an empty value, it selects all
307+
// the series which don't have the label name set too. See:
308+
// https://github.com/prometheus/prometheus/issues/3575 and
309+
// https://github.com/prometheus/prometheus/pull/3578#issuecomment-351653555
310+
it, err := inversePostingsForMatcher(ix, m)
311+
if err != nil {
312+
return nil, err
313+
}
314+
notIts = append(notIts, it)
315+
}
316+
}
317+
318+
// If there's nothing to subtract from, add in everything and remove the notIts later.
319+
if len(its) == 0 && len(notIts) != 0 {
320+
allPostings, err := ix.Postings(index.AllPostingsKey())
273321
if err != nil {
274322
return nil, err
275323
}
276-
its = append(its, it)
324+
its = append(its, allPostings)
325+
}
326+
327+
it := index.Intersect(its...)
328+
329+
for _, n := range notIts {
330+
if _, ok := n.(*index.ListPostings); !ok {
331+
// Best to pre-calculate the merged lists via next rather than have a ton
332+
// of seeks in Without.
333+
pl, err := index.ExpandPostings(n)
334+
if err != nil {
335+
return nil, err
336+
}
337+
n = index.NewListPostings(pl)
338+
}
339+
it = index.Without(it, n)
277340
}
278-
return ix.SortedPostings(index.Intersect(its...)), nil
341+
342+
return ix.SortedPostings(it), nil
279343
}
280344

281345
func postingsForMatcher(ix IndexReader, m labels.Matcher) (index.Postings, error) {
282-
// If the matcher selects an empty value, it selects all the series which don't
283-
// have the label name set too. See: https://github.com/prometheus/prometheus/issues/3575
284-
// and https://github.com/prometheus/prometheus/pull/3578#issuecomment-351653555
285-
if m.Matches("") {
286-
return postingsForUnsetLabelMatcher(ix, m)
287-
}
346+
// This method will not return postings for missing labels.
288347

289348
// Fast-path for equal matching.
290349
if em, ok := m.(*labels.EqualMatcher); ok {
291-
it, err := ix.Postings(em.Name(), em.Value())
292-
if err != nil {
293-
return nil, err
294-
}
295-
return it, nil
350+
return ix.Postings(em.Name(), em.Value())
296351
}
297352

298353
tpls, err := ix.LabelValues(m.Name())
@@ -328,7 +383,8 @@ func postingsForMatcher(ix IndexReader, m labels.Matcher) (index.Postings, error
328383
return index.Merge(rit...), nil
329384
}
330385

331-
func postingsForUnsetLabelMatcher(ix IndexReader, m labels.Matcher) (index.Postings, error) {
386+
// inversePostingsForMatcher eeturns the postings for the series with the label name set but not matching the matcher.
387+
func inversePostingsForMatcher(ix IndexReader, m labels.Matcher) (index.Postings, error) {
332388
tpls, err := ix.LabelValues(m.Name())
333389
if err != nil {
334390
return nil, err
@@ -356,23 +412,7 @@ func postingsForUnsetLabelMatcher(ix IndexReader, m labels.Matcher) (index.Posti
356412
rit = append(rit, it)
357413
}
358414

359-
merged := index.Merge(rit...)
360-
// With many many postings, it's best to pre-calculate
361-
// the merged list via next rather than have a ton of seeks
362-
// in Without/Intersection.
363-
if len(rit) > 100 {
364-
pl, err := index.ExpandPostings(merged)
365-
if err != nil {
366-
return nil, err
367-
}
368-
merged = index.NewListPostings(pl)
369-
}
370-
371-
allPostings, err := ix.Postings(index.AllPostingsKey())
372-
if err != nil {
373-
return nil, err
374-
}
375-
return index.Without(allPostings, merged), nil
415+
return index.Merge(rit...), nil
376416
}
377417

378418
func mergeStrings(a, b []string) []string {

0 commit comments

Comments
 (0)