Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
174 changes: 174 additions & 0 deletions lib/logstorage/filter_array_contains.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
package logstorage

import (
"fmt"
"strings"

"github.com/VictoriaMetrics/VictoriaLogs/lib/prefixfilter"
"github.com/valyala/fastjson"
)

// filterArrayContains matches if the JSON array in the given field contains the given value.
//
// Example LogsQL: `tags:array_contains("prod")`
type filterArrayContains struct {
fieldName string
value string
}

func (fa *filterArrayContains) String() string {
return fmt.Sprintf("%sarray_contains(%s)", quoteFieldNameIfNeeded(fa.fieldName), quoteTokenIfNeeded(fa.value))
}

func (fa *filterArrayContains) updateNeededFields(pf *prefixfilter.Filter) {
pf.AddAllowFilter(fa.fieldName)
}

func (fa *filterArrayContains) matchRow(fields []Field) bool {
v := getFieldValueByName(fields, fa.fieldName)
return matchArrayContains(v, fa.value)
}

func (fa *filterArrayContains) applyToBlockResult(br *blockResult, bm *bitmap) {
c := br.getColumnByName(fa.fieldName)
if c.isConst {
v := c.valuesEncoded[0]
if !matchArrayContains(v, fa.value) {
bm.resetBits()
}
return
}
if c.isTime {
bm.resetBits()
return
}

switch c.valueType {
case valueTypeString:
values := c.getValues(br)
bm.forEachSetBit(func(idx int) bool {
v := values[idx]
return matchArrayContains(v, fa.value)
})
case valueTypeDict:
bb := bbPool.Get()
for _, v := range c.dictValues {
c := byte(0)
if matchArrayContains(v, fa.value) {
c = 1
}
bb.B = append(bb.B, c)
}
valuesEncoded := c.getValuesEncoded(br)
bm.forEachSetBit(func(idx int) bool {
n := valuesEncoded[idx][0]
return bb.B[n] == 1
})
bbPool.Put(bb)
default:
bm.resetBits()
}
}

func (fa *filterArrayContains) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
fieldName := fa.fieldName
value := fa.value

v := bs.getConstColumnValue(fieldName)
if v != "" {
if !matchArrayContains(v, value) {
bm.resetBits()
}
return
}

// Verify whether filter matches other columns
ch := bs.getColumnHeader(fieldName)
if ch == nil {
// Fast path - there are no matching columns.
bm.resetBits()
return
}

switch ch.valueType {
case valueTypeString:
matchStringByArrayContains(bs, ch, bm, value)
case valueTypeDict:
matchValuesDictByArrayContains(bs, ch, bm, value)
default:
bm.resetBits()
}
}

func matchValuesDictByArrayContains(bs *blockSearch, ch *columnHeader, bm *bitmap, value string) {
bb := bbPool.Get()
for _, v := range ch.valuesDict.values {
c := byte(0)
if matchArrayContains(v, value) {
c = 1
}
bb.B = append(bb.B, c)
}
matchEncodedValuesDict(bs, ch, bm, bb.B)
bbPool.Put(bb)
}

func matchStringByArrayContains(bs *blockSearch, ch *columnHeader, bm *bitmap, value string) {
visitValues(bs, ch, bm, func(v string) bool {
return matchArrayContains(v, value)
})
}

func matchArrayContains(s, value string) bool {
if s == "" {
return false
}
// Fast check: if the value is not present as a substring, it definitely won't be in the array.
if !strings.Contains(s, value) {
return false
}

// Fast check 2: must start with [
if s[0] != '[' {
return false
}

// Use shared fastjson.ParserPool in order to avoid per-call parser allocations.
p := jspp.Get()
defer jspp.Put(p)
v, err := p.Parse(s)
if err != nil {
return false
}

// Check if it is an array
a, err := v.Array()
if err != nil {
return false
}

for _, elem := range a {
// We only support checking against string representation of values in the array.
var sElem string
switch elem.Type() {
case fastjson.TypeString:
sElem = string(elem.GetStringBytes())
case fastjson.TypeNumber:
sElem = elem.String()
case fastjson.TypeTrue:
sElem = "true"
case fastjson.TypeFalse:
sElem = "false"
case fastjson.TypeNull:
sElem = "null"
default:
continue
}

if sElem == value {
return true
}
}

return false
}
162 changes: 162 additions & 0 deletions lib/logstorage/filter_array_contains_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
package logstorage

import (
"testing"

"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
)

func TestMatchArrayContains(t *testing.T) {
t.Parallel()

f := func(s, value string, resultExpected bool) {
t.Helper()
result := matchArrayContains(s, value)
if result != resultExpected {
t.Fatalf("unexpected result for s=%q, value=%q; got %v; want %v", s, value, result, resultExpected)
}
}

// Not an array
f("", "foo", false)
f("foo", "foo", false)
f("{}", "foo", false)

// Array doesn't contain value
f("[]", "foo", false)
f(`["bar"]`, "foo", false)
f(`["bar","baz"]`, "foo", false)
f(`[1,2]`, "3", false)

// Array contains value
f(`["foo"]`, "foo", true)
f(`["bar","foo"]`, "foo", true)
f(`["foo","bar"]`, "foo", true)
f(`["a","foo","b"]`, "foo", true)

// Mixed types
f(`[123]`, "123", true)
f(`[true]`, "true", true)
f(`["123"]`, "123", true)

// Tricky cases
f(`["foo bar"]`, "foo", false) // partial match
f(`["foobar"]`, "foo", false) // partial match
f(`["foo"]`, "fo", false) // partial match

// Nested structures (ignored by current implementation)
f(`[{"a":"b"}]`, `{"a":"b"}`, false) // nested object ignored
f(`[["a"]]`, `["a"]`, false) // nested array ignored
f(`[["a"], "b"]`, "b", true) // mixed with simple value
}

func TestFilterArrayContains(t *testing.T) {
t.Parallel()

t.Run("const-column", func(t *testing.T) {
columns := []column{
{
name: "foo",
values: []string{
`["a","b"]`,
`["a","b"]`,
`["a","b"]`,
},
},
}

// match
fa := &filterArrayContains{
fieldName: "foo",
value: "a",
}
testFilterMatchForColumns(t, columns, fa, "foo", []int{0, 1, 2})

fa = &filterArrayContains{
fieldName: "foo",
value: "b",
}
testFilterMatchForColumns(t, columns, fa, "foo", []int{0, 1, 2})

// mismatch
fa = &filterArrayContains{
fieldName: "foo",
value: "c",
}
testFilterMatchForColumns(t, columns, fa, "foo", nil)

fa = &filterArrayContains{
fieldName: "non-existing-column",
value: "a",
}
testFilterMatchForColumns(t, columns, fa, "foo", nil)
})

t.Run("dict", func(t *testing.T) {
columns := []column{
{
name: "foo",
values: []string{
"",
`["a"]`,
`["b"]`,
`["a","b"]`,
`"a"`, // not an array
`[1,2]`,
},
},
}

// match
fa := &filterArrayContains{
fieldName: "foo",
value: "a",
}
testFilterMatchForColumns(t, columns, fa, "foo", []int{1, 3})

fa = &filterArrayContains{
fieldName: "foo",
value: "b",
}
testFilterMatchForColumns(t, columns, fa, "foo", []int{2, 3})

// mismatch
fa = &filterArrayContains{
fieldName: "foo",
value: "c",
}
testFilterMatchForColumns(t, columns, fa, "foo", nil)
})

t.Run("strings", func(t *testing.T) {
columns := []column{
{
name: "foo",
values: []string{
`["apple", "banana"]`,
`["orange"]`,
`not array`,
`["apple"]`,
`[]`,
},
},
}

// match
fa := &filterArrayContains{
fieldName: "foo",
value: "apple",
}
testFilterMatchForColumns(t, columns, fa, "foo", []int{0, 3})

// mismatch
fa = &filterArrayContains{
fieldName: "foo",
value: "pear",
}
testFilterMatchForColumns(t, columns, fa, "foo", nil)
})

// Remove the remaining data files for the test
fs.MustRemoveDir(t.Name())
}
13 changes: 13 additions & 0 deletions lib/logstorage/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -2007,6 +2007,8 @@ func parseFilterGeneric(lex *lexer, fieldName string) (filter, error) {
return parseFilterContainsAll(lex, fieldName)
case lex.isKeyword("contains_any"):
return parseFilterContainsAny(lex, fieldName)
case lex.isKeyword("array_contains"):
return parseFilterArrayContains(lex, fieldName)
Comment on lines +2010 to +2011
Copy link

Copilot AI Dec 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new array_contains filter is missing parser tests. All similar filters (e.g., contains_any, contains_all, ipv4_range) have corresponding parser tests in parser_test.go (see lines 866-924). These tests verify that the parser correctly creates the filter with expected field names and values.

Consider adding a test function like:

func TestParseFilterArrayContains(t *testing.T) {
	f := func(s, fieldNameExpected, valueExpected string) {
		t.Helper()
		q, err := ParseQuery(s)
		if err != nil {
			t.Fatalf("unexpected error: %s", err)
		}
		fa, ok := q.f.(*filterArrayContains)
		if !ok {
			t.Fatalf("unexpected filter type; got %T; want *filterArrayContains; filter: %s", q.f, q.f)
		}
		if fa.fieldName != fieldNameExpected {
			t.Fatalf("unexpected fieldName; got %q; want %q", fa.fieldName, fieldNameExpected)
		}
		if fa.value != valueExpected {
			t.Fatalf("unexpected value; got %q; want %q", fa.value, valueExpected)
		}
	}

	f(`array_contains("foo")`, `_msg`, "foo")
	f(`tags:array_contains("prod")`, `tags`, "prod")
	f(`array_contains(123)`, `_msg`, "123")
}

Copilot uses AI. Check for mistakes.
case lex.isKeyword("contains_common_case"):
return parseFilterContainsCommonCase(lex, fieldName)
case lex.isKeyword("eq_field"):
Expand Down Expand Up @@ -2311,6 +2313,16 @@ func parseFilterContainsAny(lex *lexer, fieldName string) (filter, error) {
return parseInValues(lex, fieldName, fi, &fi.values)
}

func parseFilterArrayContains(lex *lexer, fieldName string) (filter, error) {
return parseFuncArg(lex, fieldName, func(arg string) (filter, error) {
fa := &filterArrayContains{
fieldName: getCanonicalColumnName(fieldName),
value: arg,
}
return fa, nil
})
}

func parseFilterIn(lex *lexer, fieldName string) (filter, error) {
fi := &filterIn{
fieldName: getCanonicalColumnName(fieldName),
Expand Down Expand Up @@ -3803,6 +3815,7 @@ var reservedKeywords = func() map[string]struct{} {
// functions
"contains_all",
"contains_any",
"array_contains",
Copy link

Copilot AI Dec 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The array_contains filter is missing serialization tests in the main query parsing test. All similar filters have String() serialization tests (see lines 1579-1608 for contains_any and contains_all). These tests verify that the filter correctly serializes back to LogsQL format.

Consider adding tests like:

// array_contains filter
f(`array_contains(foo)`, `array_contains(foo)`)
f(`array_contains("foo bar")`, `array_contains("foo bar")`)
f(`tags:array_contains(prod)`, `tags:array_contains(prod)`)
f(`array_contains(123)`, `array_contains(123)`)

These should be added in the appropriate section of the test that starts around line 1579.

Copilot uses AI. Check for mistakes.
"contains_common_case",
"eq_field",
"equals_common_case",
Expand Down
Loading