Skip to content

Commit fb2564f

Browse files
Al-Pragliolapboyd
andauthored
feat: general include exclude models field in sources file (#1885)
* feat: general include exclude models field in sources file Signed-off-by: Alessio Pragliola <seth.pro@gmail.com> * chore: refactor source filter validation Signed-off-by: Alessio Pragliola <seth.pro@gmail.com> * chore: refactor validation function Signed-off-by: Alessio Pragliola <seth.pro@gmail.com> * feat: apply suggestions from code review Co-authored-by: Paul Boyd <paul@pboyd.io> Signed-off-by: Alessio Pragliola <83355398+Al-Pragliola@users.noreply.github.com> * chore: add case insensitive test Signed-off-by: Alessio Pragliola <seth.pro@gmail.com> * feat: add case-insensitive description Signed-off-by: Alessio Pragliola <seth.pro@gmail.com> --------- Signed-off-by: Alessio Pragliola <seth.pro@gmail.com> Signed-off-by: Alessio Pragliola <83355398+Al-Pragliola@users.noreply.github.com> Co-authored-by: Paul Boyd <paul@pboyd.io>
1 parent 1ae75bb commit fb2564f

8 files changed

Lines changed: 580 additions & 25 deletions

File tree

api/openapi/catalog.yaml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -502,6 +502,27 @@ components:
502502
type: array
503503
items:
504504
type: string
505+
includedModels:
506+
description: |-
507+
Optional allow-list of models that are eligible for this source. Entries can be
508+
exact model names or patterns that use `*` as a wildcard. When provided, only
509+
models matching at least one pattern are considered.
510+
511+
Pattern matching is case-insensitive, so `Granite/*` will match `granite/model`,
512+
`Granite/model`, and `GRANITE/model`.
513+
type: array
514+
items:
515+
type: string
516+
excludedModels:
517+
description: |-
518+
Optional block-list of models that should be removed from the catalog even if
519+
they match `includedModels`. Patterns support the `*` wildcard.
520+
521+
Pattern matching is case-insensitive, so `*-beta` will match `Model-Beta`,
522+
`model-beta`, and `MODEL-BETA`.
523+
type: array
524+
items:
525+
type: string
505526
CatalogSourceList:
506527
description: List of CatalogSource entities.
507528
allOf:

api/openapi/src/catalog.yaml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,27 @@ components:
385385
type: array
386386
items:
387387
type: string
388+
includedModels:
389+
description: |-
390+
Optional allow-list of models that are eligible for this source. Entries can be
391+
exact model names or patterns that use `*` as a wildcard. When provided, only
392+
models matching at least one pattern are considered.
393+
394+
Pattern matching is case-insensitive, so `Granite/*` will match `granite/model`,
395+
`Granite/model`, and `GRANITE/model`.
396+
type: array
397+
items:
398+
type: string
399+
excludedModels:
400+
description: |-
401+
Optional block-list of models that should be removed from the catalog even if
402+
they match `includedModels`. Patterns support the `*` wildcard.
403+
404+
Pattern matching is case-insensitive, so `*-beta` will match `Model-Beta`,
405+
`model-beta`, and `MODEL-BETA`.
406+
type: array
407+
items:
408+
type: string
388409
CatalogSourceList:
389410
description: List of CatalogSource entities.
390411
allOf:

catalog/internal/catalog/loader.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,11 @@ func (l *Loader) updateSources(path string, config *sourceConfig) error {
194194
return fmt.Errorf("invalid source: duplicate id %s", id)
195195
}
196196

197+
// Validate includedModels/excludedModels patterns early
198+
if err := ValidateSourceFilters(source.IncludedModels, source.ExcludedModels); err != nil {
199+
return fmt.Errorf("invalid source %s: %w", id, err)
200+
}
201+
197202
sources[id] = source.CatalogSource
198203

199204
glog.Infof("loaded source %s of type %s", id, source.Type)
Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
package catalog
2+
3+
import (
4+
"fmt"
5+
"regexp"
6+
"strings"
7+
)
8+
9+
// ModelFilter encapsulates include/exclude pattern matching for model names.
10+
type ModelFilter struct {
11+
included []*compiledPattern
12+
excluded []*compiledPattern
13+
}
14+
15+
type compiledPattern struct {
16+
raw string
17+
re *regexp.Regexp
18+
}
19+
20+
func newCompiledPattern(field string, idx int, raw string) (*compiledPattern, error) {
21+
value := strings.TrimSpace(raw)
22+
if value == "" {
23+
return nil, fmt.Errorf("%s[%d]: pattern cannot be empty", field, idx)
24+
}
25+
26+
// Convert a simple glob (only supporting '*') into a regexp.
27+
var b strings.Builder
28+
b.WriteString("(?i)^")
29+
for _, r := range value {
30+
if r == '*' {
31+
b.WriteString(".*")
32+
continue
33+
}
34+
b.WriteString(regexp.QuoteMeta(string(r)))
35+
}
36+
b.WriteString("$")
37+
38+
re, err := regexp.Compile(b.String())
39+
if err != nil {
40+
return nil, fmt.Errorf("%s[%d]: invalid pattern %q: %w", field, idx, value, err)
41+
}
42+
43+
return &compiledPattern{
44+
raw: value,
45+
re: re,
46+
}, nil
47+
}
48+
49+
func compilePatterns(field string, patterns []string) ([]*compiledPattern, error) {
50+
if len(patterns) == 0 {
51+
return nil, nil
52+
}
53+
54+
compiled := make([]*compiledPattern, 0, len(patterns))
55+
for i, pattern := range patterns {
56+
cp, err := newCompiledPattern(field, i, pattern)
57+
if err != nil {
58+
return nil, err
59+
}
60+
compiled = append(compiled, cp)
61+
}
62+
return compiled, nil
63+
}
64+
65+
// ValidateSourceFilters validates that the includedModels and excludedModels patterns
66+
// are valid (non-empty, compilable, non-conflicting). This is useful for early validation
67+
// at configuration load time without constructing the full ModelFilter.
68+
func ValidateSourceFilters(included, excluded []string) error {
69+
if err := detectConflictingPatterns(included, excluded); err != nil {
70+
return err
71+
}
72+
73+
if _, err := compilePatterns("includedModels", included); err != nil {
74+
return err
75+
}
76+
77+
if _, err := compilePatterns("excludedModels", excluded); err != nil {
78+
return err
79+
}
80+
81+
return nil
82+
}
83+
84+
// NewModelFilter builds a ModelFilter from the provided include/exclude pattern lists.
85+
func NewModelFilter(included, excluded []string) (*ModelFilter, error) {
86+
if err := ValidateSourceFilters(included, excluded); err != nil {
87+
return nil, err
88+
}
89+
90+
inc, err := compilePatterns("includedModels", included)
91+
if err != nil {
92+
return nil, err
93+
}
94+
95+
exc, err := compilePatterns("excludedModels", excluded)
96+
if err != nil {
97+
return nil, err
98+
}
99+
100+
if len(inc) == 0 && len(exc) == 0 {
101+
return nil, nil
102+
}
103+
104+
return &ModelFilter{
105+
included: inc,
106+
excluded: exc,
107+
}, nil
108+
}
109+
110+
func detectConflictingPatterns(included, excluded []string) error {
111+
if len(included) == 0 || len(excluded) == 0 {
112+
return nil
113+
}
114+
115+
includedIdx := make(map[string]int, len(included))
116+
for i, pattern := range included {
117+
value := strings.TrimSpace(pattern)
118+
includedIdx[value] = i
119+
}
120+
121+
for j, pattern := range excluded {
122+
value := strings.TrimSpace(pattern)
123+
if i, exists := includedIdx[value]; exists {
124+
return fmt.Errorf("pattern %q is defined in both includedModels[%d] and excludedModels[%d]", value, i, j)
125+
}
126+
}
127+
return nil
128+
}
129+
130+
// Allows returns true if the provided model name passes the include/exclude rules.
131+
func (f *ModelFilter) Allows(name string) bool {
132+
if f == nil {
133+
return true
134+
}
135+
136+
if len(f.included) > 0 {
137+
matched := false
138+
for _, pattern := range f.included {
139+
if pattern.re.MatchString(name) {
140+
matched = true
141+
break
142+
}
143+
}
144+
if !matched {
145+
return false
146+
}
147+
}
148+
149+
for _, pattern := range f.excluded {
150+
if pattern.re.MatchString(name) {
151+
return false
152+
}
153+
}
154+
155+
return true
156+
}
157+
158+
// NewModelFilterFromSource composes a ModelFilter using the source-level configuration and any legacy additions.
159+
func NewModelFilterFromSource(source *Source, extraIncluded, extraExcluded []string) (*ModelFilter, error) {
160+
if source == nil {
161+
return nil, fmt.Errorf("source cannot be nil when building filters")
162+
}
163+
164+
included := append([]string{}, source.IncludedModels...)
165+
if len(extraIncluded) > 0 {
166+
included = append(included, extraIncluded...)
167+
}
168+
169+
excluded := append([]string{}, source.ExcludedModels...)
170+
if len(extraExcluded) > 0 {
171+
excluded = append(excluded, extraExcluded...)
172+
}
173+
174+
filter, err := NewModelFilter(included, excluded)
175+
if err != nil {
176+
return nil, fmt.Errorf("invalid include/exclude configuration for source %s: %w", source.Id, err)
177+
}
178+
179+
return filter, nil
180+
}
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
package catalog
2+
3+
import (
4+
"testing"
5+
6+
apimodels "github.com/kubeflow/model-registry/catalog/pkg/openapi"
7+
"github.com/stretchr/testify/assert"
8+
"github.com/stretchr/testify/require"
9+
)
10+
11+
func TestModelFilterAllows(t *testing.T) {
12+
filter, err := NewModelFilter([]string{"Granite/*"}, []string{"Granite/beta-*"})
13+
require.NoError(t, err)
14+
15+
assert.True(t, filter.Allows("Granite/3-1-instruct"))
16+
assert.False(t, filter.Allows("Granite/beta-release"))
17+
assert.False(t, filter.Allows("Other/model"))
18+
19+
// Test case-insensitive matching
20+
assert.True(t, filter.Allows("granite/3-1-instruct"))
21+
assert.True(t, filter.Allows("GRANITE/3-1-instruct"))
22+
assert.False(t, filter.Allows("granite/beta-release"))
23+
24+
allowAll, err := NewModelFilter([]string{"*"}, nil)
25+
require.NoError(t, err)
26+
assert.True(t, allowAll.Allows("anything/goes"))
27+
}
28+
29+
func TestModelFilterConflictsAndValidation(t *testing.T) {
30+
_, err := NewModelFilter([]string{"Granite/*"}, []string{"Granite/*"})
31+
require.Error(t, err)
32+
assert.Contains(t, err.Error(), "pattern \"Granite/*\"")
33+
34+
_, err = NewModelFilter([]string{""}, nil)
35+
require.Error(t, err)
36+
assert.Contains(t, err.Error(), "pattern cannot be empty")
37+
}
38+
39+
func TestNewModelFilterFromSourceMergesLegacy(t *testing.T) {
40+
source := &Source{
41+
CatalogSource: apimodels.CatalogSource{
42+
Id: "test",
43+
Name: "Test source",
44+
Labels: []string{},
45+
IncludedModels: []string{"Granite/*"},
46+
},
47+
}
48+
49+
filter, err := NewModelFilterFromSource(source, nil, []string{"Legacy/*"})
50+
require.NoError(t, err)
51+
52+
assert.True(t, filter.Allows("Granite/model"))
53+
assert.False(t, filter.Allows("Legacy/model"))
54+
}
55+
56+
func TestModelFilterWithWildcardInMiddle(t *testing.T) {
57+
// Test that wildcards match across the entire name
58+
filter, err := NewModelFilter(nil, []string{"*deprecated*", "*old*"})
59+
require.NoError(t, err)
60+
61+
assert.True(t, filter.Allows("Granite/empty-stable"))
62+
assert.False(t, filter.Allows("Mistral/empty-deprecated"))
63+
assert.False(t, filter.Allows("DeepSeek/empty-old-v1"))
64+
assert.False(t, filter.Allows("Foo/old"))
65+
assert.False(t, filter.Allows("Bar/deprecated"))
66+
67+
// Test that */pattern* requires the pattern immediately after /
68+
filter2, err := NewModelFilter(nil, []string{"*/deprecated", "*/old*"})
69+
require.NoError(t, err)
70+
71+
assert.True(t, filter2.Allows("Mistral/empty-deprecated")) // doesn't match */deprecated (no immediate match after /)
72+
assert.False(t, filter2.Allows("Foo/deprecated")) // matches */deprecated
73+
assert.False(t, filter2.Allows("Bar/old-model")) // matches */old*
74+
}
75+
76+
func TestValidateSourceFilters(t *testing.T) {
77+
t.Run("no filters", func(t *testing.T) {
78+
err := ValidateSourceFilters(nil, nil)
79+
assert.NoError(t, err)
80+
})
81+
82+
t.Run("valid patterns", func(t *testing.T) {
83+
err := ValidateSourceFilters([]string{"Granite/*", "Meta/*"}, []string{"*-beta"})
84+
assert.NoError(t, err)
85+
})
86+
87+
t.Run("conflicting patterns", func(t *testing.T) {
88+
err := ValidateSourceFilters([]string{"Granite/*"}, []string{"Granite/*"})
89+
require.Error(t, err)
90+
assert.Contains(t, err.Error(), "Granite/*")
91+
})
92+
93+
t.Run("empty pattern in includedModels", func(t *testing.T) {
94+
err := ValidateSourceFilters([]string{"Granite/*", ""}, nil)
95+
require.Error(t, err)
96+
assert.Contains(t, err.Error(), "pattern cannot be empty")
97+
})
98+
99+
t.Run("whitespace-only pattern", func(t *testing.T) {
100+
err := ValidateSourceFilters([]string{" "}, nil)
101+
require.Error(t, err)
102+
assert.Contains(t, err.Error(), "pattern cannot be empty")
103+
})
104+
105+
t.Run("valid glob patterns", func(t *testing.T) {
106+
err := ValidateSourceFilters([]string{"valid/*"}, nil)
107+
assert.NoError(t, err) // Our conversion always produces valid regex
108+
})
109+
}

0 commit comments

Comments
 (0)