Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions api/openapi/catalog.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,27 @@ components:
type: array
items:
type: string
includedModels:
description: |-
Optional allow-list of models that are eligible for this source. Entries can be
exact model names or patterns that use `*` as a wildcard. When provided, only
models matching at least one pattern are considered.
Comment on lines +507 to +509
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To confirm, we want this to be case-insensitive? Maybe worth mentioning that here. I would generally assume a glob is case-sensitive like it would be in a shell.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah making it case insensitive makes totally sense in my opinion in our intended use case, users probably don't even know the exact naming of the models that are needed to be excluded/included or what a glob is

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good. We should just make sure to document that

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes thank you @jonburdo , please check the latest commit c049854


Pattern matching is case-insensitive, so `Granite/*` will match `granite/model`,
`Granite/model`, and `GRANITE/model`.
type: array
items:
type: string
excludedModels:
description: |-
Optional block-list of models that should be removed from the catalog even if
they match `includedModels`. Patterns support the `*` wildcard.

Pattern matching is case-insensitive, so `*-beta` will match `Model-Beta`,
`model-beta`, and `MODEL-BETA`.
type: array
items:
type: string
CatalogSourceList:
description: List of CatalogSource entities.
allOf:
Expand Down
21 changes: 21 additions & 0 deletions api/openapi/src/catalog.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,27 @@ components:
type: array
items:
type: string
includedModels:
description: |-
Optional allow-list of models that are eligible for this source. Entries can be
exact model names or patterns that use `*` as a wildcard. When provided, only
models matching at least one pattern are considered.

Pattern matching is case-insensitive, so `Granite/*` will match `granite/model`,
`Granite/model`, and `GRANITE/model`.
type: array
items:
type: string
excludedModels:
description: |-
Optional block-list of models that should be removed from the catalog even if
they match `includedModels`. Patterns support the `*` wildcard.

Pattern matching is case-insensitive, so `*-beta` will match `Model-Beta`,
`model-beta`, and `MODEL-BETA`.
type: array
items:
type: string
CatalogSourceList:
description: List of CatalogSource entities.
allOf:
Expand Down
5 changes: 5 additions & 0 deletions catalog/internal/catalog/loader.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,11 @@ func (l *Loader) updateSources(path string, config *sourceConfig) error {
return fmt.Errorf("invalid source: duplicate id %s", id)
}

// Validate includedModels/excludedModels patterns early
if err := ValidateSourceFilters(source.IncludedModels, source.ExcludedModels); err != nil {
return fmt.Errorf("invalid source %s: %w", id, err)
}

sources[id] = source.CatalogSource

glog.Infof("loaded source %s of type %s", id, source.Type)
Expand Down
180 changes: 180 additions & 0 deletions catalog/internal/catalog/model_filter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
package catalog

import (
"fmt"
"regexp"
"strings"
)

// ModelFilter encapsulates include/exclude pattern matching for model names.
type ModelFilter struct {
included []*compiledPattern
excluded []*compiledPattern
}

type compiledPattern struct {
raw string
re *regexp.Regexp
}

func newCompiledPattern(field string, idx int, raw string) (*compiledPattern, error) {
value := strings.TrimSpace(raw)
if value == "" {
return nil, fmt.Errorf("%s[%d]: pattern cannot be empty", field, idx)
}

// Convert a simple glob (only supporting '*') into a regexp.
var b strings.Builder
b.WriteString("(?i)^")
for _, r := range value {
if r == '*' {
b.WriteString(".*")
continue
}
b.WriteString(regexp.QuoteMeta(string(r)))
}
b.WriteString("$")

re, err := regexp.Compile(b.String())
if err != nil {
return nil, fmt.Errorf("%s[%d]: invalid pattern %q: %w", field, idx, value, err)
}

return &compiledPattern{
raw: value,
re: re,
}, nil
}

func compilePatterns(field string, patterns []string) ([]*compiledPattern, error) {
if len(patterns) == 0 {
return nil, nil
}

compiled := make([]*compiledPattern, 0, len(patterns))
for i, pattern := range patterns {
cp, err := newCompiledPattern(field, i, pattern)
if err != nil {
return nil, err
}
compiled = append(compiled, cp)
}
return compiled, nil
}

// ValidateSourceFilters validates that the includedModels and excludedModels patterns
// are valid (non-empty, compilable, non-conflicting). This is useful for early validation
// at configuration load time without constructing the full ModelFilter.
func ValidateSourceFilters(included, excluded []string) error {
if err := detectConflictingPatterns(included, excluded); err != nil {
return err
}

if _, err := compilePatterns("includedModels", included); err != nil {
return err
}

if _, err := compilePatterns("excludedModels", excluded); err != nil {
return err
}

return nil
}

// NewModelFilter builds a ModelFilter from the provided include/exclude pattern lists.
func NewModelFilter(included, excluded []string) (*ModelFilter, error) {
if err := ValidateSourceFilters(included, excluded); err != nil {
return nil, err
}

inc, err := compilePatterns("includedModels", included)
if err != nil {
return nil, err
}

exc, err := compilePatterns("excludedModels", excluded)
if err != nil {
return nil, err
}

if len(inc) == 0 && len(exc) == 0 {
return nil, nil
}

return &ModelFilter{
included: inc,
excluded: exc,
}, nil
}

func detectConflictingPatterns(included, excluded []string) error {
if len(included) == 0 || len(excluded) == 0 {
return nil
}

includedIdx := make(map[string]int, len(included))
for i, pattern := range included {
value := strings.TrimSpace(pattern)
includedIdx[value] = i
}

for j, pattern := range excluded {
value := strings.TrimSpace(pattern)
if i, exists := includedIdx[value]; exists {
return fmt.Errorf("pattern %q is defined in both includedModels[%d] and excludedModels[%d]", value, i, j)
}
}
return nil
}

// Allows returns true if the provided model name passes the include/exclude rules.
func (f *ModelFilter) Allows(name string) bool {
if f == nil {
return true
}

if len(f.included) > 0 {
matched := false
for _, pattern := range f.included {
if pattern.re.MatchString(name) {
matched = true
break
}
}
if !matched {
return false
}
}

for _, pattern := range f.excluded {
if pattern.re.MatchString(name) {
return false
}
}

return true
}

// NewModelFilterFromSource composes a ModelFilter using the source-level configuration and any legacy additions.
func NewModelFilterFromSource(source *Source, extraIncluded, extraExcluded []string) (*ModelFilter, error) {
if source == nil {
return nil, fmt.Errorf("source cannot be nil when building filters")
}

included := append([]string{}, source.IncludedModels...)
if len(extraIncluded) > 0 {
included = append(included, extraIncluded...)
}

excluded := append([]string{}, source.ExcludedModels...)
if len(extraExcluded) > 0 {
excluded = append(excluded, extraExcluded...)
}

filter, err := NewModelFilter(included, excluded)
if err != nil {
return nil, fmt.Errorf("invalid include/exclude configuration for source %s: %w", source.Id, err)
}

return filter, nil
}
109 changes: 109 additions & 0 deletions catalog/internal/catalog/model_filter_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
package catalog

import (
"testing"

apimodels "github.com/kubeflow/model-registry/catalog/pkg/openapi"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestModelFilterAllows(t *testing.T) {
filter, err := NewModelFilter([]string{"Granite/*"}, []string{"Granite/beta-*"})
require.NoError(t, err)

assert.True(t, filter.Allows("Granite/3-1-instruct"))
assert.False(t, filter.Allows("Granite/beta-release"))
assert.False(t, filter.Allows("Other/model"))

// Test case-insensitive matching
assert.True(t, filter.Allows("granite/3-1-instruct"))
assert.True(t, filter.Allows("GRANITE/3-1-instruct"))
assert.False(t, filter.Allows("granite/beta-release"))

allowAll, err := NewModelFilter([]string{"*"}, nil)
require.NoError(t, err)
assert.True(t, allowAll.Allows("anything/goes"))
}

func TestModelFilterConflictsAndValidation(t *testing.T) {
_, err := NewModelFilter([]string{"Granite/*"}, []string{"Granite/*"})
require.Error(t, err)
assert.Contains(t, err.Error(), "pattern \"Granite/*\"")

_, err = NewModelFilter([]string{""}, nil)
require.Error(t, err)
assert.Contains(t, err.Error(), "pattern cannot be empty")
}

func TestNewModelFilterFromSourceMergesLegacy(t *testing.T) {
source := &Source{
CatalogSource: apimodels.CatalogSource{
Id: "test",
Name: "Test source",
Labels: []string{},
IncludedModels: []string{"Granite/*"},
},
}

filter, err := NewModelFilterFromSource(source, nil, []string{"Legacy/*"})
require.NoError(t, err)

assert.True(t, filter.Allows("Granite/model"))
assert.False(t, filter.Allows("Legacy/model"))
}

func TestModelFilterWithWildcardInMiddle(t *testing.T) {
// Test that wildcards match across the entire name
filter, err := NewModelFilter(nil, []string{"*deprecated*", "*old*"})
require.NoError(t, err)

assert.True(t, filter.Allows("Granite/empty-stable"))
assert.False(t, filter.Allows("Mistral/empty-deprecated"))
assert.False(t, filter.Allows("DeepSeek/empty-old-v1"))
assert.False(t, filter.Allows("Foo/old"))
assert.False(t, filter.Allows("Bar/deprecated"))

// Test that */pattern* requires the pattern immediately after /
filter2, err := NewModelFilter(nil, []string{"*/deprecated", "*/old*"})
require.NoError(t, err)

assert.True(t, filter2.Allows("Mistral/empty-deprecated")) // doesn't match */deprecated (no immediate match after /)
assert.False(t, filter2.Allows("Foo/deprecated")) // matches */deprecated
assert.False(t, filter2.Allows("Bar/old-model")) // matches */old*
}

func TestValidateSourceFilters(t *testing.T) {
t.Run("no filters", func(t *testing.T) {
err := ValidateSourceFilters(nil, nil)
assert.NoError(t, err)
})

t.Run("valid patterns", func(t *testing.T) {
err := ValidateSourceFilters([]string{"Granite/*", "Meta/*"}, []string{"*-beta"})
assert.NoError(t, err)
})

t.Run("conflicting patterns", func(t *testing.T) {
err := ValidateSourceFilters([]string{"Granite/*"}, []string{"Granite/*"})
require.Error(t, err)
assert.Contains(t, err.Error(), "Granite/*")
})

t.Run("empty pattern in includedModels", func(t *testing.T) {
err := ValidateSourceFilters([]string{"Granite/*", ""}, nil)
require.Error(t, err)
assert.Contains(t, err.Error(), "pattern cannot be empty")
})

t.Run("whitespace-only pattern", func(t *testing.T) {
err := ValidateSourceFilters([]string{" "}, nil)
require.Error(t, err)
assert.Contains(t, err.Error(), "pattern cannot be empty")
})

t.Run("valid glob patterns", func(t *testing.T) {
err := ValidateSourceFilters([]string{"valid/*"}, nil)
assert.NoError(t, err) // Our conversion always produces valid regex
})
}
Loading
Loading