From b6966d77261f03a865a71a60d0d65d71b4b46ed6 Mon Sep 17 00:00:00 2001 From: Alessio Pragliola Date: Wed, 19 Nov 2025 18:59:57 +0100 Subject: [PATCH 1/6] feat: general include exclude models field in sources file Signed-off-by: Alessio Pragliola --- api/openapi/catalog.yaml | 15 ++ api/openapi/src/catalog.yaml | 15 ++ catalog/internal/catalog/loader.go | 4 + catalog/internal/catalog/model_filter.go | 161 ++++++++++++++++++ catalog/internal/catalog/model_filter_test.go | 69 ++++++++ catalog/internal/catalog/yaml_catalog.go | 42 ++--- catalog/internal/catalog/yaml_catalog_test.go | 153 +++++++++++++++++ catalog/pkg/openapi/model_catalog_source.go | 74 ++++++++ 8 files changed, 508 insertions(+), 25 deletions(-) create mode 100644 catalog/internal/catalog/model_filter.go create mode 100644 catalog/internal/catalog/model_filter_test.go diff --git a/api/openapi/catalog.yaml b/api/openapi/catalog.yaml index 640a823fed..d6df4d3777 100644 --- a/api/openapi/catalog.yaml +++ b/api/openapi/catalog.yaml @@ -502,6 +502,21 @@ components: type: array items: type: string + includedModels: + description: |- + Optional allow-list of models that are eligible for this source. Entries can be + exact model names or patterns that use `*` as a wildcard. When provided, only + models matching at least one pattern are considered. + type: array + items: + type: string + excludedModels: + description: |- + Optional block-list of models that should be removed from the catalog even if + they match `includedModels`. Patterns support the `*` wildcard. + type: array + items: + type: string CatalogSourceList: description: List of CatalogSource entities. allOf: diff --git a/api/openapi/src/catalog.yaml b/api/openapi/src/catalog.yaml index 9480faf409..a1098ace3d 100644 --- a/api/openapi/src/catalog.yaml +++ b/api/openapi/src/catalog.yaml @@ -385,6 +385,21 @@ components: type: array items: type: string + includedModels: + description: |- + Optional allow-list of models that are eligible for this source. Entries can be + exact model names or patterns that use `*` as a wildcard. When provided, only + models matching at least one pattern are considered. + type: array + items: + type: string + excludedModels: + description: |- + Optional block-list of models that should be removed from the catalog even if + they match `includedModels`. Patterns support the `*` wildcard. + type: array + items: + type: string CatalogSourceList: description: List of CatalogSource entities. allOf: diff --git a/catalog/internal/catalog/loader.go b/catalog/internal/catalog/loader.go index a45e29cf4d..faca6e2a06 100644 --- a/catalog/internal/catalog/loader.go +++ b/catalog/internal/catalog/loader.go @@ -194,6 +194,10 @@ func (l *Loader) updateSources(path string, config *sourceConfig) error { return fmt.Errorf("invalid source: duplicate id %s", id) } + if _, err := NewModelFilterFromSource(&source, nil, nil); err != nil { + return fmt.Errorf("invalid source %s: %w", id, err) + } + sources[id] = source.CatalogSource glog.Infof("loaded source %s of type %s", id, source.Type) diff --git a/catalog/internal/catalog/model_filter.go b/catalog/internal/catalog/model_filter.go new file mode 100644 index 0000000000..e84decd5e5 --- /dev/null +++ b/catalog/internal/catalog/model_filter.go @@ -0,0 +1,161 @@ +package catalog + +import ( + "fmt" + "regexp" + "strings" +) + +// ModelFilter encapsulates include/exclude pattern matching for model names. +type ModelFilter struct { + included []*compiledPattern + excluded []*compiledPattern +} + +type compiledPattern struct { + raw string + re *regexp.Regexp +} + +func newCompiledPattern(field string, idx int, raw string) (*compiledPattern, error) { + value := strings.TrimSpace(raw) + if value == "" { + return nil, fmt.Errorf("%s[%d]: pattern cannot be empty", field, idx) + } + + // Convert a simple glob (only supporting '*') into a regexp. + var b strings.Builder + b.WriteString("^") + for _, r := range value { + if r == '*' { + b.WriteString(".*") + continue + } + b.WriteString(regexp.QuoteMeta(string(r))) + } + b.WriteString("$") + + re, err := regexp.Compile(b.String()) + if err != nil { + return nil, fmt.Errorf("%s[%d]: invalid pattern %q: %w", field, idx, value, err) + } + + return &compiledPattern{ + raw: value, + re: re, + }, nil +} + +func compilePatterns(field string, patterns []string) ([]*compiledPattern, error) { + if len(patterns) == 0 { + return nil, nil + } + + compiled := make([]*compiledPattern, 0, len(patterns)) + for i, pattern := range patterns { + cp, err := newCompiledPattern(field, i, pattern) + if err != nil { + return nil, err + } + compiled = append(compiled, cp) + } + return compiled, nil +} + +// NewModelFilter builds a ModelFilter from the provided include/exclude pattern lists. +func NewModelFilter(included, excluded []string) (*ModelFilter, error) { + if err := detectConflictingPatterns(included, excluded); err != nil { + return nil, err + } + + inc, err := compilePatterns("includedModels", included) + if err != nil { + return nil, err + } + + exc, err := compilePatterns("excludedModels", excluded) + if err != nil { + return nil, err + } + + if len(inc) == 0 && len(exc) == 0 { + return nil, nil + } + + return &ModelFilter{ + included: inc, + excluded: exc, + }, nil +} + +func detectConflictingPatterns(included, excluded []string) error { + if len(included) == 0 || len(excluded) == 0 { + return nil + } + + includedIdx := make(map[string]int, len(included)) + for i, pattern := range included { + value := strings.TrimSpace(pattern) + includedIdx[value] = i + } + + for j, pattern := range excluded { + value := strings.TrimSpace(pattern) + if i, exists := includedIdx[value]; exists { + return fmt.Errorf("pattern %q is defined in both includedModels[%d] and excludedModels[%d]", value, i, j) + } + } + return nil +} + +// Allows returns true if the provided model name passes the include/exclude rules. +func (f *ModelFilter) Allows(name string) bool { + if f == nil { + return true + } + + if len(f.included) > 0 { + matched := false + for _, pattern := range f.included { + if pattern.re.MatchString(name) { + matched = true + break + } + } + if !matched { + return false + } + } + + for _, pattern := range f.excluded { + if pattern.re.MatchString(name) { + return false + } + } + + return true +} + +// NewModelFilterFromSource composes a ModelFilter using the source-level configuration and any legacy additions. +func NewModelFilterFromSource(source *Source, extraIncluded, extraExcluded []string) (*ModelFilter, error) { + if source == nil { + return nil, fmt.Errorf("source cannot be nil when building filters") + } + + included := append([]string{}, source.IncludedModels...) + if len(extraIncluded) > 0 { + included = append(included, extraIncluded...) + } + + excluded := append([]string{}, source.ExcludedModels...) + if len(extraExcluded) > 0 { + excluded = append(excluded, extraExcluded...) + } + + filter, err := NewModelFilter(included, excluded) + if err != nil { + return nil, fmt.Errorf("invalid include/exclude configuration for source %s: %w", source.Id, err) + } + + return filter, nil +} diff --git a/catalog/internal/catalog/model_filter_test.go b/catalog/internal/catalog/model_filter_test.go new file mode 100644 index 0000000000..567b5415e0 --- /dev/null +++ b/catalog/internal/catalog/model_filter_test.go @@ -0,0 +1,69 @@ +package catalog + +import ( + "testing" + + apimodels "github.com/kubeflow/model-registry/catalog/pkg/openapi" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestModelFilterAllows(t *testing.T) { + filter, err := NewModelFilter([]string{"Granite/*"}, []string{"Granite/beta-*"}) + require.NoError(t, err) + + assert.True(t, filter.Allows("Granite/3-1-instruct")) + assert.False(t, filter.Allows("Granite/beta-release")) + assert.False(t, filter.Allows("Other/model")) + + allowAll, err := NewModelFilter([]string{"*"}, nil) + require.NoError(t, err) + assert.True(t, allowAll.Allows("anything/goes")) +} + +func TestModelFilterConflictsAndValidation(t *testing.T) { + _, err := NewModelFilter([]string{"Granite/*"}, []string{"Granite/*"}) + require.Error(t, err) + assert.Contains(t, err.Error(), "pattern \"Granite/*\"") + + _, err = NewModelFilter([]string{""}, nil) + require.Error(t, err) + assert.Contains(t, err.Error(), "pattern cannot be empty") +} + +func TestNewModelFilterFromSourceMergesLegacy(t *testing.T) { + source := &Source{ + CatalogSource: apimodels.CatalogSource{ + Id: "test", + Name: "Test source", + Labels: []string{}, + IncludedModels: []string{"Granite/*"}, + }, + } + + filter, err := NewModelFilterFromSource(source, nil, []string{"Legacy/*"}) + require.NoError(t, err) + + assert.True(t, filter.Allows("Granite/model")) + assert.False(t, filter.Allows("Legacy/model")) +} + +func TestModelFilterWithWildcardInMiddle(t *testing.T) { + // Test that wildcards match across the entire name + filter, err := NewModelFilter(nil, []string{"*deprecated*", "*old*"}) + require.NoError(t, err) + + assert.True(t, filter.Allows("Granite/empty-stable")) + assert.False(t, filter.Allows("Mistral/empty-deprecated")) + assert.False(t, filter.Allows("DeepSeek/empty-old-v1")) + assert.False(t, filter.Allows("Foo/old")) + assert.False(t, filter.Allows("Bar/deprecated")) + + // Test that */pattern* requires the pattern immediately after / + filter2, err := NewModelFilter(nil, []string{"*/deprecated", "*/old*"}) + require.NoError(t, err) + + assert.True(t, filter2.Allows("Mistral/empty-deprecated")) // doesn't match */deprecated (no immediate match after /) + assert.False(t, filter2.Allows("Foo/deprecated")) // matches */deprecated + assert.False(t, filter2.Allows("Bar/old-model")) // matches */old* +} diff --git a/catalog/internal/catalog/yaml_catalog.go b/catalog/internal/catalog/yaml_catalog.go index c1a73580e7..3f7f781bb0 100644 --- a/catalog/internal/catalog/yaml_catalog.go +++ b/catalog/internal/catalog/yaml_catalog.go @@ -7,7 +7,6 @@ import ( "os" "path/filepath" "strconv" - "strings" "k8s.io/apimachinery/pkg/util/yaml" @@ -299,22 +298,9 @@ type yamlCatalog struct { Models []yamlModel `yaml:"models"` } -func isModelExcluded(modelName string, patterns []string) bool { - for _, pattern := range patterns { - if strings.HasSuffix(pattern, "*") { - if strings.HasPrefix(modelName, strings.TrimSuffix(pattern, "*")) { - return true - } - } else if modelName == pattern { - return true - } - } - return false -} - type yamlModelProvider struct { - path string - excludedModels map[string]struct{} + path string + filter *ModelFilter } func (p *yamlModelProvider) Models(ctx context.Context) (<-chan ModelProviderRecord, error) { @@ -378,7 +364,7 @@ func (p *yamlModelProvider) read() (*yamlCatalog, error) { func (p *yamlModelProvider) emit(ctx context.Context, catalog *yamlCatalog, out chan<- ModelProviderRecord) { done := ctx.Done() for _, model := range catalog.Models { - if _, excluded := p.excludedModels[model.Name]; excluded { + if p.filter != nil && !p.filter.Allows(model.Name) { continue } @@ -404,22 +390,28 @@ func newYamlModelProvider(ctx context.Context, source *Source, reldir string) (< p.path = filepath.Join(reldir, path) } - // Excluded models is an optional source property. - if _, exists := source.Properties[excludedModelsKey]; exists { - excludedModels, ok := source.Properties[excludedModelsKey].([]any) + var legacyExcluded []string + if raw, exists := source.Properties[excludedModelsKey]; exists { + values, ok := raw.([]any) if !ok { return nil, fmt.Errorf("%q property should be a list", excludedModelsKey) } - p.excludedModels = make(map[string]struct{}, len(excludedModels)) - for i, name := range excludedModels { - nameStr, ok := name.(string) + legacyExcluded = make([]string, len(values)) + for i, value := range values { + nameStr, ok := value.(string) if !ok { - return nil, fmt.Errorf("%s: invalid list: index %d: wanted string, got %T", name, i, name) + return nil, fmt.Errorf("%s: invalid list: index %d: wanted string, got %T", excludedModelsKey, i, value) } - p.excludedModels[nameStr] = struct{}{} + legacyExcluded[i] = nameStr } } + filter, err := NewModelFilterFromSource(source, nil, legacyExcluded) + if err != nil { + return nil, err + } + p.filter = filter + return p.Models(ctx) } diff --git a/catalog/internal/catalog/yaml_catalog_test.go b/catalog/internal/catalog/yaml_catalog_test.go index 79deda594e..3d7c0590fd 100644 --- a/catalog/internal/catalog/yaml_catalog_test.go +++ b/catalog/internal/catalog/yaml_catalog_test.go @@ -1,11 +1,15 @@ package catalog import ( + "context" "encoding/json" + "fmt" "os" "path/filepath" "slices" + "strings" "testing" + "time" model "github.com/kubeflow/model-registry/catalog/pkg/openapi" "github.com/kubeflow/model-registry/internal/apiutils" @@ -492,3 +496,152 @@ models: require.NoError(t, err, "Relative paths should work correctly") }) } + +func TestYamlModelProviderFiltersApplied(t *testing.T) { + catalogPath := writeMiniCatalog(t, []string{"Granite/alpha", "Granite/beta-release", "DeepSeek/v1"}) + + source := &Source{ + CatalogSource: model.CatalogSource{ + Id: "test", + Name: "Test source", + Labels: []string{}, + IncludedModels: []string{"Granite/*"}, + ExcludedModels: []string{"Granite/beta-*"}, + }, + } + + filter, err := NewModelFilterFromSource(source, nil, nil) + require.NoError(t, err) + + names := collectNamesWithFilter(t, catalogPath, filter) + assert.ElementsMatch(t, []string{"Granite/alpha"}, names) +} + +func TestYamlModelProviderLegacyExcludesMerged(t *testing.T) { + catalogPath := writeMiniCatalog(t, []string{"Granite/alpha", "Granite/beta-release", "DeepSeek/v1"}) + + source := &Source{ + CatalogSource: model.CatalogSource{ + Id: "test", + Name: "Test source", + Labels: []string{}, + ExcludedModels: []string{"Granite/beta-*"}, + }, + Properties: map[string]any{ + yamlCatalogPathKey: catalogPath, + excludedModelsKey: []any{"DeepSeek/v1"}, + }, + } + + ctx, cancel := context.WithCancel(context.Background()) + t.Cleanup(cancel) + + records, err := newYamlModelProvider(ctx, source, filepath.Dir(catalogPath)) + require.NoError(t, err) + + names := collectRecordsFromChannel(t, records, 1, cancel) + assert.Equal(t, []string{"Granite/alpha"}, names) +} + +func TestYamlModelProviderInvalidPattern(t *testing.T) { + catalogPath := writeMiniCatalog(t, []string{"Granite/alpha"}) + + source := &Source{ + CatalogSource: model.CatalogSource{ + Id: "test", + Name: "Test source", + Labels: []string{}, + IncludedModels: []string{""}, + }, + Properties: map[string]any{ + yamlCatalogPathKey: catalogPath, + }, + } + + _, err := newYamlModelProvider(context.Background(), source, filepath.Dir(catalogPath)) + require.Error(t, err) + assert.Contains(t, err.Error(), "pattern cannot be empty") +} + +func writeMiniCatalog(t *testing.T, modelNames []string) string { + t.Helper() + + dir := t.TempDir() + path := filepath.Join(dir, "catalog.yaml") + + var b strings.Builder + b.WriteString("source: Test\nmodels:\n") + for _, name := range modelNames { + b.WriteString(fmt.Sprintf(" - name: %s\n", name)) + } + + err := os.WriteFile(path, []byte(b.String()), 0o644) + require.NoError(t, err) + + return path +} + +func collectNamesWithFilter(t *testing.T, catalogPath string, filter *ModelFilter) []string { + t.Helper() + + provider := &yamlModelProvider{ + path: catalogPath, + filter: filter, + } + + catalog, err := provider.read() + require.NoError(t, err) + + out := make(chan ModelProviderRecord, len(catalog.Models)) + provider.emit(context.Background(), catalog, out) + close(out) + + var names []string + for record := range out { + names = append(names, modelNameFromRecord(t, record)) + } + + return names +} + +func collectRecordsFromChannel(t *testing.T, records <-chan ModelProviderRecord, expected int, cancel context.CancelFunc) []string { + t.Helper() + + names := make([]string, 0, expected) + timeout := time.After(2 * time.Second) + + for len(names) < expected { + select { + case record, ok := <-records: + if !ok { + t.Fatalf("channel closed before receiving %d records", expected) + } + names = append(names, modelNameFromRecord(t, record)) + case <-timeout: + t.Fatalf("timed out waiting for %d records", expected) + } + } + + cancel() + + select { + case _, ok := <-records: + if ok { + t.Fatalf("received more than %d records", expected) + } + case <-time.After(500 * time.Millisecond): + t.Fatalf("channel did not close after cancellation") + } + + return names +} + +func modelNameFromRecord(t *testing.T, record ModelProviderRecord) string { + t.Helper() + + require.NotNil(t, record.Model) + attrs := record.Model.GetAttributes() + require.NotNil(t, attrs) + require.NotNil(t, attrs.Name) + return *attrs.Name +} diff --git a/catalog/pkg/openapi/model_catalog_source.go b/catalog/pkg/openapi/model_catalog_source.go index 1602f8cc71..3dcca33e77 100644 --- a/catalog/pkg/openapi/model_catalog_source.go +++ b/catalog/pkg/openapi/model_catalog_source.go @@ -27,6 +27,10 @@ type CatalogSource struct { Enabled *bool `json:"enabled,omitempty"` // Labels for the catalog source. Labels []string `json:"labels"` + // Optional allow-list of models that are eligible for this source. Entries can be exact model names or patterns that use `*` as a wildcard. When provided, only models matching at least one pattern are considered. + IncludedModels []string `json:"includedModels,omitempty"` + // Optional block-list of models that should be removed from the catalog even if they match `includedModels`. Patterns support the `*` wildcard. + ExcludedModels []string `json:"excludedModels,omitempty"` } type _CatalogSource CatalogSource @@ -159,6 +163,70 @@ func (o *CatalogSource) SetLabels(v []string) { o.Labels = v } +// GetIncludedModels returns the IncludedModels field value if set, zero value otherwise. +func (o *CatalogSource) GetIncludedModels() []string { + if o == nil || IsNil(o.IncludedModels) { + var ret []string + return ret + } + return o.IncludedModels +} + +// GetIncludedModelsOk returns a tuple with the IncludedModels field value if set, nil otherwise +// and a boolean to check if the value has been set. +func (o *CatalogSource) GetIncludedModelsOk() ([]string, bool) { + if o == nil || IsNil(o.IncludedModels) { + return nil, false + } + return o.IncludedModels, true +} + +// HasIncludedModels returns a boolean if a field has been set. +func (o *CatalogSource) HasIncludedModels() bool { + if o != nil && !IsNil(o.IncludedModels) { + return true + } + + return false +} + +// SetIncludedModels gets a reference to the given []string and assigns it to the IncludedModels field. +func (o *CatalogSource) SetIncludedModels(v []string) { + o.IncludedModels = v +} + +// GetExcludedModels returns the ExcludedModels field value if set, zero value otherwise. +func (o *CatalogSource) GetExcludedModels() []string { + if o == nil || IsNil(o.ExcludedModels) { + var ret []string + return ret + } + return o.ExcludedModels +} + +// GetExcludedModelsOk returns a tuple with the ExcludedModels field value if set, nil otherwise +// and a boolean to check if the value has been set. +func (o *CatalogSource) GetExcludedModelsOk() ([]string, bool) { + if o == nil || IsNil(o.ExcludedModels) { + return nil, false + } + return o.ExcludedModels, true +} + +// HasExcludedModels returns a boolean if a field has been set. +func (o *CatalogSource) HasExcludedModels() bool { + if o != nil && !IsNil(o.ExcludedModels) { + return true + } + + return false +} + +// SetExcludedModels gets a reference to the given []string and assigns it to the ExcludedModels field. +func (o *CatalogSource) SetExcludedModels(v []string) { + o.ExcludedModels = v +} + func (o CatalogSource) MarshalJSON() ([]byte, error) { toSerialize, err := o.ToMap() if err != nil { @@ -175,6 +243,12 @@ func (o CatalogSource) ToMap() (map[string]interface{}, error) { toSerialize["enabled"] = o.Enabled } toSerialize["labels"] = o.Labels + if !IsNil(o.IncludedModels) { + toSerialize["includedModels"] = o.IncludedModels + } + if !IsNil(o.ExcludedModels) { + toSerialize["excludedModels"] = o.ExcludedModels + } return toSerialize, nil } From cd6a61429a999f12fcac83df05f8d0be5c84712f Mon Sep 17 00:00:00 2001 From: Alessio Pragliola Date: Thu, 20 Nov 2025 11:24:33 +0100 Subject: [PATCH 2/6] chore: refactor source filter validation Signed-off-by: Alessio Pragliola --- catalog/internal/catalog/loader.go | 3 +- catalog/internal/catalog/model_filter.go | 25 ++++++ catalog/internal/catalog/model_filter_test.go | 83 ++++++++++++++++++- 3 files changed, 106 insertions(+), 5 deletions(-) diff --git a/catalog/internal/catalog/loader.go b/catalog/internal/catalog/loader.go index faca6e2a06..082b1663c5 100644 --- a/catalog/internal/catalog/loader.go +++ b/catalog/internal/catalog/loader.go @@ -194,7 +194,8 @@ func (l *Loader) updateSources(path string, config *sourceConfig) error { return fmt.Errorf("invalid source: duplicate id %s", id) } - if _, err := NewModelFilterFromSource(&source, nil, nil); err != nil { + // Validate includedModels/excludedModels patterns early + if err := ValidateSourceFilters(&source); err != nil { return fmt.Errorf("invalid source %s: %w", id, err) } diff --git a/catalog/internal/catalog/model_filter.go b/catalog/internal/catalog/model_filter.go index e84decd5e5..18ed8ba42a 100644 --- a/catalog/internal/catalog/model_filter.go +++ b/catalog/internal/catalog/model_filter.go @@ -136,6 +136,31 @@ func (f *ModelFilter) Allows(name string) bool { return true } +// ValidateSourceFilters checks that the includedModels and excludedModels patterns +// in a source are valid (non-empty, compilable, non-conflicting) without constructing +// the full ModelFilter. This is useful for early validation at configuration load time. +func ValidateSourceFilters(source *Source) error { + if source == nil { + return fmt.Errorf("source cannot be nil") + } + + // Check for conflicting patterns + if err := detectConflictingPatterns(source.IncludedModels, source.ExcludedModels); err != nil { + return fmt.Errorf("source %s: %w", source.Id, err) + } + + // Validate that all patterns compile successfully + if _, err := compilePatterns("includedModels", source.IncludedModels); err != nil { + return fmt.Errorf("source %s: %w", source.Id, err) + } + + if _, err := compilePatterns("excludedModels", source.ExcludedModels); err != nil { + return fmt.Errorf("source %s: %w", source.Id, err) + } + + return nil +} + // NewModelFilterFromSource composes a ModelFilter using the source-level configuration and any legacy additions. func NewModelFilterFromSource(source *Source, extraIncluded, extraExcluded []string) (*ModelFilter, error) { if source == nil { diff --git a/catalog/internal/catalog/model_filter_test.go b/catalog/internal/catalog/model_filter_test.go index 567b5415e0..b7dd896d2c 100644 --- a/catalog/internal/catalog/model_filter_test.go +++ b/catalog/internal/catalog/model_filter_test.go @@ -58,12 +58,87 @@ func TestModelFilterWithWildcardInMiddle(t *testing.T) { assert.False(t, filter.Allows("DeepSeek/empty-old-v1")) assert.False(t, filter.Allows("Foo/old")) assert.False(t, filter.Allows("Bar/deprecated")) - + // Test that */pattern* requires the pattern immediately after / filter2, err := NewModelFilter(nil, []string{"*/deprecated", "*/old*"}) require.NoError(t, err) - + assert.True(t, filter2.Allows("Mistral/empty-deprecated")) // doesn't match */deprecated (no immediate match after /) - assert.False(t, filter2.Allows("Foo/deprecated")) // matches */deprecated - assert.False(t, filter2.Allows("Bar/old-model")) // matches */old* + assert.False(t, filter2.Allows("Foo/deprecated")) // matches */deprecated + assert.False(t, filter2.Allows("Bar/old-model")) // matches */old* +} + +func TestValidateSourceFilters(t *testing.T) { + t.Run("valid source with no filters", func(t *testing.T) { + source := &Source{ + CatalogSource: apimodels.CatalogSource{ + Id: "test", + Name: "Test source", + }, + } + err := ValidateSourceFilters(source) + assert.NoError(t, err) + }) + + t.Run("valid source with patterns", func(t *testing.T) { + source := &Source{ + CatalogSource: apimodels.CatalogSource{ + Id: "test", + Name: "Test source", + IncludedModels: []string{"Granite/*", "Meta/*"}, + ExcludedModels: []string{"*-beta"}, + }, + } + err := ValidateSourceFilters(source) + assert.NoError(t, err) + }) + + t.Run("conflicting patterns", func(t *testing.T) { + source := &Source{ + CatalogSource: apimodels.CatalogSource{ + Id: "test", + Name: "Test source", + IncludedModels: []string{"Granite/*"}, + ExcludedModels: []string{"Granite/*"}, + }, + } + err := ValidateSourceFilters(source) + require.Error(t, err) + assert.Contains(t, err.Error(), "source test") + assert.Contains(t, err.Error(), "Granite/*") + }) + + t.Run("empty pattern in includedModels", func(t *testing.T) { + source := &Source{ + CatalogSource: apimodels.CatalogSource{ + Id: "test", + Name: "Test source", + IncludedModels: []string{"Granite/*", ""}, + }, + } + err := ValidateSourceFilters(source) + require.Error(t, err) + assert.Contains(t, err.Error(), "source test") + assert.Contains(t, err.Error(), "pattern cannot be empty") + }) + + t.Run("invalid regex pattern", func(t *testing.T) { + // This shouldn't happen with our glob-to-regex conversion, + // but validates the error path exists + source := &Source{ + CatalogSource: apimodels.CatalogSource{ + Id: "test", + Name: "Test source", + IncludedModels: []string{"valid/*"}, + }, + } + err := ValidateSourceFilters(source) + assert.NoError(t, err) // Our conversion always produces valid regex + }) + + t.Run("nil source", func(t *testing.T) { + err := ValidateSourceFilters(nil) + require.Error(t, err) + assert.Contains(t, err.Error(), "source cannot be nil") + }) } From cb3c16e47b33811e5a09ea52b4741979f424f46e Mon Sep 17 00:00:00 2001 From: Alessio Pragliola Date: Thu, 20 Nov 2025 17:36:53 +0100 Subject: [PATCH 3/6] chore: refactor validation function Signed-off-by: Alessio Pragliola --- catalog/internal/catalog/loader.go | 2 +- catalog/internal/catalog/model_filter.go | 46 ++++++------- catalog/internal/catalog/model_filter_test.go | 66 ++++--------------- 3 files changed, 34 insertions(+), 80 deletions(-) diff --git a/catalog/internal/catalog/loader.go b/catalog/internal/catalog/loader.go index 082b1663c5..3cb5b56634 100644 --- a/catalog/internal/catalog/loader.go +++ b/catalog/internal/catalog/loader.go @@ -195,7 +195,7 @@ func (l *Loader) updateSources(path string, config *sourceConfig) error { } // Validate includedModels/excludedModels patterns early - if err := ValidateSourceFilters(&source); err != nil { + if err := ValidateSourceFilters(source.IncludedModels, source.ExcludedModels); err != nil { return fmt.Errorf("invalid source %s: %w", id, err) } diff --git a/catalog/internal/catalog/model_filter.go b/catalog/internal/catalog/model_filter.go index 18ed8ba42a..c2cb7a63af 100644 --- a/catalog/internal/catalog/model_filter.go +++ b/catalog/internal/catalog/model_filter.go @@ -62,9 +62,28 @@ func compilePatterns(field string, patterns []string) ([]*compiledPattern, error return compiled, nil } +// ValidateSourceFilters validates that the includedModels and excludedModels patterns +// are valid (non-empty, compilable, non-conflicting). This is useful for early validation +// at configuration load time without constructing the full ModelFilter. +func ValidateSourceFilters(included, excluded []string) error { + if err := detectConflictingPatterns(included, excluded); err != nil { + return err + } + + if _, err := compilePatterns("includedModels", included); err != nil { + return err + } + + if _, err := compilePatterns("excludedModels", excluded); err != nil { + return err + } + + return nil +} + // NewModelFilter builds a ModelFilter from the provided include/exclude pattern lists. func NewModelFilter(included, excluded []string) (*ModelFilter, error) { - if err := detectConflictingPatterns(included, excluded); err != nil { + if err := ValidateSourceFilters(included, excluded); err != nil { return nil, err } @@ -136,31 +155,6 @@ func (f *ModelFilter) Allows(name string) bool { return true } -// ValidateSourceFilters checks that the includedModels and excludedModels patterns -// in a source are valid (non-empty, compilable, non-conflicting) without constructing -// the full ModelFilter. This is useful for early validation at configuration load time. -func ValidateSourceFilters(source *Source) error { - if source == nil { - return fmt.Errorf("source cannot be nil") - } - - // Check for conflicting patterns - if err := detectConflictingPatterns(source.IncludedModels, source.ExcludedModels); err != nil { - return fmt.Errorf("source %s: %w", source.Id, err) - } - - // Validate that all patterns compile successfully - if _, err := compilePatterns("includedModels", source.IncludedModels); err != nil { - return fmt.Errorf("source %s: %w", source.Id, err) - } - - if _, err := compilePatterns("excludedModels", source.ExcludedModels); err != nil { - return fmt.Errorf("source %s: %w", source.Id, err) - } - - return nil -} - // NewModelFilterFromSource composes a ModelFilter using the source-level configuration and any legacy additions. func NewModelFilterFromSource(source *Source, extraIncluded, extraExcluded []string) (*ModelFilter, error) { if source == nil { diff --git a/catalog/internal/catalog/model_filter_test.go b/catalog/internal/catalog/model_filter_test.go index b7dd896d2c..deab0b6770 100644 --- a/catalog/internal/catalog/model_filter_test.go +++ b/catalog/internal/catalog/model_filter_test.go @@ -69,76 +69,36 @@ func TestModelFilterWithWildcardInMiddle(t *testing.T) { } func TestValidateSourceFilters(t *testing.T) { - t.Run("valid source with no filters", func(t *testing.T) { - source := &Source{ - CatalogSource: apimodels.CatalogSource{ - Id: "test", - Name: "Test source", - }, - } - err := ValidateSourceFilters(source) + t.Run("no filters", func(t *testing.T) { + err := ValidateSourceFilters(nil, nil) assert.NoError(t, err) }) - t.Run("valid source with patterns", func(t *testing.T) { - source := &Source{ - CatalogSource: apimodels.CatalogSource{ - Id: "test", - Name: "Test source", - IncludedModels: []string{"Granite/*", "Meta/*"}, - ExcludedModels: []string{"*-beta"}, - }, - } - err := ValidateSourceFilters(source) + t.Run("valid patterns", func(t *testing.T) { + err := ValidateSourceFilters([]string{"Granite/*", "Meta/*"}, []string{"*-beta"}) assert.NoError(t, err) }) t.Run("conflicting patterns", func(t *testing.T) { - source := &Source{ - CatalogSource: apimodels.CatalogSource{ - Id: "test", - Name: "Test source", - IncludedModels: []string{"Granite/*"}, - ExcludedModels: []string{"Granite/*"}, - }, - } - err := ValidateSourceFilters(source) + err := ValidateSourceFilters([]string{"Granite/*"}, []string{"Granite/*"}) require.Error(t, err) - assert.Contains(t, err.Error(), "source test") assert.Contains(t, err.Error(), "Granite/*") }) t.Run("empty pattern in includedModels", func(t *testing.T) { - source := &Source{ - CatalogSource: apimodels.CatalogSource{ - Id: "test", - Name: "Test source", - IncludedModels: []string{"Granite/*", ""}, - }, - } - err := ValidateSourceFilters(source) + err := ValidateSourceFilters([]string{"Granite/*", ""}, nil) require.Error(t, err) - assert.Contains(t, err.Error(), "source test") assert.Contains(t, err.Error(), "pattern cannot be empty") }) - t.Run("invalid regex pattern", func(t *testing.T) { - // This shouldn't happen with our glob-to-regex conversion, - // but validates the error path exists - source := &Source{ - CatalogSource: apimodels.CatalogSource{ - Id: "test", - Name: "Test source", - IncludedModels: []string{"valid/*"}, - }, - } - err := ValidateSourceFilters(source) - assert.NoError(t, err) // Our conversion always produces valid regex + t.Run("whitespace-only pattern", func(t *testing.T) { + err := ValidateSourceFilters([]string{" "}, nil) + require.Error(t, err) + assert.Contains(t, err.Error(), "pattern cannot be empty") }) - t.Run("nil source", func(t *testing.T) { - err := ValidateSourceFilters(nil) - require.Error(t, err) - assert.Contains(t, err.Error(), "source cannot be nil") + t.Run("valid glob patterns", func(t *testing.T) { + err := ValidateSourceFilters([]string{"valid/*"}, nil) + assert.NoError(t, err) // Our conversion always produces valid regex }) } From e6be598d98eef5c4a716d668fd21f394f5456989 Mon Sep 17 00:00:00 2001 From: Alessio Pragliola <83355398+Al-Pragliola@users.noreply.github.com> Date: Thu, 20 Nov 2025 20:20:38 +0100 Subject: [PATCH 4/6] feat: apply suggestions from code review Co-authored-by: Paul Boyd Signed-off-by: Alessio Pragliola <83355398+Al-Pragliola@users.noreply.github.com> --- catalog/internal/catalog/model_filter.go | 2 +- catalog/internal/catalog/yaml_catalog.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/catalog/internal/catalog/model_filter.go b/catalog/internal/catalog/model_filter.go index c2cb7a63af..ba485e7474 100644 --- a/catalog/internal/catalog/model_filter.go +++ b/catalog/internal/catalog/model_filter.go @@ -25,7 +25,7 @@ func newCompiledPattern(field string, idx int, raw string) (*compiledPattern, er // Convert a simple glob (only supporting '*') into a regexp. var b strings.Builder - b.WriteString("^") + b.WriteString("(?i)^") for _, r := range value { if r == '*' { b.WriteString(".*") diff --git a/catalog/internal/catalog/yaml_catalog.go b/catalog/internal/catalog/yaml_catalog.go index 3f7f781bb0..cf1da5223b 100644 --- a/catalog/internal/catalog/yaml_catalog.go +++ b/catalog/internal/catalog/yaml_catalog.go @@ -364,7 +364,7 @@ func (p *yamlModelProvider) read() (*yamlCatalog, error) { func (p *yamlModelProvider) emit(ctx context.Context, catalog *yamlCatalog, out chan<- ModelProviderRecord) { done := ctx.Done() for _, model := range catalog.Models { - if p.filter != nil && !p.filter.Allows(model.Name) { + if !p.filter.Allows(model.Name) { continue } From 3958a5b84a54a11b73ee2b6c8a0228770a6c80e5 Mon Sep 17 00:00:00 2001 From: Alessio Pragliola Date: Thu, 20 Nov 2025 20:22:28 +0100 Subject: [PATCH 5/6] chore: add case insensitive test Signed-off-by: Alessio Pragliola --- catalog/internal/catalog/model_filter_test.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/catalog/internal/catalog/model_filter_test.go b/catalog/internal/catalog/model_filter_test.go index deab0b6770..d3d32e9205 100644 --- a/catalog/internal/catalog/model_filter_test.go +++ b/catalog/internal/catalog/model_filter_test.go @@ -16,6 +16,11 @@ func TestModelFilterAllows(t *testing.T) { assert.False(t, filter.Allows("Granite/beta-release")) assert.False(t, filter.Allows("Other/model")) + // Test case-insensitive matching + assert.True(t, filter.Allows("granite/3-1-instruct")) + assert.True(t, filter.Allows("GRANITE/3-1-instruct")) + assert.False(t, filter.Allows("granite/beta-release")) + allowAll, err := NewModelFilter([]string{"*"}, nil) require.NoError(t, err) assert.True(t, allowAll.Allows("anything/goes")) From c049854306a645bde670251e03ae179cd5b780c5 Mon Sep 17 00:00:00 2001 From: Alessio Pragliola Date: Thu, 20 Nov 2025 20:37:30 +0100 Subject: [PATCH 6/6] feat: add case-insensitive description Signed-off-by: Alessio Pragliola --- api/openapi/catalog.yaml | 6 ++++++ api/openapi/src/catalog.yaml | 6 ++++++ catalog/pkg/openapi/model_catalog_source.go | 4 ++-- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/api/openapi/catalog.yaml b/api/openapi/catalog.yaml index d6df4d3777..6479d1012f 100644 --- a/api/openapi/catalog.yaml +++ b/api/openapi/catalog.yaml @@ -507,6 +507,9 @@ components: Optional allow-list of models that are eligible for this source. Entries can be exact model names or patterns that use `*` as a wildcard. When provided, only models matching at least one pattern are considered. + + Pattern matching is case-insensitive, so `Granite/*` will match `granite/model`, + `Granite/model`, and `GRANITE/model`. type: array items: type: string @@ -514,6 +517,9 @@ components: description: |- Optional block-list of models that should be removed from the catalog even if they match `includedModels`. Patterns support the `*` wildcard. + + Pattern matching is case-insensitive, so `*-beta` will match `Model-Beta`, + `model-beta`, and `MODEL-BETA`. type: array items: type: string diff --git a/api/openapi/src/catalog.yaml b/api/openapi/src/catalog.yaml index a1098ace3d..6b38af4a9d 100644 --- a/api/openapi/src/catalog.yaml +++ b/api/openapi/src/catalog.yaml @@ -390,6 +390,9 @@ components: Optional allow-list of models that are eligible for this source. Entries can be exact model names or patterns that use `*` as a wildcard. When provided, only models matching at least one pattern are considered. + + Pattern matching is case-insensitive, so `Granite/*` will match `granite/model`, + `Granite/model`, and `GRANITE/model`. type: array items: type: string @@ -397,6 +400,9 @@ components: description: |- Optional block-list of models that should be removed from the catalog even if they match `includedModels`. Patterns support the `*` wildcard. + + Pattern matching is case-insensitive, so `*-beta` will match `Model-Beta`, + `model-beta`, and `MODEL-BETA`. type: array items: type: string diff --git a/catalog/pkg/openapi/model_catalog_source.go b/catalog/pkg/openapi/model_catalog_source.go index 3dcca33e77..2e35fc98db 100644 --- a/catalog/pkg/openapi/model_catalog_source.go +++ b/catalog/pkg/openapi/model_catalog_source.go @@ -27,9 +27,9 @@ type CatalogSource struct { Enabled *bool `json:"enabled,omitempty"` // Labels for the catalog source. Labels []string `json:"labels"` - // Optional allow-list of models that are eligible for this source. Entries can be exact model names or patterns that use `*` as a wildcard. When provided, only models matching at least one pattern are considered. + // Optional allow-list of models that are eligible for this source. Entries can be exact model names or patterns that use `*` as a wildcard. When provided, only models matching at least one pattern are considered. Pattern matching is case-insensitive, so `Granite/_*` will match `granite/model`, `Granite/model`, and `GRANITE/model`. IncludedModels []string `json:"includedModels,omitempty"` - // Optional block-list of models that should be removed from the catalog even if they match `includedModels`. Patterns support the `*` wildcard. + // Optional block-list of models that should be removed from the catalog even if they match `includedModels`. Patterns support the `*` wildcard. Pattern matching is case-insensitive, so `*-beta` will match `Model-Beta`, `model-beta`, and `MODEL-BETA`. ExcludedModels []string `json:"excludedModels,omitempty"` }