diff --git a/catalog/README.md b/catalog/README.md index 3864a0344b..6041247c85 100644 --- a/catalog/README.md +++ b/catalog/README.md @@ -320,33 +320,113 @@ catalogs: enabled: true # Required: List of model identifiers to include # Format: "organization/model-name" or "username/model-name" + # Supports wildcard patterns: "organization/*" or "organization/prefix*" includedModels: - "meta-llama/Llama-3.1-8B-Instruct" - - "ibm-granite/granite-4.0-h-small" - "microsoft/phi-2" - + - "microsoft/phi-3*" # All models starting with "phi-3" + # Optional: Exclude specific models or patterns # Supports exact matches or patterns ending with "*" excludedModels: - "some-org/unwanted-model" - "another-org/test-*" # Excludes all models starting with "test-" - + # Optional: Configure a custom environment variable name for the API key # Defaults to "HF_API_KEY" if not specified properties: apiKeyEnvVar: "MY_CUSTOM_API_KEY_VAR" ``` +#### Organization-Restricted Sources + +You can restrict a source to only fetch models from a specific organization using the `allowedOrganization` property. This automatically prefixes all model patterns with the organization name: + +```yaml +catalogs: + - name: "Meta LLaMA Models" + id: "meta-llama-models" + type: "hf" + enabled: true + properties: + allowedOrganization: "meta-llama" + apiKeyEnvVar: "HF_API_KEY" + includedModels: + # These patterns are automatically prefixed with "meta-llama/" + - "*" # Expands to: meta-llama/* + - "Llama-3*" # Expands to: meta-llama/Llama-3* + - "CodeLlama-*" # Expands to: meta-llama/CodeLlama-* + excludedModels: + - "*-4bit" # Excludes: meta-llama/*-4bit + - "*-GGUF" # Excludes: meta-llama/*-GGUF +``` + +**Benefits of organization-restricted sources:** +- **Simplified configuration**: No need to repeat organization name in every pattern +- **Security**: Prevents accidental inclusion of models from other organizations +- **Convenience**: Use `"*"` to get all models from an organization +- **Performance**: Optimized API calls when fetching from a single organization + #### Model Filtering Both `includedModels` and `excludedModels` are top-level properties (not nested under `properties`): -- **`includedModels`** (required): List of model identifiers to fetch from Hugging Face. Format: `"organization/model-name"` or `"username/model-name"` +- **`includedModels`** (required): List of model identifiers to fetch from Hugging Face - **`excludedModels`** (optional): List of models or patterns to exclude from the results -The `excludedModels` property supports: +#### Supported Pattern Types + +**Exact Model Names:** +```yaml +includedModels: + - "meta-llama/Llama-3.1-8B-Instruct" # Specific model + - "microsoft/phi-2" # Specific model +``` + +**Wildcard Patterns:** + +In `includedModels`, wildcards can match model names by a prefix. + +```yaml +includedModels: + - "microsoft/phi-*" # All models starting with "phi-" + - "meta-llama/Llama-3*" # All models starting with "Llama-3" + - "huggingface/*" # All models from huggingface organization +``` + +**Organization-Only Patterns (with `allowedOrganization`):** +```yaml +properties: + allowedOrganization: "meta-llama" +includedModels: + - "*" # All models from meta-llama organization + - "Llama-3*" # All meta-llama models starting with "Llama-3" + - "CodeLlama-*" # All meta-llama models starting with "CodeLlama-" +``` + +#### Pattern Validation + +**Valid patterns:** +- `"org/model"` - Exact model name +- `"org/prefix*"` - Models starting with prefix +- `"org/*"` - All models from organization +- `"*"` - All models (only when using `allowedOrganization`) + +**Invalid patterns (will be rejected):** +- `"*"` - Global wildcard (without `allowedOrganization`) +- `"*/*"` - Global organization wildcard +- `"org*"` - Wildcard in organization name +- `"org/"` - Empty model name +- `"*prefix*"` - Multiple wildcards + +#### Exclusion Patterns + +The `excludedModels` property supports prefixes like `includedModels` and also suffixes and mid-name wildcards: - **Exact matches**: `"meta-llama/Llama-3.1-8B-Instruct"` - excludes this specific model -- **Pattern matching**: `"test-*"` - excludes all models starting with "test-" +- **Pattern matching**: + - `"*-draft"` - excludes all models ending with "-draft" + - `"Llama-3.*-Instruct"` - excludes all Llama 3.x models ending with "-Instruct" +- **Organization patterns**: `"test-org/*"` - excludes all models from test-org ## Development diff --git a/catalog/internal/catalog/hf_catalog.go b/catalog/internal/catalog/hf_catalog.go index fb55115995..1f0713c7ad 100644 --- a/catalog/internal/catalog/hf_catalog.go +++ b/catalog/internal/catalog/hf_catalog.go @@ -26,6 +26,7 @@ const ( apiKeyEnvVarKey = "apiKeyEnvVar" maxModelsKey = "maxModels" syncIntervalKey = "syncInterval" + allowedOrgKey = "allowedOrganization" // defaultMaxModels is the default limit for models fetched PER PATTERN. // This limit is applied independently to each pattern in includedModels @@ -360,13 +361,72 @@ func (p *hfModelProvider) Models(ctx context.Context) (<-chan ModelProviderRecor return ch, nil } +// expandModelNames takes a list of model identifiers (which may include wildcards) +// and returns a list of concrete model names by expanding any wildcard patterns. +// Uses the same logic as FetchModelNamesForPreview. +func (p *hfModelProvider) expandModelNames(ctx context.Context, modelIdentifiers []string) ([]string, error) { + var allNames []string + var failedPatterns []string + var wildcardPatterns []string + + for _, pattern := range modelIdentifiers { + select { + case <-ctx.Done(): + return nil, ctx.Err() + default: + } + + patternType, org, searchPrefix := parseModelPattern(pattern) + + switch patternType { + case PatternInvalid: + return nil, fmt.Errorf("wildcard pattern %q is not supported - Hugging Face requires a specific organization (e.g., 'ibm-granite/*' or 'meta-llama/Llama-2-*')", pattern) + + case PatternOrgAll, PatternOrgPrefix: + wildcardPatterns = append(wildcardPatterns, pattern) + glog.Infof("Expanding wildcard pattern: %s (org=%s, prefix=%s)", pattern, org, searchPrefix) + models, err := p.listModelsByAuthor(ctx, org, searchPrefix) + if err != nil { + failedPatterns = append(failedPatterns, pattern) + glog.Warningf("Failed to expand wildcard pattern %s: %v", pattern, err) + continue + } + allNames = append(allNames, models...) + + case PatternExact: + // Direct model name - no expansion needed + allNames = append(allNames, pattern) + } + } + + // Check error conditions for wildcard pattern failures + if len(wildcardPatterns) > 0 && len(allNames) == 0 { + // All wildcard patterns failed AND no results from exact patterns - this is an error + if len(failedPatterns) > 0 { + return nil, fmt.Errorf("no models found: %v", failedPatterns) + } else { + return nil, fmt.Errorf("no models found") + } + } else if len(failedPatterns) > 0 { + // Some patterns failed but we have results - log warning and continue with partial results + glog.Warningf("Some wildcard patterns failed to expand and were skipped: %v", failedPatterns) + } + + return allNames, nil +} + func (p *hfModelProvider) getModelsFromHF(ctx context.Context) ([]ModelProviderRecord, error) { - var records []ModelProviderRecord + // First expand any wildcard patterns to concrete model names + expandedModels, err := p.expandModelNames(ctx, p.includedModels) + if err != nil { + return nil, fmt.Errorf("failed to expand model patterns: %w", err) + } + var records []ModelProviderRecord currentTime := time.Now().UnixMilli() lastSyncedStr := strconv.FormatInt(currentTime, 10) - for _, modelName := range p.includedModels { + for _, modelName := range expandedModels { // Skip if excluded - check before fetching to avoid unnecessary API calls if !p.filter.Allows(modelName) { glog.V(2).Infof("Skipping excluded model: %s", modelName) @@ -726,6 +786,9 @@ func newHFModelProvider(ctx context.Context, source *Source, reldir string) (<-c p.baseURL = strings.TrimSuffix(url, "/") } + allowedOrg, _ := source.Properties[allowedOrgKey].(string) + restrictToOrg(allowedOrg, &source.IncludedModels, &source.ExcludedModels) + // Parse sync interval (optional, defaults to 24 hours) // This can be configured as a duration string (e.g., "1s", "10s", "1m", "24h"). // For testing, a shorter interval can be used to speed up tests. @@ -805,6 +868,13 @@ func NewHFPreviewProvider(config *PreviewConfig) (*hfModelProvider, error) { p.baseURL = strings.TrimSuffix(url, "/") } + allowedOrg, _ := config.Properties[allowedOrgKey].(string) + restrictToOrg(allowedOrg, &config.IncludedModels, &config.ExcludedModels) + + if len(config.IncludedModels) == 0 { + return nil, fmt.Errorf("includedModels is required for HuggingFace source preview (specifies which models to fetch from HuggingFace)") + } + // Parse maxModels limit (optional, defaults to 500) // This limit is applied PER PATTERN (e.g., each "org/*" pattern gets its own limit) // to prevent overloading the Hugging Face API and respect rate limiting. @@ -868,19 +938,27 @@ func parseModelPattern(pattern string) (PatternType, string, string) { return PatternOrgAll, org, "" } + parts := strings.SplitN(pattern, "/", 2) + + org := parts[0] + // Ensure org is not empty or a wildcard + if org == "" || strings.Contains(org, "*") { + return PatternInvalid, "", "" + } + + var model string + if len(parts) == 2 { + model = parts[1] + if model == "" { + return PatternInvalid, "", "" + } + } + // Check if it has a wildcard after org/prefix - if strings.Contains(pattern, "/") && strings.HasSuffix(pattern, "*") { - parts := strings.SplitN(pattern, "/", 2) - if len(parts) == 2 { - org := parts[0] - // Ensure org is not empty or a wildcard - if org == "" || org == "*" { - return PatternInvalid, "", "" - } - prefix := strings.TrimSuffix(parts[1], "*") - if prefix != "" { - return PatternOrgPrefix, org, prefix - } + if strings.HasSuffix(model, "*") { + prefix := strings.TrimSuffix(model, "*") + if prefix != "" { + return PatternOrgPrefix, org, prefix } } @@ -1085,3 +1163,29 @@ func (p *hfModelProvider) FetchModelNamesForPreview(ctx context.Context, modelId return names, nil } + +// restrictToOrg prefixes included and excluded model lists with an +// organization name for convenience and to prevent any other organization from +// being retrieved. +func restrictToOrg(org string, included *[]string, excluded *[]string) { + if org == "" { + // No op + return + } + + prefix := org + "/" + + if included == nil || len(*included) == 0 { + *included = []string{prefix + "*"} + } else { + for i := range *included { + (*included)[i] = prefix + (*included)[i] + } + } + + if excluded != nil { + for i := range *excluded { + (*excluded)[i] = prefix + (*excluded)[i] + } + } +} diff --git a/catalog/internal/catalog/hf_catalog_test.go b/catalog/internal/catalog/hf_catalog_test.go index 6261bc9951..52913cc378 100644 --- a/catalog/internal/catalog/hf_catalog_test.go +++ b/catalog/internal/catalog/hf_catalog_test.go @@ -9,6 +9,7 @@ import ( "os" "strings" "testing" + "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -599,11 +600,17 @@ func TestParseModelPattern(t *testing.T) { {"ibm-granite/granite-3*", PatternOrgPrefix, "ibm-granite", "granite-3"}, {"mistralai/Mistral-*", PatternOrgPrefix, "mistralai", "Mistral-"}, - // Invalid patterns - would try to list all HuggingFace models + // Invalid patterns {"*", PatternInvalid, "", ""}, {"*/*", PatternInvalid, "", ""}, {"*/something", PatternInvalid, "", ""}, {"*/prefix*", PatternInvalid, "", ""}, + {"foo*/bar-*", PatternInvalid, "", ""}, + {"foo*", PatternInvalid, "", ""}, + {"foo*/", PatternInvalid, "", ""}, + {"foo*bar/", PatternInvalid, "", ""}, + {"/foo", PatternInvalid, "", ""}, + {"foo/", PatternInvalid, "", ""}, } for _, tt := range tests { @@ -720,6 +727,7 @@ func TestListModelsByAuthor(t *testing.T) { Properties: map[string]any{ "url": server.URL, }, + IncludedModels: []string{"test-org/*"}, } provider, err := NewHFPreviewProvider(config) @@ -745,6 +753,7 @@ func TestListModelsByAuthor(t *testing.T) { Properties: map[string]any{ "url": server.URL, }, + IncludedModels: []string{"test-org/*"}, } provider, err := NewHFPreviewProvider(config) @@ -769,6 +778,7 @@ func TestListModelsByAuthor(t *testing.T) { "url": server.URL, "maxModels": 50, // Limit to 50 models }, + IncludedModels: []string{"test-org/*"}, } provider, err := NewHFPreviewProvider(config) @@ -791,6 +801,7 @@ func TestListModelsByAuthor(t *testing.T) { Properties: map[string]any{ "url": server.URL, }, + IncludedModels: []string{"test-org/*"}, } provider, err := NewHFPreviewProvider(config) @@ -808,6 +819,7 @@ func TestListModelsByAuthor(t *testing.T) { "url": server.URL, "maxModels": 0, // No limit }, + IncludedModels: []string{"test-org/*"}, } provider, err := NewHFPreviewProvider(config) @@ -1144,3 +1156,251 @@ func TestConvertHFModelToRecord_ArtifactTimestamps(t *testing.T) { assert.Equal(t, *record.Model.GetAttributes().CreateTimeSinceEpoch, *attrs.CreateTimeSinceEpoch) assert.Equal(t, *record.Model.GetAttributes().LastUpdateTimeSinceEpoch, *attrs.LastUpdateTimeSinceEpoch) } + +func TestHfModelProvider_Models_WithWildcardPattern(t *testing.T) { + // Mock server that handles both list API and individual model API + mockServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch { + case strings.Contains(r.URL.RawQuery, "author=test-org"): + // Mock response for list API (used by listModelsByAuthor) + // HF API returns an array of models directly + models := []map[string]interface{}{ + {"id": "test-org/model-1", "author": "test-org"}, + {"id": "test-org/model-2", "author": "test-org"}, + } + json.NewEncoder(w).Encode(models) + case strings.HasPrefix(r.URL.Path, "/api/models/test-org/model-"): + // Mock response for individual model API (used by fetchModelInfo) + modelInfo := hfModelInfo{ + ID: strings.TrimPrefix(r.URL.Path, "/api/models/"), + Author: "test-org", + } + json.NewEncoder(w).Encode(modelInfo) + default: + http.Error(w, "Not found", http.StatusNotFound) + } + })) + defer mockServer.Close() + + filter, err := NewModelFilter([]string{}, []string{}) + require.NoError(t, err) + + provider := &hfModelProvider{ + baseURL: mockServer.URL, + includedModels: []string{"test-org/*"}, + filter: filter, + client: &http.Client{}, + syncInterval: 24 * time.Hour, + } + + // Use a context with cancel to stop the goroutine after receiving records + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + ch, err := provider.Models(ctx) + assert.NoError(t, err) + + var records []ModelProviderRecord + for record := range ch { + // Skip empty records (batch markers) + if record.Model == nil { + // After receiving the empty record marker, we're done with this batch + cancel() + continue + } + records = append(records, record) + } + + // Should have expanded wildcard to 2 concrete models + assert.Len(t, records, 2) + assert.Contains(t, getModelNames(records), "test-org/model-1") + assert.Contains(t, getModelNames(records), "test-org/model-2") +} + +func getModelNames(records []ModelProviderRecord) []string { + var names []string + for _, record := range records { + if record.Model.GetAttributes() != nil && record.Model.GetAttributes().Name != nil { + names = append(names, *record.Model.GetAttributes().Name) + } + } + return names +} + +func TestHfModelProvider_Models_WithMixedPatterns(t *testing.T) { + mockServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch { + case strings.Contains(r.URL.RawQuery, "author=test-org"): + models := []map[string]interface{}{ + {"id": "test-org/wildcard-model", "author": "test-org"}, + } + json.NewEncoder(w).Encode(models) + case strings.Contains(r.URL.Path, "/api/models/exact-model"): + modelInfo := hfModelInfo{ + ID: "exact-model", + Author: "exact-author", + } + json.NewEncoder(w).Encode(modelInfo) + case strings.Contains(r.URL.Path, "/api/models/test-org/wildcard-model"): + modelInfo := hfModelInfo{ + ID: "test-org/wildcard-model", + Author: "test-org", + } + json.NewEncoder(w).Encode(modelInfo) + default: + http.Error(w, "Not found", http.StatusNotFound) + } + })) + defer mockServer.Close() + + filter, err := NewModelFilter([]string{}, []string{}) + require.NoError(t, err) + + provider := &hfModelProvider{ + baseURL: mockServer.URL, + includedModels: []string{"exact-model", "test-org/*"}, + filter: filter, + client: &http.Client{}, + syncInterval: 24 * time.Hour, + } + + // Use a context with cancel to stop the goroutine after receiving records + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + ch, err := provider.Models(ctx) + assert.NoError(t, err) + + var records []ModelProviderRecord + for record := range ch { + // Skip empty records (batch markers) + if record.Model == nil { + // After receiving the empty record marker, we're done with this batch + cancel() + continue + } + records = append(records, record) + } + + // Should have both exact model and wildcard-expanded model + assert.Len(t, records, 2) + modelNames := getModelNames(records) + assert.Contains(t, modelNames, "exact-model") + assert.Contains(t, modelNames, "test-org/wildcard-model") +} + +func TestHfModelProvider_Models_WithInvalidWildcardPattern(t *testing.T) { + filter, err := NewModelFilter([]string{}, []string{}) + require.NoError(t, err) + + provider := &hfModelProvider{ + baseURL: "https://huggingface.co", + includedModels: []string{"*"}, // Invalid global wildcard + filter: filter, + client: &http.Client{}, + } + + ch, err := provider.Models(context.Background()) + + // Should return error for invalid wildcard pattern + assert.Error(t, err) + assert.Contains(t, err.Error(), "wildcard pattern \"*\" is not supported") + assert.Nil(t, ch) +} + +func TestHfModelProvider_expandModelNames_AllWildcardPatternsFail(t *testing.T) { + // Test that when ALL wildcard patterns fail, an error is returned + mockServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Return 401 for unauthorized requests + w.WriteHeader(http.StatusUnauthorized) + w.Write([]byte(`{"error": "Invalid API key"}`)) + })) + defer mockServer.Close() + + filter, err := NewModelFilter([]string{}, []string{}) + require.NoError(t, err) + + provider := &hfModelProvider{ + baseURL: mockServer.URL, + includedModels: []string{"test-org/*"}, + filter: filter, + client: &http.Client{}, + apiKey: "invalid-key", + } + + models, err := provider.expandModelNames(context.Background(), []string{"test-org/*"}) + + // Should return an error when all wildcard patterns fail (improved error reporting) + assert.Error(t, err) + assert.Contains(t, err.Error(), "no models found") + assert.Empty(t, models) +} + +func TestHfModelProvider_expandModelNames_PartialWildcardFailure(t *testing.T) { + // Test that when some but not all wildcard patterns fail, operation continues with partial results + mockServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch { + case strings.Contains(r.URL.RawQuery, "author=good-org"): + // Mock successful response for good-org + models := []map[string]interface{}{ + {"id": "good-org/model-1", "author": "good-org"}, + } + json.NewEncoder(w).Encode(models) + case strings.Contains(r.URL.RawQuery, "author=bad-org"): + // Mock failure for bad-org (unauthorized) + w.WriteHeader(http.StatusUnauthorized) + w.Write([]byte(`{"error": "Unauthorized"}`)) + default: + http.Error(w, "Not found", http.StatusNotFound) + } + })) + defer mockServer.Close() + + filter, err := NewModelFilter([]string{}, []string{}) + require.NoError(t, err) + + provider := &hfModelProvider{ + baseURL: mockServer.URL, + filter: filter, + client: &http.Client{}, + } + + // Test with mixed patterns: one succeeds, one fails + patterns := []string{"good-org/*", "bad-org/*"} + models, err := provider.expandModelNames(context.Background(), patterns) + + // Should NOT return error when only some patterns fail + assert.NoError(t, err) + // Should return partial results from successful patterns + assert.Len(t, models, 1) + assert.Equal(t, "good-org/model-1", models[0]) +} + +func TestHfModelProvider_expandModelNames_ExactPatternsWithWildcardFailures(t *testing.T) { + // Test that when wildcard patterns fail but exact patterns exist, operation continues + mockServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // All wildcard requests fail + w.WriteHeader(http.StatusUnauthorized) + w.Write([]byte(`{"error": "Unauthorized"}`)) + })) + defer mockServer.Close() + + filter, err := NewModelFilter([]string{}, []string{}) + require.NoError(t, err) + + provider := &hfModelProvider{ + baseURL: mockServer.URL, + filter: filter, + client: &http.Client{}, + } + + // Test with mixed patterns: exact models and wildcards + patterns := []string{"exact-org/exact-model", "wildcard-org/*"} + models, err := provider.expandModelNames(context.Background(), patterns) + + // Should NOT return error when exact patterns exist even if wildcards fail + assert.NoError(t, err) + // Should return the exact patterns that don't require API calls + assert.Len(t, models, 1) + assert.Equal(t, "exact-org/exact-model", models[0]) +} diff --git a/catalog/internal/catalog/preview.go b/catalog/internal/catalog/preview.go index 397f11e3b1..fcf2388ff2 100644 --- a/catalog/internal/catalog/preview.go +++ b/catalog/internal/catalog/preview.go @@ -44,18 +44,18 @@ func ParsePreviewConfig(configBytes []byte) (*PreviewConfig, error) { // preview results showing which models would be included or excluded. // If catalogDataBytes is provided, it will be used directly instead of reading from yamlCatalogPath. func PreviewSourceModels(ctx context.Context, config *PreviewConfig, catalogDataBytes []byte) ([]model.ModelPreviewResult, error) { - // Create a ModelFilter from the config - filter, err := NewModelFilter(config.IncludedModels, config.ExcludedModels) - if err != nil { - return nil, fmt.Errorf("invalid filter configuration: %w", err) - } - // Load all model names from the source (without filtering) modelNames, err := loadModelNamesFromSource(ctx, config, catalogDataBytes) if err != nil { return nil, err } + // Create a ModelFilter from the config + filter, err := NewModelFilter(config.IncludedModels, config.ExcludedModels) + if err != nil { + return nil, fmt.Errorf("invalid filter configuration: %w", err) + } + // Create preview results for each model results := make([]model.ModelPreviewResult, 0, len(modelNames)) for _, name := range modelNames { @@ -86,10 +86,6 @@ func loadModelNamesFromSource(ctx context.Context, config *PreviewConfig, catalo // For HF sources, includedModels specifies which models to fetch from HuggingFace. // This function calls the HF API to validate models exist and get their actual names. func loadHFModelNames(ctx context.Context, config *PreviewConfig) ([]string, error) { - if len(config.IncludedModels) == 0 { - return nil, fmt.Errorf("includedModels is required for HuggingFace source preview (specifies which models to fetch from HuggingFace)") - } - // SECURITY: Override the URL property to prevent SSRF attacks. // An attacker could otherwise set a custom URL to leak the HF API key // to an attacker-controlled domain. diff --git a/manifests/kustomize/options/catalog/base/hf-sources-example.yaml b/manifests/kustomize/options/catalog/base/hf-sources-example.yaml index ca3eadce1c..dc3fb7af1e 100644 --- a/manifests/kustomize/options/catalog/base/hf-sources-example.yaml +++ b/manifests/kustomize/options/catalog/base/hf-sources-example.yaml @@ -5,8 +5,9 @@ catalogs: enabled: true properties: yamlCatalogPath: sample-catalog.yaml - - name: Hugging Face Hub - id: huggingface + # Basic Hugging Face Hub configuration with specific models + - name: Hugging Face Hub - Specific Models + id: hf-specific type: hf enabled: true properties: @@ -15,9 +16,60 @@ catalogs: # exposed as an environment variable in the pod/deployment configuration. # Default environment variable name is "HF_API_KEY" apiKeyEnvVar: "HF_API_KEY" - includedModels: + includedModels: - "meta-llama/Llama-3.2-1B" - - "ibm-granite/granite-4.0-h-small" - "microsoft/phi-2" + - "huggingface/CodeBERTa-small-v1" excludedModels: - - "some-org/*" \ No newline at end of file + - "some-org/*" + + # Example using allowedOrganization to restrict to Meta LLaMA models + # This automatically prefixes model patterns with "meta-llama/" + - name: Meta LLaMA Models Only + id: hf-meta-llama + type: hf + enabled: false # disabled by default + properties: + apiKeyEnvVar: "HF_API_KEY" + allowedOrganization: "meta-llama" + includedModels: + # These patterns will be automatically prefixed with "meta-llama/" + - "*" # Expands to all models: meta-llama/* + - "Llama-3*" # Expands to: meta-llama/Llama-3* + - "CodeLlama-*" # Expands to: meta-llama/CodeLlama-* + excludedModels: + - "*-4bit" # Excludes: meta-llama/*-4bit + - "*-GGUF" # Excludes: meta-llama/*-GGUF + + # Example using wildcard patterns without organization restriction + - name: Microsoft Models + id: hf-microsoft + type: hf + enabled: false # disabled by default + properties: + apiKeyEnvVar: "HF_API_KEY" + includedModels: + # Full wildcard patterns (organization/model) + - "microsoft/phi-*" # All phi variants + - "microsoft/DialoGPT-*" # All DialoGPT variants + excludedModels: + - "*-base" # Exclude base models + - "*-uncensored" # Exclude uncensored variants + + # Example combining specific models with wildcard patterns + - name: Mixed Model Selection + id: hf-mixed + type: hf + enabled: false # disabled by default + properties: + apiKeyEnvVar: "HF_API_KEY" + includedModels: + # Specific models + - "huggingface/CodeBERTa-small-v1" + - "microsoft/DialoGPT-medium" + # Wildcard patterns + - "microsoft/phi-3*" # All phi-3 variants + - "huggingface/CodeBERTa-*" # All CodeBERTa variants + excludedModels: + - "*-base" # Exclude base models + - "*-draft" # Exclude draft models \ No newline at end of file