feat: add hf maxModels safeguard

Al-Pragliola · Al-Pragliola · commit f33d223f8289 · 2025-12-05T16:45:06.000+01:00
Signed-off-by: Alessio Pragliola &lt;seth.pro@gmail.com&gt;
diff --git a/catalog/internal/catalog/hf_catalog.go b/catalog/internal/catalog/hf_catalog.go
@@ -24,6 +24,16 @@ const (
 	defaultAPIKeyEnvVar   = "HF_API_KEY"
 	urlKey                = "url"
 	apiKeyEnvVarKey       = "apiKeyEnvVar"
+	maxModelsKey          = "maxModels"
+
+	// defaultMaxModels is the default limit for models fetched PER PATTERN.
+	// This limit is applied independently to each pattern in includedModels
+	// (e.g., "ibm-granite/*", "meta-llama/*") to prevent overloading the
+	// HuggingFace API with too many requests and to respect rate limiting.
+	//
+	// Example: with maxModels=100 and 3 patterns, up to 300 models total may be fetched.
+	// Set to 0 to disable the limit (not recommended for large organizations).
+	defaultMaxModels = 500
 )
 
 // gatedString is a custom type that can unmarshal both boolean and string values from JSON
@@ -71,6 +81,10 @@ type hfModelProvider struct {
 	baseURL        string
 	includedModels []string
 	filter         *ModelFilter
+	// maxModels limits how many models to fetch PER PATTERN (e.g., per "org/*").
+	// This is applied independently to each pattern to respect HuggingFace API rate limits.
+	// A value of 0 means no limit.
+	maxModels int
 }
 
 // hfModelInfo represents the structure of HuggingFace API model information
@@ -665,8 +679,9 @@ func init() {
 // It initializes the provider from a PreviewConfig without starting the full model loading.
 func NewHFPreviewProvider(config *PreviewConfig) (*hfModelProvider, error) {
 	p := &hfModelProvider{
-		client:  &http.Client{Timeout: 30 * time.Second},
-		baseURL: defaultHuggingFaceURL,
+		client:    &http.Client{Timeout: 30 * time.Second},
+		baseURL:   defaultHuggingFaceURL,
+		maxModels: defaultMaxModels,
 	}
 
 	// Parse API key from environment variable
@@ -685,6 +700,21 @@ func NewHFPreviewProvider(config *PreviewConfig) (*hfModelProvider, error) {
 		p.baseURL = strings.TrimSuffix(url, "/")
 	}
 
+	// Parse maxModels limit (optional, defaults to 500)
+	// This limit is applied PER PATTERN (e.g., each "org/*" pattern gets its own limit)
+	// to prevent overloading the HuggingFace API and respect rate limiting.
+	// Set to 0 to disable the limit.
+	if maxModels, ok := config.Properties[maxModelsKey]; ok {
+		switch v := maxModels.(type) {
+		case int:
+			p.maxModels = v
+		case int64:
+			p.maxModels = int(v)
+		case float64:
+			p.maxModels = int(v)
+		}
+	}
+
 	return p, nil
 }
 
@@ -767,6 +797,13 @@ func (p *hfModelProvider) listModelsByAuthor(ctx context.Context, author string,
 		default:
 		}
 
+		// Check if we've reached the maxModels limit for this pattern
+		// (maxModels is applied per-pattern to respect HF API rate limits)
+		if p.maxModels > 0 && len(allModels) >= p.maxModels {
+			glog.Warningf("Reached maxModels limit (%d) for pattern author=%s, stopping pagination", p.maxModels, author)
+			break
+		}
+
 		// Build API URL
 		apiURL := fmt.Sprintf("%s/api/models?author=%s&limit=%d", p.baseURL, author, limit)
 		if searchPrefix != "" {
@@ -808,6 +845,11 @@ func (p *hfModelProvider) listModelsByAuthor(ctx context.Context, author string,
 
 		// Extract model IDs
 		for _, m := range models {
+			// Check limit before adding each model
+			if p.maxModels > 0 && len(allModels) >= p.maxModels {
+				break
+			}
+
 			modelID := m.ID
 			if modelID == "" {
 				modelID = m.ModelID
@@ -842,7 +884,7 @@ func (p *hfModelProvider) listModelsByAuthor(ctx context.Context, author string,
 		cursor = nextCursor
 	}
 
-	glog.Infof("Listed %d models from author %s", len(allModels), author)
+	glog.Infof("Listed %d models from author %s (maxModels: %d)", len(allModels), author, p.maxModels)
 	return allModels, nil
 }
 
diff --git a/catalog/internal/catalog/hf_catalog_test.go b/catalog/internal/catalog/hf_catalog_test.go
@@ -747,6 +747,69 @@ func TestListModelsByAuthor(t *testing.T) {
 		// "other-model" should be filtered out
 		assert.NotContains(t, models, "search-org/other-model")
 	})
+
+	t.Run("respects maxModels limit", func(t *testing.T) {
+		callCount = 0
+		config := &PreviewConfig{
+			Type: "hf",
+			Properties: map[string]any{
+				"url":       server.URL,
+				"maxModels": 50, // Limit to 50 models
+			},
+		}
+
+		provider, err := NewHFPreviewProvider(config)
+		require.NoError(t, err)
+		assert.Equal(t, 50, provider.maxModels)
+
+		models, err := provider.listModelsByAuthor(context.Background(), "test-org", "")
+		require.NoError(t, err)
+
+		// Should stop at 50 models (first page has 100, but we limit to 50)
+		assert.Len(t, models, 50)
+
+		// Should have only made 1 API call (stopped before second page)
+		assert.Equal(t, 1, callCount)
+	})
+
+	t.Run("uses default maxModels when not specified", func(t *testing.T) {
+		config := &PreviewConfig{
+			Type: "hf",
+			Properties: map[string]any{
+				"url": server.URL,
+			},
+		}
+
+		provider, err := NewHFPreviewProvider(config)
+		require.NoError(t, err)
+
+		// Should use default (500)
+		assert.Equal(t, 500, provider.maxModels)
+	})
+
+	t.Run("maxModels 0 means no limit", func(t *testing.T) {
+		callCount = 0
+		config := &PreviewConfig{
+			Type: "hf",
+			Properties: map[string]any{
+				"url":       server.URL,
+				"maxModels": 0, // No limit
+			},
+		}
+
+		provider, err := NewHFPreviewProvider(config)
+		require.NoError(t, err)
+		assert.Equal(t, 0, provider.maxModels)
+
+		models, err := provider.listModelsByAuthor(context.Background(), "test-org", "")
+		require.NoError(t, err)
+
+		// Should get all 102 models (100 from page 1 + 2 from page 2)
+		assert.Len(t, models, 102)
+
+		// Should have made 2 API calls
+		assert.Equal(t, 2, callCount)
+	})
 }
 
 func TestFetchModelNamesForPreviewWithPatterns(t *testing.T) {