opendatahub-io · openshift-merge-bot · May 14, 2026 · May 4, 2026 · May 5, 2026 · May 5, 2026
diff --git a/catalog/internal/catalog/modelcatalog/loader.go b/catalog/internal/catalog/modelcatalog/loader.go
@@ -329,9 +329,11 @@ func (l *ModelLoader) updateDatabase(ctx context.Context) error {
 					}
 				}
 
-				for _, handler := range l.handlers {
-					handler(ctx, record)
+			for _, handler := range l.handlers {
+				if err := handler(ctx, record); err != nil {
+					glog.Errorf("%s: event handler error: %v", *attr.Name, err)
 				}
+			}
 			}()
 		}
 	}()

diff --git a/catalog/internal/catalog/modelcatalog/performance_metrics.go b/catalog/internal/catalog/modelcatalog/performance_metrics.go
@@ -107,6 +107,11 @@ func (pr *performanceRecord) UnmarshalJSON(data []byte) error {
 	if id, ok := raw["id"].(string); ok {
 		pr.ID = id
 	}
+	if pr.ID == "" {
+		if configID, ok := raw["config_id"].(string); ok && configID != "" {
+			pr.ID = configID
+		}
+	}
 	if modelID, ok := raw["model_id"].(string); ok {
 		pr.ModelID = modelID
 	}
@@ -505,19 +510,28 @@ func createAccuracyMetricsArtifact(evalRecords []evaluationRecord, modelID int32
 	// Properties can be empty or contain general metadata
 	properties := []models.Properties{}
 
-	// Create custom properties - simple mapping of benchmark_name to score_value
-	customProperties := []models.Properties{}
-
+	// Create custom properties - simple mapping of benchmark_name to score_value.
+	// Deduplicate by benchmark name: if multiple evaluation records share the same
+	// benchmark, keep the last score encountered. This prevents DB constraint violations
+	// on the (artifact_id, name, is_custom_property) composite primary key.
+	benchmarkScores := make(map[string]float64, len(evalRecords))
 	for _, evalRecord := range evalRecords {
-		// Add the benchmark score as a named property (e.g., "aime24": 63.3333)
 		if score, ok := evalRecord.CustomProperties["score"].(float64); ok {
-			customProperties = append(customProperties, models.Properties{
-				Name:        evalRecord.Benchmark,
-				DoubleValue: &score,
-			})
+			if _, duplicate := benchmarkScores[evalRecord.Benchmark]; duplicate {
+				glog.Warningf("Duplicate benchmark %q for model %d, using latest score", evalRecord.Benchmark, modelID)
+			}
+			benchmarkScores[evalRecord.Benchmark] = score
 		}
 	}
 
+	customProperties := make([]models.Properties, 0, len(benchmarkScores)+1)
+	for benchmark, score := range benchmarkScores {
+		customProperties = append(customProperties, models.Properties{
+			Name:        benchmark,
+			DoubleValue: &score,
+		})
+	}
+
 	// Add overall_average custom property from metadata.json overall_accuracy field
 	if overallAccuracy != nil {
 		customProperties = append(customProperties, models.Properties{

diff --git a/catalog/internal/catalog/modelcatalog/performance_metrics_test.go b/catalog/internal/catalog/modelcatalog/performance_metrics_test.go
@@ -330,6 +330,88 @@ func TestOverallAccuracyToOverallAverage(t *testing.T) {
 	})
 }
 
+func TestCreateAccuracyMetricsArtifact_DuplicateBenchmarks(t *testing.T) {
+	t.Run("duplicate benchmarks are deduplicated using last score", func(t *testing.T) {
+		evalRecords := []evaluationRecord{
+			{Benchmark: "mmlu", CustomProperties: map[string]any{"score": 80.0}},
+			{Benchmark: "aime24", CustomProperties: map[string]any{"score": 63.3}},
+			{Benchmark: "mmlu", CustomProperties: map[string]any{"score": 85.0}},
+		}
+
+		artifact := createAccuracyMetricsArtifact(evalRecords, 1, 100, nil, nil, nil)
+
+		// Count occurrences of each benchmark name
+		benchmarkCounts := map[string]int{}
+		benchmarkScores := map[string]float64{}
+		for _, prop := range *artifact.CustomProperties {
+			benchmarkCounts[prop.Name]++
+			if prop.DoubleValue != nil {
+				benchmarkScores[prop.Name] = *prop.DoubleValue
+			}
+		}
+
+		// "mmlu" should appear exactly once (deduplicated)
+		if benchmarkCounts["mmlu"] != 1 {
+			t.Errorf("expected mmlu to appear once, got %d", benchmarkCounts["mmlu"])
+		}
+
+		// The last score (85.0) should win
+		if benchmarkScores["mmlu"] != 85.0 {
+			t.Errorf("expected mmlu score 85.0, got %v", benchmarkScores["mmlu"])
+		}
+
+		// "aime24" should still be present
+		if benchmarkCounts["aime24"] != 1 {
+			t.Errorf("expected aime24 to appear once, got %d", benchmarkCounts["aime24"])
+		}
+		if benchmarkScores["aime24"] != 63.3 {
+			t.Errorf("expected aime24 score 63.3, got %v", benchmarkScores["aime24"])
+		}
+	})
+
+	t.Run("no duplicates produces all benchmarks", func(t *testing.T) {
+		evalRecords := []evaluationRecord{
+			{Benchmark: "mmlu", CustomProperties: map[string]any{"score": 90.0}},
+			{Benchmark: "aime24", CustomProperties: map[string]any{"score": 63.3}},
+			{Benchmark: "gpqa", CustomProperties: map[string]any{"score": 72.5}},
+		}
+
+		artifact := createAccuracyMetricsArtifact(evalRecords, 1, 100, nil, nil, nil)
+
+		benchmarkNames := map[string]bool{}
+		for _, prop := range *artifact.CustomProperties {
+			benchmarkNames[prop.Name] = true
+		}
+
+		for _, expected := range []string{"mmlu", "aime24", "gpqa"} {
+			if !benchmarkNames[expected] {
+				t.Errorf("expected benchmark %q not found in custom properties", expected)
+			}
+		}
+	})
+
+	t.Run("all records with same benchmark produce single property", func(t *testing.T) {
+		evalRecords := []evaluationRecord{
+			{Benchmark: "mmlu", CustomProperties: map[string]any{"score": 80.0}},
+			{Benchmark: "mmlu", CustomProperties: map[string]any{"score": 82.0}},
+			{Benchmark: "mmlu", CustomProperties: map[string]any{"score": 85.0}},
+		}
+
+		artifact := createAccuracyMetricsArtifact(evalRecords, 1, 100, nil, nil, nil)
+
+		count := 0
+		for _, prop := range *artifact.CustomProperties {
+			if prop.Name == "mmlu" {
+				count++
+			}
+		}
+
+		if count != 1 {
+			t.Errorf("expected exactly 1 mmlu property, got %d", count)
+		}
+	})
+}
+
 func TestEvaluationRecordUnmarshalJSON(t *testing.T) {
 	tests := []struct {
 		name             string

diff --git a/catalog/internal/plugin/config.go b/catalog/internal/plugin/config.go
@@ -0,0 +1,27 @@
+package plugin
+
+import (
+	"fmt"
+
+	"github.com/kubeflow/hub/catalog/internal/catalog/basecatalog"
+)
+
+// LoadConfig loads and parses a sources.yaml file using basecatalog's parser.
+func LoadConfig(path string) (*basecatalog.SourceConfig, error) {
+	return basecatalog.ReadSourceConfig(path)
+}
+
+// LoadConfigs loads multiple sources.yaml files and returns them as
+// independent configs. Callers are responsible for any merge logic
+// (e.g., basecatalog's SourceCollection handles field-level merging).
+func LoadConfigs(paths []string) ([]*basecatalog.SourceConfig, error) {
+	configs := make([]*basecatalog.SourceConfig, 0, len(paths))
+	for _, path := range paths {
+		cfg, err := LoadConfig(path)
+		if err != nil {
+			return nil, fmt.Errorf("loading config %s: %w", path, err)
+		}
+		configs = append(configs, cfg)
+	}
+	return configs, nil
+}