Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
0857d87
build(deps): bump pydantic from 2.13.0 to 2.13.3 in /jobs/async-uploa…
dependabot[bot] May 4, 2026
9157329
fix(ui): fix flaky selectNamespace race condition in cypress test (#2…
jonburdo May 5, 2026
0740a4a
feat(ui): register and display model_type from catalog and model regi…
Taj010 May 5, 2026
7c36b70
build(deps): bump huggingface-hub from 1.12.0 to 1.13.0 in /clients/p…
dependabot[bot] May 5, 2026
8fa8dd3
build(deps-dev): bump ruff from 0.15.7 to 0.15.12 in /clients/python …
dependabot[bot] May 5, 2026
c589331
build(deps): bump boto3 from 1.42.96 to 1.43.2 in /clients/python (#2…
dependabot[bot] May 5, 2026
ac33b4a
build(deps): bump github.com/go-sql-driver/mysql from 1.9.3 to 1.10.0…
dependabot[bot] May 5, 2026
17b7835
Model validation err handling (#2666)
adysenrothman May 5, 2026
367aa17
Merge pull request #1738 from kubeflow/main
openshift-merge-bot[bot] May 5, 2026
b2d4092
build(deps-dev): bump axios from 1.15.0 to 1.16.0 in /clients/ui/fron…
dependabot[bot] May 6, 2026
323626d
fix to collapse consecutive dashes, strip edge dashes, and cap k8s na…
Taj010 May 6, 2026
e5e9088
feat(catalog): add plugin infrastructure for extensible catalog types…
Al-Pragliola May 6, 2026
303a4e1
fix(ci): quote output in repeat_cmd_until to prevent flaky E2E deploy…
manaswinidas May 6, 2026
5f20dc3
Merge pull request #1740 from kubeflow/main
openshift-merge-bot[bot] May 6, 2026
2397794
add fix for hide category toggle (#2662)
Philip-Carneiro May 7, 2026
8f14aaf
Fix the model type selector when the model type is unknown (#2676)
ppadti May 7, 2026
c3087b2
fix(ui): use NotReadyError in useRegisteredModels hook (#2684)
manaswinidas May 8, 2026
edf953d
Add feature flag for Tool calling configuration (#2681)
ppadti May 8, 2026
870c754
Fix model transfer jobs to support gateway-based registry URLs (#2679)
ppadti May 8, 2026
c860cea
Merge pull request #1741 from kubeflow/main
openshift-merge-bot[bot] May 8, 2026
a319dfe
Remove redundant whitespace in Model Catalog (#2682)
Philip-Carneiro May 8, 2026
bc8c64d
fix(ci): exclude terminating pods from deploy image check (#2680)
jonburdo May 8, 2026
f0c5d63
Merge pull request #1742 from kubeflow/main
openshift-merge-bot[bot] May 8, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions catalog/internal/catalog/modelcatalog/loader.go
Original file line number Diff line number Diff line change
Expand Up @@ -329,9 +329,11 @@ func (l *ModelLoader) updateDatabase(ctx context.Context) error {
}
}

for _, handler := range l.handlers {
handler(ctx, record)
for _, handler := range l.handlers {
if err := handler(ctx, record); err != nil {
glog.Errorf("%s: event handler error: %v", *attr.Name, err)
}
}
}()
}
}()
Expand Down
30 changes: 22 additions & 8 deletions catalog/internal/catalog/modelcatalog/performance_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,11 @@ func (pr *performanceRecord) UnmarshalJSON(data []byte) error {
if id, ok := raw["id"].(string); ok {
pr.ID = id
}
if pr.ID == "" {
if configID, ok := raw["config_id"].(string); ok && configID != "" {
pr.ID = configID
}
}
if modelID, ok := raw["model_id"].(string); ok {
pr.ModelID = modelID
}
Expand Down Expand Up @@ -505,19 +510,28 @@ func createAccuracyMetricsArtifact(evalRecords []evaluationRecord, modelID int32
// Properties can be empty or contain general metadata
properties := []models.Properties{}

// Create custom properties - simple mapping of benchmark_name to score_value
customProperties := []models.Properties{}

// Create custom properties - simple mapping of benchmark_name to score_value.
// Deduplicate by benchmark name: if multiple evaluation records share the same
// benchmark, keep the last score encountered. This prevents DB constraint violations
// on the (artifact_id, name, is_custom_property) composite primary key.
benchmarkScores := make(map[string]float64, len(evalRecords))
for _, evalRecord := range evalRecords {
// Add the benchmark score as a named property (e.g., "aime24": 63.3333)
if score, ok := evalRecord.CustomProperties["score"].(float64); ok {
customProperties = append(customProperties, models.Properties{
Name: evalRecord.Benchmark,
DoubleValue: &score,
})
if _, duplicate := benchmarkScores[evalRecord.Benchmark]; duplicate {
glog.Warningf("Duplicate benchmark %q for model %d, using latest score", evalRecord.Benchmark, modelID)
}
benchmarkScores[evalRecord.Benchmark] = score
}
}

customProperties := make([]models.Properties, 0, len(benchmarkScores)+1)
for benchmark, score := range benchmarkScores {
customProperties = append(customProperties, models.Properties{
Name: benchmark,
DoubleValue: &score,
})
}

// Add overall_average custom property from metadata.json overall_accuracy field
if overallAccuracy != nil {
customProperties = append(customProperties, models.Properties{
Expand Down
82 changes: 82 additions & 0 deletions catalog/internal/catalog/modelcatalog/performance_metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,88 @@ func TestOverallAccuracyToOverallAverage(t *testing.T) {
})
}

func TestCreateAccuracyMetricsArtifact_DuplicateBenchmarks(t *testing.T) {
t.Run("duplicate benchmarks are deduplicated using last score", func(t *testing.T) {
evalRecords := []evaluationRecord{
{Benchmark: "mmlu", CustomProperties: map[string]any{"score": 80.0}},
{Benchmark: "aime24", CustomProperties: map[string]any{"score": 63.3}},
{Benchmark: "mmlu", CustomProperties: map[string]any{"score": 85.0}},
}

artifact := createAccuracyMetricsArtifact(evalRecords, 1, 100, nil, nil, nil)

// Count occurrences of each benchmark name
benchmarkCounts := map[string]int{}
benchmarkScores := map[string]float64{}
for _, prop := range *artifact.CustomProperties {
benchmarkCounts[prop.Name]++
if prop.DoubleValue != nil {
benchmarkScores[prop.Name] = *prop.DoubleValue
}
}

// "mmlu" should appear exactly once (deduplicated)
if benchmarkCounts["mmlu"] != 1 {
t.Errorf("expected mmlu to appear once, got %d", benchmarkCounts["mmlu"])
}

// The last score (85.0) should win
if benchmarkScores["mmlu"] != 85.0 {
t.Errorf("expected mmlu score 85.0, got %v", benchmarkScores["mmlu"])
}

// "aime24" should still be present
if benchmarkCounts["aime24"] != 1 {
t.Errorf("expected aime24 to appear once, got %d", benchmarkCounts["aime24"])
}
if benchmarkScores["aime24"] != 63.3 {
t.Errorf("expected aime24 score 63.3, got %v", benchmarkScores["aime24"])
}
})

t.Run("no duplicates produces all benchmarks", func(t *testing.T) {
evalRecords := []evaluationRecord{
{Benchmark: "mmlu", CustomProperties: map[string]any{"score": 90.0}},
{Benchmark: "aime24", CustomProperties: map[string]any{"score": 63.3}},
{Benchmark: "gpqa", CustomProperties: map[string]any{"score": 72.5}},
}

artifact := createAccuracyMetricsArtifact(evalRecords, 1, 100, nil, nil, nil)

benchmarkNames := map[string]bool{}
for _, prop := range *artifact.CustomProperties {
benchmarkNames[prop.Name] = true
}

for _, expected := range []string{"mmlu", "aime24", "gpqa"} {
if !benchmarkNames[expected] {
t.Errorf("expected benchmark %q not found in custom properties", expected)
}
}
})

t.Run("all records with same benchmark produce single property", func(t *testing.T) {
evalRecords := []evaluationRecord{
{Benchmark: "mmlu", CustomProperties: map[string]any{"score": 80.0}},
{Benchmark: "mmlu", CustomProperties: map[string]any{"score": 82.0}},
{Benchmark: "mmlu", CustomProperties: map[string]any{"score": 85.0}},
}

artifact := createAccuracyMetricsArtifact(evalRecords, 1, 100, nil, nil, nil)

count := 0
for _, prop := range *artifact.CustomProperties {
if prop.Name == "mmlu" {
count++
}
}

if count != 1 {
t.Errorf("expected exactly 1 mmlu property, got %d", count)
}
})
}

func TestEvaluationRecordUnmarshalJSON(t *testing.T) {
tests := []struct {
name string
Expand Down
27 changes: 27 additions & 0 deletions catalog/internal/plugin/config.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package plugin

import (
"fmt"

"github.com/kubeflow/hub/catalog/internal/catalog/basecatalog"
)

// LoadConfig loads and parses a sources.yaml file using basecatalog's parser.
func LoadConfig(path string) (*basecatalog.SourceConfig, error) {
return basecatalog.ReadSourceConfig(path)
}

// LoadConfigs loads multiple sources.yaml files and returns them as
// independent configs. Callers are responsible for any merge logic
// (e.g., basecatalog's SourceCollection handles field-level merging).
func LoadConfigs(paths []string) ([]*basecatalog.SourceConfig, error) {
configs := make([]*basecatalog.SourceConfig, 0, len(paths))
for _, path := range paths {
cfg, err := LoadConfig(path)
if err != nil {
return nil, fmt.Errorf("loading config %s: %w", path, err)
}
configs = append(configs, cfg)
}
return configs, nil
}
Loading
Loading