Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions catalog/internal/catalog/modelcatalog/loader.go
Original file line number Diff line number Diff line change
Expand Up @@ -329,9 +329,11 @@ func (l *ModelLoader) updateDatabase(ctx context.Context) error {
}
}

for _, handler := range l.handlers {
handler(ctx, record)
for _, handler := range l.handlers {
if err := handler(ctx, record); err != nil {
glog.Errorf("%s: event handler error: %v", *attr.Name, err)
}
}
}()
}
}()
Expand Down
30 changes: 22 additions & 8 deletions catalog/internal/catalog/modelcatalog/performance_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,11 @@ func (pr *performanceRecord) UnmarshalJSON(data []byte) error {
if id, ok := raw["id"].(string); ok {
pr.ID = id
}
if pr.ID == "" {
if configID, ok := raw["config_id"].(string); ok && configID != "" {
pr.ID = configID
}
}
if modelID, ok := raw["model_id"].(string); ok {
pr.ModelID = modelID
}
Expand Down Expand Up @@ -505,19 +510,28 @@ func createAccuracyMetricsArtifact(evalRecords []evaluationRecord, modelID int32
// Properties can be empty or contain general metadata
properties := []models.Properties{}

// Create custom properties - simple mapping of benchmark_name to score_value
customProperties := []models.Properties{}

// Create custom properties - simple mapping of benchmark_name to score_value.
// Deduplicate by benchmark name: if multiple evaluation records share the same
// benchmark, keep the last score encountered. This prevents DB constraint violations
// on the (artifact_id, name, is_custom_property) composite primary key.
benchmarkScores := make(map[string]float64, len(evalRecords))
for _, evalRecord := range evalRecords {
// Add the benchmark score as a named property (e.g., "aime24": 63.3333)
if score, ok := evalRecord.CustomProperties["score"].(float64); ok {
customProperties = append(customProperties, models.Properties{
Name: evalRecord.Benchmark,
DoubleValue: &score,
})
if _, duplicate := benchmarkScores[evalRecord.Benchmark]; duplicate {
glog.Warningf("Duplicate benchmark %q for model %d, using latest score", evalRecord.Benchmark, modelID)
}
benchmarkScores[evalRecord.Benchmark] = score
}
}

customProperties := make([]models.Properties, 0, len(benchmarkScores)+1)
for benchmark, score := range benchmarkScores {
customProperties = append(customProperties, models.Properties{
Name: benchmark,
DoubleValue: &score,
})
}

// Add overall_average custom property from metadata.json overall_accuracy field
if overallAccuracy != nil {
customProperties = append(customProperties, models.Properties{
Expand Down
82 changes: 82 additions & 0 deletions catalog/internal/catalog/modelcatalog/performance_metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,88 @@ func TestOverallAccuracyToOverallAverage(t *testing.T) {
})
}

func TestCreateAccuracyMetricsArtifact_DuplicateBenchmarks(t *testing.T) {
t.Run("duplicate benchmarks are deduplicated using last score", func(t *testing.T) {
evalRecords := []evaluationRecord{
{Benchmark: "mmlu", CustomProperties: map[string]any{"score": 80.0}},
{Benchmark: "aime24", CustomProperties: map[string]any{"score": 63.3}},
{Benchmark: "mmlu", CustomProperties: map[string]any{"score": 85.0}},
}

artifact := createAccuracyMetricsArtifact(evalRecords, 1, 100, nil, nil, nil)

// Count occurrences of each benchmark name
benchmarkCounts := map[string]int{}
benchmarkScores := map[string]float64{}
for _, prop := range *artifact.CustomProperties {
benchmarkCounts[prop.Name]++
if prop.DoubleValue != nil {
benchmarkScores[prop.Name] = *prop.DoubleValue
}
}

// "mmlu" should appear exactly once (deduplicated)
if benchmarkCounts["mmlu"] != 1 {
t.Errorf("expected mmlu to appear once, got %d", benchmarkCounts["mmlu"])
}

// The last score (85.0) should win
if benchmarkScores["mmlu"] != 85.0 {
t.Errorf("expected mmlu score 85.0, got %v", benchmarkScores["mmlu"])
}

// "aime24" should still be present
if benchmarkCounts["aime24"] != 1 {
t.Errorf("expected aime24 to appear once, got %d", benchmarkCounts["aime24"])
}
if benchmarkScores["aime24"] != 63.3 {
t.Errorf("expected aime24 score 63.3, got %v", benchmarkScores["aime24"])
}
})

t.Run("no duplicates produces all benchmarks", func(t *testing.T) {
evalRecords := []evaluationRecord{
{Benchmark: "mmlu", CustomProperties: map[string]any{"score": 90.0}},
{Benchmark: "aime24", CustomProperties: map[string]any{"score": 63.3}},
{Benchmark: "gpqa", CustomProperties: map[string]any{"score": 72.5}},
}

artifact := createAccuracyMetricsArtifact(evalRecords, 1, 100, nil, nil, nil)

benchmarkNames := map[string]bool{}
for _, prop := range *artifact.CustomProperties {
benchmarkNames[prop.Name] = true
}

for _, expected := range []string{"mmlu", "aime24", "gpqa"} {
if !benchmarkNames[expected] {
t.Errorf("expected benchmark %q not found in custom properties", expected)
}
}
})

t.Run("all records with same benchmark produce single property", func(t *testing.T) {
evalRecords := []evaluationRecord{
{Benchmark: "mmlu", CustomProperties: map[string]any{"score": 80.0}},
{Benchmark: "mmlu", CustomProperties: map[string]any{"score": 82.0}},
{Benchmark: "mmlu", CustomProperties: map[string]any{"score": 85.0}},
}

artifact := createAccuracyMetricsArtifact(evalRecords, 1, 100, nil, nil, nil)

count := 0
for _, prop := range *artifact.CustomProperties {
if prop.Name == "mmlu" {
count++
}
}

if count != 1 {
t.Errorf("expected exactly 1 mmlu property, got %d", count)
}
})
}

func TestEvaluationRecordUnmarshalJSON(t *testing.T) {
tests := []struct {
name string
Expand Down
78 changes: 39 additions & 39 deletions clients/python/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion clients/python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ aiohttp-retry = "^2.8.3"
# allows for reentrant event loops (used for sync client) - Python 3.14 compatible
nest-asyncio2 = "^1.7.1"

huggingface-hub = { version = ">=0.20.1,<1.13.0", optional = true }
huggingface-hub = { version = ">=0.20.1,<1.14.0", optional = true }
olot = { version = "^0.1.17", optional = true }
boto3 = { version = "^1.37.34", optional = true }
rh-model-signing = { version = "1.0.1", optional = true }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,9 @@ class RegisterAndStoreFields {
}

selectNamespace(name: string) {
this.findNamespaceSelectCombobox().scrollIntoView().click({ force: true });
this.findNamespaceSelectCombobox().should('not.be.disabled');
this.findNamespaceSelectCombobox().scrollIntoView().click();
cy.findByRole('listbox').should('be.visible');
cy.findByRole('option', { name }).click();
}

Expand All @@ -66,11 +68,12 @@ class RegisterAndStoreFields {
}

shouldHaveNamespaceOptions(namespaces: string[]) {
this.findNamespaceSelectCombobox().scrollIntoView().click({ force: true });
this.findNamespaceSelectCombobox().should('not.be.disabled');
this.findNamespaceSelectCombobox().scrollIntoView().click();
namespaces.forEach((namespace) => {
cy.findByRole('option', { name: namespace }).should('exist');
});
this.findNamespaceSelectCombobox().scrollIntoView().click({ force: true });
this.findNamespaceSelectCombobox().scrollIntoView().click();
return this;
}

Expand Down Expand Up @@ -301,9 +304,18 @@ class RegisterAndStoreFields {
this.findSourceS3SecretAccessKeyInput().type(secretAccessKey);
}

/** Sets model type (required on register page). Uses Predictive by default. */
selectModelType(
optionName: 'Predictive Model' | 'Generative AI model (Example, LLM)' = 'Predictive Model',
) {
cy.get('#register-model-type-toggle').click();
cy.findByRole('option', { name: optionName }).click();
}

// Convenience method to fill all required fields for submission
fillAllRequiredFields() {
this.fillModelName('test-model');
this.selectModelType();
this.fillVersionName('v1.0.0');
this.fillJobName('my-transfer-job');
this.fillSourceEndpoint('https://s3.amazonaws.com');
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,7 @@ describe('Register and Store Fields - Credential Validation', () => {
it('Should have submit button disabled when S3 access key ID is missing', () => {
// Fill all fields except S3 access key ID
registerAndStoreFields.fillModelName('test-model');
registerAndStoreFields.selectModelType();
registerAndStoreFields.fillVersionName('v1.0.0');
registerAndStoreFields.fillJobName('my-transfer-job');
registerAndStoreFields.fillSourceEndpoint('https://s3.amazonaws.com');
Expand All @@ -349,6 +350,7 @@ describe('Register and Store Fields - Credential Validation', () => {
it('Should have submit button disabled when S3 secret access key is missing', () => {
// Fill all fields except S3 secret access key
registerAndStoreFields.fillModelName('test-model');
registerAndStoreFields.selectModelType();
registerAndStoreFields.fillVersionName('v1.0.0');
registerAndStoreFields.fillJobName('my-transfer-job');
registerAndStoreFields.fillSourceEndpoint('https://s3.amazonaws.com');
Expand All @@ -367,6 +369,7 @@ describe('Register and Store Fields - Credential Validation', () => {
it('Should have submit button disabled when OCI username is missing', () => {
// Fill all fields except OCI username
registerAndStoreFields.fillModelName('test-model');
registerAndStoreFields.selectModelType();
registerAndStoreFields.fillVersionName('v1.0.0');
registerAndStoreFields.fillJobName('my-transfer-job');
registerAndStoreFields.fillSourceEndpoint('https://s3.amazonaws.com');
Expand All @@ -385,6 +388,7 @@ describe('Register and Store Fields - Credential Validation', () => {
it('Should have submit button disabled when OCI password is missing', () => {
// Fill all fields except OCI password
registerAndStoreFields.fillModelName('test-model');
registerAndStoreFields.selectModelType();
registerAndStoreFields.fillVersionName('v1.0.0');
registerAndStoreFields.fillJobName('my-transfer-job');
registerAndStoreFields.fillSourceEndpoint('https://s3.amazonaws.com');
Expand Down
Loading
Loading