feat: add overall_average custom property when loading perf data (#1930)

Al-Pragliola · web-flow · commit 06fc02d56766 · 2025-12-01T14:46:32.000Z
* feat: add overall_average custom property when loading perf data

Signed-off-by: Alessio Pragliola &lt;seth.pro@gmail.com&gt;

* chore: improve test coverage

Signed-off-by: Alessio Pragliola &lt;seth.pro@gmail.com&gt;

---------

Signed-off-by: Alessio Pragliola &lt;seth.pro@gmail.com&gt;
diff --git a/catalog/internal/catalog/performance_metrics.go b/catalog/internal/catalog/performance_metrics.go
@@ -21,7 +21,8 @@ import (
 // metadataJSON represents the minimal structure needed from metadata.json files
 // Only the ID field is needed to look up existing models
 type metadataJSON struct {
-	ID string `json:"id"` // Maps to model name for lookup
+	ID              string   `json:"id"`               // Maps to model name for lookup
+	OverallAccuracy *float64 `json:"overall_accuracy"` // Overall accuracy score for the model
 }
 
 // parseMetadataJSON parses JSON data into metadataJSON struct, extracting only the ID field
@@ -298,12 +299,12 @@ func processModelDirectory(dirPath string, modelRepo dbmodels.CatalogModelReposi
 	glog.V(2).Infof("Found existing model %s with ID %d, processing metrics", metadata.ID, modelID)
 
 	// Use batch processing for all artifacts
-	return processModelArtifactsBatch(dirPath, modelID, metadata.ID, metricsArtifactRepo, metricsArtifactTypeID)
+	return processModelArtifactsBatch(dirPath, modelID, metadata.ID, metadata.OverallAccuracy, metricsArtifactRepo, metricsArtifactTypeID)
 }
 
 // processModelArtifactsBatch processes all metric artifacts for a model in batch
 // This reduces DB overhead by parsing, checking, and inserting in optimized phases
-func processModelArtifactsBatch(dirPath string, modelID int32, modelName string, metricsArtifactRepo dbmodels.CatalogMetricsArtifactRepository, metricsArtifactTypeID int32) (int, error) {
+func processModelArtifactsBatch(dirPath string, modelID int32, modelName string, overallAccuracy *float64, metricsArtifactRepo dbmodels.CatalogMetricsArtifactRepository, metricsArtifactTypeID int32) (int, error) {
 	// Parse all metrics files
 	var evaluationRecords []evaluationRecord
 	var performanceRecords []performanceRecord
@@ -359,7 +360,7 @@ func processModelArtifactsBatch(dirPath string, modelID int32, modelName string,
 	if len(evaluationRecords) > 0 {
 		externalID := fmt.Sprintf("accuracy-metrics-model-%d", modelID)
 		if !existingArtifactsMap[externalID] {
-			artifact := createAccuracyMetricsArtifact(evaluationRecords, modelID, metricsArtifactTypeID, nil, nil)
+			artifact := createAccuracyMetricsArtifact(evaluationRecords, modelID, metricsArtifactTypeID, overallAccuracy, nil, nil)
 			artifactsToInsert = append(artifactsToInsert, artifact)
 		} else {
 			glog.V(2).Infof("Accuracy metrics artifact already exists, skipping")
@@ -463,7 +464,7 @@ func parsePerformanceFile(filePath string) ([]performanceRecord, error) {
 }
 
 // createAccuracyMetricsArtifact creates a single metrics artifact from all evaluation records
-func createAccuracyMetricsArtifact(evalRecords []evaluationRecord, modelID int32, typeID int32, existingID *int32, existingCreateTime *int64) *dbmodels.CatalogMetricsArtifactImpl {
+func createAccuracyMetricsArtifact(evalRecords []evaluationRecord, modelID int32, typeID int32, overallAccuracy *float64, existingID *int32, existingCreateTime *int64) *dbmodels.CatalogMetricsArtifactImpl {
 	artifactName := fmt.Sprintf("accuracy-metrics-model-%d", modelID)
 	externalID := fmt.Sprintf("accuracy-metrics-model-%d", modelID)
 
@@ -506,6 +507,14 @@ func createAccuracyMetricsArtifact(evalRecords []evaluationRecord, modelID int32
 		}
 	}
 
+	// Add overall_average custom property from metadata.json overall_accuracy field
+	if overallAccuracy != nil {
+		customProperties = append(customProperties, models.Properties{
+			Name:        "overall_average",
+			DoubleValue: overallAccuracy,
+		})
+	}
+
 	// Create the metrics artifact with metricsType set to accuracy-metrics
 	metricsArtifact := &dbmodels.CatalogMetricsArtifactImpl{
 		ID:     existingID, // Use existing ID if updating
diff --git a/catalog/internal/catalog/performance_metrics_test.go b/catalog/internal/catalog/performance_metrics_test.go
@@ -235,6 +235,98 @@ func TestParseMetadataJSON_OnlyIDMatters(t *testing.T) {
 	}
 }
 
+func TestOverallAccuracyToOverallAverage(t *testing.T) {
+	t.Run("parse overall_accuracy from metadata", func(t *testing.T) {
+		tests := []struct {
+			name      string
+			jsonData  string
+			wantNil   bool
+			wantValue float64
+		}{
+			{
+				name:      "overall_accuracy present",
+				jsonData:  `{"id": "model-1", "overall_accuracy": 85.5}`,
+				wantNil:   false,
+				wantValue: 85.5,
+			},
+			{
+				name:      "overall_accuracy is zero",
+				jsonData:  `{"id": "model-2", "overall_accuracy": 0}`,
+				wantNil:   false,
+				wantValue: 0.0,
+			},
+			{
+				name:     "overall_accuracy is null",
+				jsonData: `{"id": "model-3", "overall_accuracy": null}`,
+				wantNil:  true,
+			},
+			{
+				name:     "overall_accuracy missing",
+				jsonData: `{"id": "model-4"}`,
+				wantNil:  true,
+			},
+		}
+
+		for _, tt := range tests {
+			t.Run(tt.name, func(t *testing.T) {
+				metadata, err := parseMetadataJSON([]byte(tt.jsonData))
+				if err != nil {
+					t.Fatalf("parseMetadataJSON() error = %v", err)
+				}
+
+				if tt.wantNil {
+					if metadata.OverallAccuracy != nil {
+						t.Errorf("OverallAccuracy = %v, want nil", *metadata.OverallAccuracy)
+					}
+				} else {
+					if metadata.OverallAccuracy == nil {
+						t.Errorf("OverallAccuracy = nil, want %v", tt.wantValue)
+					} else if *metadata.OverallAccuracy != tt.wantValue {
+						t.Errorf("OverallAccuracy = %v, want %v", *metadata.OverallAccuracy, tt.wantValue)
+					}
+				}
+			})
+		}
+	})
+
+	t.Run("artifact has overall_average when overall_accuracy provided", func(t *testing.T) {
+		overallAccuracy := 87.5
+		evalRecords := []evaluationRecord{
+			{Benchmark: "mmlu", CustomProperties: map[string]interface{}{"score": 90.0}},
+		}
+
+		artifact := createAccuracyMetricsArtifact(evalRecords, 1, 100, &overallAccuracy, nil, nil)
+
+		found := false
+		for _, prop := range *artifact.CustomProperties {
+			if prop.Name == "overall_average" && prop.DoubleValue != nil {
+				if *prop.DoubleValue != overallAccuracy {
+					t.Errorf("overall_average = %v, want %v", *prop.DoubleValue, overallAccuracy)
+				}
+				found = true
+				break
+			}
+		}
+		if !found {
+			t.Error("overall_average custom property not found in artifact")
+		}
+	})
+
+	t.Run("artifact has no overall_average when overall_accuracy is nil", func(t *testing.T) {
+		evalRecords := []evaluationRecord{
+			{Benchmark: "mmlu", CustomProperties: map[string]interface{}{"score": 90.0}},
+		}
+
+		artifact := createAccuracyMetricsArtifact(evalRecords, 1, 100, nil, nil, nil)
+
+		for _, prop := range *artifact.CustomProperties {
+			if prop.Name == "overall_average" {
+				t.Error("overall_average should not exist when overall_accuracy is nil")
+			}
+		}
+	})
+}
+
 func TestEvaluationRecordUnmarshalJSON(t *testing.T) {
 	tests := []struct {
 		name             string