Add AutoEvaluationMetricAggregate & AutoEvaluationMetricRun

davidgisbey · davidgisbey · commit c4bdb23ca002 · 2025-12-16T16:46:32.000Z
This adds a migration to add the AutoEvaluationMetricAggregate
&amp; AutoEvaluationMetric tables, models and factories.

These will be used to store the results of automatic evaluations of answers.
I've indexed the metric on name and answer_id to ensure there's only ever
1 aggregate record per metric per answer.
diff --git a/app/models/answer.rb b/app/models/answer.rb
@@ -55,6 +55,7 @@ def self.response_for_question_routing_label(label)
   has_many :sources, -> { order(relevancy: :asc) }, class_name: "AnswerSource"
   has_one :feedback, class_name: "AnswerFeedback"
   has_one :topics, class_name: "AnswerTopics"
+  has_many :metric_aggregates, class_name: "AutoEvaluationMetricAggregate"
 
   enum :status,
        {
diff --git a/app/models/auto_evaluation_metric_aggregate.rb b/app/models/auto_evaluation_metric_aggregate.rb
@@ -0,0 +1,4 @@
+class AutoEvaluationMetricAggregate < ApplicationRecord
+  belongs_to :answer
+  has_many :runs, class_name: "AutoEvaluationMetricRun"
+end
diff --git a/app/models/auto_evaluation_metric_run.rb b/app/models/auto_evaluation_metric_run.rb
@@ -0,0 +1,5 @@
+class AutoEvaluationMetricRun < ApplicationRecord
+  belongs_to :metric_aggregate,
+             class_name: "AutoEvaluationMetricAggregate",
+             foreign_key: :auto_evaluation_metric_aggregate_id
+end
diff --git a/db/migrate/20251216092915_add_auto_evaluation_metric_and_auto_evaluation_run.rb b/db/migrate/20251216092915_add_auto_evaluation_metric_and_auto_evaluation_run.rb
@@ -0,0 +1,25 @@
+class AddAutoEvaluationMetricAndAutoEvaluationRun < ActiveRecord::Migration[8.0]
+  def change
+    create_enum :auto_evaluation_metric_name, %w[answer_relevancy answer_coherence context_relevancy faithfulness]
+
+    create_table :auto_evaluation_metric_aggregates, id: :uuid do |t|
+      t.enum :metric_name, enum_type: "auto_evaluation_metric_name", null: false
+      t.float :score, null: false
+      t.references :answer, type: :uuid, null: false, foreign_key: { on_delete: :cascade }, index: { unique: true }
+      t.timestamps
+    end
+
+    add_index :auto_evaluation_metric_aggregates, %i[metric_name answer_id],
+              unique: true,
+              name: "index_metric_aggregate_on_metric_name_and_answer_id"
+
+    create_table :auto_evaluation_metric_runs, id: :uuid do |t|
+      t.float :score, null: false
+      t.string :reason, null: false
+      t.jsonb :llm_responses
+      t.jsonb :metrics
+      t.references :auto_evaluation_metric_aggregate, type: :uuid, null: false, foreign_key: { on_delete: :cascade }
+      t.timestamps
+    end
+  end
+end
diff --git a/db/schema.rb b/db/schema.rb
@@ -10,7 +10,7 @@
 #
 # It's strongly recommended that you check this file into your version control system.
 
-ActiveRecord::Schema[8.0].define(version: 2025_12_15_161508) do
+ActiveRecord::Schema[8.0].define(version: 2025_12_16_092915) do
   # These are extensions that must be enabled in order to support this database
   enable_extension "citext"
   enable_extension "pg_catalog.plpgsql"
@@ -20,6 +20,7 @@
   # Note that some types may not work with other database engines. Be careful if changing database.
   create_enum "answer_completeness", ["complete", "partial", "no_information"]
   create_enum "answer_status", ["answered", "clarification", "error_answer_guardrails", "error_answer_service_error", "error_jailbreak_guardrails", "error_non_specific", "error_question_routing_guardrails", "error_timeout", "guardrails_answer", "guardrails_forbidden_terms", "guardrails_jailbreak", "guardrails_question_routing", "unanswerable_llm_cannot_answer", "unanswerable_no_govuk_content", "unanswerable_question_routing"]
+  create_enum "auto_evaluation_metric_name", ["answer_relevancy", "answer_coherence", "context_relevancy", "faithfulness"]
   create_enum "conversation_source", ["web", "api"]
   create_enum "guardrails_status", ["pass", "fail", "error"]
   create_enum "question_routing_label", ["about_mps", "advice_opinions_predictions", "character_fun", "genuine_rag", "gov_transparency", "greetings", "harmful_vulgar_controversy", "multi_questions", "negative_acknowledgement", "non_english", "personal_info", "positive_acknowledgement", "vague_acronym_grammar", "unclear_intent", "requires_account_data", "about_chat"]
@@ -103,6 +104,27 @@
     t.index ["question_id"], name: "index_answers_on_question_id", unique: true
   end
 
+  create_table "auto_evaluation_metric_aggregates", id: :uuid, default: -> { "gen_random_uuid()" }, force: :cascade do |t|
+    t.enum "metric_name", null: false, enum_type: "auto_evaluation_metric_name"
+    t.float "score", null: false
+    t.uuid "answer_id", null: false
+    t.datetime "created_at", null: false
+    t.datetime "updated_at", null: false
+    t.index ["answer_id"], name: "index_auto_evaluation_metric_aggregates_on_answer_id", unique: true
+    t.index ["metric_name", "answer_id"], name: "index_metric_aggregate_on_metric_name_and_answer_id", unique: true
+  end
+
+  create_table "auto_evaluation_metric_runs", id: :uuid, default: -> { "gen_random_uuid()" }, force: :cascade do |t|
+    t.float "score", null: false
+    t.string "reason", null: false
+    t.jsonb "llm_responses"
+    t.jsonb "metrics"
+    t.uuid "auto_evaluation_metric_aggregate_id", null: false
+    t.datetime "created_at", null: false
+    t.datetime "updated_at", null: false
+    t.index ["auto_evaluation_metric_aggregate_id"], name: "idx_on_auto_evaluation_metric_aggregate_id_c079941d7e"
+  end
+
   create_table "base_path_versions", id: :uuid, default: -> { "gen_random_uuid()" }, force: :cascade do |t|
     t.string "base_path", null: false
     t.bigint "payload_version", default: 0, null: false
@@ -172,11 +194,23 @@
     t.datetime "updated_at", null: false
   end
 
+  create_table "solid_cable_messages", force: :cascade do |t|
+    t.binary "channel", null: false
+    t.binary "payload", null: false
+    t.datetime "created_at", null: false
+    t.bigint "channel_hash", null: false
+    t.index ["channel"], name: "index_solid_cable_messages_on_channel"
+    t.index ["channel_hash"], name: "index_solid_cable_messages_on_channel_hash"
+    t.index ["created_at"], name: "index_solid_cable_messages_on_created_at"
+  end
+
   add_foreign_key "answer_feedback", "answers", on_delete: :cascade
   add_foreign_key "answer_sources", "answer_source_chunks", on_delete: :restrict
   add_foreign_key "answer_sources", "answers", on_delete: :cascade
   add_foreign_key "answer_topics", "answers", on_delete: :cascade
   add_foreign_key "answers", "questions", on_delete: :cascade
+  add_foreign_key "auto_evaluation_metric_aggregates", "answers", on_delete: :cascade
+  add_foreign_key "auto_evaluation_metric_runs", "auto_evaluation_metric_aggregates", on_delete: :cascade
   add_foreign_key "conversations", "signon_users", on_delete: :restrict
   add_foreign_key "questions", "conversations"
   add_foreign_key "settings_audits", "signon_users", column: "user_id", on_delete: :nullify
diff --git a/spec/factories/auto_evaluation_metric_aggregate.rb b/spec/factories/auto_evaluation_metric_aggregate.rb
@@ -0,0 +1,7 @@
+FactoryBot.define do
+  factory :auto_evaluation_metric_aggregate do
+    answer
+    metric_type { "answer_relevancy" }
+    score { 0.5 }
+  end
+end
diff --git a/spec/factories/auto_evaluation_metric_run.rb b/spec/factories/auto_evaluation_metric_run.rb
@@ -0,0 +1,7 @@
+FactoryBot.define do
+  factory :auto_evaluation_metric_run do
+    association :metric_aggregate, factory: :auto_evaluation_metric_aggregate
+    score { 0.5 }
+    reason { "The answer was okay." }
+  end
+end

Original file line number	Diff line number	Diff line change
`@@ -55,6 +55,7 @@ def self.response_for_question_routing_label(label)`
`55`	`55`	`has_many :sources, -> { order(relevancy: :asc) }, class_name: "AnswerSource"`
`56`	`56`	`has_one :feedback, class_name: "AnswerFeedback"`
`57`	`57`	`has_one :topics, class_name: "AnswerTopics"`
	`58`	`+ has_many :metric_aggregates, class_name: "AutoEvaluationMetricAggregate"`
`58`	`59`
`59`	`60`	`enum :status,`
`60`	`61`	`{`