Skip to content

Commit c4bdb23

Browse files
committed
Add AutoEvaluationMetricAggregate & AutoEvaluationMetricRun
This adds a migration to add the AutoEvaluationMetricAggregate & AutoEvaluationMetric tables, models and factories. These will be used to store the results of automatic evaluations of answers. I've indexed the metric on name and answer_id to ensure there's only ever 1 aggregate record per metric per answer.
1 parent eeef206 commit c4bdb23

7 files changed

Lines changed: 84 additions & 1 deletion

app/models/answer.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ def self.response_for_question_routing_label(label)
5555
has_many :sources, -> { order(relevancy: :asc) }, class_name: "AnswerSource"
5656
has_one :feedback, class_name: "AnswerFeedback"
5757
has_one :topics, class_name: "AnswerTopics"
58+
has_many :metric_aggregates, class_name: "AutoEvaluationMetricAggregate"
5859

5960
enum :status,
6061
{
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
class AutoEvaluationMetricAggregate < ApplicationRecord
2+
belongs_to :answer
3+
has_many :runs, class_name: "AutoEvaluationMetricRun"
4+
end
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
class AutoEvaluationMetricRun < ApplicationRecord
2+
belongs_to :metric_aggregate,
3+
class_name: "AutoEvaluationMetricAggregate",
4+
foreign_key: :auto_evaluation_metric_aggregate_id
5+
end
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
class AddAutoEvaluationMetricAndAutoEvaluationRun < ActiveRecord::Migration[8.0]
2+
def change
3+
create_enum :auto_evaluation_metric_name, %w[answer_relevancy answer_coherence context_relevancy faithfulness]
4+
5+
create_table :auto_evaluation_metric_aggregates, id: :uuid do |t|
6+
t.enum :metric_name, enum_type: "auto_evaluation_metric_name", null: false
7+
t.float :score, null: false
8+
t.references :answer, type: :uuid, null: false, foreign_key: { on_delete: :cascade }, index: { unique: true }
9+
t.timestamps
10+
end
11+
12+
add_index :auto_evaluation_metric_aggregates, %i[metric_name answer_id],
13+
unique: true,
14+
name: "index_metric_aggregate_on_metric_name_and_answer_id"
15+
16+
create_table :auto_evaluation_metric_runs, id: :uuid do |t|
17+
t.float :score, null: false
18+
t.string :reason, null: false
19+
t.jsonb :llm_responses
20+
t.jsonb :metrics
21+
t.references :auto_evaluation_metric_aggregate, type: :uuid, null: false, foreign_key: { on_delete: :cascade }
22+
t.timestamps
23+
end
24+
end
25+
end

db/schema.rb

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
#
1111
# It's strongly recommended that you check this file into your version control system.
1212

13-
ActiveRecord::Schema[8.0].define(version: 2025_12_15_161508) do
13+
ActiveRecord::Schema[8.0].define(version: 2025_12_16_092915) do
1414
# These are extensions that must be enabled in order to support this database
1515
enable_extension "citext"
1616
enable_extension "pg_catalog.plpgsql"
@@ -20,6 +20,7 @@
2020
# Note that some types may not work with other database engines. Be careful if changing database.
2121
create_enum "answer_completeness", ["complete", "partial", "no_information"]
2222
create_enum "answer_status", ["answered", "clarification", "error_answer_guardrails", "error_answer_service_error", "error_jailbreak_guardrails", "error_non_specific", "error_question_routing_guardrails", "error_timeout", "guardrails_answer", "guardrails_forbidden_terms", "guardrails_jailbreak", "guardrails_question_routing", "unanswerable_llm_cannot_answer", "unanswerable_no_govuk_content", "unanswerable_question_routing"]
23+
create_enum "auto_evaluation_metric_name", ["answer_relevancy", "answer_coherence", "context_relevancy", "faithfulness"]
2324
create_enum "conversation_source", ["web", "api"]
2425
create_enum "guardrails_status", ["pass", "fail", "error"]
2526
create_enum "question_routing_label", ["about_mps", "advice_opinions_predictions", "character_fun", "genuine_rag", "gov_transparency", "greetings", "harmful_vulgar_controversy", "multi_questions", "negative_acknowledgement", "non_english", "personal_info", "positive_acknowledgement", "vague_acronym_grammar", "unclear_intent", "requires_account_data", "about_chat"]
@@ -103,6 +104,27 @@
103104
t.index ["question_id"], name: "index_answers_on_question_id", unique: true
104105
end
105106

107+
create_table "auto_evaluation_metric_aggregates", id: :uuid, default: -> { "gen_random_uuid()" }, force: :cascade do |t|
108+
t.enum "metric_name", null: false, enum_type: "auto_evaluation_metric_name"
109+
t.float "score", null: false
110+
t.uuid "answer_id", null: false
111+
t.datetime "created_at", null: false
112+
t.datetime "updated_at", null: false
113+
t.index ["answer_id"], name: "index_auto_evaluation_metric_aggregates_on_answer_id", unique: true
114+
t.index ["metric_name", "answer_id"], name: "index_metric_aggregate_on_metric_name_and_answer_id", unique: true
115+
end
116+
117+
create_table "auto_evaluation_metric_runs", id: :uuid, default: -> { "gen_random_uuid()" }, force: :cascade do |t|
118+
t.float "score", null: false
119+
t.string "reason", null: false
120+
t.jsonb "llm_responses"
121+
t.jsonb "metrics"
122+
t.uuid "auto_evaluation_metric_aggregate_id", null: false
123+
t.datetime "created_at", null: false
124+
t.datetime "updated_at", null: false
125+
t.index ["auto_evaluation_metric_aggregate_id"], name: "idx_on_auto_evaluation_metric_aggregate_id_c079941d7e"
126+
end
127+
106128
create_table "base_path_versions", id: :uuid, default: -> { "gen_random_uuid()" }, force: :cascade do |t|
107129
t.string "base_path", null: false
108130
t.bigint "payload_version", default: 0, null: false
@@ -172,11 +194,23 @@
172194
t.datetime "updated_at", null: false
173195
end
174196

197+
create_table "solid_cable_messages", force: :cascade do |t|
198+
t.binary "channel", null: false
199+
t.binary "payload", null: false
200+
t.datetime "created_at", null: false
201+
t.bigint "channel_hash", null: false
202+
t.index ["channel"], name: "index_solid_cable_messages_on_channel"
203+
t.index ["channel_hash"], name: "index_solid_cable_messages_on_channel_hash"
204+
t.index ["created_at"], name: "index_solid_cable_messages_on_created_at"
205+
end
206+
175207
add_foreign_key "answer_feedback", "answers", on_delete: :cascade
176208
add_foreign_key "answer_sources", "answer_source_chunks", on_delete: :restrict
177209
add_foreign_key "answer_sources", "answers", on_delete: :cascade
178210
add_foreign_key "answer_topics", "answers", on_delete: :cascade
179211
add_foreign_key "answers", "questions", on_delete: :cascade
212+
add_foreign_key "auto_evaluation_metric_aggregates", "answers", on_delete: :cascade
213+
add_foreign_key "auto_evaluation_metric_runs", "auto_evaluation_metric_aggregates", on_delete: :cascade
180214
add_foreign_key "conversations", "signon_users", on_delete: :restrict
181215
add_foreign_key "questions", "conversations"
182216
add_foreign_key "settings_audits", "signon_users", column: "user_id", on_delete: :nullify
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
FactoryBot.define do
2+
factory :auto_evaluation_metric_aggregate do
3+
answer
4+
metric_type { "answer_relevancy" }
5+
score { 0.5 }
6+
end
7+
end
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
FactoryBot.define do
2+
factory :auto_evaluation_metric_run do
3+
association :metric_aggregate, factory: :auto_evaluation_metric_aggregate
4+
score { 0.5 }
5+
reason { "The answer was okay." }
6+
end
7+
end

0 commit comments

Comments
 (0)