Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion app/controllers/admin/questions_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ def index
def show
question_scope = Question.includes(
conversation: :signon_user,
answer: [{ sources: :chunk }, :feedback, :topics],
answer: [{ sources: :chunk }, :feedback, :topics, { answer_relevancy_aggregate: :runs }],
)

@question = question_scope.find(params[:id])
Expand Down
24 changes: 24 additions & 0 deletions app/jobs/answer_analysis/answer_relevancy_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
module AnswerAnalysis
class AnswerRelevancyJob < BaseJob
def perform(answer_id)
return unless eligible_for_answer_analysis?(answer_id)

answer = Answer.includes(:question, :answer_relevancy_aggregate).find(answer_id)
return logger.warn(aggregate_exists_warn_message(answer.id)) if answer.answer_relevancy_aggregate.present?

results = NUMBER_OF_RUNS.times.map { AutoEvaluation::AnswerRelevancy.call(answer) }

begin
AnswerAnalysis::AnswerRelevancyAggregate.create_mean_aggregate_and_score_runs(answer, results)
rescue ActiveRecord::RecordNotUnique
logger.warn(aggregate_exists_warn_message(answer.id))
end
end

private

def aggregate_exists_warn_message(answer_id)
"Answer #{answer_id} has already been evaluated for relevancy"
end
end
end
19 changes: 19 additions & 0 deletions app/jobs/answer_analysis/base_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
module AnswerAnalysis
class BaseJob < ApplicationJob
NUMBER_OF_RUNS = 3
MAX_RETRIES = 5
retry_on Aws::Errors::ServiceError, wait: 1.minute, attempts: MAX_RETRIES

private

def eligible_for_answer_analysis?(answer_id)
eligible = Answer.status_answered.exists?(id: answer_id)

unless eligible
logger.warn("Couldn't find an answer #{answer_id} that was eligible for auto-evaluation")
end

eligible
end
end
end
2 changes: 1 addition & 1 deletion app/jobs/answer_analysis/tag_topics_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def perform(answer_id)
return logger.info("Answer #{answer_id} is not eligible for topic analysis")
end

result = AutoEvaluation::TopicTagger.call(answer.rephrased_question || answer.question.message)
result = AutoEvaluation::TopicTagger.call(answer.question_used)

topics = answer.build_topics(
primary_topic: result.primary_topic,
Expand Down
7 changes: 6 additions & 1 deletion app/jobs/compose_answer_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ def perform(question_id)
logger.warn("Already an answer created for #{question_id}")
end

AnswerAnalysis::TagTopicsJob.perform_later(answer.id) if answer.persisted?
if answer.persisted?
Comment thread
davidgisbey marked this conversation as resolved.
# TODO: Once we've added a few metrics we should move these to a single job that
# kicks off all analysis jobs.
AnswerAnalysis::TagTopicsJob.perform_later(answer.id)
AnswerAnalysis::AnswerRelevancyJob.perform_later(answer.id)
end
end
end
9 changes: 9 additions & 0 deletions app/models/answer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def self.response_for_question_routing_label(label)
has_many :sources, -> { order(relevancy: :asc) }, class_name: "AnswerSource"
has_one :feedback, class_name: "AnswerFeedback"
has_one :topics, class_name: "AnswerAnalysis::Topics"
has_one :answer_relevancy_aggregate, class_name: "AnswerAnalysis::AnswerRelevancyAggregate"

enum :status,
{
Expand Down Expand Up @@ -196,4 +197,12 @@ def group_used_answer_sources_by_base_path
}
end
end

def has_analysis?
topics.present? || answer_relevancy_aggregate.present?
end

def question_used
rephrased_question || question.message
end
end
13 changes: 13 additions & 0 deletions app/models/answer_analysis/answer_relevancy_aggregate.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
module AnswerAnalysis
class AnswerRelevancyAggregate < ApplicationRecord
include AutoEvaluationResultsCreatable

self.table_name = "answer_analysis_answer_relevancy_aggregates"

belongs_to :answer
has_many :runs,
Comment thread
davidgisbey marked this conversation as resolved.
-> { order(:created_at) },
class_name: "AnswerAnalysis::AnswerRelevancyRun",
foreign_key: :answer_analysis_answer_relevancy_aggregate_id
end
end
11 changes: 11 additions & 0 deletions app/models/answer_analysis/answer_relevancy_run.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
module AnswerAnalysis
class AnswerRelevancyRun < ApplicationRecord
include LlmCallsRecordable

self.table_name = "answer_analysis_answer_relevancy_runs"

belongs_to :aggregate,
class_name: "AnswerAnalysis::AnswerRelevancyAggregate",
foreign_key: :answer_analysis_answer_relevancy_aggregate_id
end
end
27 changes: 27 additions & 0 deletions app/models/concerns/auto_evaluation_results_creatable.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
module AutoEvaluationResultsCreatable
extend ActiveSupport::Concern

class_methods do
def create_mean_aggregate_and_score_runs(answer, results)
mean_score = results.map { |result| result.score.to_d }.sum / results.size
aggregate = new(answer:, mean_score:)

results.each do |result|
run = aggregate.runs.build(
aggregate:,
score: result.score,
reason: result.reason,
)

result.llm_responses.stringify_keys.each do |name, llm_response|
run.assign_llm_response(name, llm_response)
end
result.metrics.stringify_keys.each do |name, metrics|
run.assign_metrics(name, metrics)
end
end

aggregate.save!
end
end
end
80 changes: 45 additions & 35 deletions app/views/admin/questions/_analysis_tab.html.erb
Original file line number Diff line number Diff line change
@@ -1,41 +1,51 @@
<%= render "govuk_publishing_components/components/summary_list", {
items: [
{
field: "Primary topic",
value: topics.primary_topic&.humanize,
},
{
field: "Secondary topic",
value: topics.secondary_topic&.humanize,
},
],
} %>
<% if topics.present? %>
<%= render "govuk_publishing_components/components/summary_list", {
title: "Topics",
heading_size: "l",
heading_level: 2,
margin_bottom: 4,
items: [
{
field: "Primary topic",
value: topics.primary_topic.humanize,
},
{
field: "Secondary topic",
value: topics.secondary_topic&.humanize,
},
],
} %>

<% if topics.llm_responses.present? %>
<%= render "govuk_publishing_components/components/details", {
title: "LLM responses",
} do %>
<% topics.llm_responses.each do |namespace, response| %>
<h3 class="govuk-heading-m"><%= namespace %></h3>
<p class="govuk-body">
<%= render("components/code_snippet", content: JSON.pretty_generate(response)) %>
</p>
<% if topics.llm_responses.present? %>
<%= render "govuk_publishing_components/components/details", {
title: "LLM responses",
} do %>
<% topics.llm_responses.each do |namespace, response| %>
<h3 class="govuk-heading-m"><%= namespace %></h3>
<p class="govuk-body">
<%= render("components/code_snippet", content: JSON.pretty_generate(response)) %>
</p>
<% end %>
<% end %>
<% end %>
<% end %>

<% if topics.metrics.present? %>
<%= render "govuk_publishing_components/components/details", {
title: "Metrics",
} do %>
<%= render "govuk_publishing_components/components/summary_list", {
items: topics.metrics.map do |metric, value|
{
field: metric,
value: value,
}
end,
borderless: true,
} %>
<% if topics.metrics.present? %>
<%= render "govuk_publishing_components/components/details", {
title: "Metrics",
} do %>
<%= render "govuk_publishing_components/components/summary_list", {
items: topics.metrics.map do |metric, value|
{
field: metric,
value: value,
}
end,
borderless: true,
} %>
<% end %>
<% end %>
<% end %>

<% if answer_relevancy_aggregate.present? %>
<%= render "generic_aggregate_auto_evaluation", aggregate: answer_relevancy_aggregate, title: "Answer relevancy" %>
<% end %>
Comment thread
davidgisbey marked this conversation as resolved.
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
<%
items = [
{
field: "Mean score",
value: aggregate.mean_score,
},
]

items += aggregate.runs.flat_map.with_index(1) do |run, index|
[
{ field: "Run #{index} score", value: run.score },
{ field: "Run #{index} reason", value: run.reason },
]
end
%>

<%= render "govuk_publishing_components/components/summary_list", {
title:,
heading_level: 2,
margin_bottom: 4,
heading_size: "l",
items: items,
} %>

<%= render "govuk_publishing_components/components/details", {
title: "LLM responses",
} do %>
<% aggregate.runs.each.with_index(1) do |run, index| %>
<%= render "govuk_publishing_components/components/heading", {
text: "Run #{index}",
font_size: "m",
heading_level: 2,
margin_bottom: 4,
} %>

<% run.llm_responses.each do |namespace, response| %>
<%= render "govuk_publishing_components/components/heading", {
text: namespace.capitalize,
font_size: "s",
heading_level: 3,
} %>

<p class="govuk-body">
<%= render("components/code_snippet", content: JSON.pretty_generate(response)) %>
</p>
<% end %>
<% end %>
<% end %>

<%= render "govuk_publishing_components/components/details", {
title: "Metrics",
} do %>
<% aggregate.runs.each.with_index(1) do |run, index| %>
<%= render "govuk_publishing_components/components/heading", {
text: "Run #{index}",
font_size: "m",
heading_level: 2,
} %>

<% run.metrics.sort.each do |namespace, metrics| %>
<%= render "govuk_publishing_components/components/summary_list", {
title: namespace.capitalize,
items: metrics.map do |metric, value|
{
field: metric,
value: value,
}
end,
borderless: true,
heading_size: "s",
margin_bottom: 6,
} %>
<% end %>
<% end %>
<% end %>
3 changes: 2 additions & 1 deletion app/views/admin/questions/show.html.erb
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,9 @@ content_for(:active_navigation_item, admin_questions_path)
content: render(
"analysis_tab",
topics: @answer.topics,
answer_relevancy_aggregate: @answer.answer_relevancy_aggregate,
),
} if @answer&.topics.present?
} if @answer&.has_analysis?
%>

<div class="govuk-grid-row">
Expand Down
18 changes: 18 additions & 0 deletions db/migrate/20251216092915_add_answer_relevancy_tables.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
class AddAnswerRelevancyTables < ActiveRecord::Migration[8.0]
def change
create_table :answer_analysis_answer_relevancy_aggregates, id: :uuid do |t|
t.decimal :mean_score, null: false
t.references :answer, type: :uuid, null: false, foreign_key: { on_delete: :cascade }, index: { unique: true }
t.timestamps
end

create_table :answer_analysis_answer_relevancy_runs, id: :uuid do |t|
t.decimal :score, null: false
t.string :reason, null: false
t.jsonb :llm_responses
t.jsonb :metrics
t.references :answer_analysis_answer_relevancy_aggregate, type: :uuid, null: false, foreign_key: { on_delete: :cascade }
t.timestamps
end
end
end
23 changes: 22 additions & 1 deletion db/schema.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.

ActiveRecord::Schema[8.0].define(version: 2025_12_15_161508) do
ActiveRecord::Schema[8.0].define(version: 2025_12_16_092915) do
# These are extensions that must be enabled in order to support this database
enable_extension "citext"
enable_extension "pg_catalog.plpgsql"
Expand All @@ -24,6 +24,25 @@
create_enum "guardrails_status", ["pass", "fail", "error"]
create_enum "question_routing_label", ["about_mps", "advice_opinions_predictions", "character_fun", "genuine_rag", "gov_transparency", "greetings", "harmful_vulgar_controversy", "multi_questions", "negative_acknowledgement", "non_english", "personal_info", "positive_acknowledgement", "vague_acronym_grammar", "unclear_intent", "requires_account_data", "about_chat"]

create_table "answer_analysis_answer_relevancy_aggregates", id: :uuid, default: -> { "gen_random_uuid()" }, force: :cascade do |t|
t.decimal "mean_score", null: false
t.uuid "answer_id", null: false
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.index ["answer_id"], name: "index_answer_analysis_answer_relevancy_aggregates_on_answer_id", unique: true
end

create_table "answer_analysis_answer_relevancy_runs", id: :uuid, default: -> { "gen_random_uuid()" }, force: :cascade do |t|
t.decimal "score", null: false
t.string "reason", null: false
t.jsonb "llm_responses"
t.jsonb "metrics"
t.uuid "answer_analysis_answer_relevancy_aggregate_id", null: false
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.index ["answer_analysis_answer_relevancy_aggregate_id"], name: "idx_on_answer_analysis_answer_relevancy_aggregate_i_d9d79a637a"
end

create_table "answer_analysis_topics", id: :uuid, default: -> { "gen_random_uuid()" }, force: :cascade do |t|
t.string "primary_topic"
t.string "secondary_topic"
Expand Down Expand Up @@ -170,6 +189,8 @@
t.datetime "updated_at", null: false
end

add_foreign_key "answer_analysis_answer_relevancy_aggregates", "answers", on_delete: :cascade
add_foreign_key "answer_analysis_answer_relevancy_runs", "answer_analysis_answer_relevancy_aggregates", on_delete: :cascade
add_foreign_key "answer_analysis_topics", "answers", on_delete: :cascade
add_foreign_key "answer_feedback", "answers", on_delete: :cascade
add_foreign_key "answer_sources", "answer_source_chunks", on_delete: :restrict
Expand Down
Loading