Skip to content

Commit f66f79e

Browse files
authored
Merge pull request #713 from alphagov/add-metrics-data-models-and-integrate-into-workflow
Add answer relevancy models and integrate into analysis workflow
2 parents 95df9b2 + 4cabe13 commit f66f79e

39 files changed

Lines changed: 677 additions & 138 deletions

app/controllers/admin/questions_controller.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ def index
77
def show
88
question_scope = Question.includes(
99
conversation: :signon_user,
10-
answer: [{ sources: :chunk }, :feedback, :topics],
10+
answer: [{ sources: :chunk }, :feedback, :topics, { answer_relevancy_aggregate: :runs }],
1111
)
1212

1313
@question = question_scope.find(params[:id])
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
module AnswerAnalysis
2+
class AnswerRelevancyJob < BaseJob
3+
def perform(answer_id)
4+
return unless eligible_for_answer_analysis?(answer_id)
5+
6+
answer = Answer.includes(:question, :answer_relevancy_aggregate).find(answer_id)
7+
return logger.warn(aggregate_exists_warn_message(answer.id)) if answer.answer_relevancy_aggregate.present?
8+
9+
results = NUMBER_OF_RUNS.times.map { AutoEvaluation::AnswerRelevancy.call(answer) }
10+
11+
begin
12+
AnswerAnalysis::AnswerRelevancyAggregate.create_mean_aggregate_and_score_runs(answer, results)
13+
rescue ActiveRecord::RecordNotUnique
14+
logger.warn(aggregate_exists_warn_message(answer.id))
15+
end
16+
end
17+
18+
private
19+
20+
def aggregate_exists_warn_message(answer_id)
21+
"Answer #{answer_id} has already been evaluated for relevancy"
22+
end
23+
end
24+
end
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
module AnswerAnalysis
2+
class BaseJob < ApplicationJob
3+
NUMBER_OF_RUNS = 3
4+
MAX_RETRIES = 5
5+
retry_on Aws::Errors::ServiceError, wait: 1.minute, attempts: MAX_RETRIES
6+
7+
private
8+
9+
def eligible_for_answer_analysis?(answer_id)
10+
eligible = Answer.status_answered.exists?(id: answer_id)
11+
12+
unless eligible
13+
logger.warn("Couldn't find an answer #{answer_id} that was eligible for auto-evaluation")
14+
end
15+
16+
eligible
17+
end
18+
end
19+
end

app/jobs/answer_analysis/tag_topics_job.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ def perform(answer_id)
1212
return logger.info("Answer #{answer_id} is not eligible for topic analysis")
1313
end
1414

15-
result = AutoEvaluation::TopicTagger.call(answer.rephrased_question || answer.question.message)
15+
result = AutoEvaluation::TopicTagger.call(answer.question_used)
1616

1717
topics = answer.build_topics(
1818
primary_topic: result.primary_topic,

app/jobs/compose_answer_job.rb

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,11 @@ def perform(question_id)
1414
logger.warn("Already an answer created for #{question_id}")
1515
end
1616

17-
AnswerAnalysis::TagTopicsJob.perform_later(answer.id) if answer.persisted?
17+
if answer.persisted?
18+
# TODO: Once we've added a few metrics we should move these to a single job that
19+
# kicks off all analysis jobs.
20+
AnswerAnalysis::TagTopicsJob.perform_later(answer.id)
21+
AnswerAnalysis::AnswerRelevancyJob.perform_later(answer.id)
22+
end
1823
end
1924
end

app/models/answer.rb

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ def self.response_for_question_routing_label(label)
5555
has_many :sources, -> { order(relevancy: :asc) }, class_name: "AnswerSource"
5656
has_one :feedback, class_name: "AnswerFeedback"
5757
has_one :topics, class_name: "AnswerAnalysis::Topics"
58+
has_one :answer_relevancy_aggregate, class_name: "AnswerAnalysis::AnswerRelevancyAggregate"
5859

5960
enum :status,
6061
{
@@ -196,4 +197,12 @@ def group_used_answer_sources_by_base_path
196197
}
197198
end
198199
end
200+
201+
def has_analysis?
202+
topics.present? || answer_relevancy_aggregate.present?
203+
end
204+
205+
def question_used
206+
rephrased_question || question.message
207+
end
199208
end
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
module AnswerAnalysis
2+
class AnswerRelevancyAggregate < ApplicationRecord
3+
include AutoEvaluationResultsCreatable
4+
5+
self.table_name = "answer_analysis_answer_relevancy_aggregates"
6+
7+
belongs_to :answer
8+
has_many :runs,
9+
-> { order(:created_at) },
10+
class_name: "AnswerAnalysis::AnswerRelevancyRun",
11+
foreign_key: :answer_analysis_answer_relevancy_aggregate_id
12+
end
13+
end
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
module AnswerAnalysis
2+
class AnswerRelevancyRun < ApplicationRecord
3+
include LlmCallsRecordable
4+
5+
self.table_name = "answer_analysis_answer_relevancy_runs"
6+
7+
belongs_to :aggregate,
8+
class_name: "AnswerAnalysis::AnswerRelevancyAggregate",
9+
foreign_key: :answer_analysis_answer_relevancy_aggregate_id
10+
end
11+
end
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
module AutoEvaluationResultsCreatable
2+
extend ActiveSupport::Concern
3+
4+
class_methods do
5+
def create_mean_aggregate_and_score_runs(answer, results)
6+
mean_score = results.map { |result| result.score.to_d }.sum / results.size
7+
aggregate = new(answer:, mean_score:)
8+
9+
results.each do |result|
10+
run = aggregate.runs.build(
11+
aggregate:,
12+
score: result.score,
13+
reason: result.reason,
14+
)
15+
16+
result.llm_responses.stringify_keys.each do |name, llm_response|
17+
run.assign_llm_response(name, llm_response)
18+
end
19+
result.metrics.stringify_keys.each do |name, metrics|
20+
run.assign_metrics(name, metrics)
21+
end
22+
end
23+
24+
aggregate.save!
25+
end
26+
end
27+
end
Lines changed: 45 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,51 @@
1-
<%= render "govuk_publishing_components/components/summary_list", {
2-
items: [
3-
{
4-
field: "Primary topic",
5-
value: topics.primary_topic&.humanize,
6-
},
7-
{
8-
field: "Secondary topic",
9-
value: topics.secondary_topic&.humanize,
10-
},
11-
],
12-
} %>
1+
<% if topics.present? %>
2+
<%= render "govuk_publishing_components/components/summary_list", {
3+
title: "Topics",
4+
heading_size: "l",
5+
heading_level: 2,
6+
margin_bottom: 4,
7+
items: [
8+
{
9+
field: "Primary topic",
10+
value: topics.primary_topic.humanize,
11+
},
12+
{
13+
field: "Secondary topic",
14+
value: topics.secondary_topic&.humanize,
15+
},
16+
],
17+
} %>
1318

14-
<% if topics.llm_responses.present? %>
15-
<%= render "govuk_publishing_components/components/details", {
16-
title: "LLM responses",
17-
} do %>
18-
<% topics.llm_responses.each do |namespace, response| %>
19-
<h3 class="govuk-heading-m"><%= namespace %></h3>
20-
<p class="govuk-body">
21-
<%= render("components/code_snippet", content: JSON.pretty_generate(response)) %>
22-
</p>
19+
<% if topics.llm_responses.present? %>
20+
<%= render "govuk_publishing_components/components/details", {
21+
title: "LLM responses",
22+
} do %>
23+
<% topics.llm_responses.each do |namespace, response| %>
24+
<h3 class="govuk-heading-m"><%= namespace %></h3>
25+
<p class="govuk-body">
26+
<%= render("components/code_snippet", content: JSON.pretty_generate(response)) %>
27+
</p>
28+
<% end %>
2329
<% end %>
2430
<% end %>
25-
<% end %>
2631

27-
<% if topics.metrics.present? %>
28-
<%= render "govuk_publishing_components/components/details", {
29-
title: "Metrics",
30-
} do %>
31-
<%= render "govuk_publishing_components/components/summary_list", {
32-
items: topics.metrics.map do |metric, value|
33-
{
34-
field: metric,
35-
value: value,
36-
}
37-
end,
38-
borderless: true,
39-
} %>
32+
<% if topics.metrics.present? %>
33+
<%= render "govuk_publishing_components/components/details", {
34+
title: "Metrics",
35+
} do %>
36+
<%= render "govuk_publishing_components/components/summary_list", {
37+
items: topics.metrics.map do |metric, value|
38+
{
39+
field: metric,
40+
value: value,
41+
}
42+
end,
43+
borderless: true,
44+
} %>
45+
<% end %>
4046
<% end %>
4147
<% end %>
48+
49+
<% if answer_relevancy_aggregate.present? %>
50+
<%= render "generic_aggregate_auto_evaluation", aggregate: answer_relevancy_aggregate, title: "Answer relevancy" %>
51+
<% end %>

0 commit comments

Comments
 (0)