Skip to content

Commit 18af825

Browse files
committed
Expose answer relevancy metrics in admin UI
I've added an additional tab for answer relevancy metrics in the admin interface on the question show page. My thoughts for this are if we don't split out the metrics into their own tabs then the page will get incredibly noisy. This makes it easier to navigate. Due to this, i've renamed the analysis tab to topics.
1 parent 1d59eb5 commit 18af825

8 files changed

Lines changed: 203 additions & 39 deletions

File tree

app/controllers/admin/questions_controller.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ def index
77
def show
88
question_scope = Question.includes(
99
conversation: :signon_user,
10-
answer: [{ sources: :chunk }, :feedback, :topics],
10+
answer: [{ sources: :chunk }, :feedback, :topics, { answer_relevancy_aggregate: :runs }],
1111
)
1212

1313
@question = question_scope.find(params[:id])

app/models/answer.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,4 +197,8 @@ def group_used_answer_sources_by_base_path
197197
}
198198
end
199199
end
200+
201+
def has_analysis?
202+
topics.present? || answer_relevancy_aggregate.present?
203+
end
200204
end
Lines changed: 45 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,51 @@
1-
<%= render "govuk_publishing_components/components/summary_list", {
2-
items: [
3-
{
4-
field: "Primary topic",
5-
value: topics.primary_topic&.humanize,
6-
},
7-
{
8-
field: "Secondary topic",
9-
value: topics.secondary_topic&.humanize,
10-
},
11-
],
12-
} %>
1+
<% if topics.present? %>
2+
<%= render "govuk_publishing_components/components/summary_list", {
3+
title: "Topics",
4+
heading_size: "l",
5+
heading_level: 2,
6+
margin_bottom: 4,
7+
items: [
8+
{
9+
field: "Primary topic",
10+
value: topics.primary_topic.humanize,
11+
},
12+
{
13+
field: "Secondary topic",
14+
value: topics.secondary_topic&.humanize,
15+
},
16+
],
17+
} %>
1318

14-
<% if topics.llm_responses.present? %>
15-
<%= render "govuk_publishing_components/components/details", {
16-
title: "LLM responses",
17-
} do %>
18-
<% topics.llm_responses.each do |namespace, response| %>
19-
<h3 class="govuk-heading-m"><%= namespace %></h3>
20-
<p class="govuk-body">
21-
<%= render("components/code_snippet", content: JSON.pretty_generate(response)) %>
22-
</p>
19+
<% if topics.llm_responses.present? %>
20+
<%= render "govuk_publishing_components/components/details", {
21+
title: "LLM responses",
22+
} do %>
23+
<% topics.llm_responses.each do |namespace, response| %>
24+
<h3 class="govuk-heading-m"><%= namespace %></h3>
25+
<p class="govuk-body">
26+
<%= render("components/code_snippet", content: JSON.pretty_generate(response)) %>
27+
</p>
28+
<% end %>
2329
<% end %>
2430
<% end %>
25-
<% end %>
2631

27-
<% if topics.metrics.present? %>
28-
<%= render "govuk_publishing_components/components/details", {
29-
title: "Metrics",
30-
} do %>
31-
<%= render "govuk_publishing_components/components/summary_list", {
32-
items: topics.metrics.map do |metric, value|
33-
{
34-
field: metric,
35-
value: value,
36-
}
37-
end,
38-
borderless: true,
39-
} %>
32+
<% if topics.metrics.present? %>
33+
<%= render "govuk_publishing_components/components/details", {
34+
title: "Metrics",
35+
} do %>
36+
<%= render "govuk_publishing_components/components/summary_list", {
37+
items: topics.metrics.map do |metric, value|
38+
{
39+
field: metric,
40+
value: value,
41+
}
42+
end,
43+
borderless: true,
44+
} %>
45+
<% end %>
4046
<% end %>
4147
<% end %>
48+
49+
<% if answer_relevancy_aggregate.present? %>
50+
<%= render "generic_aggregate_auto_evaluation", aggregate: answer_relevancy_aggregate, title: "Answer relevancy" %>
51+
<% end %>
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
<%
2+
items = [
3+
{
4+
field: "Mean score",
5+
value: aggregate.mean_score,
6+
},
7+
]
8+
9+
items += aggregate.runs.flat_map.with_index(1) do |run, index|
10+
[
11+
{ field: "Run #{index} score", value: run.score },
12+
{ field: "Run #{index} reason", value: run.reason },
13+
]
14+
end
15+
%>
16+
17+
<%= render "govuk_publishing_components/components/summary_list", {
18+
title:,
19+
heading_level: 2,
20+
margin_bottom: 4,
21+
heading_size: "l",
22+
items: items,
23+
} %>
24+
25+
<%= render "govuk_publishing_components/components/details", {
26+
title: "LLM responses",
27+
} do %>
28+
<% aggregate.runs.each.with_index(1) do |run, index| %>
29+
<%= render "govuk_publishing_components/components/heading", {
30+
text: "Run #{index}",
31+
font_size: "m",
32+
heading_level: 2,
33+
margin_bottom: 4,
34+
} %>
35+
36+
<% run.llm_responses.each do |namespace, response| %>
37+
<%= render "govuk_publishing_components/components/heading", {
38+
text: namespace.capitalize,
39+
font_size: "s",
40+
heading_level: 3,
41+
} %>
42+
43+
<p class="govuk-body">
44+
<%= render("components/code_snippet", content: JSON.pretty_generate(response)) %>
45+
</p>
46+
<% end %>
47+
<% end %>
48+
<% end %>
49+
50+
<%= render "govuk_publishing_components/components/details", {
51+
title: "Metrics",
52+
} do %>
53+
<% aggregate.runs.each.with_index(1) do |run, index| %>
54+
<%= render "govuk_publishing_components/components/heading", {
55+
text: "Run #{index}",
56+
font_size: "m",
57+
heading_level: 2,
58+
} %>
59+
60+
<% run.metrics.sort.each do |namespace, metrics| %>
61+
<%= render "govuk_publishing_components/components/summary_list", {
62+
title: namespace.capitalize,
63+
items: metrics.map do |metric, value|
64+
{
65+
field: metric,
66+
value: value,
67+
}
68+
end,
69+
borderless: true,
70+
heading_size: "s",
71+
margin_bottom: 6,
72+
} %>
73+
<% end %>
74+
<% end %>
75+
<% end %>

app/views/admin/questions/show.html.erb

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,9 @@ content_for(:active_navigation_item, admin_questions_path)
3939
content: render(
4040
"analysis_tab",
4141
topics: @answer.topics,
42+
answer_relevancy_aggregate: @answer.answer_relevancy_aggregate,
4243
),
43-
} if @answer&.topics.present?
44+
} if @answer&.has_analysis?
4445
%>
4546

4647
<div class="govuk-grid-row">

spec/models/answer_spec.rb

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,4 +353,23 @@
353353
end
354354
end
355355
end
356+
357+
describe "#has_analysis?" do
358+
it "returns true if topics are present" do
359+
answer = build(:answer, :with_topics)
360+
expect(answer.has_analysis?).to be(true)
361+
end
362+
363+
it "returns true if answer_relevancy_aggregate is present" do
364+
answer = build(
365+
:answer, answer_relevancy_aggregate: build(:answer_relevancy_aggregate)
366+
)
367+
expect(answer.has_analysis?).to be(true)
368+
end
369+
370+
it "returns false if no analysis is present" do
371+
answer = build(:answer)
372+
expect(answer.has_analysis?).to be(false)
373+
end
374+
end
356375
end

spec/requests/admin/questions_spec.rb

Lines changed: 48 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,7 @@
284284
.and have_content('"id": "call_dqGpbb39drQDafLsjDLtnbGD"')
285285
end
286286

287-
it "doesn't render the tabs component when there is no analysis" do
287+
it "doesn't render the tabs component when there are no topics or auto-eval aggregate data" do
288288
question = create(:question, :with_answer)
289289
get admin_show_question_path(question)
290290

@@ -353,14 +353,60 @@
353353
.to have_selector("#details-tab", text: question.message)
354354
end
355355

356-
it "renders the topics in the analysis tab" do
356+
it "renders the topics in the topics tab" do
357357
get admin_show_question_path(question)
358358

359359
expect(response.body)
360360
.to have_selector("#analysis-tab", text: topics.primary_topic.capitalize)
361361
.and have_selector("#analysis-tab", text: topics.secondary_topic.capitalize)
362362
end
363363
end
364+
365+
context "when answer relevancy aggregate data is present" do
366+
let(:run) do
367+
create(
368+
:answer_relevancy_run,
369+
score: 0.85,
370+
reason: "The answer is relevant to the question.",
371+
llm_responses: {
372+
"statements" => { "statements" => ["The answer is relevant."] },
373+
"verdicts" => { "verdicts" => [{ "verdict" => "yes" }] },
374+
},
375+
metrics: {
376+
"statements" => { duration: 1.55556 },
377+
"verdicts" => { duration: 1.44445 },
378+
},
379+
)
380+
end
381+
let!(:aggregate) do
382+
create(
383+
:answer_relevancy_aggregate,
384+
runs: [run],
385+
)
386+
end
387+
let(:question) { aggregate.answer.question }
388+
389+
it "renders the answer relevancy metrics and LLM responses" do
390+
get admin_show_question_path(question)
391+
392+
expect(response.body.squish)
393+
.to have_content("Answer relevancy")
394+
.and have_content("Run 1 score")
395+
.and have_content("0.85")
396+
.and have_content("Run 1 reason")
397+
.and have_content("The answer is relevant to the question.")
398+
399+
expect(response.body.squish)
400+
.to have_content("Statements")
401+
.and have_content(/duration.*1\.55556/)
402+
.and have_content("Verdicts")
403+
.and have_content(/duration.*1\.44445/)
404+
405+
expect(response.body.squish)
406+
.to have_content('{ "statements": [ "The answer is relevant." ] }')
407+
.and have_content('{ "verdicts": [ { "verdict": "yes" } ] }')
408+
end
409+
end
364410
end
365411

366412
def expect_unprocessable_content_with_date_errors

spec/system/user_conversation_activity_is_shown_in_admin_spec.rb

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
when_i_click_the_analysis_tab
2020
then_i_see_the_topics_have_been_tagged
21+
and_i_see_the_answer_relevancy_statistics
2122
and_i_dont_see_the_answer
2223
end
2324

@@ -105,4 +106,12 @@ def then_i_see_the_topics_have_been_tagged
105106
def and_i_dont_see_the_answer
106107
expect(page).not_to have_content(@answer)
107108
end
109+
110+
def when_i_click_the_answer_relevancy_tab
111+
click_link "Answer relevancy"
112+
end
113+
114+
def and_i_see_the_answer_relevancy_statistics
115+
expect(page).to have_content(/Mean score.*1.0/)
116+
end
108117
end

0 commit comments

Comments
 (0)