Skip to content

Commit 5791f27

Browse files
committed
Expose answer relevancy metrics in admin UI
I've added an additional tab for answer relevancy metrics in the admin interface on the question show page. My thoughts for this are if we don't split out the metrics into their own tabs then the page will get incredibly noisy. This makes it easier to navigate. Due to this, i've renamed the analysis tab to topics.
1 parent b0372c2 commit 5791f27

6 files changed

Lines changed: 180 additions & 39 deletions

File tree

app/controllers/admin/questions_controller.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ def index
77
def show
88
question_scope = Question.includes(
99
conversation: :signon_user,
10-
answer: [{ sources: :chunk }, :feedback, :topics],
10+
answer: [{ sources: :chunk }, :feedback, :topics, { answer_relevancy_aggregate: :runs }],
1111
)
1212

1313
@question = question_scope.find(params[:id])
Lines changed: 45 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,50 @@
1-
<%= render "govuk_publishing_components/components/summary_list", {
2-
items: [
3-
{
4-
field: "Primary topic",
5-
value: topics.primary_topic&.humanize,
6-
},
7-
{
8-
field: "Secondary topic",
9-
value: topics.secondary_topic&.humanize,
10-
},
11-
],
12-
} %>
1+
<% if topics.present? %>
2+
<%= render "govuk_publishing_components/components/summary_list", {
3+
title: "Topics",
4+
heading_size: "l",
5+
heading_level: 2,
6+
margin_bottom: 4,
7+
items: [
8+
{
9+
field: "Primary topic",
10+
value: topics.primary_topic.humanize,
11+
},
12+
{
13+
field: "Secondary topic",
14+
value: topics.secondary_topic&.humanize,
15+
},
16+
],
17+
} %>
1318

14-
<% if topics.llm_response.present? %>
15-
<%= render "govuk_publishing_components/components/details", {
16-
title: "LLM responses",
17-
} do %>
18-
<h3 class="govuk-heading-m">Topic tagger</h3>
19-
<p class="govuk-body">
20-
<%= render("components/code_snippet", content: JSON.pretty_generate(topics.llm_response)) %>
21-
</p>
19+
<% if topics.llm_response.present? %>
20+
<%= render "govuk_publishing_components/components/details", {
21+
title: "LLM responses",
22+
} do %>
23+
<h3 class="govuk-heading-m">Topic tagger</h3>
24+
<p class="govuk-body">
25+
<%= render("components/code_snippet", content: JSON.pretty_generate(topics.llm_response)) %>
26+
</p>
27+
<% end %>
2228
<% end %>
23-
<% end %>
2429

25-
<% if topics.metrics.present? %>
26-
<%= render "govuk_publishing_components/components/details", {
27-
title: "Metrics",
28-
} do %>
29-
<%= render "govuk_publishing_components/components/summary_list", {
30-
title: "Topic tagger",
31-
items: topics.metrics.map do |metric, value|
32-
{
33-
field: metric,
34-
value: value,
35-
}
36-
end,
37-
borderless: true,
38-
} %>
30+
<% if topics.metrics.present? %>
31+
<%= render "govuk_publishing_components/components/details", {
32+
title: "Metrics",
33+
} do %>
34+
<%= render "govuk_publishing_components/components/summary_list", {
35+
title: "Topic tagger",
36+
items: topics.metrics.map do |metric, value|
37+
{
38+
field: metric,
39+
value: value,
40+
}
41+
end,
42+
borderless: true,
43+
} %>
44+
<% end %>
3945
<% end %>
4046
<% end %>
47+
48+
<% if answer_relevancy_aggregate.present? %>
49+
<%= render "metric", aggregate: answer_relevancy_aggregate, title: "Answer relevancy" %>
50+
<% end %>
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
<%
2+
items = [
3+
{
4+
field: "Mean score",
5+
value: aggregate.mean_score,
6+
},
7+
]
8+
9+
items += aggregate.runs.flat_map.with_index(1) do |run, index|
10+
[
11+
{ field: "Run #{index} score", value: run.score },
12+
{ field: "Run #{index} reason", value: run.reason },
13+
]
14+
end
15+
%>
16+
17+
<%= render "govuk_publishing_components/components/summary_list", {
18+
title:,
19+
heading_level: 2,
20+
margin_bottom: 4,
21+
heading_size: "l",
22+
items: items,
23+
} %>
24+
25+
<%= render "govuk_publishing_components/components/details", {
26+
title: "LLM responses",
27+
} do %>
28+
<% aggregate.runs.each.with_index(1) do |run, index| %>
29+
<%= render "govuk_publishing_components/components/heading", {
30+
text: "Run #{index}",
31+
font_size: "m",
32+
heading_level: 2,
33+
margin_bottom: 4,
34+
} %>
35+
36+
<% run.llm_responses.each do |namespace, response| %>
37+
<%= render "govuk_publishing_components/components/heading", {
38+
text: namespace.capitalize,
39+
font_size: "s",
40+
heading_level: 3,
41+
} %>
42+
43+
<p class="govuk-body">
44+
<%= render("components/code_snippet", content: JSON.pretty_generate(response)) %>
45+
</p>
46+
<% end %>
47+
<% end %>
48+
<% end %>
49+
50+
<%= render "govuk_publishing_components/components/details", {
51+
title: "Metrics",
52+
} do %>
53+
<% aggregate.runs.each.with_index(1) do |run, index| %>
54+
<%= render "govuk_publishing_components/components/heading", {
55+
text: "Run #{index}",
56+
font_size: "m",
57+
heading_level: 2,
58+
} %>
59+
60+
<% run.metrics.sort.each do |namespace, metrics| %>
61+
<%= render "govuk_publishing_components/components/summary_list", {
62+
title: namespace.capitalize,
63+
items: metrics.map do |metric, value|
64+
{
65+
field: metric,
66+
value: value,
67+
}
68+
end,
69+
borderless: true,
70+
heading_size: "s",
71+
margin_bottom: 6,
72+
} %>
73+
<% end %>
74+
<% end %>
75+
<% end %>

app/views/admin/questions/show.html.erb

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,9 @@ content_for(:active_navigation_item, admin_questions_path)
3939
content: render(
4040
"analysis_tab",
4141
topics: @answer.topics,
42+
answer_relevancy_aggregate: @answer.answer_relevancy_aggregate,
4243
),
43-
} if @answer&.topics.present?
44+
} if @answer&.topics.present? || @answer&.answer_relevancy_aggregate.present?
4445
%>
4546

4647
<div class="govuk-grid-row">

spec/requests/admin/questions_spec.rb

Lines changed: 48 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,7 @@
284284
.and have_content('"id": "call_dqGpbb39drQDafLsjDLtnbGD"')
285285
end
286286

287-
it "doesn't render the tabs component when there is no analysis" do
287+
it "doesn't render the tabs component when there are no topics or auto-eval aggregate data" do
288288
question = create(:question, :with_answer)
289289
get admin_show_question_path(question)
290290

@@ -349,14 +349,60 @@
349349
.to have_selector("#details-tab", text: question.message)
350350
end
351351

352-
it "renders the topics in the analysis tab" do
352+
it "renders the topics in the topics tab" do
353353
get admin_show_question_path(question)
354354

355355
expect(response.body)
356356
.to have_selector("#analysis-tab", text: topics.primary_topic.capitalize)
357357
.and have_selector("#analysis-tab", text: topics.secondary_topic.capitalize)
358358
end
359359
end
360+
361+
context "when answer relevancy aggregate data is present" do
362+
let(:run) do
363+
create(
364+
:answer_relevancy_run,
365+
score: 0.85,
366+
reason: "The answer is relevant to the question.",
367+
llm_responses: {
368+
"statements" => { "statements" => ["The answer is relevant."] },
369+
"verdicts" => { "verdicts" => [{ "verdict" => "yes" }] },
370+
},
371+
metrics: {
372+
"statements" => { duration: 1.55556 },
373+
"verdicts" => { duration: 1.44445 },
374+
},
375+
)
376+
end
377+
let!(:aggregate) do
378+
create(
379+
:answer_relevancy_aggregate,
380+
runs: [run],
381+
)
382+
end
383+
let(:question) { aggregate.answer.question }
384+
385+
it "renders the answer relevancy metrics and LLM responses" do
386+
get admin_show_question_path(question)
387+
388+
expect(response.body.squish)
389+
.to have_content("Answer relevancy")
390+
.and have_content("Run 1 score")
391+
.and have_content("0.85")
392+
.and have_content("Run 1 reason")
393+
.and have_content("The answer is relevant to the question.")
394+
395+
expect(response.body.squish)
396+
.to have_content("Statements")
397+
.and have_content(/duration.*1\.55556/)
398+
.and have_content("Verdicts")
399+
.and have_content(/duration.*1\.44445/)
400+
401+
expect(response.body.squish)
402+
.to have_content('{ "statements": [ "The answer is relevant." ] }')
403+
.and have_content('{ "verdicts": [ { "verdict": "yes" } ] }')
404+
end
405+
end
360406
end
361407

362408
def expect_unprocessable_content_with_date_errors

spec/system/user_conversation_activity_is_shown_in_admin_spec.rb

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
when_i_click_the_analysis_tab
2020
then_i_see_the_topics_have_been_tagged
21+
and_i_see_the_answer_relevancy_statistics
2122
and_i_dont_see_the_answer
2223
end
2324

@@ -105,4 +106,12 @@ def then_i_see_the_topics_have_been_tagged
105106
def and_i_dont_see_the_answer
106107
expect(page).not_to have_content(@answer)
107108
end
109+
110+
def when_i_click_the_answer_relevancy_tab
111+
click_link "Answer relevancy"
112+
end
113+
114+
def and_i_see_the_answer_relevancy_statistics
115+
expect(page).to have_content(/Mean score.*1.0/)
116+
end
108117
end

0 commit comments

Comments
 (0)