Skip to content

Commit 7cdcb58

Browse files
committed
Move TopicTagger into AutoEvaluation module
I know this isn't strictly part of AutoEvaluation, but we will be using it as part of our analysis for auto-evaluation, as we will be aggregating metric scores across topics. For that reason i think it makes sense to move it into the AutoEvaluation module. Particularly as we've grouped topics with the other metrics in jobs and models.
1 parent e4eb0db commit 7cdcb58

7 files changed

Lines changed: 101 additions & 99 deletions

File tree

app/jobs/answer_analysis/answer_topics_job.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ def perform(answer_id)
1212
end
1313
return if quota_limit_reached?
1414

15-
result = TopicTagger.call(answer.rephrased_question || answer.question.message)
15+
result = AutoEvaluation::TopicTagger.call(answer.rephrased_question || answer.question.message)
1616
topics = answer.build_topics(
1717
primary_topic: result.primary_topic,
1818
secondary_topic: result.secondary_topic,
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
module AutoEvaluation
2+
class TopicTagger
3+
Result = Data.define(:primary_topic,
4+
:secondary_topic,
5+
:metrics,
6+
:llm_response)
7+
8+
def self.call(...) = new(...).call
9+
10+
def initialize(user_question)
11+
@user_question = user_question
12+
end
13+
14+
def call
15+
start_time = Clock.monotonic_time
16+
response = anthropic_bedrock_client.messages.create(
17+
messages:,
18+
system: [
19+
{ type: "text", text: system_prompt, cache_control: { type: "ephemeral" } },
20+
],
21+
model: BedrockModels.model_id(:claude_sonnet),
22+
tools:,
23+
tool_choice: { type: "tool", name: tools.first[:name] },
24+
**inference_config,
25+
)
26+
27+
Result.new(
28+
primary_topic: response[:content][0][:input][:primary_topic],
29+
secondary_topic: response[:content][0][:input][:secondary_topic],
30+
metrics: build_metrics(response, start_time),
31+
llm_response: response.to_h,
32+
)
33+
end
34+
35+
private
36+
37+
attr_reader :user_question
38+
39+
def anthropic_bedrock_client
40+
@anthropic_bedrock_client ||= Anthropic::BedrockClient.new(
41+
aws_region: ENV["CLAUDE_AWS_REGION"],
42+
)
43+
end
44+
45+
def build_metrics(response, start_time)
46+
{
47+
duration: Clock.monotonic_time - start_time,
48+
llm_prompt_tokens: BedrockModels.claude_total_prompt_tokens(response[:usage]),
49+
llm_completion_tokens: response[:usage][:output_tokens],
50+
llm_cached_tokens: response[:usage][:cache_read_input_tokens],
51+
model: response[:model],
52+
}
53+
end
54+
55+
def system_prompt
56+
topic_tagger_config["system_prompt"]
57+
end
58+
59+
def messages
60+
[
61+
{
62+
role: "user",
63+
content: user_question,
64+
},
65+
]
66+
end
67+
68+
def topic_tagger_config
69+
Rails.configuration.govuk_chat_private.llm_prompts.claude.topic_tagger
70+
end
71+
72+
def inference_config
73+
{
74+
max_tokens: 4096,
75+
temperature: 0.0,
76+
}
77+
end
78+
79+
def tools
80+
[topic_tagger_config["tool_spec"]]
81+
end
82+
end
83+
end

lib/tasks/evaluation.rake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ namespace :evaluation do
119119
task generate_topics_for_question: :environment do
120120
raise "Requires an INPUT env var" if ENV["INPUT"].blank?
121121

122-
result = TopicTagger.call(ENV["INPUT"])
122+
result = AutoEvaluation::TopicTagger.call(ENV["INPUT"])
123123

124124
puts(result.to_json)
125125
end

lib/topic_tagger.rb

Lines changed: 0 additions & 81 deletions
This file was deleted.

spec/jobs/answer_analysis/answer_topics_job_spec.rb

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
let(:answer) { create(:answer) }
44
let(:question) { answer.question }
55
let(:topic_tagger_result) do
6-
TopicTagger::Result.new(
6+
AutoEvaluation::TopicTagger::Result.new(
77
primary_topic: "business",
88
secondary_topic: "benefits",
99
metrics: {
@@ -16,21 +16,21 @@
1616
)
1717
end
1818

19-
before { allow(TopicTagger).to receive(:call).and_return(topic_tagger_result) }
19+
before { allow(AutoEvaluation::TopicTagger).to receive(:call).and_return(topic_tagger_result) }
2020

2121
it_behaves_like "a job in queue", "default"
22-
it_behaves_like "a job that adheres to the metric quota", TopicTagger
22+
it_behaves_like "a job that adheres to the metric quota", AutoEvaluation::TopicTagger
2323
it_behaves_like "a job that retries on service errors", Anthropic::Errors::APIError do
2424
before do
25-
allow(TopicTagger).to receive(:call)
25+
allow(AutoEvaluation::TopicTagger).to receive(:call)
2626
.and_raise(Anthropic::Errors::APIError.new(url: "url"))
2727
end
2828
end
2929

3030
describe "#perform" do
31-
it "calls the TopicTagger with the answer message" do
31+
it "calls the AutoEvaluation::TopicTagger with the answer message" do
3232
described_class.new.perform(answer.id)
33-
expect(TopicTagger).to have_received(:call).with(question.message)
33+
expect(AutoEvaluation::TopicTagger).to have_received(:call).with(question.message)
3434
end
3535

3636
it "creates topics for the answer based of the returned result" do
@@ -49,10 +49,10 @@
4949
context "when the answer has a rephrased_question" do
5050
let(:rephrased_question) { "This is a rephrased_question" }
5151

52-
it "calls the TopicTagger with the rephrased question" do
52+
it "calls the AutoEvaluation::TopicTagger with the rephrased question" do
5353
answer = create(:answer, rephrased_question: rephrased_question)
5454
described_class.new.perform(answer.id)
55-
expect(TopicTagger).to have_received(:call).with(rephrased_question)
55+
expect(AutoEvaluation::TopicTagger).to have_received(:call).with(rephrased_question)
5656
end
5757
end
5858

@@ -67,9 +67,9 @@
6767
described_class.new.perform(answer_id)
6868
end
6969

70-
it "doesn't call the TopicTagger" do
70+
it "doesn't call the AutoEvaluation::TopicTagger" do
7171
described_class.new.perform(answer_id)
72-
expect(TopicTagger).not_to have_received(:call)
72+
expect(AutoEvaluation::TopicTagger).not_to have_received(:call)
7373
end
7474
end
7575

@@ -96,8 +96,8 @@
9696
described_class.new.perform(answer.id)
9797
end
9898

99-
it "does not call the TopicTagger" do
100-
expect(TopicTagger).not_to receive(:call)
99+
it "does not call the AutoEvaluation::TopicTagger" do
100+
expect(AutoEvaluation::TopicTagger).not_to receive(:call)
101101
described_class.new.perform(answer.id)
102102
end
103103
end

spec/lib/topic_tagger_spec.rb renamed to spec/lib/auto_evaluation/topic_tagger_spec.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
RSpec.describe TopicTagger, :aws_credentials_stubbed do
1+
RSpec.describe AutoEvaluation::TopicTagger, :aws_credentials_stubbed do
22
describe ".call" do
33
let(:message) { "This is a test message." }
44

@@ -7,7 +7,7 @@
77
it "returns a results object with the expected topics" do
88
result = described_class.call(message)
99
expect(result)
10-
.to be_a(TopicTagger::Result)
10+
.to be_a(AutoEvaluation::TopicTagger::Result)
1111
.and have_attributes(
1212
primary_topic: "business",
1313
secondary_topic: "benefits",

spec/lib/tasks/evaluation_spec.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -384,13 +384,13 @@
384384

385385
it "outputs the response as JSON to stdout" do
386386
ClimateControl.modify(INPUT: input) do
387-
result = TopicTagger::Result.new(
387+
result = AutoEvaluation::TopicTagger::Result.new(
388388
primary_topic: "tax",
389389
secondary_topic: "benefits",
390390
metrics: {},
391391
llm_response: {},
392392
)
393-
allow(TopicTagger).to receive(:call).with(input).and_return(result)
393+
allow(AutoEvaluation::TopicTagger).to receive(:call).with(input).and_return(result)
394394

395395
expect { Rake::Task[task_name].invoke }
396396
.to output("#{result.to_json}\n").to_stdout

0 commit comments

Comments
 (0)