Skip to content

Commit 0133128

Browse files
committed
Update Faithfulness rake task to use shared class
Replace the direct implementation with EvaluateAnswerFromQuestionMessage, matching the pattern used by the Coherence and AnswerRelevancy tasks. This also updates the spec to use the shared "a task that returns a ScoreResult" example.
1 parent 550fbfa commit 0133128

2 files changed

Lines changed: 10 additions & 39 deletions

File tree

lib/tasks/evaluation.rake

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -209,25 +209,15 @@ namespace :evaluation do
209209
task generate_faithfulness_evaluation: :environment do
210210
raise "Requires an INPUT env var" if ENV["INPUT"].blank?
211211

212-
question = Question.new(message: ENV["INPUT"], conversation: Conversation.new)
213-
214-
answer = AnswerComposition::PipelineRunner.call(question:, pipeline: [
215-
AnswerComposition::Pipeline::SearchResultFetcher,
216-
AnswerComposition::Pipeline::Claude::StructuredAnswerComposer,
217-
])
212+
begin
213+
result = AutoEvaluation::EvaluateAnswerFromQuestionMessage.call(
214+
evaluation_class: AutoEvaluation::Faithfulness,
215+
question_message: ENV["INPUT"],
216+
)
218217

219-
if answer.status =~ /^error/
220-
warn "Warning: answer has an error status: #{answer.status}"
221-
abort(answer.error_message)
218+
puts result.to_json
219+
rescue AutoEvaluation::EvaluateAnswerFromQuestionMessage::TaskFailedError => e
220+
abort e.message
222221
end
223-
224-
retrieval_context = answer.sources.used.map(&:plain_content).join("\n\n")
225-
226-
result = AutoEvaluation::Faithfulness.call(
227-
answer_message: answer.message,
228-
retrieval_context:,
229-
)
230-
231-
puts(result.to_json)
232222
end
233223
end

spec/lib/tasks/evaluation_spec.rb

Lines changed: 2 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -590,28 +590,9 @@
590590
end
591591

592592
describe "generate_faithfulness_evaluation" do
593-
it_behaves_like "an auto-evaluation generate task" do
594-
let(:question_message) { "What is the current VAT rate?" }
593+
it_behaves_like "a task that returns a ScoreResult" do
595594
let(:task_name) { "evaluation:generate_faithfulness_evaluation" }
596-
let(:used_sources) do
597-
[
598-
build(:answer_source, used: true, chunk: build(:answer_source_chunk, plain_content: "Source 1 content")),
599-
build(:answer_source, used: true, chunk: build(:answer_source_chunk, plain_content: "Source 2 content")),
600-
]
601-
end
602-
let(:retrieval_context) { "Source 1 content\n\nSource 2 content" }
603-
604-
before do
605-
allow(answer.sources).to receive(:used).and_return(used_sources)
606-
607-
allow(AutoEvaluation::Faithfulness)
608-
.to receive(:call)
609-
.with(
610-
answer_message: answer.message,
611-
retrieval_context:,
612-
)
613-
.and_return(evaluation_result)
614-
end
595+
let(:evaluation_class) { AutoEvaluation::Faithfulness }
615596
end
616597
end
617598
end

0 commit comments

Comments
 (0)