Skip to content

Commit 6520aca

Browse files
committed
Send all retrieved sources during faithfulness evaluation
Previously, we've only been sending the used sources to the faithfulness evaluation, which it out of line with the evaluation repo. Part of what we want to evaluate is whether the model is correctly identifying which sources are relevant, so we should be sending all retrieved sources.
1 parent df53aa6 commit 6520aca

2 files changed

Lines changed: 8 additions & 10 deletions

File tree

lib/auto_evaluation/faithfulness.rb

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,18 +49,14 @@ def answer_message
4949
end
5050

5151
def retrieval_context
52-
used_sources.map(&:plain_content).join("\n\n")
52+
answer.sources.map(&:plain_content).join("\n\n")
5353
end
5454

5555
def calculate_score(verdicts)
5656
faithful_count = verdicts.count { |verdict| verdict["verdict"].strip.downcase != "no" }
5757
faithful_count.to_d / verdicts.count
5858
end
5959

60-
def used_sources
61-
answer.sources.select(&:used)
62-
end
63-
6460
def build_error_result(error_message)
6561
AutoEvaluation::Result.new(
6662
status: "error",

spec/lib/auto_evaluation/faithfulness_spec.rb

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
RSpec.describe AutoEvaluation::Faithfulness, :aws_credentials_stubbed do
22
describe ".call" do
33
let(:answer_message) { "Einstein won the Nobel Prize in 1968 for the photoelectric effect." }
4-
let(:retrieval_context) { "Einstein won the Nobel Prize in 1921 for the photoelectric effect." }
4+
let(:used_chunk_conext) { "Einstein won the Nobel Prize in 1921 for the photoelectric effect." }
55
let(:question) { build(:question, message: "When did Einstein win the Nobel Prize?") }
6-
let(:chunk) { build(:answer_source_chunk, plain_content: retrieval_context) }
7-
let(:used_source) { build(:answer_source, used: true, chunk:) }
8-
let(:answer) { build(:answer, question:, message: answer_message, sources: [used_source]) }
9-
6+
let(:used_chunk) { build(:answer_source_chunk, plain_content: used_chunk_conext) }
7+
let(:unused_chunk) { build(:answer_source_chunk, plain_content: "Some other context.") }
8+
let(:used_source) { build(:answer_source, used: true, chunk: used_chunk) }
9+
let(:unused_source) { build(:answer_source, used: false, chunk: unused_chunk) }
10+
let(:answer) { build(:answer, question:, message: answer_message, sources: [used_source, unused_source]) }
11+
let(:retrieval_context) { "#{used_chunk_conext}\n\n#{unused_chunk.plain_content}" }
1012
let(:truths) { ["Einstein won the Nobel Prize in 1921.", "Einstein won the Nobel Prize for the photoelectric effect."] }
1113
let(:claims) { ["Einstein won the Nobel Prize in 1968.", "Einstein won the Nobel Prize for the photoelectric effect."] }
1214
let(:verdicts) do

0 commit comments

Comments
 (0)