Skip to content

Commit 550fbfa

Browse files
committed
Update Faithfulness to accept answer object
Change the Faithfulness interface from keyword arguments (answer_message:, retrieval_context:) to a single positional argument (answer), matching the pattern used by Coherence and AnswerRelevancy. The class now extracts answer_message and retrieval_context from the answer object via private methods.
1 parent 9459208 commit 550fbfa

2 files changed

Lines changed: 25 additions & 32 deletions

File tree

lib/auto_evaluation/faithfulness.rb

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,8 @@ class AutoEvaluation::Faithfulness
33

44
def self.call(...) = new(...).call
55

6-
def initialize(answer_message:, retrieval_context:)
7-
@answer_message = answer_message
8-
@retrieval_context = retrieval_context
6+
def initialize(answer)
7+
@answer = answer
98
@llm_responses = {}
109
@metrics = {}
1110
end
@@ -68,9 +67,17 @@ def call
6867

6968
private
7069

71-
attr_reader :answer_message, :retrieval_context
70+
attr_reader :answer
7271
attr_accessor :llm_responses, :metrics
7372

73+
def answer_message
74+
answer.message
75+
end
76+
77+
def retrieval_context
78+
answer.sources.select(&:used?).map(&:plain_content).join("\n\n")
79+
end
80+
7481
def calculate_score(verdicts)
7582
verdict_count = verdicts.count
7683
return 1.0 if verdict_count.zero?

spec/lib/auto_evaluation/faithfulness_spec.rb

Lines changed: 14 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,13 @@
33
let(:prompts) { AutoEvaluation::Prompts.config.faithfulness }
44
let(:answer_message) { "Einstein won the Nobel Prize in 1968 for the photoelectric effect." }
55
let(:retrieval_context) { "Einstein won the Nobel Prize in 1921 for the photoelectric effect." }
6+
let(:question) { build(:question, message: "When did Einstein win the Nobel Prize?") }
7+
let(:used_source) { double(plain_content: retrieval_context, used?: true) }
8+
let(:answer) do
9+
build(:answer, question:, message: answer_message).tap do |a|
10+
allow(a).to receive(:sources).and_return([used_source])
11+
end
12+
end
613

714
let(:truths) { ["Einstein won the Nobel Prize in 1921.", "Einstein won the Nobel Prize for the photoelectric effect."] }
815
let(:truths_json) { { truths: }.to_json }
@@ -83,10 +90,7 @@
8390
allow(Clock).to receive(:monotonic_time)
8491
.and_return(200.0, 202.0, 204.0, 206.0, 208.0, 210.0, 212.0, 214.0)
8592

86-
result = described_class.call(
87-
answer_message:,
88-
retrieval_context:,
89-
)
93+
result = described_class.call(answer)
9094

9195
expected_llm_responses = {
9296
truths: JSON.parse(truths_stub.response.body),
@@ -127,10 +131,7 @@
127131
end
128132

129133
it "treats 'idk' verdicts as positive in the score" do
130-
result = described_class.call(
131-
answer_message:,
132-
retrieval_context:,
133-
)
134+
result = described_class.call(answer)
134135

135136
expect(result.score).to eq(0.5)
136137
end
@@ -142,10 +143,7 @@
142143
it "returns a result object with the expected attributes" do
143144
allow(Clock).to receive(:monotonic_time).and_return(200.0, 202.0, 204.0, 206.0)
144145

145-
result = described_class.call(
146-
answer_message:,
147-
retrieval_context:,
148-
)
146+
result = described_class.call(answer)
149147

150148
expect(result)
151149
.to be_a(AutoEvaluation::ScoreResult)
@@ -165,10 +163,7 @@
165163
it "returns a result object with the expected attributes" do
166164
allow(Clock).to receive(:monotonic_time).and_return(200.0, 202.0, 204.0, 206.0)
167165

168-
result = described_class.call(
169-
answer_message:,
170-
retrieval_context:,
171-
)
166+
result = described_class.call(answer)
172167

173168
expect(result)
174169
.to be_a(AutoEvaluation::ScoreResult)
@@ -189,10 +184,7 @@
189184
allow(Clock).to receive(:monotonic_time)
190185
.and_return(200.0, 202.0, 204.0, 206.0, 208.0, 210.0)
191186

192-
result = described_class.call(
193-
answer_message:,
194-
retrieval_context:,
195-
)
187+
result = described_class.call(answer)
196188

197189
expect(result)
198190
.to be_a(AutoEvaluation::ScoreResult)
@@ -220,10 +212,7 @@
220212
it "returns a result object with the expected attributes" do
221213
allow(Clock).to receive(:monotonic_time).and_return(200.0, 202.0, 204.0, 206.0, 208.0, 210.0)
222214

223-
result = described_class.call(
224-
answer_message:,
225-
retrieval_context:,
226-
)
215+
result = described_class.call(answer)
227216

228217
expect(result)
229218
.to be_a(AutoEvaluation::ScoreResult)
@@ -262,10 +251,7 @@
262251
end
263252

264253
it "returns success: false" do
265-
result = described_class.call(
266-
answer_message:,
267-
retrieval_context:,
268-
)
254+
result = described_class.call(answer)
269255

270256
expect(result.success).to be false
271257
expect(result.score).to be_within(0.01).of(0.33)

0 commit comments

Comments
 (0)