Update Faithfulness to accept answer object

chaecramb · chaecramb · commit 550fbfa9d465 · 2026-01-06T15:24:19.000Z
Change the Faithfulness interface from keyword arguments
(answer_message:, retrieval_context:) to a single positional
argument (answer), matching the pattern used by Coherence and
AnswerRelevancy.

The class now extracts answer_message and retrieval_context
from the answer object via private methods.
diff --git a/lib/auto_evaluation/faithfulness.rb b/lib/auto_evaluation/faithfulness.rb
@@ -3,9 +3,8 @@ class AutoEvaluation::Faithfulness
 
   def self.call(...) = new(...).call
 
-  def initialize(answer_message:, retrieval_context:)
-    @answer_message = answer_message
-    @retrieval_context = retrieval_context
+  def initialize(answer)
+    @answer = answer
     @llm_responses = {}
     @metrics = {}
   end
@@ -68,9 +67,17 @@ def call
 
 private
 
-  attr_reader :answer_message, :retrieval_context
+  attr_reader :answer
   attr_accessor :llm_responses, :metrics
 
+  def answer_message
+    answer.message
+  end
+
+  def retrieval_context
+    answer.sources.select(&:used?).map(&:plain_content).join("\n\n")
+  end
+
   def calculate_score(verdicts)
     verdict_count = verdicts.count
     return 1.0 if verdict_count.zero?
diff --git a/spec/lib/auto_evaluation/faithfulness_spec.rb b/spec/lib/auto_evaluation/faithfulness_spec.rb
@@ -3,6 +3,13 @@
     let(:prompts) { AutoEvaluation::Prompts.config.faithfulness }
     let(:answer_message) { "Einstein won the Nobel Prize in 1968 for the photoelectric effect." }
     let(:retrieval_context) { "Einstein won the Nobel Prize in 1921 for the photoelectric effect." }
+    let(:question) { build(:question, message: "When did Einstein win the Nobel Prize?") }
+    let(:used_source) { double(plain_content: retrieval_context, used?: true) }
+    let(:answer) do
+      build(:answer, question:, message: answer_message).tap do |a|
+        allow(a).to receive(:sources).and_return([used_source])
+      end
+    end
 
     let(:truths) { ["Einstein won the Nobel Prize in 1921.", "Einstein won the Nobel Prize for the photoelectric effect."] }
     let(:truths_json) { { truths: }.to_json }
@@ -83,10 +90,7 @@
       allow(Clock).to receive(:monotonic_time)
                   .and_return(200.0, 202.0, 204.0, 206.0, 208.0, 210.0, 212.0, 214.0)
 
-      result = described_class.call(
-        answer_message:,
-        retrieval_context:,
-      )
+      result = described_class.call(answer)
 
       expected_llm_responses = {
         truths: JSON.parse(truths_stub.response.body),
@@ -127,10 +131,7 @@
       end
 
       it "treats 'idk' verdicts as positive in the score" do
-        result = described_class.call(
-          answer_message:,
-          retrieval_context:,
-        )
+        result = described_class.call(answer)
 
         expect(result.score).to eq(0.5)
       end
@@ -142,10 +143,7 @@
       it "returns a result object with the expected attributes" do
         allow(Clock).to receive(:monotonic_time).and_return(200.0, 202.0, 204.0, 206.0)
 
-        result = described_class.call(
-          answer_message:,
-          retrieval_context:,
-        )
+        result = described_class.call(answer)
 
         expect(result)
           .to be_a(AutoEvaluation::ScoreResult)
@@ -165,10 +163,7 @@
       it "returns a result object with the expected attributes" do
         allow(Clock).to receive(:monotonic_time).and_return(200.0, 202.0, 204.0, 206.0)
 
-        result = described_class.call(
-          answer_message:,
-          retrieval_context:,
-        )
+        result = described_class.call(answer)
 
         expect(result)
           .to be_a(AutoEvaluation::ScoreResult)
@@ -189,10 +184,7 @@
         allow(Clock).to receive(:monotonic_time)
                     .and_return(200.0, 202.0, 204.0, 206.0, 208.0, 210.0)
 
-        result = described_class.call(
-          answer_message:,
-          retrieval_context:,
-        )
+        result = described_class.call(answer)
 
         expect(result)
           .to be_a(AutoEvaluation::ScoreResult)
@@ -220,10 +212,7 @@
       it "returns a result object with the expected attributes" do
         allow(Clock).to receive(:monotonic_time).and_return(200.0, 202.0, 204.0, 206.0, 208.0, 210.0)
 
-        result = described_class.call(
-          answer_message:,
-          retrieval_context:,
-        )
+        result = described_class.call(answer)
 
         expect(result)
           .to be_a(AutoEvaluation::ScoreResult)
@@ -262,10 +251,7 @@
       end
 
       it "returns success: false" do
-        result = described_class.call(
-          answer_message:,
-          retrieval_context:,
-        )
+        result = described_class.call(answer)
 
         expect(result.success).to be false
         expect(result.score).to be_within(0.01).of(0.33)