Ensure we pass chunks in the correct format for faithfulness

davidgisbey · davidgisbey · commit 5a66d7de927a · 2026-05-21T14:53:45.000+01:00
We were sending the plain content of the chunk to the LLM which means
that the LLM isn't actually getting the full context in the sources.

The reason for this is that when we use the plain_content method on the chunk,
it strips out inline links. This updates the code to use the same
format as the evaluation repo which sends the html content of the chunk
to the LLM.
diff --git a/lib/auto_evaluation/faithfulness.rb b/lib/auto_evaluation/faithfulness.rb
@@ -49,7 +49,7 @@ def answer_message
   end
 
   def retrieval_context
-    answer.sources.map(&:plain_content).join("\n\n")
+    answer.sources.map { |source| format_chunk_for_evaluation(source.chunk) }.join("\n")
   end
 
   def calculate_score(verdicts)
@@ -75,4 +75,16 @@ def build_result_with_score(score, reason)
       metrics:,
     )
   end
+
+  def format_chunk_for_evaluation(chunk)
+    <<~STRING
+      Context:
+      Page Title: #{chunk.title}
+      Page description: #{chunk.description}
+      Headings: #{chunk.heading_hierarchy.join(' > ')}
+
+      Content:
+      #{chunk.html_content}
+    STRING
+  end
 end
diff --git a/spec/lib/auto_evaluation/faithfulness_spec.rb b/spec/lib/auto_evaluation/faithfulness_spec.rb
@@ -8,7 +8,25 @@
     let(:used_source) { build(:answer_source, used: true, chunk: used_chunk) }
     let(:unused_source) { build(:answer_source, used: false, chunk: unused_chunk) }
     let(:answer) { build(:answer, question:, message: answer_message, sources: [used_source, unused_source]) }
-    let(:retrieval_context) { "#{used_chunk_conext}\n\n#{unused_chunk.plain_content}" }
+    let(:retrieval_context) do
+      <<~STRING
+        Context:
+        Page Title: #{used_chunk.title}
+        Page description: #{used_chunk.description}
+        Headings: #{used_chunk.heading_hierarchy.join(' > ')}
+
+        Content:
+        #{used_chunk.html_content}
+
+        Context:
+        Page Title: #{unused_chunk.title}
+        Page description: #{unused_chunk.description}
+        Headings: #{unused_chunk.heading_hierarchy.join(' > ')}
+
+        Content:
+        #{unused_chunk.html_content}
+      STRING
+    end
     let(:truths) { ["Einstein won the Nobel Prize in 1921.", "Einstein won the Nobel Prize for the photoelectric effect."] }
     let(:claims) { ["Einstein won the Nobel Prize in 1968.", "Einstein won the Nobel Prize for the photoelectric effect."] }
     let(:verdicts) do
diff --git a/spec/support/system_spec_helpers.rb b/spec/support/system_spec_helpers.rb
@@ -60,8 +60,19 @@ def stubs_for_mock_answer(question,
       question_message: question,
       answer_message: answer,
     )
+
+    retrieval_context = <<~STRING
+      Context:
+      Page Title: Title
+      Page description: Description
+      Headings: Heading 1 > Heading 2
+
+      Content:
+      <p>Some content</p>
+    STRING
+
     stub_bedrock_invoke_model_openai_oss_faithfulness(
-      retrieval_context: "Some content",
+      retrieval_context: retrieval_context,
       answer_message: answer,
     )
     stub_bedrock_invoke_model_openai_oss_coherence(