Skip to content

Commit 37b2971

Browse files
committed
Ensure we pass chunks in the correct format for faithfulness
We were sending the plain content of the chunk to the LLM which means that the LLM isn't actually getting the full context in the sources. The reason for this is that when we use the plain_content method on the chunk, it strips out inline links. This updates the code to use the same format as the evaluation repo which sends the html content of the chunk to the LLM.
1 parent 6520aca commit 37b2971

2 files changed

Lines changed: 32 additions & 2 deletions

File tree

lib/auto_evaluation/faithfulness.rb

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def answer_message
4949
end
5050

5151
def retrieval_context
52-
answer.sources.map(&:plain_content).join("\n\n")
52+
answer.sources.map { |source| format_chunk_for_evaluation(source.chunk) }.join("\n")
5353
end
5454

5555
def calculate_score(verdicts)
@@ -75,4 +75,16 @@ def build_result_with_score(score, reason)
7575
metrics:,
7676
)
7777
end
78+
79+
def format_chunk_for_evaluation(chunk)
80+
<<~STRING
81+
Context:
82+
Page Title: #{chunk.title}
83+
Page description: #{chunk.description}
84+
Headings: #{chunk.heading_hierarchy.join(' > ')}
85+
86+
Content:
87+
#{chunk.html_content}
88+
STRING
89+
end
7890
end

spec/lib/auto_evaluation/faithfulness_spec.rb

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,25 @@
88
let(:used_source) { build(:answer_source, used: true, chunk: used_chunk) }
99
let(:unused_source) { build(:answer_source, used: false, chunk: unused_chunk) }
1010
let(:answer) { build(:answer, question:, message: answer_message, sources: [used_source, unused_source]) }
11-
let(:retrieval_context) { "#{used_chunk_conext}\n\n#{unused_chunk.plain_content}" }
11+
let(:retrieval_context) do
12+
<<~STRING
13+
Context:
14+
Page Title: #{used_chunk.title}
15+
Page description: #{used_chunk.description}
16+
Headings: #{used_chunk.heading_hierarchy.join(' > ')}
17+
18+
Content:
19+
#{used_chunk.html_content}
20+
21+
Context:
22+
Page Title: #{unused_chunk.title}
23+
Page description: #{unused_chunk.description}
24+
Headings: #{unused_chunk.heading_hierarchy.join(' > ')}
25+
26+
Content:
27+
#{unused_chunk.html_content}
28+
STRING
29+
end
1230
let(:truths) { ["Einstein won the Nobel Prize in 1921.", "Einstein won the Nobel Prize for the photoelectric effect."] }
1331
let(:claims) { ["Einstein won the Nobel Prize in 1968.", "Einstein won the Nobel Prize for the photoelectric effect."] }
1432
let(:verdicts) do

0 commit comments

Comments
 (0)