|
3 | 3 | let(:prompts) { AutoEvaluation::Prompts.config.faithfulness } |
4 | 4 | let(:answer_message) { "Einstein won the Nobel Prize in 1968 for the photoelectric effect." } |
5 | 5 | let(:retrieval_context) { "Einstein won the Nobel Prize in 1921 for the photoelectric effect." } |
| 6 | + let(:question) { build(:question, message: "When did Einstein win the Nobel Prize?") } |
| 7 | + let(:used_source) { double(plain_content: retrieval_context, used?: true) } |
| 8 | + let(:answer) do |
| 9 | + build(:answer, question:, message: answer_message).tap do |a| |
| 10 | + allow(a).to receive(:sources).and_return([used_source]) |
| 11 | + end |
| 12 | + end |
6 | 13 |
|
7 | 14 | let(:truths) { ["Einstein won the Nobel Prize in 1921.", "Einstein won the Nobel Prize for the photoelectric effect."] } |
8 | 15 | let(:truths_json) { { truths: }.to_json } |
|
83 | 90 | allow(Clock).to receive(:monotonic_time) |
84 | 91 | .and_return(200.0, 202.0, 204.0, 206.0, 208.0, 210.0, 212.0, 214.0) |
85 | 92 |
|
86 | | - result = described_class.call( |
87 | | - answer_message:, |
88 | | - retrieval_context:, |
89 | | - ) |
| 93 | + result = described_class.call(answer) |
90 | 94 |
|
91 | 95 | expected_llm_responses = { |
92 | 96 | truths: JSON.parse(truths_stub.response.body), |
|
127 | 131 | end |
128 | 132 |
|
129 | 133 | it "treats 'idk' verdicts as positive in the score" do |
130 | | - result = described_class.call( |
131 | | - answer_message:, |
132 | | - retrieval_context:, |
133 | | - ) |
| 134 | + result = described_class.call(answer) |
134 | 135 |
|
135 | 136 | expect(result.score).to eq(0.5) |
136 | 137 | end |
|
142 | 143 | it "returns a result object with the expected attributes" do |
143 | 144 | allow(Clock).to receive(:monotonic_time).and_return(200.0, 202.0, 204.0, 206.0) |
144 | 145 |
|
145 | | - result = described_class.call( |
146 | | - answer_message:, |
147 | | - retrieval_context:, |
148 | | - ) |
| 146 | + result = described_class.call(answer) |
149 | 147 |
|
150 | 148 | expect(result) |
151 | 149 | .to be_a(AutoEvaluation::ScoreResult) |
|
165 | 163 | it "returns a result object with the expected attributes" do |
166 | 164 | allow(Clock).to receive(:monotonic_time).and_return(200.0, 202.0, 204.0, 206.0) |
167 | 165 |
|
168 | | - result = described_class.call( |
169 | | - answer_message:, |
170 | | - retrieval_context:, |
171 | | - ) |
| 166 | + result = described_class.call(answer) |
172 | 167 |
|
173 | 168 | expect(result) |
174 | 169 | .to be_a(AutoEvaluation::ScoreResult) |
|
189 | 184 | allow(Clock).to receive(:monotonic_time) |
190 | 185 | .and_return(200.0, 202.0, 204.0, 206.0, 208.0, 210.0) |
191 | 186 |
|
192 | | - result = described_class.call( |
193 | | - answer_message:, |
194 | | - retrieval_context:, |
195 | | - ) |
| 187 | + result = described_class.call(answer) |
196 | 188 |
|
197 | 189 | expect(result) |
198 | 190 | .to be_a(AutoEvaluation::ScoreResult) |
|
220 | 212 | it "returns a result object with the expected attributes" do |
221 | 213 | allow(Clock).to receive(:monotonic_time).and_return(200.0, 202.0, 204.0, 206.0, 208.0, 210.0) |
222 | 214 |
|
223 | | - result = described_class.call( |
224 | | - answer_message:, |
225 | | - retrieval_context:, |
226 | | - ) |
| 215 | + result = described_class.call(answer) |
227 | 216 |
|
228 | 217 | expect(result) |
229 | 218 | .to be_a(AutoEvaluation::ScoreResult) |
|
262 | 251 | end |
263 | 252 |
|
264 | 253 | it "returns success: false" do |
265 | | - result = described_class.call( |
266 | | - answer_message:, |
267 | | - retrieval_context:, |
268 | | - ) |
| 254 | + result = described_class.call(answer) |
269 | 255 |
|
270 | 256 | expect(result.success).to be false |
271 | 257 | expect(result.score).to be_within(0.01).of(0.33) |
|
0 commit comments