-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathanswer_relevancy.rb
More file actions
81 lines (66 loc) · 2.02 KB
/
answer_relevancy.rb
File metadata and controls
81 lines (66 loc) · 2.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
class AutoEvaluation::AnswerRelevancy
THRESHOLD = 0.5
def self.call(...) = new(...).call
def initialize(question_message:, answer_message:)
@question_message = question_message
@answer_message = answer_message
@llm_responses = {}
@metrics = {}
end
def call
statements, llm_responses[:statements], metrics[:statements] = StatementGenerator.call(answer_message:)
if statements.empty?
return build_maximum_score_result(
reason: "No statements were extracted from the answer.",
llm_responses:,
metrics:,
)
end
verdicts, llm_responses[:verdicts], metrics[:verdicts] = VerdictsGenerator.call(
question_message:, statements: statements,
)
if verdicts.empty?
return build_maximum_score_result(
reason: "No verdicts were generated for the extracted statements.",
llm_responses:,
metrics:,
)
end
if verdicts.none? { |verdict| verdict["verdict"].strip.downcase == "no" }
return build_maximum_score_result(
reason: "The response fully addressed the input with no irrelevant statements.",
llm_responses:,
metrics:,
)
end
score = calculate_score(verdicts)
reason, llm_responses[:reason], metrics[:reason] = ReasonGenerator.call(
question_message:, verdicts:, score:,
)
AutoEvaluation::ScoreResult.new(
score:,
reason:,
success: score >= THRESHOLD,
llm_responses:,
metrics:,
)
end
private
attr_reader :question_message, :answer_message
attr_accessor :llm_responses, :metrics
def calculate_score(verdicts)
verdict_count = verdicts.count
return 1.0 if verdict_count.zero?
relevant_count = verdicts.count { |verdict| verdict["verdict"].strip.downcase != "no" }
relevant_count.to_f / verdict_count
end
def build_maximum_score_result(reason:, llm_responses:, metrics:)
AutoEvaluation::ScoreResult.new(
score: 1.0,
reason:,
success: true,
llm_responses:,
metrics:,
)
end
end