Skip to content

Commit 0b90024

Browse files
committed
Get basic implementation working
1 parent c4bdb23 commit 0b90024

4 files changed

Lines changed: 90 additions & 1 deletion

File tree

app/jobs/answer_relevancy_job.rb

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
class AnswerRelevancyJob < AutoEvaluationMetricJob
2+
METRIC_NAME = "answer_relevancy".freeze
3+
4+
def perform(answer_id)
5+
answer = find_answer(answer_id)
6+
return if log_warnings(METRIC_NAME, answer)
7+
8+
results = []
9+
3.times do
10+
results << AutoEvaluation::AnswerRelevancy.call(
11+
question_message: answer.rephrased_question || answer.question.message,
12+
answer_message: answer.message,
13+
)
14+
end
15+
16+
save_metric(answer, results, METRIC_NAME)
17+
end
18+
19+
private
20+
21+
attr_reader :answer_id
22+
23+
def attribute_warn_message
24+
"Answer #{answer_id} has already been evaluated for relevancy"
25+
end
26+
27+
def score_attribute
28+
:answer_relevancy_score
29+
end
30+
end
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
class AutoEvaluationMetricJob < ApplicationJob
2+
MAX_RETRIES = 5
3+
retry_on StandardError, wait: 1.minute, attempts: MAX_RETRIES
4+
5+
private
6+
7+
def find_answer(answer_id)
8+
Answer.includes(:topics, question: :conversation).find_by(id: answer_id)
9+
end
10+
11+
def log_warnings(metric, answer)
12+
unless answer
13+
logger.warn("No answer found for #{answer.id}")
14+
return true
15+
end
16+
17+
if answer.metric_aggregates.exists?(metric_name: metric)
18+
logger.warn(attribute_warn_message)
19+
return true
20+
end
21+
22+
unless answer.status == "answered"
23+
logger.info("Answer #{answer.id} is not eligible for auto evaluation")
24+
return true
25+
end
26+
27+
false
28+
end
29+
30+
def save_metric(answer, results, metric)
31+
return logger.warn(attribute_warn_message) if answer.metric_aggregates.exists?(metric_name: metric)
32+
33+
mean_score = results.sum { |r| r.score } / results.size.to_f
34+
aggregate_metric = answer.metric_aggregates.build(score: mean_score, metric_name: metric)
35+
36+
aggregate_metric.save!
37+
38+
results.each do |result|
39+
run = aggregate_metric.runs.build(
40+
score: result.score,
41+
reason: result.reason,
42+
)
43+
44+
result.llm_responses.stringify_keys.each do |name, llm_response|
45+
run.assign_llm_response(name, llm_response)
46+
end
47+
result.metrics.stringify_keys.each do |name, metrics|
48+
run.assign_metrics(name, metrics)
49+
end
50+
51+
aggregate_metric.runs << run
52+
end
53+
end
54+
end

app/jobs/compose_answer_job.rb

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ def perform(question_id)
1414
logger.warn("Already an answer created for #{question_id}")
1515
end
1616

17-
AnswerTopicsJob.perform_later(answer.id) if answer.persisted?
17+
if answer.persisted?
18+
AnswerTopicsJob.perform_later(answer.id)
19+
AnswerRelevancyJob.perform_later(answer.id)
20+
end
1821
end
1922
end

app/models/auto_evaluation_metric_run.rb

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
class AutoEvaluationMetricRun < ApplicationRecord
2+
include LlmCallsRecordable
3+
24
belongs_to :metric_aggregate,
35
class_name: "AutoEvaluationMetricAggregate",
46
foreign_key: :auto_evaluation_metric_aggregate_id

0 commit comments

Comments
 (0)