Skip to content

Commit c05b0a6

Browse files
committed
Add schema validation to BedrockOpenAIOssInvoke
This updates the BedrockOpenAIOssInvoke class to include JSON schema validation for the structured output received from the LLM. It uses the first tool's schema since all of our auto-eval metrics use a single tool. If this is to change we could consider passing the schema in via the method parameters down the line. While doing this I noticed that i'd not been consistent with adhering to the schemas it some test cases so i've updated those.
1 parent 3254bf0 commit c05b0a6

5 files changed

Lines changed: 49 additions & 24 deletions

File tree

lib/auto_evaluation/bedrock_openai_oss_invoke.rb

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
module AutoEvaluation
22
class BedrockOpenAIOssInvoke
3+
class InvalidToolCallSchemaError < StandardError; end
34
Result = Data.define(
45
:evaluation_data,
56
:llm_response,
@@ -33,12 +34,14 @@ def call
3334
}.to_json,
3435
)
3536
parsed_response = JSON.parse(response.body.read)
36-
parsed_structured_output = JSON.parse(
37+
parsed_tool_output = JSON.parse(
3738
parsed_response["choices"][0]["message"]["tool_calls"][0]["function"]["arguments"],
3839
)
3940

41+
validate_tool_output_against_schema(parsed_tool_output)
42+
4043
Result.new(
41-
evaluation_data: parsed_structured_output,
44+
evaluation_data: parsed_tool_output,
4245
llm_response: parsed_response,
4346
metrics: build_metrics(start_time, parsed_response),
4447
)
@@ -57,5 +60,12 @@ def build_metrics(start_time, response)
5760
model: response["model"],
5861
}
5962
end
63+
64+
def validate_tool_output_against_schema(tool_output)
65+
schema = tools.dig(0, "function", "parameters")
66+
JSON::Validator.validate!(schema, tool_output)
67+
rescue JSON::Schema::ValidationError => e
68+
raise InvalidToolCallSchemaError, "Tool call response does not match schema: #{e.message}"
69+
end
6070
end
6171
end

spec/lib/auto_evaluation/answer_relevancy/verdicts_generator_spec.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
let(:statements) { ["Statement one.", "Statement two."] }
55
let(:verdicts) do
66
[
7-
{ "verdict" => "Yes" },
8-
{ "verdict" => "No", "reason" => "The statement is irrelevant." },
7+
{ "verdict" => "yes" },
8+
{ "verdict" => "no", "reason" => "The statement is irrelevant." },
99
]
1010
end
1111
let(:verdicts_json) do

spec/lib/auto_evaluation/answer_relevancy_spec.rb

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@
2323

2424
let(:verdicts) do
2525
[
26-
{ "verdict" => "Yes" },
27-
{ "verdict" => "No", "reason" => "The statement is irrelevant." },
26+
{ "verdict" => "yes" },
27+
{ "verdict" => "no", "reason" => "The statement is irrelevant." },
2828
]
2929
end
3030
let(:verdicts_json) { { verdicts: }.to_json }
@@ -104,7 +104,7 @@
104104
let(:verdicts) do
105105
[
106106
{ "verdict" => "idk", "reason" => "Cannot determine relevance." },
107-
{ "verdict" => "No", "reason" => "The statement is irrelevant." },
107+
{ "verdict" => "no", "reason" => "The statement is irrelevant." },
108108
]
109109
end
110110

@@ -172,7 +172,7 @@
172172
end
173173

174174
context "when verdicts are generated and none have a 'no' verdict" do
175-
let(:verdicts_json) { { verdicts: [{ "verdict" => "Yes" }, { "verdict" => "Yes" }] }.to_json }
175+
let(:verdicts_json) { { verdicts: [{ "verdict" => "yes" }, { "verdict" => "yes" }] }.to_json }
176176

177177
it "returns a result object with the expected attributes" do
178178
allow(Clock).to receive(:monotonic_time).and_return(200.0, 202.0, 204.0, 206.0)

spec/lib/auto_evaluation/bedrock_openai_oss_invoke_spec.rb

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,18 @@
44
let(:tools) do
55
[
66
{
7-
type: "function",
8-
function: {
9-
name: "test_schema",
10-
description: "A test JSON schema",
11-
schema: {
12-
type: "object",
13-
properties: {
14-
response: { type: "string" },
7+
"type" => "function",
8+
"function" => {
9+
"name" => "test_schema",
10+
"description" => "A test JSON schema",
11+
"parameters" => {
12+
"type" => "object",
13+
"properties" => {
14+
"response" => { "type" => "string" },
1515
},
16-
required: %w[response],
16+
"required" => %w[response],
1717
},
18-
strict: true,
18+
"strict" => true,
1919
},
2020
},
2121
]
@@ -55,5 +55,20 @@
5555
},
5656
)
5757
end
58+
59+
it "raises an error if the response does not conform to the schema" do
60+
bedrock_invoke_model_openai_oss_tool_call(
61+
user_message,
62+
tools,
63+
{ "invalid_key" => "This does not conform to the schema." }.to_json,
64+
)
65+
66+
expect {
67+
described_class.call(user_message, tools)
68+
}.to raise_error(
69+
described_class::InvalidToolCallSchemaError,
70+
/The property '#\/' did not contain a required property of 'response'/,
71+
)
72+
end
5873
end
5974
end

spec/lib/auto_evaluation/coherence_spec.rb

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
let(:question_message) { "This is a test question message." }
55
let(:answer_message) { "This is a test answer message." }
66
let(:reason) { "This is the reason for the score." }
7-
let(:response_json) { { score: 3.0, reason: }.to_json }
7+
let(:response_json) { { score: 3, reason: }.to_json }
88
let(:user_prompt) do
99
sprintf(
1010
prompts.fetch(:user_prompt),
@@ -49,11 +49,11 @@
4949

5050
it "returns the correct score and success for each rubric score" do
5151
{
52-
1.0 => 0.0,
53-
2.0 => 0.25,
54-
3.0 => 0.5,
55-
4.0 => 0.75,
56-
5.0 => 1.0,
52+
1 => 0.0,
53+
2 => 0.25,
54+
3 => 0.5,
55+
4 => 0.75,
56+
5 => 1.0,
5757
}.each do |rubric_score, expected_score|
5858
response_json = { score: rubric_score, reason: }.to_json
5959
bedrock_invoke_model_openai_oss_tool_call(

0 commit comments

Comments
 (0)