Skip to content

Commit eef5292

Browse files
authored
Add schema verification step (#955)
* step files, empty notebook * missing type hint * fix test * post review fixes * fix test and cover new argument * add copyright
1 parent bd8ce7d commit eef5292

File tree

5 files changed

+159
-0
lines changed

5 files changed

+159
-0
lines changed

steps/src/.gitkeep

Whitespace-only changes.

steps/src/verify_schema/item.yaml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
apiVersion: v1
2+
categories:
3+
- data-preparation
4+
- model-serving
5+
- utilities
6+
description: Verifies the event is aligned with the provided schema
7+
example: verify_schema.ipynb
8+
generationDate: 2025-12-29:11-59
9+
hidden: false
10+
labels:
11+
author: Iguazio
12+
mlrunVersion: 1.10.0
13+
name: verify_schema
14+
className: VerifySchema
15+
defaultHandler:
16+
spec:
17+
filename: verify_schema.py
18+
image: mlrun/mlrun
19+
requirements:
20+
version: 1.0.0
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
# Copyright 2025 Iguazio
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
#
15+
16+
from verify_schema import VerifySchema
17+
18+
class TestVerifySchema:
19+
def test_verify_schema(self):
20+
schema = ["id", "name", "active"]
21+
verifier = VerifySchema(schema=schema, allow_unexpected_keys=False)
22+
23+
# Test with valid event
24+
event = {
25+
"id": 1,
26+
"name": "Test Event",
27+
"active": True
28+
}
29+
result = verifier.do(event)
30+
assert result == event
31+
32+
# Test with missing key
33+
event_missing_key = {
34+
"id": 1,
35+
"name": "Test Event"
36+
}
37+
try:
38+
verifier.do(event_missing_key)
39+
except KeyError as e:
40+
assert "missing keys {'active'} in event" in str(e)
41+
42+
# Test with unexpected key
43+
event_unexpected_key = {
44+
"id": 1,
45+
"name": "Test Event",
46+
"active": True,
47+
"extra": "unexpected"
48+
}
49+
try:
50+
verifier.do(event_unexpected_key)
51+
except KeyError as e:
52+
assert "unexpected keys {'extra'} in event" in str(e)
53+
54+
def test_verify_schema_allow_unexpected(self):
55+
schema = ["id", "name", "active"]
56+
verifier = VerifySchema(schema=schema, allow_unexpected_keys=True)
57+
58+
# Test with valid event and unexpected key
59+
event = {
60+
"id": 1,
61+
"name": "Test Event",
62+
"active": True,
63+
"extra": "unexpected"
64+
}
65+
result = verifier.do(event)
66+
assert result == event
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
{
2+
"cells": [
3+
{
4+
"metadata": {},
5+
"cell_type": "code",
6+
"outputs": [],
7+
"execution_count": null,
8+
"source": "",
9+
"id": "556b36b9b89d0515"
10+
}
11+
],
12+
"metadata": {
13+
"kernelspec": {
14+
"display_name": "Python 3",
15+
"language": "python",
16+
"name": "python3"
17+
},
18+
"language_info": {
19+
"codemirror_mode": {
20+
"name": "ipython",
21+
"version": 2
22+
},
23+
"file_extension": ".py",
24+
"mimetype": "text/x-python",
25+
"name": "python",
26+
"nbconvert_exporter": "python",
27+
"pygments_lexer": "ipython2",
28+
"version": "2.7.6"
29+
}
30+
},
31+
"nbformat": 4,
32+
"nbformat_minor": 5
33+
}
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# Copyright 2025 Iguazio
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
#
15+
16+
class VerifySchema:
17+
"""
18+
This step validates that an event dictionary contains exactly the keys defined in the schema,
19+
raising a KeyError if any are missing or unexpected.
20+
"""
21+
22+
def __init__(self, schema: list, allow_unexpected_keys: bool = False):
23+
self.schema = schema
24+
self.allow_unexpected_keys = allow_unexpected_keys
25+
26+
def do(self, event: dict):
27+
# Check if all keys in the expected schema are present in the event
28+
missing = set(self.schema) - set(event)
29+
if missing:
30+
raise KeyError(f"Schema verification failed: missing keys {missing} in event: {event}")
31+
32+
if self.allow_unexpected_keys:
33+
return event
34+
35+
# Check if there are any unexpected keys in the event
36+
unexpected = set(event) - set(self.schema)
37+
if unexpected:
38+
raise KeyError(f"Schema verification failed: unexpected keys {unexpected} in event: {event}")
39+
40+
return event

0 commit comments

Comments
 (0)