Skip to content

Commit 9f69b5f

Browse files
authored
fix: support legacy metadata (#777)
* fix: support legacy metadata * Update unit coverage badge --------- Co-authored-by: leoguillaume <leoguillaume@users.noreply.github.com>
1 parent 56a20be commit 9f69b5f

3 files changed

Lines changed: 41 additions & 2 deletions

File tree

.github/badges/coverage.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"schemaVersion":1,"label":"coverage","message":"51.01%","color":"red"}
1+
{"schemaVersion":1,"label":"coverage","message":"50.18%","color":"red"}

api/schemas/chunks.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from datetime import datetime
22
from typing import Annotated, Literal
33

4-
from pydantic import Field, StringConstraints, field_serializer, model_validator
4+
from pydantic import Field, StringConstraints, field_serializer, field_validator, model_validator
55

66
from api.schemas import BaseModel
77

@@ -43,6 +43,25 @@ class Chunk(BaseModel):
4343
metadata: Annotated[ChunkMetadata | None, Field(default=None, description="Metadata of the chunk")]
4444
created: Annotated[datetime, Field(default=datetime.now(), description="The date of the chunk creation.")]
4545

46+
@field_validator("metadata", mode="before")
47+
@classmethod
48+
def support_legacy_metadata(cls, metadata: dict | None) -> dict | None:
49+
if metadata is None:
50+
return None
51+
52+
normalized_metadata = {}
53+
for key, value in metadata.items():
54+
if value is None:
55+
continue
56+
if isinstance(value, list):
57+
normalized_value = ",".join(str(item).strip() for item in value if str(item).strip())
58+
if normalized_value:
59+
normalized_metadata[key] = normalized_value
60+
continue
61+
normalized_metadata[key] = value
62+
63+
return normalized_metadata
64+
4665
@field_serializer("created")
4766
def serialize_created(self, created: datetime) -> int:
4867
return int(created.timestamp())

api/tests/unit/test_legacy.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
from datetime import datetime
2+
3+
from api.schemas.chunks import Chunk
4+
5+
6+
def test_pydantic_chunck_model_support_legacy_metadata():
7+
"""Test that the Chunk model supports legacy metadata, None type and list. None are ignored, list are converted to a string by joining the strings with a comma."""
8+
# Given
9+
content = {
10+
"id": 1,
11+
"collection_id": 1,
12+
"document_id": 1,
13+
"content": "test",
14+
"created": datetime.now(),
15+
"metadata": {"list_metadata": ["tag1", "tag2"], "none_metadata": None},
16+
}
17+
# When
18+
result = Chunk(**content)
19+
# Then
20+
assert result.metadata == {"list_metadata": "tag1,tag2"}

0 commit comments

Comments
 (0)