1+ import numpy as np
2+
13try :
24 import chromadb # noqa: F401
35except ImportError :
1921)
2022
2123from chromadb .api .types import Embedding
22- from pydantic import BaseModel , Field
24+ from pydantic import BaseModel , Field , ConfigDict
2325
2426C = TypeVar ("C" )
2527
@@ -31,17 +33,22 @@ class ResourceFeature(BaseModel, Generic[C]):
3133
3234Metadata = Dict [str , Union [str , int , float , bool ]]
3335
36+ EmbeddingWrapper = Union [Embedding , np .ndarray ]
37+
3438
3539class EmbeddableResource (BaseModel ):
40+ model_config = ConfigDict (arbitrary_types_allowed = True )
3641 id : Optional [str ] = Field (None , description = "Document ID" )
3742 metadata : Optional [Metadata ] = Field (None , description = "Document metadata" )
38- embedding : Optional [Embedding ] = Field (None , description = "Document embedding" )
43+ embedding : Optional [EmbeddingWrapper ] = Field (
44+ None , description = "Document embedding"
45+ )
3946
4047 @staticmethod
4148 def resource_features () -> Sequence [ResourceFeature ]:
4249 return [
43- ResourceFeature [Embedding ](
44- feature_name = "embedding" , feature_type = Embedding
50+ ResourceFeature [EmbeddingWrapper ](
51+ feature_name = "embedding" , feature_type = EmbeddingWrapper
4552 ),
4653 ResourceFeature [Metadata ](feature_name = "metadata" , eature_type = Metadata ),
4754 ResourceFeature [str ](feature_name = "id" , feature_type = str ),
@@ -58,6 +65,13 @@ def resource_features() -> Sequence[ResourceFeature]:
5865 * super ().resource_features (),
5966 ]
6067
68+ def model_dump (self , ** kwargs ):
69+ # Convert NumPy arrays to lists before dumping
70+ data = super ().model_dump (** kwargs )
71+ if isinstance (data ["embedding" ], np .ndarray ):
72+ data ["embedding" ] = data ["embedding" ].tolist ()
73+ return data
74+
6175
6276D = TypeVar ("D" , bound = EmbeddableResource , contravariant = True )
6377
0 commit comments