From c0e8ba697d4941f21974839b37672f8c4d3c1a95 Mon Sep 17 00:00:00 2001 From: Trayan Azarov Date: Thu, 12 Sep 2024 10:40:46 +0300 Subject: [PATCH] fix: HF batch metadata keys --- chroma_dp/huggingface/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/chroma_dp/huggingface/__init__.py b/chroma_dp/huggingface/__init__.py index e2ae498..510e269 100644 --- a/chroma_dp/huggingface/__init__.py +++ b/chroma_dp/huggingface/__init__.py @@ -339,6 +339,7 @@ def hf_export( for key in doc.metadata.keys(): if f"metadata.{key}" not in features: features[f"metadata.{key}"] = _infer_hf_type(doc.metadata[key]) + _batch[f"metadata.{key}"] = [] _batch[f"metadata.{key}"].append(doc.metadata[key]) if len(_batch["document"]) >= _batch_size: @@ -394,7 +395,7 @@ def hf_export( custom_metadata = { "license": "mit", "language": "en", - "pretty_name": f"Chroma export of collection N/A", + "pretty_name": "Chroma export of collection N/A", "size_categories": ["n<1K"], "x-chroma": { "description": "Chroma Dataset",