Skip to content

[BUG] Raise Error when can't deserialize configuration json from server, lazily load ef on CollectionModel, warn on api_key #4471

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 15 additions & 3 deletions chromadb/api/collection_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,24 @@ def load_collection_configuration_from_json(
ef = None
else:
try:
ef = known_embedding_functions[ef_config["name"]]
ef = ef.build_from_config(ef_config["config"]) # type: ignore
ef_name = ef_config["name"]
except KeyError:
raise ValueError(
f"Embedding function name not found in config: {ef_config}"
)
try:
ef = known_embedding_functions[ef_name]
except KeyError:
raise ValueError(
f"Embedding function {ef_config['name']} not found. Add @register_embedding_function decorator to the class definition."
f"Embedding function {ef_name} not found. Add @register_embedding_function decorator to the class definition."
)
try:
ef = ef.build_from_config(ef_config["config"]) # type: ignore
except Exception as e:
raise ValueError(
f"Could not build embedding function {ef_config['name']} from config {ef_config['config']}: {e}"
)

else:
ef = None

Expand Down
20 changes: 8 additions & 12 deletions chromadb/api/models/CollectionCommon.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,18 +129,7 @@ def __init__(
if embedding_function is not None:
validate_embedding_function(embedding_function)

config_ef = self.configuration.get("embedding_function")
if config_ef is not None:
if embedding_function is not None and not isinstance(
embedding_function, ef.DefaultEmbeddingFunction
):
if embedding_function.name() is not config_ef.name():
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't we want this validation?

raise ValueError(
f"Embedding function name mismatch: {embedding_function.name()} != {config_ef.name()}"
)
self._embedding_function = config_ef
else:
self._embedding_function = embedding_function
self._embedding_function = embedding_function
self._data_loader = data_loader

# Expose the model properties as read-only properties on the Collection class
Expand Down Expand Up @@ -567,6 +556,13 @@ def _embed_record_set(
)

def _embed(self, input: Any) -> Embeddings:
if self._embedding_function is not None and not isinstance(
self._embedding_function, ef.DefaultEmbeddingFunction
):
return self._embedding_function(input=input)
config_ef = self.configuration.get("embedding_function")
if config_ef is not None:
return config_ef(input=input)
if self._embedding_function is None:
raise ValueError(
"You must provide an embedding function to compute embeddings."
Expand Down
13 changes: 3 additions & 10 deletions chromadb/api/rust.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
UpdateCollectionConfiguration,
create_collection_configuration_to_json_str,
update_collection_configuration_to_json_str,
load_collection_configuration_from_json,
)
from chromadb.auth import UserIdentity
from chromadb.config import DEFAULT_DATABASE, DEFAULT_TENANT, Settings, System
Expand Down Expand Up @@ -191,9 +190,7 @@ def list_collections(
CollectionModel(
id=collection.id,
name=collection.name,
configuration=load_collection_configuration_from_json(
collection.configuration
),
configuration_json=collection.configuration,
metadata=collection.metadata,
dimension=collection.dimension,
tenant=collection.tenant,
Expand Down Expand Up @@ -233,9 +230,7 @@ def create_collection(
collection_model = CollectionModel(
id=collection.id,
name=collection.name,
configuration=load_collection_configuration_from_json(
collection.configuration
),
configuration_json=collection.configuration,
metadata=collection.metadata,
dimension=collection.dimension,
tenant=collection.tenant,
Expand All @@ -254,9 +249,7 @@ def get_collection(
return CollectionModel(
id=collection.id,
name=collection.name,
configuration=load_collection_configuration_from_json(
collection.configuration
),
configuration_json=collection.configuration,
metadata=collection.metadata,
dimension=collection.dimension,
tenant=collection.tenant,
Expand Down
8 changes: 5 additions & 3 deletions chromadb/api/segment.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from chromadb.api.collection_configuration import (
CreateCollectionConfiguration,
UpdateCollectionConfiguration,
load_collection_configuration_from_create_collection_configuration,
create_collection_configuration_to_json,
)
from chromadb.auth import UserIdentity
from chromadb.config import DEFAULT_DATABASE, DEFAULT_TENANT, Settings, System
Expand Down Expand Up @@ -235,7 +235,7 @@ def create_collection(
id=id,
name=name,
metadata=metadata,
configuration=load_collection_configuration_from_create_collection_configuration(
configuration_json=create_collection_configuration_to_json(
configuration or CreateCollectionConfiguration()
),
tenant=tenant,
Expand Down Expand Up @@ -413,7 +413,9 @@ def _fork(
tenant: str = DEFAULT_TENANT,
database: str = DEFAULT_DATABASE,
) -> CollectionModel:
raise NotImplementedError("Collection forking is not implemented for SegmentAPI")
raise NotImplementedError(
"Collection forking is not implemented for SegmentAPI"
)

@trace_method("SegmentAPI.delete_collection", OpenTelemetryGranularity.OPERATION)
@override
Expand Down
11 changes: 4 additions & 7 deletions chromadb/db/impl/grpc/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@
from typing import Any, Dict, List, cast
from uuid import UUID
from overrides import overrides
from chromadb.api.collection_configuration import (
load_collection_configuration_from_json_str,
)
import json

from chromadb.config import DEFAULT_DATABASE, DEFAULT_TENANT, Component, System
from chromadb.proto.convert import (
from_proto_metadata,
Expand Down Expand Up @@ -286,15 +285,13 @@ def CreateCollection(
f"Collection {collection_name} already exists",
)

configuration = load_collection_configuration_from_json_str(
request.configuration_json_str
)
configuration_json = json.loads(request.configuration_json_str)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do you care that this can error? Should it be made nice?


id = UUID(hex=request.id)
new_collection = Collection(
id=id,
name=request.name,
configuration=configuration,
configuration_json=configuration_json,
metadata=from_proto_metadata(request.metadata),
dimension=request.dimension,
database=database,
Expand Down
11 changes: 6 additions & 5 deletions chromadb/db/mixins/sysdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@
create_collection_configuration_to_json_str,
load_collection_configuration_from_json_str,
CollectionConfiguration,
load_collection_configuration_from_create_collection_configuration,
create_collection_configuration_to_json,
collection_configuration_to_json,
collection_configuration_to_json_str,
overwrite_collection_configuration,
update_collection_configuration_from_legacy_update_metadata,
Expand Down Expand Up @@ -310,9 +311,7 @@ def create_collection(
collection = Collection(
id=id,
name=name,
configuration=load_collection_configuration_from_create_collection_configuration(
configuration
),
configuration_json=create_collection_configuration_to_json(configuration),
metadata=metadata,
dimension=dimension,
tenant=tenant,
Expand Down Expand Up @@ -541,7 +540,9 @@ def get_collections(
Collection(
id=cast(UUID, id),
name=name,
configuration=configuration,
configuration_json=collection_configuration_to_json(
configuration
),
metadata=metadata,
dimension=dimension,
tenant=str(rows[0][5]),
Expand Down
6 changes: 2 additions & 4 deletions chromadb/proto/convert.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
from typing import Dict, Optional, Sequence, Tuple, TypedDict, Union, cast
from uuid import UUID
import json

import numpy as np
from numpy.typing import NDArray

import chromadb.proto.chroma_pb2 as chroma_pb
import chromadb.proto.query_executor_pb2 as query_pb
from chromadb.api.collection_configuration import (
load_collection_configuration_from_json_str,
collection_configuration_to_json_str,
)
from chromadb.api.types import Embedding, Where, WhereDocument
Expand Down Expand Up @@ -239,9 +239,7 @@ def from_proto_collection(collection: chroma_pb.Collection) -> Collection:
return Collection(
id=UUID(hex=collection.id),
name=collection.name,
configuration=load_collection_configuration_from_json_str(
collection.configuration_json_str
),
configuration_json=json.loads(collection.configuration_json_str),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you care if this errors?

metadata=from_proto_metadata(collection.metadata)
if collection.HasField("metadata")
else None,
Expand Down
Loading
Loading