diff --git a/chromadb/api/__init__.py b/chromadb/api/__init__.py index e70be476b11..40662d82a09 100644 --- a/chromadb/api/__init__.py +++ b/chromadb/api/__init__.py @@ -636,6 +636,16 @@ def _modify( ) -> None: pass + @abstractmethod + def _fork( + self, + collection_id: UUID, + new_name: str, + tenant: str = DEFAULT_TENANT, + database: str = DEFAULT_DATABASE, + ) -> CollectionModel: + pass + @abstractmethod @override def _count( diff --git a/chromadb/api/async_api.py b/chromadb/api/async_api.py index 429bb3db688..8561661fca9 100644 --- a/chromadb/api/async_api.py +++ b/chromadb/api/async_api.py @@ -630,6 +630,16 @@ async def _modify( ) -> None: pass + @abstractmethod + async def _fork( + self, + collection_id: UUID, + new_name: str, + tenant: str = DEFAULT_TENANT, + database: str = DEFAULT_DATABASE, + ) -> CollectionModel: + pass + @abstractmethod @override async def _count( diff --git a/chromadb/api/async_fastapi.py b/chromadb/api/async_fastapi.py index d827992f90b..afa753deb28 100644 --- a/chromadb/api/async_fastapi.py +++ b/chromadb/api/async_fastapi.py @@ -376,6 +376,23 @@ async def _modify( }, ) + @trace_method("AsyncFastAPI._fork", OpenTelemetryGranularity.OPERATION) + @override + async def _fork( + self, + collection_id: UUID, + new_name: str, + tenant: str = DEFAULT_TENANT, + database: str = DEFAULT_DATABASE, + ) -> CollectionModel: + resp_json = await self._make_request( + "post", + f"/tenants/{tenant}/databases/{database}/collections/{collection_id}/fork", + json={"new_name": new_name}, + ) + model = CollectionModel.from_json(resp_json) + return model + @trace_method("AsyncFastAPI.delete_collection", OpenTelemetryGranularity.OPERATION) @override async def delete_collection( diff --git a/chromadb/api/fastapi.py b/chromadb/api/fastapi.py index a5a5246e5c8..5ddf348c3d1 100644 --- a/chromadb/api/fastapi.py +++ b/chromadb/api/fastapi.py @@ -330,6 +330,24 @@ def _modify( }, ) + @trace_method("FastAPI._fork", OpenTelemetryGranularity.OPERATION) + @override + def _fork( + self, + collection_id: UUID, + new_name: str, + tenant: str = DEFAULT_TENANT, + database: str = DEFAULT_DATABASE, + ) -> CollectionModel: + """Forks a collection""" + resp_json = self._make_request( + "post", + f"/tenants/{tenant}/databases/{database}/collections/{collection_id}/fork", + json={"new_name": new_name}, + ) + model = CollectionModel.from_json(resp_json) + return model + @trace_method("FastAPI.delete_collection", OpenTelemetryGranularity.OPERATION) @override def delete_collection( diff --git a/chromadb/api/models/AsyncCollection.py b/chromadb/api/models/AsyncCollection.py index 1147149e776..7713de48385 100644 --- a/chromadb/api/models/AsyncCollection.py +++ b/chromadb/api/models/AsyncCollection.py @@ -256,6 +256,32 @@ async def modify( self._update_model_after_modify_success(name, metadata, configuration) + async def fork( + self, + new_name: str, + ) -> "AsyncCollection": + """Fork the current collection under a new name. The returning collection should contain identical data to the current collection. + This is an experimental API that only works for Hosted Chroma for now. + + Args: + new_name: The name of the new collection. + + Returns: + Collection: A new collection with the specified name and containing identical data to the current collection. + """ + model = await self._client._fork( + collection_id=self.id, + new_name=new_name, + tenant=self.tenant, + database=self.database, + ) + return AsyncCollection( + client=self._client, + model=model, + embedding_function=self._embedding_function, + data_loader=self._data_loader + ) + async def update( self, ids: OneOrMany[ID], diff --git a/chromadb/api/models/Collection.py b/chromadb/api/models/Collection.py index e822c5a5cab..9a9b8f1c163 100644 --- a/chromadb/api/models/Collection.py +++ b/chromadb/api/models/Collection.py @@ -262,6 +262,32 @@ def modify( self._update_model_after_modify_success(name, metadata, configuration) + def fork( + self, + new_name: str, + ) -> "Collection": + """Fork the current collection under a new name. The returning collection should contain identical data to the current collection. + This is an experimental API that only works for Hosted Chroma for now. + + Args: + new_name: The name of the new collection. + + Returns: + Collection: A new collection with the specified name and containing identical data to the current collection. + """ + model = self._client._fork( + collection_id=self.id, + new_name=new_name, + tenant=self.tenant, + database=self.database, + ) + return Collection( + client=self._client, + model=model, + embedding_function=self._embedding_function, + data_loader=self._data_loader + ) + def update( self, ids: OneOrMany[ID], diff --git a/chromadb/api/rust.py b/chromadb/api/rust.py index 46b2b304e3f..18a2a6037bd 100644 --- a/chromadb/api/rust.py +++ b/chromadb/api/rust.py @@ -305,6 +305,16 @@ def _modify( str(id), new_name, new_metadata, new_configuration_json_str ) + @override + def _fork( + self, + collection_id: UUID, + new_name: str, + tenant: str = DEFAULT_TENANT, + database: str = DEFAULT_DATABASE, + ) -> CollectionModel: + raise NotImplementedError("Collection forking is not implemented for Local Chroma") + @override def _count( self, diff --git a/chromadb/api/segment.py b/chromadb/api/segment.py index e69292a34be..e0acb1ef892 100644 --- a/chromadb/api/segment.py +++ b/chromadb/api/segment.py @@ -405,6 +405,16 @@ def _modify( elif new_configuration: self._sysdb.update_collection(id, configuration=new_configuration) + @override + def _fork( + self, + collection_id: UUID, + new_name: str, + tenant: str = DEFAULT_TENANT, + database: str = DEFAULT_DATABASE, + ) -> CollectionModel: + raise NotImplementedError("Collection forking is not implemented for SegmentAPI") + @trace_method("SegmentAPI.delete_collection", OpenTelemetryGranularity.OPERATION) @override @rate_limit