MaartenGr · MaartenGr · Mar 25, 2025 · Mar 20, 2025 · Mar 20, 2025 · Mar 20, 2025
diff --git a/bertopic/backend/__init__.py b/bertopic/backend/__init__.py
@@ -31,6 +31,12 @@
     msg = "`pip install model2vec` \n\n"
     Model2VecBackend = NotInstalled("Model2Vec", "Model2Vec", custom_msg=msg)
 
+# FasteEmbed Embeddings
+try:
+    from bertopic.backend._fastembed import FastEmbedBackend
+except ModuleNotFoundError:
+    msg = "`pip install fastembed` \n\n"
+    FastEmbedBackend = NotInstalled("FastEmbed", "FastEmbed", custom_msg=msg)
 
 __all__ = [
     "BaseEmbedder",
@@ -39,5 +45,6 @@
     "CohereBackend",
     "Model2VecBackend",
     "MultiModalBackend",
+    "FastEmbedBackend",
     "languages",
 ]
diff --git a/bertopic/backend/_fastembed.py b/bertopic/backend/_fastembed.py
@@ -0,0 +1,54 @@
+import numpy as np
+from typing import List
+from fastembed import TextEmbedding
+
+from bertopic.backend import BaseEmbedder
+
+
+class FastEmbedBackend(BaseEmbedder):
+    """FastEmbed embedding model.
+
+    The FastEmbed embedding model used for generating sentence embeddings.
+
+    Arguments:
+        embedding_model: A FastEmbed embedding model
+
+    Examples:
+    To create a model, you can load in a string pointing to a supported
+    FastEmbed model:
+
+    ```python
+    from bertopic.backend import FastEmbedBackend
+
+    sentence_model = FastEmbedBackend("BAAI/bge-small-en-v1.5")
+    ```
+    """
+
+    def __init__(self, embedding_model: str = "BAAI/bge-small-en-v1.5"):
+        super().__init__()
+
+        supported_models = [m["model"] for m in TextEmbedding.list_supported_models()]
+
+        if isinstance(embedding_model, str) and embedding_model in supported_models:
+            self.embedding_model = TextEmbedding(model_name=embedding_model)
+        else:
+            raise ValueError(
+                "Please select a correct FasteEmbed model: \n"
+                "the model must be a string and must be supported. \n"
+                "The supported TextEmbedding model list is here: https://qdrant.github.io/fastembed/examples/Supported_Models/"
+            )
+
+    def embed(self, documents: List[str], verbose: bool = False) -> np.ndarray:
+        """Embed a list of n documents/words into an n-dimensional
+        matrix of embeddings.
+
+        Arguments:
+            documents: A list of documents or words to be embedded
+            verbose: Controls the verbosity of the process
+
+        Returns:
+            Document/words embeddings with shape (n, m) with `n` documents/words
+            that each have an embeddings size of `m`
+        """
+        embeddings = np.array(list(self.embedding_model.embed(documents, show_progress_bar=verbose)))
+        return embeddings
diff --git a/bertopic/backend/_utils.py b/bertopic/backend/_utils.py
@@ -130,6 +130,12 @@ def select_backend(embedding_model, language: str = None, verbose: bool = False)
 
         return Model2VecBackend(embedding_model)
 
+    # FastEmbed word embeddings
+    if "fastembed" in str(type(embedding_model)):
+        from bertopic.backend._fastembed import FastEmbedBackend
+
+        return FastEmbedBackend(embedding_model)
+
     # Select embedding model based on language
     if language:
         try:

diff --git a/docs/getting_started/embeddings/embeddings.md b/docs/getting_started/embeddings/embeddings.md
@@ -279,6 +279,22 @@ embedding_model = CohereBackend(client)
 topic_model = BERTopic(embedding_model=embedding_model)
 ```
 
+## **FastEmbed**
+FastEmbed[https://qdrant.tech/documentation/fastembed/] is a lightweight python library for embedding generation
+and it supports popular embedding models.
+You can easily use it as in the example below:
+
+```python
+from bertopic.backend import FastEmbedBackend
+
+embedding_model = FastEmbedBackend("BAAI/bge-small-en-v1.5")
+topic_model = BERTopic(embedding_model=embedding_model)
+```
+
+!!! tip "Tip!"
+    Before to start check the supported FastEmbed text embedding models [here](https://qdrant.github.io/fastembed/examples/Supported_Models/).
+
+
 ## **Multimodal**
 To create embeddings for both text and images in the same vector space, we can use the `MultiModalBackend`.
 This model uses a clip-vit based model that is capable of embedding text, images, or both:

diff --git a/pyproject.toml b/pyproject.toml
@@ -61,6 +61,9 @@ docs = [
     "mkdocstrings-python==1.10.0",
     "mkdocstrings==0.24.3",
 ]
+fastembed = [
+    "fastembed>=0.6.0",
+]
 flair = [
     "flair>=0.7",
     "torch>=1.4.0",