release: 0.5.2

bclavie · bclavie · commit 7f881eca313e · 2024-08-19T11:47:24.000+02:00
diff --git a/README.md b/README.md
@@ -14,6 +14,7 @@ Welcome to `rerankers`! Our goal is to provide users with a simple API to use an
 
 ## Updates
 
+- v0.5.2: Minor ColBERT fixes
 - v0.5.1: Minor change making RankedResults subscribable, meaning results[0] will return the first result, etc...
 - v0.5.0: Added support for the current state-of-the-art rerankers, BAAI's series of `BGE` layerwise LLM rerankers, based on [Gemma](https://huggingface.co/BAAI/bge-reranker-v2.5-gemma2-lightweight) and MiniCPM. These are different from RankGPT, as they're not listwise: the models are repurposed as "cross-encoders", and do output logit scores.
 - v0.4.0: ColBERT performance improvement! It should now be faster and result in stronger results following implementation of the JaColBERTv2.5 dynamic query length method. This version also now supports HuggingFace's Text-Embedding-Server (TEI) inference as an API reranker option, thanks to [@srisudarsan](https://github.com/srisudarsan).
diff --git a/pyproject.toml b/pyproject.toml
@@ -14,7 +14,7 @@ packages = [
 name = "rerankers" 
 
 
-version = "0.5.1"
+version = "0.5.2"
 
 description = "A unified API for various document re-ranking models."
 
diff --git a/rerankers/__init__.py b/rerankers/__init__.py
@@ -2,4 +2,4 @@
 from rerankers.documents import Document
 
 __all__ = ["Reranker", "Document"]
-__version__ = "0.5.1"
+__version__ = "0.5.2"
diff --git a/rerankers/models/colbert_ranker.py b/rerankers/models/colbert_ranker.py
@@ -224,8 +224,12 @@ def __init__(
             self.verbose,
         )
         self.tokenizer = AutoTokenizer.from_pretrained(model_name)
-        self.model = AutoModel.from_pretrained(model_name, torch_dtype=dtype).to(
-            self.device
+        self.model = (
+            ColBERTModel.from_pretrained(
+                model_name,
+            )
+            .to(self.device)
+            .to(self.dtype)
         )
         self.model.eval()
         self.query_max_length = 32  # Lower bound
@@ -335,10 +339,10 @@ def _encode(
                 )
 
                 # Calculate QLEN dynamically for each query
-                if original_length % 32 <= 8:
+                if original_length % 16 <= 8:
                     QLEN = original_length + 8
                 else:
-                    QLEN = ceil(original_length / 32) * 32
+                    QLEN = ceil(original_length / 16) * 16
 
                 if original_length < QLEN:
                     pad_length = QLEN - original_length
@@ -372,7 +376,7 @@ def _to_embs(self, encoding) -> torch.Tensor:
                 batch_encoding = {
                     key: val[i : i + self.batch_size] for key, val in encoding.items()
                 }
-                batch_embs = self.model(**batch_encoding).last_hidden_state.squeeze(1)
+                batch_embs = self.model(**batch_encoding)
                 batched_embs.append(batch_embs)
             embs = torch.cat(batched_embs, dim=0)
         if self.normalize: