Skip to content

Commit 5048559

Browse files
authored
Jina reranking (#3788)
1 parent f052403 commit 5048559

File tree

6 files changed

+449
-0
lines changed

6 files changed

+449
-0
lines changed

camel/retrievers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from .auto_retriever import AutoRetriever
1616
from .base import BaseRetriever
1717
from .bm25_retriever import BM25Retriever
18+
from .jina_rerank_retriever import JinaRerankRetriever
1819
from .cohere_rerank_retriever import CohereRerankRetriever
1920
from .vector_retriever import VectorRetriever
2021
from .hybrid_retrival import HybridRetriever
@@ -24,6 +25,7 @@
2425
'VectorRetriever',
2526
'AutoRetriever',
2627
'BM25Retriever',
28+
'JinaRerankRetriever',
2729
'CohereRerankRetriever',
2830
'HybridRetriever',
2931
]
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
# ========= Copyright 2023-2026 @ CAMEL-AI.org. All Rights Reserved. =========
2+
# Licensed under the Apache License, Version 2.0 (the "License");
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
#
6+
# http://www.apache.org/licenses/LICENSE-2.0
7+
#
8+
# Unless required by applicable law or agreed to in writing, software
9+
# distributed under the License is distributed on an "AS IS" BASIS,
10+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
# See the License for the specific language governing permissions and
12+
# limitations under the License.
13+
# ========= Copyright 2023-2026 @ CAMEL-AI.org. All Rights Reserved. =========
14+
import os
15+
from typing import Any, Dict, List, Union
16+
17+
import requests
18+
19+
from camel.retrievers import BaseRetriever
20+
from camel.types.enums import JinaRerankerModelType
21+
22+
DEFAULT_TOP_K_RESULTS = 1
23+
JINA_RERANK_API_URL = "https://api.jina.ai/v1/rerank"
24+
25+
26+
class JinaRerankRetriever(BaseRetriever):
27+
r"""An implementation of the `BaseRetriever` using the `Jina AI Reranker`
28+
model.
29+
30+
This retriever uses Jina AI's reranking API to re-order retrieved documents
31+
based on their relevance to the query. It supports multilingual retrieval
32+
across 100+ languages.
33+
34+
Attributes:
35+
model_name (Union[JinaRerankerModelType, str]): The model name to use
36+
for re-ranking.
37+
api_key (str, optional): The API key for authenticating with the
38+
Jina AI service.
39+
40+
References:
41+
https://jina.ai/reranker/
42+
"""
43+
44+
def __init__(
45+
self,
46+
model_name: Union[JinaRerankerModelType, str] = (
47+
JinaRerankerModelType.JINA_RERANKER_V2_BASE_MULTILINGUAL
48+
),
49+
api_key: str | None = None,
50+
) -> None:
51+
r"""Initializes an instance of the JinaRerankRetriever. This
52+
constructor sets up the API key for interacting with the Jina AI
53+
Reranker API.
54+
55+
Args:
56+
model_name (Union[JinaRerankerModelType, str]): The name of the
57+
model to be used for re-ranking. Can be a JinaRerankerModelType
58+
enum value or a string. Defaults to
59+
`JinaRerankerModelType.JINA_RERANKER_V2_BASE_MULTILINGUAL`.
60+
api_key (Optional[str]): The API key for authenticating requests
61+
to the Jina AI API. If not provided, the method will attempt to
62+
retrieve the key from the environment variable 'JINA_API_KEY'.
63+
64+
Raises:
65+
ValueError: If the API key is neither passed as an argument nor
66+
set in the environment variable.
67+
"""
68+
self.api_key = api_key or os.environ.get("JINA_API_KEY")
69+
if not self.api_key:
70+
raise ValueError(
71+
"Must pass in Jina API key or specify via JINA_API_KEY"
72+
" environment variable."
73+
)
74+
# Handle both enum and string values for model_name
75+
if isinstance(model_name, JinaRerankerModelType):
76+
self.model_name = model_name.value
77+
else:
78+
self.model_name = model_name
79+
80+
def query(
81+
self,
82+
query: str,
83+
retrieved_result: list[dict[str, Any]],
84+
top_k: int = DEFAULT_TOP_K_RESULTS,
85+
) -> List[Dict[str, Any]]:
86+
r"""Queries and compiles results using the Jina AI re-ranking model.
87+
88+
Args:
89+
query (str): Query string for information retriever.
90+
retrieved_result (List[Dict[str, Any]]): The content to be
91+
re-ranked, should be the output from `BaseRetriever` like
92+
`VectorRetriever`. Each dict should have a 'text' key
93+
containing the document text.
94+
top_k (int, optional): The number of top results to return during
95+
retrieval. Must be a positive integer. Defaults to
96+
`DEFAULT_TOP_K_RESULTS`.
97+
98+
Returns:
99+
List[Dict[str, Any]]: Concatenated list of the query results,
100+
each containing the original data plus a 'similarity score'.
101+
102+
Raises:
103+
requests.exceptions.RequestException: If the API request fails.
104+
"""
105+
# Extract text content for reranking
106+
documents = []
107+
for item in retrieved_result:
108+
if isinstance(item, dict):
109+
# Try common keys for text content
110+
text = item.get('text') or item.get('content') or str(item)
111+
else:
112+
text = str(item)
113+
documents.append(text)
114+
115+
headers = {
116+
"Authorization": f"Bearer {self.api_key}",
117+
"Content-Type": "application/json",
118+
}
119+
120+
payload = {
121+
"model": self.model_name,
122+
"query": query,
123+
"documents": documents,
124+
"top_n": top_k,
125+
}
126+
127+
response = requests.post(
128+
JINA_RERANK_API_URL,
129+
headers=headers,
130+
json=payload,
131+
timeout=30,
132+
)
133+
response.raise_for_status()
134+
135+
rerank_response = response.json()
136+
137+
formatted_results = []
138+
for result in rerank_response.get("results", []):
139+
index = result.get("index", 0)
140+
relevance_score = result.get("relevance_score", 0.0)
141+
142+
selected_chunk = retrieved_result[index].copy()
143+
selected_chunk['similarity score'] = relevance_score
144+
formatted_results.append(selected_chunk)
145+
146+
return formatted_results

camel/types/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
EmbeddingModelType,
1717
GeminiEmbeddingTaskType,
1818
HuggingFaceRepoType,
19+
JinaRerankerModelType,
1920
ModelPlatformType,
2021
ModelType,
2122
OpenAIBackendRole,
@@ -83,6 +84,7 @@
8384
'ParsedChatCompletion',
8485
'HuggingFaceRepoType',
8586
'GeminiEmbeddingTaskType',
87+
'JinaRerankerModelType',
8688
'NOT_GIVEN',
8789
'NotGiven',
8890
'BaseMCPRegistryConfig',

camel/types/enums.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2106,6 +2106,32 @@ class JinaReturnFormat(Enum):
21062106
TEXT = "text"
21072107

21082108

2109+
class JinaRerankerModelType(str, Enum):
2110+
r"""Model types for Jina AI Reranker.
2111+
2112+
These models are available through the Jina AI Reranker API for
2113+
re-ranking documents based on their relevance to a query.
2114+
2115+
For more information, please refer to:
2116+
https://jina.ai/reranker/
2117+
"""
2118+
2119+
JINA_RERANKER_V2_BASE_MULTILINGUAL = "jina-reranker-v2-base-multilingual"
2120+
r"""Multilingual reranker model supporting 100+ languages. (Default)"""
2121+
2122+
JINA_RERANKER_V1_BASE_EN = "jina-reranker-v1-base-en"
2123+
r"""English base reranker model."""
2124+
2125+
JINA_RERANKER_V1_TINY_EN = "jina-reranker-v1-tiny-en"
2126+
r"""Lightweight English reranker model for faster inference."""
2127+
2128+
JINA_RERANKER_V1_TURBO_EN = "jina-reranker-v1-turbo-en"
2129+
r"""High-performance English reranker model."""
2130+
2131+
JINA_COLBERT_V2 = "jina-colbert-v2"
2132+
r"""ColBERT-based reranker for token-level matching."""
2133+
2134+
21092135
class HuggingFaceRepoType(str, Enum):
21102136
DATASET = "dataset"
21112137
MODEL = "model"
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
# ========= Copyright 2023-2026 @ CAMEL-AI.org. All Rights Reserved. =========
2+
# Licensed under the Apache License, Version 2.0 (the "License");
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
#
6+
# http://www.apache.org/licenses/LICENSE-2.0
7+
#
8+
# Unless required by applicable law or agreed to in writing, software
9+
# distributed under the License is distributed on an "AS IS" BASIS,
10+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
# See the License for the specific language governing permissions and
12+
# limitations under the License.
13+
# ========= Copyright 2023-2026 @ CAMEL-AI.org. All Rights Reserved. =========
14+
"""
15+
Example demonstrating the use of JinaRerankRetriever for re-ranking
16+
retrieved documents using Jina AI's reranker model.
17+
18+
Prerequisites:
19+
- Set `JINA_API_KEY` environment variable or pass it directly
20+
- Get your API key from https://jina.ai/
21+
22+
Usage:
23+
python examples/retrievers/jina_rerank_example.py
24+
"""
25+
26+
from camel.retrievers import JinaRerankRetriever
27+
28+
29+
def main():
30+
# Sample documents to demonstrate reranking
31+
sample_documents = [
32+
{
33+
'text': """CAMEL is an open-source library for building
34+
communicative AI agents. It enables multi-agent collaboration
35+
through role-playing and task decomposition.""",
36+
'metadata': {'source': 'camel_docs', 'page': 1},
37+
},
38+
{
39+
'text': """The weather forecast shows sunny skies with
40+
temperatures reaching 75°F. A perfect day for outdoor
41+
activities and picnics.""",
42+
'metadata': {'source': 'weather', 'page': 1},
43+
},
44+
{
45+
'text': """AI alignment research focuses on ensuring that
46+
artificial intelligence systems behave in accordance with
47+
human values and intentions. This is crucial for safe AI
48+
development.""",
49+
'metadata': {'source': 'ai_safety', 'page': 1},
50+
},
51+
{
52+
'text': """Large language models have revolutionized natural
53+
language processing. They can understand and generate
54+
human-like text across many domains.""",
55+
'metadata': {'source': 'llm_overview', 'page': 1},
56+
},
57+
{
58+
'text': """Multi-agent systems allow multiple AI entities to
59+
work together on complex tasks. CAMEL provides tools for
60+
building such collaborative agent frameworks.""",
61+
'metadata': {'source': 'multi_agent', 'page': 1},
62+
},
63+
]
64+
65+
# Initialize the Jina Reranker
66+
# API key will be read from JINA_API_KEY environment variable
67+
jina_reranker = JinaRerankRetriever(
68+
model_name="jina-reranker-v2-base-multilingual"
69+
)
70+
71+
# Query to search for
72+
query = "How do AI agents collaborate in CAMEL framework?"
73+
74+
print(f"Query: {query}\n")
75+
print("=" * 60)
76+
77+
# Before reranking - show original order
78+
print("\nOriginal document order (by initial retrieval):")
79+
for i, doc in enumerate(sample_documents):
80+
print(f" {i + 1}. {doc['text'][:80]}...")
81+
82+
# Rerank the documents using Jina AI
83+
print("\n" + "=" * 60)
84+
print("\nReranked results (by Jina AI relevance):")
85+
86+
reranked_results = jina_reranker.query(
87+
query=query,
88+
retrieved_result=sample_documents,
89+
top_k=3, # Return top 3 most relevant
90+
)
91+
92+
for i, result in enumerate(reranked_results):
93+
print(f"\n Rank {i + 1} (Score: {result['similarity score']:.4f}):")
94+
print(f" Source: {result['metadata']['source']}")
95+
print(f" Text: {result['text'][:100]}...")
96+
97+
print("\n" + "=" * 60)
98+
print("\nThe reranker has reordered documents by relevance to the query.")
99+
print(
100+
"Notice how documents about CAMEL and multi-agent"
101+
"systems are ranked higher."
102+
)
103+
104+
105+
if __name__ == "__main__":
106+
main()

0 commit comments

Comments
 (0)