castorini · XKTZ · Aug 13, 2024 · Aug 13, 2024 · Aug 13, 2024 · Aug 14, 2024
diff --git a/.gitignore b/.gitignore
@@ -170,4 +170,4 @@ token_counts/
 retrieve_results/
 ranking_execution_summary/
 repro/
-demo_outputs/
+demo_outputs/
diff --git a/README.md b/README.md
@@ -147,6 +147,18 @@ Omit `--use_logits` if you wish to perform traditional listwise reranking.
 
 If you would like to contribute to the project, please refer to the [contribution guidelines](CONTRIBUTING.md).
 
+### Run end to end - Rank Zephyr with [Tournament Sort](https://arxiv.org/abs/2402.15838) (`top_k = 10, r = 1`)
+
+* Other LLM reranker method, like 
+
+```
+python src/rank_llm/scripts/run_rank_llm.py  --model_path=castorini/rank_zephyr_7b_v1_full --top_k_candidates=100 --dataset=dl20 \
+    --retrieval_method=SPLADE++_EnsembleDistil_ONNX --prompt_mode=rank_GPT  --context_size=4096 --variable_passages \
+    --reorder_policy="tournament_sort:{top_k: 10, r: 1}"
+```
+
+### Run end to end - Rank Zephyr with [Top Down](https://)
+
 ## 🦙🐧 Model Zoo
 
 The following is a table of the listwise models our repository was primarily built to handle (with the models hosted on HuggingFace):
@@ -283,6 +295,34 @@ If you would like to cite the FIRST methodology, please consider citing:
 }
 ```
 
+If you woud like to cite the ListT5's tournament sort methodology, please consider citing
+
+[[2402.15838] ListT5: Listwise Reranking with Fusion-in-Decoder Improves Zero-shot Retrieval](https://arxiv.org/abs/2405.14589)
+
+```
+@ARTICLE{yoon2024listt5,
+  title = {Listt5: Listwise reranking with fusion-in-decoder improves zero-shot retrieval},
+  author = {Yoon, Soyoung and Choi, Eunbi and Kim, Jiyeon and Yun, Hyeongu and Kim, Yireun and Hwang, Seung-won},
+  journal = {arXiv preprint arXiv:2402.15838},
+  year={2024}
+  journal = {arXiv:2402.15838}
+}
+```
+
+If you would like to cite the Top Down Paritioning methodology, please consider citing
+
+[[2405.14589] Top-Down Partitioning for Efficient List-Wise Ranking](https://arxiv.org/abs/2405.14589)
+
+```
+@ARTICLE{parry2024top,
+  title = {Top-Down Partitioning for Efficient List-Wise Ranking},
+  author = {Parry, Andrew and MacAvaney, Sean and Ganguly, Debasis},
+  journal = {arXiv preprint arXiv:2405.14589},
+  year = {2024}
+  journal = {arXiv:2405.14589}
+}
+```
+
 ## 🙏 Acknowledgments
 
 This research is supported in part by the Natural Sciences and Engineering Research Council (NSERC) of Canada.
diff --git a/requirements.txt b/requirements.txt
@@ -7,4 +7,5 @@ python-dotenv>=1.0.1
 faiss-cpu>=1.8.0
 ftfy>=6.2.0
 dacite>=1.8.1
-fschat[model_worker]>=0.2.36
+fschat[model_worker]>=0.2.36
+json-repair
diff --git a/src/rank_llm/rerank/listwise/listwise_rankllm.py b/src/rank_llm/rerank/listwise/listwise_rankllm.py
@@ -1,22 +1,50 @@
 import copy
+import json
 import logging
 import random
 import re
 from abc import ABC
+from dataclasses import dataclass
 from datetime import datetime
-from typing import Any, Dict, List, Tuple
+from typing import Any, Dict, List, Tuple, Union
 
+import json_repair
 from ftfy import fix_text
+from gguf import Optional
 from tqdm import tqdm
 
 from rank_llm.data import RankingExecInfo, Request, Result
 from rank_llm.rerank import PromptMode, RankLLM
+from rank_llm.rerank.listwise.reorder.reorder_policy import (
+    ModelFunction,
+    ReorderPolicy,
+    SlidingWindowReorderPolicy,
+)
+from rank_llm.rerank.listwise.reorder.top_down_reorder_policy import (
+    TopDownReorderPolicy,
+)
+from rank_llm.rerank.listwise.reorder.tournament_sort_reorder_policy import (
+    TournamentSortReorderPolicy,
+)
 
 logger = logging.getLogger(__name__)
 
 ALPH_START_IDX = ord("A") - 1
 
 
+SUPPORT_REORDER_POLICIES = [
+    SlidingWindowReorderPolicy,
+    TournamentSortReorderPolicy,
+    TopDownReorderPolicy,
+]
+
+
+@dataclass
+class RerankConsumption:
+    consumption_reference_by_batch: int
+    consumption_reference_by_item: int
+
+
 class ListwiseRankLLM(RankLLM, ABC):
     """
     All children of ListwiseRankLLM must implement these functions:
@@ -32,18 +60,77 @@ class ListwiseRankLLM(RankLLM, ABC):
 
     def __init__(
         self,
+        reorder_policy: Optional[ReorderPolicy],
         model: str,
         context_size: int,
+        window_size: int,
         prompt_mode: PromptMode,
         num_few_shot_examples: int,
-        window_size: int,
         use_alpha: bool = False,
     ) -> None:
         super().__init__(model, context_size, prompt_mode)
         self._num_few_shot_examples = num_few_shot_examples
+
+        self.reorder_policy = reorder_policy or SlidingWindowReorderPolicy()
         self._window_size = window_size
         self._use_alpha = use_alpha
 
+    def rerank_batch(
+        self,
+        requests: List[Request],
+        rank_start: int = 0,
+        rank_end: int = 100,
+        shuffle_candidates: bool = False,
+        logging: bool = False,
+        batched: bool = False,
+        **kwargs: Any,
+    ) -> List[Result]:
+        populate_exec_summary: bool = kwargs.get("populate_exec_summary", False)
+
+        batch_size = kwargs.get("batch_size") or len(requests)
+
+        if not batched:
+            batch_size = 1
+
+        reorder_policy = self.reorder_policy
+        model_functions, consumption = self._get_model_function(batched, **kwargs)
+
+        # reranking using batched mode
+        if batched and len(set([len(req.candidates) for req in requests])) != 1:
+            raise ValueError("Batched requests must have the same number of candidates")
+
+        result: list[Result] = []
+
+        with tqdm(range(0, len(requests)), leave=False) as bar:
+            for i in range(0, len(requests), batch_size):
+                batch = requests[i : min(i + batch_size, len(requests))]
+                batch_result = reorder_policy.reorder(
+                    requests=[
+                        Result(
+                            query=copy.deepcopy(request.query),
+                            candidates=copy.deepcopy(request.candidates),
+                            ranking_exec_summary=[],
+                        )
+                        for request in batch
+                    ],
+                    rank_start=max(rank_start, 0),
+                    rank_end=min(
+                        rank_end, len(requests[0].candidates)
+                    ),  # TODO: Fails arbitrary hit sizes
+                    model=model_functions,
+                    shuffle_candidates=shuffle_candidates,
+                    logging=logging,
+                    populate_exec_summary=populate_exec_summary,
+                )
+                result.extend(batch_result)
+                bar.update(len(batch))
+
+        logger.info(
+            f"\n\nAverage consumption per request: {consumption.consumption_reference_by_item / len(requests) : .2f}\n\n"
+        )
+
+        return result
+
     def get_output_filename(
         self,
         top_k_candidates: int,
@@ -61,6 +148,7 @@ def get_output_filename(
             name = f"{name}_{dataset_name}"
         if self._num_few_shot_examples > 0:
             name += f"_{self._num_few_shot_examples}_shot"
+        name += f"_{self.reorder_policy.param_name()}"
         return (
             f"{name}_shuffled_{datetime.isoformat(datetime.now())}"
             if shuffle_candidates
@@ -76,6 +164,7 @@ def max_tokens(self) -> int:
         """
         return self._context_size
 
+    # @deprecated("old sliding window pipeline is deprecated. please use reorder policy")
     def permutation_pipeline_batched(
         self,
         results: List[Result],
@@ -99,7 +188,9 @@ def permutation_pipeline_batched(
         prompts = []
         logger.info("Loading prompts.")
         prompts = self.create_prompt_batched(
-            results, rank_start, rank_end, batch_size=32
+            results,
+            [list(range(rank_start, rank_end)) for _ in range(len(results))],
+            batch_size=32,
         )
         if logging:
             for prompt in prompts:
@@ -126,6 +217,7 @@ def permutation_pipeline_batched(
 
         return results
 
+    # @deprecated("old sliding window pipeline is deprecated. please use reorder policy")
     def permutation_pipeline(
         self,
         result: Result,
@@ -146,7 +238,9 @@ def permutation_pipeline(
         Returns:
             Result: The processed result object after applying permutation.
         """
-        prompt, in_token_count = self.create_prompt(result, rank_start, rank_end)
+        prompt, in_token_count = self.create_prompt(
+            result, list(range(rank_start, rank_end))
+        )
         if logging:
             logger.info(f"Prompt: {prompt}\n")
         permutation, out_token_count = self.run_llm(
@@ -184,6 +278,7 @@ def shuffle_and_rescore(
                 cand["score"] = 1.0 / (i + 1)
                 cand["rank"] = i + 1
 
+    # @deprecated("old sliding window pipeline is deprecated. please use reorder policy")
     def sliding_windows_batched(
         self,
         requests: List[Request],
@@ -234,6 +329,7 @@ def sliding_windows_batched(
             start_pos = start_pos - step
         return rerank_results
 
+    # @deprecated("old sliding window pipeline is deprecated. please use reorder policy")
     def sliding_windows(
         self,
         request: Request,
@@ -342,7 +438,7 @@ def get_ranking_cost(
             start_pos = rank_end - window_size
             while start_pos >= rank_start:
                 start_pos = max(start_pos, rank_start)
-                prompt, _ = self.create_prompt(result, start_pos, end_pos)
+                prompt, _ = self.create_prompt(result, list(range(start_pos, end_pos)))
                 input_token_count += self.get_num_tokens(prompt)
                 end_pos = end_pos - step
                 start_pos = start_pos - step
@@ -369,7 +465,7 @@ def _clean_response(self, response: str) -> str:
                 else:
                     new_response += c
             new_response = new_response.strip()
-
+        new_response = re.sub(r"\s+", " ", new_response)
         return new_response
 
     def _remove_duplicate(self, response: List[int]) -> List[int]:
@@ -452,3 +548,106 @@ def convert_doc_to_prompt_content(
         # For Japanese should cut by character: content = content[:int(max_length)]
         content = " ".join(content.split()[: int(max_length)])
         return self._replace_number(content)
+
+    def _permutation_to_rank(self, perm_string: str, selected_indices: List[int]):
+        perm = [
+            int(x) - 1 for x in self._clean_response(perm_string).strip().split(" ")
+        ]
+        perm = [
+            int(x)
+            for x in self._remove_duplicate(perm)
+            if 0 <= int(x) < len(selected_indices)
+        ]
+        perm = perm + [i for i in range(len(selected_indices)) if i not in perm]
+        return perm
+
+    def _get_model_function(
+        self, batched: bool = False, silence: bool = False, **kwargs
+    ) -> Tuple[ModelFunction, RerankConsumption]:
+        # [(Request, SelectIndex)] -> [Prompt]
+
+        consumption = RerankConsumption(0, 0)
+
+        if batched:
+
+            def create_prompt(batch: List[Tuple[Result, List[int]]]):
+                return [
+                    prompt
+                    for prompt, _ in self.create_prompt_batched(
+                        [result for result, selected_indices in batch],
+                        [selected_indices for result, selected_indices in batch],
+                        32,
+                    )
+                ]
+
+            def execute(
+                batch: List[Union[str, Dict[str, str]]],
+                selected_indices_batch: List[List[int]],
+            ):
+                consumption.consumption_reference_by_batch += 1
+                consumption.consumption_reference_by_item += len(batch)
+
+                return [
+                    self._permutation_to_rank(s, selected_indices)
+                    for (s, _), selected_indices in zip(
+                        self.run_llm_batched(batch, silence=silence, **kwargs),
+                        selected_indices_batch,
+                    )
+                ]
+
+        else:
+
+            def create_prompt(batch: List[Tuple[Result, List[int]]]):
+                return [
+                    self.create_prompt(result, selected_indices)[0]
+                    for result, selected_indices in batch
+                ]
+
+            def execute(
+                batch: List[Union[str, Dict[str, str]]],
+                selected_indices_batch: List[List[int]],
+            ):
+                consumption.consumption_reference_by_batch += 1
+                consumption.consumption_reference_by_item += len(batch)
+
+                return [
+                    self._permutation_to_rank(
+                        self.run_llm(x, silence=silence, **kwargs)[0], selected_indices
+                    )
+                    for x, selected_indices in zip(batch, selected_indices_batch)
+                ]
+
+        return (
+            ModelFunction(
+                create_prompt=create_prompt,
+                execute=execute,
+                window_size=self._window_size,
+            ),
+            consumption,
+        )
+
+    @staticmethod
+    def get_reorder_policy(reorder_policy: str, **kwargs) -> ReorderPolicy:
+        for policy in SUPPORT_REORDER_POLICIES:
+            if reorder_policy.startswith(policy.name()):
+                reorder_params = reorder_policy[len(policy.name()) :]
+                reorder_params = reorder_params.strip()
+                if len(reorder_params) <= 1:
+                    return policy()
+                else:
+                    assert reorder_params[0] == ":" and reorder_params[1] == "{"
+                    reorder_params = reorder_params[1:]
+                    try:
+                        reorder_param_dict = json_repair.repair_json(reorder_params)
+                        if isinstance(reorder_param_dict, str):
+                            reorder_param_dict = json.loads(reorder_param_dict)
+                        if not isinstance(reorder_param_dict, dict):
+                            raise Exception(
+                                f"Didn't successfully parse reorder parameter into a dict, right now it is {reorder_param_dict} with type {(type(reorder_param_dict))}"
+                            )
+                    except Exception as e:
+                        print(e)
+                        raise Exception(f"Cannot load reorder policy {reorder_policy}")
+                    return policy(**reorder_param_dict, extra_args=dict(**kwargs))
+
+        raise Exception(f"Cannot find reorder policy {reorder_policy}")