castorini · xpbowler · Sep 3, 2024 · Sep 3, 2024 · Sep 3, 2024 · Sep 3, 2024
diff --git a/src/rank_llm/api/server.py b/src/rank_llm/api/server.py
@@ -4,7 +4,7 @@
 from flask import Flask, jsonify, request
 
 from rank_llm import retrieve_and_rerank
-from rank_llm.rerank import PromptMode, get_azure_openai_args, get_openai_api_key
+from rank_llm.rerank import Prompt, get_azure_openai_args, get_openai_api_key
 from rank_llm.rerank.listwise import RankListwiseOSLLM, SafeOpenai
 from rank_llm.retrieve import RetrievalMethod, RetrievalMode
 
@@ -31,7 +31,7 @@ def create_app(model, port, use_azure_openai=False):
             model=f"castorini/{model}_7b_v1_full",
             name=model,
             context_size=4096,
-            prompt_mode=PromptMode.RANK_GPT,
+            prompt_mode=Prompt.RANK_GPT,
             num_few_shot_examples=0,
             device="cuda",
             num_gpus=1,
@@ -45,7 +45,7 @@ def create_app(model, port, use_azure_openai=False):
             model=f"castorini/{model}_7b_v1",
             name=model,
             context_size=4096,
-            prompt_mode=PromptMode.RANK_GPT,
+            prompt_mode=Prompt.RANK_GPT,
             num_few_shot_examples=0,
             device="cuda",
             num_gpus=1,
@@ -59,7 +59,7 @@ def create_app(model, port, use_azure_openai=False):
         default_agent = SafeOpenai(
             model=model,
             context_size=8192,
-            prompt_mode=PromptMode.RANK_GPT,
+            prompt_mode=Prompt.RANK_GPT,
             num_few_shot_examples=0,
             keys=openai_keys,
             **(get_azure_openai_args() if use_azure_openai else {}),

diff --git a/src/rank_llm/rerank/__init__.py b/src/rank_llm/rerank/__init__.py
@@ -2,7 +2,8 @@
 
 from .api_keys import get_azure_openai_args, get_openai_api_key
 from .identity_reranker import IdentityReranker
-from .rankllm import PromptMode, RankLLM
+from .prompt import Prompt
+from .rankllm import RankLLM
 from .reranker import Reranker
 
 logging.basicConfig(
@@ -14,6 +15,6 @@
     "RankLLM",
     "get_azure_openai_args",
     "get_openai_api_key",
-    "PromptMode",
+    "Prompt",
     "Reranker",
 ]
diff --git a/src/rank_llm/rerank/listwise/listwise_rankllm.py b/src/rank_llm/rerank/listwise/listwise_rankllm.py
@@ -10,7 +10,7 @@
 from tqdm import tqdm
 
 from rank_llm.data import RankingExecInfo, Request, Result
-from rank_llm.rerank import PromptMode, RankLLM
+from rank_llm.rerank import Prompt, RankLLM
 
 logger = logging.getLogger(__name__)
 
@@ -32,7 +32,7 @@ def __init__(
         self,
         model: str,
         context_size: int,
-        prompt_mode: PromptMode,
+        prompt_mode: Prompt,
         num_few_shot_examples: int,
         window_size: int,
     ) -> None:

diff --git a/src/rank_llm/rerank/listwise/lit5_reranker.py b/src/rank_llm/rerank/listwise/lit5_reranker.py
@@ -1,6 +1,6 @@
 from rank_llm.data import Request, Result
+from rank_llm.rerank import Prompt
 from rank_llm.rerank.listwise.rank_fid import RankFiDDistill, RankFiDScore
-from rank_llm.rerank.rankllm import PromptMode
 from rank_llm.rerank.reranker import Reranker
 
 
@@ -9,7 +9,7 @@ def __init__(
         self,
         model_path: str = "castorini/LiT5-Distill-base",
         context_size: int = 300,
-        prompt_mode: PromptMode = PromptMode.LiT5,
+        prompt_mode: Prompt = Prompt.LiT5,
         window_size: int = 20,
     ) -> None:
         agent = RankFiDDistill(
@@ -64,7 +64,7 @@ def __init__(
         self,
         model_path: str = "castorini/LiT5-Score-base",
         context_size: int = 300,
-        prompt_mode: PromptMode = PromptMode.LiT5,
+        prompt_mode: Prompt = Prompt.LiT5,
         window_size: int = 20,
         runfile_path: str = "runs/run.${topics}_${firststage}_${model//\//}",
     ) -> None:

diff --git a/src/rank_llm/rerank/listwise/rank_fid.py b/src/rank_llm/rerank/listwise/rank_fid.py
@@ -5,9 +5,9 @@
 from transformers import T5Tokenizer
 
 from rank_llm.data import Request, Result
+from rank_llm.rerank import Prompt
 from rank_llm.rerank.listwise.listwise_rankllm import ListwiseRankLLM
 from rank_llm.rerank.listwise.lit5.model import FiD, FiDCrossAttentionScore
-from rank_llm.rerank.rankllm import PromptMode
 
 
 class RankFiDDistill(ListwiseRankLLM):
@@ -32,7 +32,7 @@ def __init__(
         self,
         model: str,
         context_size: int = 150,
-        prompt_mode: PromptMode = PromptMode.LiT5,  # Placeholder for actual mode
+        prompt_mode: Prompt = Prompt.LiT5,  # Placeholder for actual mode
         num_few_shot_examples: int = 0,
         window_size: int = 20,
         step_size: int = 10,
@@ -294,7 +294,7 @@ def __init__(
         self,
         model: str,
         context_size: int = 150,
-        prompt_mode: PromptMode = PromptMode.LiT5,  # Placeholder for actual mode
+        prompt_mode: Prompt = Prompt.LiT5,  # Placeholder for actual mode
         num_few_shot_examples: int = 0,
         window_size: int = 20,
         step_size: int = 10,

diff --git a/src/rank_llm/rerank/listwise/rank_gpt.py b/src/rank_llm/rerank/listwise/rank_gpt.py
@@ -7,7 +7,7 @@
 from tqdm import tqdm
 
 from rank_llm.data import Request, Result
-from rank_llm.rerank import PromptMode
+from rank_llm.rerank import Prompt
 
 from .listwise_rankllm import ListwiseRankLLM
 
@@ -17,7 +17,7 @@ def __init__(
         self,
         model: str,
         context_size: int,
-        prompt_mode: PromptMode = PromptMode.RANK_GPT,
+        prompt_mode: Prompt = Prompt.RANK_GPT,
         num_few_shot_examples: int = 0,
         window_size: int = 20,
         keys=None,
@@ -34,7 +34,7 @@ def __init__(
         Parameters:
         - model (str): The model identifier for the LLM (model identifier information can be found via OpenAI's model lists).
         - context_size (int): The maximum number of tokens that the model can handle in a single request.
-        - prompt_mode (PromptMode, optional): Specifies the mode of prompt generation, with the default set to RANK_GPT,
+        - prompt_mode (Prompt, optional): Specifies the mode of prompt generation, with the default set to RANK_GPT,
          indicating that this class is designed primarily for listwise ranking tasks following the RANK_GPT methodology.
         - num_few_shot_examples (int, optional): Number of few-shot learning examples to include in the prompt, allowing for
         the integration of example-based learning to improve model performance. Defaults to 0, indicating no few-shot examples
@@ -62,12 +62,12 @@ def __init__(
         if not keys:
             raise ValueError("Please provide OpenAI Keys.")
         if prompt_mode not in [
-            PromptMode.RANK_GPT,
-            PromptMode.RANK_GPT_APEER,
-            PromptMode.LRL,
+            Prompt.RANK_GPT,
+            Prompt.RANK_GPT_APEER,
+            Prompt.LRL,
         ]:
             raise ValueError(
-                f"unsupported prompt mode for GPT models: {prompt_mode}, expected {PromptMode.RANK_GPT}, {PromptMode.RANK_GPT_APEER} or {PromptMode.LRL}."
+                f"unsupported prompt mode for GPT models: {prompt_mode}, expected {Prompt.RANK_GPT}, {Prompt.RANK_GPT_APEER} or {Prompt.LRL}."
             )
 
         self._output_token_estimate = None
@@ -178,42 +178,6 @@ def run_llm(
             encoding = tiktoken.get_encoding("cl100k_base")
         return response, len(encoding.encode(response))
 
-    def _get_prefix_for_rank_gpt_prompt(
-        self, query: str, num: int
-    ) -> List[Dict[str, str]]:
-        return [
-            {
-                "role": "system",
-                "content": "You are RankGPT, an intelligent assistant that can rank passages based on their relevancy to the query.",
-            },
-            {
-                "role": "user",
-                "content": f"I will provide you with {num} passages, each indicated by number identifier []. \nRank the passages based on their relevance to query: {query}.",
-            },
-            {"role": "assistant", "content": "Okay, please provide the passages."},
-        ]
-
-    def _get_suffix_for_rank_gpt_prompt(self, query: str, num: int) -> str:
-        return f"Search Query: {query}. \nRank the {num} passages above based on their relevance to the search query. The passages should be listed in descending order using identifiers. The most relevant passages should be listed first. The output format should be [] > [], e.g., [1] > [2]. Only response the ranking results, do not say any word or explain."
-
-    def _get_prefix_for_rank_gpt_apeer_prompt(
-        self, query: str, num: int
-    ) -> List[Dict[str, str]]:
-        # APEER
-        return [
-            {
-                "role": "system",
-                "content": "As RankGPT, your task is to evaluate and rank unique passages based on their relevance and accuracy to a given query. Prioritize passages that directly address the query and provide detailed, correct answers. Ignore factors such as length, complexity, or writing style unless they seriously hinder readability.",
-            },
-            {
-                "role": "user",
-                "content": f"In response to the query: [querystart] {query} [queryend], rank the passages. Ignore aspects like length, complexity, or writing style, and concentrate on passages that provide a comprehensive understanding of the query. Take into account any inaccuracies or vagueness in the passages when determining their relevance.",
-            },
-        ]
-
-    def _get_suffix_for_rank_gpt_apeer_prompt(self, query: str, num: int) -> str:
-        return f"Given the query: [querystart] {query} [queryend], produce a succinct and clear ranking of all passages, from most to least relevant, using their identifiers. The format should be [rankstart] [most relevant passage ID] > [next most relevant passage ID] > ... > [least relevant passage ID] [rankend]. Refrain from including any additional commentary or explanations in your ranking."
-
     def num_output_tokens(self, current_window_size: Optional[int] = None) -> int:
         if current_window_size is None:
             current_window_size = self._window_size
@@ -249,7 +213,7 @@ def run_llm_batched(self):
     def create_prompt(
         self, result: Result, rank_start: int, rank_end: int
     ) -> Tuple[List[Dict[str, str]], int]:
-        if self._prompt_mode in [PromptMode.RANK_GPT, PromptMode.RANK_GPT_APEER]:
+        if self._prompt_mode in [Prompt.RANK_GPT, Prompt.RANK_GPT_APEER]:
             return self.create_rank_gpt_prompt(result, rank_start, rank_end)
         else:
             return self.create_LRL_prompt(result, rank_start, rank_end)
@@ -262,16 +226,12 @@ def create_rank_gpt_prompt(
 
         max_length = 300 * (self._window_size / (rank_end - rank_start))
         while True:
-            messages = (
-                self._get_prefix_for_rank_gpt_apeer_prompt(query, num)
-                if self._prompt_mode == PromptMode.RANK_GPT_APEER
-                else self._get_prefix_for_rank_gpt_prompt(query, num)
-            )
+            messages = self._prompt_mode.prefix(query, num)
             rank = 0
             for cand in result.candidates[rank_start:rank_end]:
                 rank += 1
                 content = self.convert_doc_to_prompt_content(cand.doc, max_length)
-                if self._prompt_mode == PromptMode.RANK_GPT_APEER:
+                if self._prompt_mode == Prompt.RANK_GPT_APEER:
                     messages[-1][
                         "content"
                     ] += f"\n[{rank}] {self._replace_number(content)}"
@@ -285,16 +245,11 @@ def create_rank_gpt_prompt(
                     messages.append(
                         {"role": "assistant", "content": f"Received passage [{rank}]."}
                     )
-            if self._prompt_mode == PromptMode.RANK_GPT_APEER:
-                messages[-1][
-                    "content"
-                ] += f"\n{self._get_suffix_for_rank_gpt_apeer_prompt(query, num)}"
+            if self._prompt_mode == Prompt.RANK_GPT_APEER:
+                messages[-1]["content"] += f"\n{self._prompt_mode.suffix(query, num)}"
             else:
                 messages.append(
-                    {
-                        "role": "user",
-                        "content": self._get_suffix_for_rank_gpt_prompt(query, num),
-                    }
+                    {"role": "user", "content": self._prompt_mode.suffix(query, num)}
                 )
             num_tokens = self.get_num_tokens(messages)
             if num_tokens <= self.max_tokens() - self.num_output_tokens():

diff --git a/src/rank_llm/rerank/listwise/rank_listwise_os_llm.py b/src/rank_llm/rerank/listwise/rank_listwise_os_llm.py
@@ -12,7 +12,7 @@
 from transformers.generation import GenerationConfig
 
 from rank_llm.data import Request, Result
-from rank_llm.rerank import PromptMode
+from rank_llm.rerank import Prompt
 
 from .listwise_rankllm import ListwiseRankLLM
 
@@ -31,7 +31,7 @@ def __init__(
         model: str,
         name: str = "",
         context_size: int = 4096,
-        prompt_mode: PromptMode = PromptMode.RANK_GPT,
+        prompt_mode: Prompt = Prompt.RANK_GPT,
         num_few_shot_examples: int = 0,
         device: str = "cuda",
         num_gpus: int = 1,
@@ -47,7 +47,7 @@ def __init__(
          Parameters:
          - model (str): Identifier for the language model to be used for ranking tasks.
          - context_size (int, optional): Maximum number of tokens that can be handled in a single prompt. Defaults to 4096.
-        - prompt_mode (PromptMode, optional): Specifies the mode of prompt generation, with the default set to RANK_GPT,
+        - prompt_mode (Prompt, optional): Specifies the mode of prompt generation, with the default set to RANK_GPT,
          indicating that this class is designed primarily for listwise ranking tasks following the RANK_GPT methodology.
          - num_few_shot_examples (int, optional): Number of few-shot learning examples to include in the prompt, allowing for
          the integration of example-based learning to improve model performance. Defaults to 0, indicating no few-shot examples
@@ -86,9 +86,9 @@ def __init__(
         if self._device == "cuda":
             assert torch.cuda.is_available()
 
-        if prompt_mode != PromptMode.RANK_GPT:
+        if prompt_mode != Prompt.RANK_GPT:
             raise ValueError(
-                f"Unsupported prompt mode: {prompt_mode}. The only prompt mode currently supported is a slight variation of {PromptMode.RANK_GPT} prompt."
+                f"Unsupported prompt mode: {prompt_mode}. The only prompt mode currently supported is a slight variation of {Prompt.RANK_GPT} prompt."
             )
         if vllm_batched and LLM is None:
             raise ImportError(
@@ -220,13 +220,6 @@ def num_output_tokens(self, current_window_size: Optional[int] = None) -> int:
                 self._output_token_estimate = _output_token_estimate
             return _output_token_estimate
 
-    def _add_prefix_prompt(self, query: str, num: int) -> str:
-        return f"I will provide you with {num} passages, each indicated by a numerical identifier []. Rank the passages based on their relevance to the search query: {query}.\n"
-
-    def _add_post_prompt(self, query: str, num: int) -> str:
-        example_ordering = "[2] > [1]" if self._variable_passages else "[4] > [2]"
-        return f"Search Query: {query}.\nRank the {num} passages above based on their relevance to the search query. All the passages should be included and listed using identifiers, in descending order of relevance. The output format should be [] > [], e.g., {example_ordering}, Only respond with the ranking results, do not say any word or explain."
-
     def _add_few_shot_examples(self, conv):
         for _ in range(self._num_few_shot_examples):
             ex = random.choice(self._examples)
@@ -249,7 +242,7 @@ def create_prompt(
             if self._system_message:
                 conv.set_system_message(self._system_message)
             conv = self._add_few_shot_examples(conv)
-            prefix = self._add_prefix_prompt(query, num)
+            prefix = self._prompt_mode.prefix(query, num)
             rank = 0
             input_context = f"{prefix}\n"
             for cand in result.candidates[rank_start:rank_end]:
@@ -258,7 +251,7 @@ def create_prompt(
                 content = self.convert_doc_to_prompt_content(cand.doc, max_length)
                 input_context += f"[{rank}] {self._replace_number(content)}\n"
 
-            input_context += self._add_post_prompt(query, num)
+            input_context += self._prompt_mode.suffix(query, num)
             conv.append_message(conv.roles[0], input_context)
             conv.append_message(conv.roles[1], None)
             prompt = conv.get_prompt()

diff --git a/src/rank_llm/rerank/listwise/vicuna_reranker.py b/src/rank_llm/rerank/listwise/vicuna_reranker.py
@@ -1,7 +1,7 @@
 from typing import List
 
 from rank_llm.data import Request, Result
-from rank_llm.rerank import PromptMode
+from rank_llm.rerank import Prompt
 from rank_llm.rerank.listwise import RankListwiseOSLLM
 
 
@@ -10,7 +10,7 @@ def __init__(
         self,
         model_path: str = "castorini/rank_vicuna_7b_v1",
         context_size: int = 4096,
-        prompt_mode: PromptMode = PromptMode.RANK_GPT,
+        prompt_mode: Prompt = Prompt.RANK_GPT,
         num_few_shot_examples: int = 0,
         device: str = "cuda",
         num_gpus: int = 1,

diff --git a/src/rank_llm/rerank/listwise/zephyr_reranker.py b/src/rank_llm/rerank/listwise/zephyr_reranker.py
@@ -1,7 +1,7 @@
 from typing import List
 
 from rank_llm.data import Request, Result
-from rank_llm.rerank import PromptMode
+from rank_llm.rerank import Prompt
 from rank_llm.rerank.listwise import RankListwiseOSLLM
 
 
@@ -10,7 +10,7 @@ def __init__(
         self,
         model_path: str = "castorini/rank_zephyr_7b_v1_full",
         context_size: int = 4096,
-        prompt_mode: PromptMode = PromptMode.RANK_GPT,
+        prompt_mode: Prompt = Prompt.RANK_GPT,
         num_few_shot_examples: int = 0,
         device: str = "cuda",
         num_gpus: int = 1,

diff --git a/src/rank_llm/rerank/pointwise/pointwise_rankllm.py b/src/rank_llm/rerank/pointwise/pointwise_rankllm.py
@@ -10,7 +10,7 @@
 from tqdm import tqdm
 
 from rank_llm.data import Candidate, Request, Result
-from rank_llm.rerank.rankllm import PromptMode, RankLLM
+from rank_llm.rerank import Prompt, RankLLM
 
 logger = logging.getLogger(__name__)
 
@@ -26,7 +26,7 @@ def __init__(
         self,
         model: str,
         context_size: int,
-        prompt_mode: PromptMode,
+        prompt_mode: Prompt,
         device: str = "cuda",
         filename: str = "",
         batch_size: int = 32,