Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add query and candidate #111

Merged
merged 8 commits into from
Apr 25, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions src/rank_llm/analysis/response_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
parent = os.path.dirname(parent)
sys.path.append(parent)

from rank_llm.result import Result
from rank_llm.data import Result


class ResponseAnalyzer:
Expand Down Expand Up @@ -80,9 +80,9 @@ def read_saved_responses(self) -> Tuple[List[str], List[int]]:
with open(result) as f:
ranking_exec_summaries = json.load(f)
for summary in ranking_exec_summaries:
for exec_info in summary["ranking_exec_summary"]:
responses.append(exec_info["response"])
num_passage = self._get_num_passages(exec_info["prompt"])
for exec_info in summary.ranking_exec_summary:
responses.append(exec_info.response)
num_passage = self._get_num_passages(exec_info.prompt)
num_passages.append(int(num_passage))
return responses, num_passages

Expand Down
101 changes: 101 additions & 0 deletions src/rank_llm/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import json
from dataclasses import dataclass, field
from typing import Any, Dict, List, Union


from dacite import from_dict


@dataclass
class Query:
text: str
qid: Union[str | int]


@dataclass
class Candidate:
docid: Union[str | int]
score: float
doc: Dict[str, Any]


@dataclass
class Request:
query: Query
candidates: List[Candidate] = field(default_factory=list)


@dataclass
class RankingExecInfo:
prompt: Any
response: str
input_token_count: int
output_token_count: int


@dataclass
class Result:
query: Query
candidates: list[Candidate] = field(default_factory=list)
ranking_exec_summary: list[RankingExecInfo] = (field(default_factory=list),)


def read_requests_from_file(file_path: str) -> List[Request]:
extension = file_path.split(".")[-1]
if extension == "jsonl":
requests = []
with open(file_path, "r") as f:
for l in f:
if not l.strip():
continue
requests.append(from_dict(data_class=Request, data=json.loads(l)))
return requests
elif extension == "json":
with open(file_path, "r") as f:
request_dicts = json.load(f)
return [
from_dict(data_class=Request, data=request_dict)
for request_dict in request_dicts
]
else:
raise ValueError(f"Expected json or jsonl file format, got {extension}")


class DataWriter:
def __init__(self, data: Union[List[Result] | List[Request]], append: bool = False):
self._data = data
self._append = append

def write_ranking_exec_summary(self, filename: str):
exec_summary = []
for d in self._data:
values = []
for info in d.ranking_exec_summary:
values.append(info.__dict__)
exec_summary.append(
{"query": d.query.__dict__, "ranking_exec_summary": values}
)
with open(filename, "a" if self._append else "w") as f:
json.dump(exec_summary, f, indent=2)

def write_in_json_format(self, filename: str):
results = []
for d in self._data:
candidates = [candidate.__dict__ for candidate in d.candidates]
results.append({"query": d.query.__dict__, "candidates": candidates})
with open(filename, "a" if self._append else "w") as f:
json.dump(results, f, indent=2)

def write_in_jsonl_format(self, filename: str):
with open(filename, "a" if self._append else "w") as f:
for d in self._data:
candidates = [candidate.__dict__ for candidate in d.candidates]
json.dump({"query": d.query.__dict__, "candidates": candidates}, f)
f.write("\n")

def write_in_trec_eval_format(self, filename: str):
with open(filename, "a" if self._append else "w") as f:
for d in self._data:
qid = d.query.qid
for rank, cand in enumerate(d.candidates, start=1):
f.write(f"{qid} Q0 {cand.docid} {rank} {cand.score} rank\n")
sahel-sh marked this conversation as resolved.
Show resolved Hide resolved
6 changes: 3 additions & 3 deletions src/rank_llm/demo/experimental_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@
rerankers = {"rg": g_reranker, "rv": v_reranker, "rz": z_reranker}

results = {}
for dataset in ["dl19"]: # , "dl20"]:
retrieved_results = Retriever.from_dataset_with_prebuilt_index(dataset)
for dataset in ["dl19", "dl20", "dl21", "dl22"]:
retrieved_results = Retriever.from_dataset_with_prebuilt_index(dataset, k=20)
topics = TOPICS[dataset]
ret_ndcg_10 = EvalFunction.from_results(retrieved_results, topics)
for key, reranker in rerankers.items():
rerank_results = reranker.rerank(retrieved_results)
rerank_results = reranker.rerank_batch(retrieved_results)
rerank_ndcg_10 = EvalFunction.from_results(rerank_results, topics)
analyzer = ResponseAnalyzer.from_inline_results(rerank_results)
error_counts = analyzer.count_errors()
Expand Down
2 changes: 1 addition & 1 deletion src/rank_llm/demo/rerank_dataset_with_custom_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,5 @@
index_type="lucene",
)
reranker = VicunaReranker()
rerank_results = reranker.rerank(retrieved_results)
rerank_results = reranker.rerank_batch(retrieved_results)
print(rerank_results)
8 changes: 4 additions & 4 deletions src/rank_llm/demo/rerank_dataset_with_prebuilt_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@

# Rerank
reranker = VicunaReranker()
rerank_results = reranker.rerank(retrieved_results)
rerank_results = reranker.rerank_batch(retrieved_results)

# Analyze response:
analyzer = ResponseAnalyzer.from_inline_results(rerank_results)
Expand All @@ -49,18 +49,18 @@
dataset_name, RetrievalMethod.SPLADE_P_P_ENSEMBLE_DISTIL
)
reranker = VicunaReranker()
rerank_results = reranker.rerank(retrieved_results)
rerank_results = reranker.rerank_batch(retrieved_results)

# Analyze response:
analyzer = ResponseAnalyzer.from_inline_results(rerank_results)
error_counts = analyzer.count_errors(verbose=True)

from pathlib import Path

from rank_llm.result import ResultsWriter
from rank_llm.data import DataWriter

# write rerank results
writer = ResultsWriter(rerank_results)
writer = DataWriter(rerank_results)
Path(f"demo_outputs/").mkdir(parents=True, exist_ok=True)
writer.write_in_json_format(f"demo_outputs/rerank_results.json")
writer.write_in_trec_eval_format(f"demo_outputs/rerank_results.txt")
Expand Down
34 changes: 0 additions & 34 deletions src/rank_llm/demo/rerank_inline_docs.py

This file was deleted.

135 changes: 68 additions & 67 deletions src/rank_llm/demo/rerank_inline_hits.py
Original file line number Diff line number Diff line change
@@ -1,85 +1,86 @@
import os
from pathlib import Path
import sys

from dacite import from_dict
sahel-sh marked this conversation as resolved.
Show resolved Hide resolved

SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
parent = os.path.dirname(SCRIPT_DIR)
parent = os.path.dirname(parent)
sys.path.append(parent)

from rank_llm.data import Request, DataWriter
from rank_llm.rerank.zephyr_reranker import ZephyrReranker
from rank_llm.retrieve.retriever import Retriever

query = "how long is life cycle of flea"
hits = [
{
"content": "The life cycle of a flea can last anywhere from 20 days to an entire year. It depends on how long the flea remains in the dormant stage (eggs, larvae, pupa). Outside influences, such as weather, affect the flea cycle. A female flea can lay around 20 to 25 eggs in one day.",
"qid": 264014,
"docid": "4834547",
"rank": 3,
"score": 14.971799850463867,
},
{
"content": "The life cycle of a flea can last anywhere from 20 days to an entire year. It depends on how long the flea remains in the dormant stage (eggs, larvae, pupa). Outside influences, such as weather, affect the flea cycle. A female flea can lay around 20 to 25 eggs in one day. The flea egg stage is the beginning of the flea cycle. This part of the flea cycle represents a little more than one third of the flea population at any given time. Depending on the temperature and humidity of the environment the egg can take from two to six days to hatch.",
"qid": 264014,
"docid": "6641238",
"rank": 2,
"score": 15.090800285339355,
},
{
"content": "To go to our more detailed flea life cycle and flea control page, click here. 1) The flea life cycle diagram - a complete step-by-step diagram of animal host flea infestation; flea reproduction and environmental flea contamination with juvenile flea life cycle stages (eggs, larvae and pupae).",
"qid": 264014,
"docid": "1610712",
"rank": 8,
"score": 13.455499649047852,
},
{
"content": "Flea Pupa. The flea larvae spin cocoons around themselves in which they move to the last phase of the flea life cycle and become adult fleas. The larvae can remain in the cocoon anywhere from one week to one year. Temperature is one factor that determines how long it will take for the adult flea to emerge from the cocoon.",
"qid": 264014,
"docid": "96852",
"rank": 4,
"score": 14.215100288391113,
},
{
"content": "The cat flea's primary host is the domestic cat, but it is also the primary flea infesting dogs in most of the world. The cat flea can also maintain its life cycle on other carnivores and on omnivores. Humans can be bitten, though a long-term population of cat fleas cannot be sustained and infest people.However, if the female flea is allowed to feed for 12 consecutive hours on a human, it can lay viable eggs.he cat flea's primary host is the domestic cat, but it is also the primary flea infesting dogs in most of the world. The cat flea can also maintain its life cycle on other carnivores and on omnivores. Humans can be bitten, though a long-term population of cat fleas cannot be sustained and infest people.",
"qid": 264014,
"docid": "4239616",
"rank": 6,
"score": 13.947500228881836,
},
{
"content": "5. Cancel. A flea can live up to a year, but its general lifespan depends on its living conditions, such as the availability of hosts. Find out how long a flea's life cycle can last with tips from a pet industry specialist in this free video on fleas and pest control.Part of the Video Series: Flea Control.ancel. A flea can live up to a year, but its general lifespan depends on its living conditions, such as the availability of hosts. Find out how long a flea's life cycle can last with tips from a pet industry specialist in this free video on fleas and pest control. Part of the Video Series: Flea Control.",
"qid": 264014,
"docid": "5611210",
"rank": 1,
"score": 15.780599594116211,
},
{
"content": "2) The fleas life cycle discussed - the flea life cycle diagram explained in full. 2a) Fleas life cycle 1 - The adult flea lays her eggs on the host animal. 2b) Fleas life cycle 2 - The egg falls off the animal's skin and into the local environment of the host animal. 2c) Fleas life cycle 3 - The flea egg hatches, releasing a first stage (stage 1) flea larva.",
"qid": 264014,
"docid": "96854",
"rank": 5,
"score": 13.985199928283691,
},
{
"content": "In appearance, flea larvae can be up to \u00c2\u00bc-inch long and are white (almost see-through) and legless. Larvae make up about 35 percent of the flea population in the average household. If conditions are favorable, the larvae will spin cocoons in about 5-20 days of hatching from their eggs.This leads to the next life stage, called the cocoon or pupae stage.The pupae stage of the flea life cycle accounts for about 10 percent of the flea population in a home.f conditions are favorable, the larvae will spin cocoons in about 5-20 days of hatching from their eggs. This leads to the next life stage, called the cocoon or pupae stage. The pupae stage of the flea life cycle accounts for about 10 percent of the flea population in a home.",
"qid": 264014,
"docid": "5635521",
"rank": 7,
"score": 13.533599853515625,
},
]
request_dict = {
"query": {"text": "how long is life cycle of flea", "qid": "264014"},
"candidates": [
{
"doc": {
"segment": "The life cycle of a flea can last anywhere from 20 days to an entire year. It depends on how long the flea remains in the dormant stage (eggs, larvae, pupa). Outside influences, such as weather, affect the flea cycle. A female flea can lay around 20 to 25 eggs in one day."
},
"docid": "4834547",
"score": 14.971799850463867,
},
{
"doc": {
"segment": "The life cycle of a flea can last anywhere from 20 days to an entire year. It depends on how long the flea remains in the dormant stage (eggs, larvae, pupa). Outside influences, such as weather, affect the flea cycle. A female flea can lay around 20 to 25 eggs in one day. The flea egg stage is the beginning of the flea cycle. This part of the flea cycle represents a little more than one third of the flea population at any given time. Depending on the temperature and humidity of the environment the egg can take from two to six days to hatch."
},
"docid": "6641238",
"score": 15.090800285339355,
},
{
"doc": {
"segment": "To go to our more detailed flea life cycle and flea control page, click here. 1) The flea life cycle diagram - a complete step-by-step diagram of animal host flea infestation; flea reproduction and environmental flea contamination with juvenile flea life cycle stages (eggs, larvae and pupae)."
},
"docid": "1610712",
"score": 13.455499649047852,
},
{
"doc": {
"segment": "Flea Pupa. The flea larvae spin cocoons around themselves in which they move to the last phase of the flea life cycle and become adult fleas. The larvae can remain in the cocoon anywhere from one week to one year. Temperature is one factor that determines how long it will take for the adult flea to emerge from the cocoon."
},
"docid": "96852",
"score": 14.215100288391113,
},
{
"doc": {
"segment": "The cat flea's primary host is the domestic cat, but it is also the primary flea infesting dogs in most of the world. The cat flea can also maintain its life cycle on other carnivores and on omnivores. Humans can be bitten, though a long-term population of cat fleas cannot be sustained and infest people.However, if the female flea is allowed to feed for 12 consecutive hours on a human, it can lay viable eggs.he cat flea's primary host is the domestic cat, but it is also the primary flea infesting dogs in most of the world. The cat flea can also maintain its life cycle on other carnivores and on omnivores. Humans can be bitten, though a long-term population of cat fleas cannot be sustained and infest people."
},
"docid": "4239616",
"score": 13.947500228881836,
},
{
"doc": {
"segment": "5. Cancel. A flea can live up to a year, but its general lifespan depends on its living conditions, such as the availability of hosts. Find out how long a flea's life cycle can last with tips from a pet industry specialist in this free video on fleas and pest control.Part of the Video Series: Flea Control.ancel. A flea can live up to a year, but its general lifespan depends on its living conditions, such as the availability of hosts. Find out how long a flea's life cycle can last with tips from a pet industry specialist in this free video on fleas and pest control. Part of the Video Series: Flea Control."
},
"docid": "5611210",
"score": 15.780599594116211,
},
{
"doc": {
"segment": "2) The fleas life cycle discussed - the flea life cycle diagram explained in full. 2a) Fleas life cycle 1 - The adult flea lays her eggs on the host animal. 2b) Fleas life cycle 2 - The egg falls off the animal's skin and into the local environment of the host animal. 2c) Fleas life cycle 3 - The flea egg hatches, releasing a first stage (stage 1) flea larva."
},
"docid": "96854",
"score": 13.985199928283691,
},
{
"doc": {
"segment": "In appearance, flea larvae can be up to \u00c2\u00bc-inch long and are white (almost see-through) and legless. Larvae make up about 35 percent of the flea population in the average household. If conditions are favorable, the larvae will spin cocoons in about 5-20 days of hatching from their eggs.This leads to the next life stage, called the cocoon or pupae stage.The pupae stage of the flea life cycle accounts for about 10 percent of the flea population in a home.f conditions are favorable, the larvae will spin cocoons in about 5-20 days of hatching from their eggs. This leads to the next life stage, called the cocoon or pupae stage. The pupae stage of the flea life cycle accounts for about 10 percent of the flea population in a home."
},
"docid": "5635521",
"score": 13.533599853515625,
},
],
}

retrieved_results = Retriever.from_inline_hits(query=query, hits=hits)
requests = [from_dict(data_class=Request, data=request_dict)]
reranker = ZephyrReranker()
rerank_results = reranker.rerank(retrieved_results)
rerank_results = reranker.rerank_batch(requests)
print(rerank_results)

from pathlib import Path

from rank_llm.result import ResultsWriter

# write rerank results
writer = ResultsWriter(rerank_results)
writer = DataWriter(rerank_results)
Path(f"demo_outputs/").mkdir(parents=True, exist_ok=True)
writer.write_in_json_format(f"demo_outputs/rerank_results.json")
writer.write_in_trec_eval_format(f"demo_outputs/rerank_results.txt")
Expand Down
6 changes: 3 additions & 3 deletions src/rank_llm/demo/rerank_rank_gpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@
retrieved_results = Retriever.from_dataset_with_prebuilt_index(dataset_name)
agent = SafeOpenai("gpt-3.5-turbo", 4096, keys=get_openai_api_key())
reranker = Reranker(agent)
rerank_results = reranker.rerank(retrieved_results)
rerank_results = reranker.rerank_batch(retrieved_results)
print(rerank_results)

from pathlib import Path

from rank_llm.result import ResultsWriter
from rank_llm.data import DataWriter

# write rerank results
writer = ResultsWriter(rerank_results)
writer = DataWriter(rerank_results)
Path(f"demo_outputs/").mkdir(parents=True, exist_ok=True)
writer.write_in_json_format(f"demo_outputs/rerank_results.json")
writer.write_in_trec_eval_format(f"demo_outputs/rerank_results.txt")
Expand Down
Loading