Skip to content

Commit

Permalink
Merge main
Browse files Browse the repository at this point in the history
  • Loading branch information
Nicolas Frank committed Sep 11, 2024
2 parents d7449b0 + 5fa7dd3 commit 3db6019
Show file tree
Hide file tree
Showing 4 changed files with 7 additions and 6 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Changed
- Use OpenAI response_format instead of separators in the prompt.
- Switch to cohere reranker v3 and `retriever_k = 500`.

## [v0.0.5]

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ dependencies = [
"cohere",
"elasticsearch >= 8.5",
"elasticsearch-dsl",
"fastapi",
"fastapi <= 0.112.0",
"fastapi-pagination",
"httpx",
"python-dotenv",
Expand Down
2 changes: 1 addition & 1 deletion src/scholarag/app/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ class PassthroughRequest(BaseModel):
class RetrievalRequest(PassthroughRequest, extra="forbid"):
"""Request for the raw retrieval endpoint."""

retriever_k: int = Field(700, ge=1, le=2000)
retriever_k: int = Field(500, ge=1, le=1000)
use_reranker: bool = True
reranker_k: int = Field(8, ge=1, le=100)

Expand Down
8 changes: 4 additions & 4 deletions src/scholarag/services/cohere_reranker.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,12 @@ def run(
List of dictionaries containing in score order the text and the associated score.
"""
response = self.client.rerank(
model="rerank-english-v2.0",
model="rerank-english-v3.0",
query=query,
documents=contexts,
return_documents=True,
top_n=len(contexts),
max_chunks_per_doc=10_000 // len(contexts),
max_chunks_per_doc=1000 // len(contexts),
)
reranked_contexts = [
{
Expand Down Expand Up @@ -88,12 +88,12 @@ async def arun(
List of dictionaries containing in score order the text and the associated score.
"""
response = await self.async_client.rerank(
model="rerank-english-v2.0",
model="rerank-english-v3.0",
query=query,
documents=contexts,
return_documents=True,
top_n=len(contexts),
max_chunks_per_doc=10_000 // len(contexts),
max_chunks_per_doc=1000 // len(contexts),
)

reranked_contexts = [
Expand Down

0 comments on commit 3db6019

Please sign in to comment.