Skip to content

Commit

Permalink
Adding back padding to query encoding
Browse files Browse the repository at this point in the history
  • Loading branch information
NohTow committed Aug 19, 2024
1 parent 80b3451 commit 1123a1c
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions giga_cherche/models/colbert.py
Original file line number Diff line number Diff line change
Expand Up @@ -875,8 +875,8 @@ def tokenize(
max_length = self.query_length if is_query else self.document_length
self._first_module().max_seq_length = max_length

# Handle padding for documents if specified
tokenize_args = {"padding": "max_length"} if pad_document else {}
# Pad queries (query expansion) and handle padding for documents if specified
tokenize_args = {"padding": "max_length"} if pad_document or is_query else {}

# Tokenize the texts
tokenized_outputs = self._first_module().tokenize(texts, **tokenize_args)
Expand Down

0 comments on commit 1123a1c

Please sign in to comment.