Skip to content

Commit

Permalink
pad images if exception
Browse files Browse the repository at this point in the history
  • Loading branch information
baberabb committed Sep 18, 2024
1 parent 6dc55fb commit 1c24248
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 10 deletions.
33 changes: 23 additions & 10 deletions lm_eval/models/hf_vlms.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
replace_placeholders,
stop_sequences_criteria,
)
from lm_eval.utils import add_padding_if_needed


DEFAULT_IMAGE_PLACEHOLDER = "<image>"
Expand Down Expand Up @@ -266,7 +267,9 @@ def chat_template(self, chat_template: Union[bool, str] = False) -> Optional[str
def tok_batch_multimodal_encode(
self,
strings: List[str], # note that input signature of this fn is different
images: List[List], # TODO: images are pil.Image at the moment, update typehint
images: List[
List["PIL.Image.Image"] # noqa: F821
], # TODO: images are pil.Image at the moment, update typehint
padding_side: str = "left",
left_truncate_len: int = None,
truncation: bool = False,
Expand All @@ -292,15 +295,25 @@ def tok_batch_multimodal_encode(
images = [img[: self.max_images] for img in images]
if self.rgb:
images = [[img.convert("RGB") for img in sublist] for sublist in images]

encoding = self.processor(
images=images,
text=strings,
truncation=truncation,
padding="longest",
return_tensors="pt",
# **add_special_tokens, # TODO: at least some Processors error out when passing this. How do we control whether text gets BOS added?
)
try:
encoding = self.processor(
images=images,
text=strings,
truncation=truncation,
padding="longest",
return_tensors="pt",
# **add_special_tokens, # TODO: at least some Processors error out when passing this. How do we control whether text gets BOS added?
)
# Qwen processor errors out if a dimension is too small (defaults to do_resize=True, and that requires a min dimension)
except Exception:
encoding = self.processor(
images=[add_padding_if_needed(image) for image in images],
text=strings,
truncation=truncation,
padding="longest",
return_tensors="pt",
# **add_special_tokens, # TODO: at least some Processors error out when passing this. How do we control whether text gets BOS added?
)

encoding.to( # TODO: our other tokenization methods in HFLM don't typically move to device. this breaks convention
self.device, self.model.dtype
Expand Down
37 changes: 37 additions & 0 deletions lm_eval/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,3 +499,40 @@ def weighted_f1_score(items):
preds = unzipped_list[1]
fscore = f1_score(golds, preds, average="weighted")
return fscore


def add_padding_if_needed(
images: List["PIL.Image.Image"], # noqa: F821
min_width: int = 50,
min_height: int = 50,
color=(255, 255, 255),
) -> List["PIL.Image.Image"]: # noqa: F821
"""Adds (default white) padding to images to make them at least min_width and min_height"""
from PIL import ImageOps

res = []
for image in images:
width, height = image.size

if width >= min_width and height >= min_height:
return image
image = image.convert("RGB")
new_width = max(width, min_width)
new_height = max(height, min_height)

delta_width = new_width - width
delta_height = new_height - height

padding_left = delta_width // 2
padding_right = delta_width - padding_left
padding_top = delta_height // 2
padding_bottom = delta_height - padding_top
res.append(
ImageOps.expand(
image,
(padding_left, padding_top, padding_right, padding_bottom),
fill=color,
)
)

return res

0 comments on commit 1c24248

Please sign in to comment.