Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add DINOv2 model #334

Merged
merged 35 commits into from
Feb 21, 2024
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
91cce58
Add BitFeaturizer
joelpaulkoch Feb 6, 2024
434b854
Refactor to pipeline
joelpaulkoch Feb 6, 2024
bb016ef
Add test for BitFeaturizer
joelpaulkoch Feb 6, 2024
981920a
Base version of DINOv2, with pooling
joelpaulkoch Feb 7, 2024
e0b635d
Interpolate positional encodings
joelpaulkoch Feb 8, 2024
397f79c
Rename variables
joelpaulkoch Feb 8, 2024
6cb14fe
Return hidden_states as feature map
joelpaulkoch Feb 8, 2024
3fc47e3
Apply layernorm and reshape feature maps
joelpaulkoch Feb 9, 2024
6acadf8
Image classification with dinov2
joelpaulkoch Feb 9, 2024
04e679f
Update tests
joelpaulkoch Feb 9, 2024
f695c04
Remove comments
joelpaulkoch Feb 9, 2024
11514bc
Clean up configuration
joelpaulkoch Feb 9, 2024
49f6133
Make pipeline
joelpaulkoch Feb 9, 2024
f1f36eb
Add mapping for DinoV2Backbone
joelpaulkoch Feb 9, 2024
ede347b
Add swiglu ffn layer
joelpaulkoch Feb 11, 2024
657f950
Extracted scale layer from ffn
joelpaulkoch Feb 14, 2024
8b332db
Refactor ffns
joelpaulkoch Feb 14, 2024
5335d6b
Rename block type
joelpaulkoch Feb 14, 2024
95004b8
Refactor
joelpaulkoch Feb 14, 2024
18d8744
Update docs
joelpaulkoch Feb 14, 2024
3cc1622
Refactor param naming
joelpaulkoch Feb 14, 2024
9b79e82
Floor instead of round in swiglu
joelpaulkoch Feb 14, 2024
84f4d9c
Merge branch 'main' into DinoV2
joelpaulkoch Feb 16, 2024
43d7e12
Use new size representation
joelpaulkoch Feb 16, 2024
4502953
Support interpolation for rectangular input
joelpaulkoch Feb 16, 2024
1874e97
Updates
jonatanklosko Feb 19, 2024
2da7db4
Updates
jonatanklosko Feb 19, 2024
45e36c6
Refactor transformer blocks
jonatanklosko Feb 20, 2024
21c1df0
Support custom block function
jonatanklosko Feb 20, 2024
2e56d04
Refactor ffn
jonatanklosko Feb 20, 2024
f148ed1
Rename test file
jonatanklosko Feb 20, 2024
9e23241
Updates
jonatanklosko Feb 20, 2024
6cb23cc
Update feature maps
jonatanklosko Feb 21, 2024
041b8a7
Naming
jonatanklosko Feb 21, 2024
a46083b
Up
jonatanklosko Feb 21, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion lib/bumblebee.ex
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,9 @@ defmodule Bumblebee do
{Bumblebee.Vision.Deit, :for_image_classification_with_teacher},
"DeiTForMaskedImageModeling" => {Bumblebee.Vision.Deit, :for_masked_image_modeling},
"DeiTModel" => {Bumblebee.Vision.Deit, :base},
"Dinov2Model" => {Bumblebee.Vision.DinoV2, :base},
"Dinov2Backbone" => {Bumblebee.Vision.DinoV2, :backbone},
"Dinov2ForImageClassification" => {Bumblebee.Vision.DinoV2, :for_image_classification},
"DistilBertModel" => {Bumblebee.Text.Distilbert, :base},
"DistilBertForMaskedLM" => {Bumblebee.Text.Distilbert, :for_masked_language_modeling},
"DistilBertForSequenceClassification" =>
Expand Down Expand Up @@ -203,7 +206,8 @@ defmodule Bumblebee do
}

@transformers_image_processor_type_to_featurizer %{
"BlipImageProcessor" => Bumblebee.Vision.BlipFeaturizer
"BlipImageProcessor" => Bumblebee.Vision.BlipFeaturizer,
"BitImageProcessor" => Bumblebee.Vision.BitFeaturizer
}

@model_type_to_featurizer %{
Expand Down
175 changes: 175 additions & 0 deletions lib/bumblebee/vision/bit_featurizer.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
defmodule Bumblebee.Vision.BitFeaturizer do
alias Bumblebee.Shared

options = [
resize: [
default: true,
doc: "whether to resize the input to the given `:size`"
],
size: [
default: 224,
doc: """
the size to resize the input to. A single number, a `{height, width}` tuple, or a map specifying the shortest edge.
Only has an effect if `:resize` is `true`
"""
],
resize_method: [
default: :bicubic,
doc:
"the resizing method, either of `:nearest`, `:bilinear`, `:bicubic`, `:lanczos3`, `:lanczos5`"
],
center_crop: [
default: true,
doc: "whether to crop the image at the center to given `:crop_size`"
],
crop_size: [
default: {224, 224},
doc: """
the size to crop the input to. A `{height, width}` tuple
Only has an effect if `:crop` is `true`
"""
],
rescale: [
default: true,
doc: "whether to rescale the input by the given `:rescale_factor`"
],
rescale_factor: [
default: 224,
doc: """
the factor by which to rescale the input. A single number
Only has an effect if `:rescale` is `true`
"""
],
normalize: [
default: true,
doc: "whether or not to normalize the input with mean and standard deviation"
],
image_mean: [
default: [0.5, 0.5, 0.5],
doc: "the sequence of mean values for each channel, to be used when normalizing images"
],
image_std: [
default: [0.5, 0.5, 0.5],
doc:
"the sequence of standard deviations for each channel, to be used when normalizing images"
]
]

@moduledoc """
BiT featurizer for image data.

## Configuration

#{Shared.options_doc(options)}
"""

defstruct Shared.option_defaults(options)

@behaviour Bumblebee.Featurizer
@behaviour Bumblebee.Configurable

alias Bumblebee.Utils.Image

@impl true
def config(featurizer, opts) do
Shared.put_config_attrs(featurizer, opts)
end

@impl true
def process_input(featurizer, images) do
images = List.wrap(images)

for image <- images do
image
|> Image.to_batched_tensor()
|> Nx.as_type(:f32)
|> Image.normalize_channels(length(featurizer.image_mean))
|> maybe_resize(featurizer)
|> maybe_center_crop(featurizer)
|> maybe_rescale(featurizer)
joelpaulkoch marked this conversation as resolved.
Show resolved Hide resolved
end
|> Nx.concatenate()
end

defp maybe_resize(images, featurizer) do
if featurizer.resize do
resize(images, featurizer)
else
images
end
end

defp resize(images, featurizer) do
case featurizer.size do
%{"shortest_edge" => size} ->
joelpaulkoch marked this conversation as resolved.
Show resolved Hide resolved
NxImage.resize_short(images, size, method: featurizer.resize_method)

_ ->
size = Image.normalize_size(featurizer.size)
NxImage.resize(images, size, method: featurizer.resize_method)
end
end

defp maybe_center_crop(images, featurizer) do
if featurizer.center_crop do
%{"height" => crop_height, "width" => crop_width} = featurizer.crop_size
NxImage.center_crop(images, {crop_height, crop_width})
else
images
end
end

defp maybe_rescale(images, featurizer) do
if featurizer.rescale do
Nx.multiply(images, featurizer.rescale_factor)
else
images
end
end

@impl true
def batch_template(featurizer, batch_size) do
{height, width} = Image.normalize_size(featurizer.size)
num_channels = length(featurizer.image_mean)
Nx.template({batch_size, height, width, num_channels}, :f32)
end

@impl true
def process_batch(featurizer, images) do
images =
if featurizer.normalize do
NxImage.normalize(
images,
Nx.tensor(featurizer.image_mean),
Nx.tensor(featurizer.image_std)
)
else
images
end

%{"pixel_values" => images}
end

defimpl Bumblebee.HuggingFace.Transformers.Config do
def load(featurizer, data) do
import Shared.Converters

opts =
convert!(data,
resize: {"do_resize", boolean()},
size:
{"size", one_of([number(), tuple([number(), number()]), map(string(), number())])},
resize_method: {"resample", resize_method()},
center_crop: {"do_center_crop", boolean()},
crop_size: {"crop_size", map(string(), number())},
rescale: {"do_rescale", boolean()},
rescale_factor: {"rescale_factor", number()},
normalize: {"do_normalize", boolean()},
image_mean: {"image_mean", list(number())},
image_std: {"image_std", list(number())}
)

@for.config(featurizer, opts)
end
end
end
Loading