Skip to content

Commit

Permalink
Update tokenizers
Browse files Browse the repository at this point in the history
  • Loading branch information
jonatanklosko committed Jul 31, 2023
1 parent 4a9b92e commit 0b4f295
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 6 deletions.
6 changes: 3 additions & 3 deletions lib/bumblebee/utils/tokenizers.ex
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ defmodule Bumblebee.Utils.Tokenizers do
input = List.wrap(input)

{:ok, encodings} =
Tokenizer.encode(tokenizer, input, add_special_tokens: opts[:add_special_tokens])
Tokenizer.encode_batch(tokenizer, input, add_special_tokens: opts[:add_special_tokens])

length = opts[:length]

Expand Down Expand Up @@ -152,7 +152,7 @@ defmodule Bumblebee.Utils.Tokenizers do
end

def decode(tokenizer, ids) do
case Tokenizer.decode(tokenizer, ids) do
case Tokenizer.decode_batch(tokenizer, ids) do
{:ok, decoded} -> decoded
{:error, term} -> raise "decoding failed with error: #{inspect(term)}"
end
Expand All @@ -167,7 +167,7 @@ defmodule Bumblebee.Utils.Tokenizers do
end

def load!(path) do
case Tokenizers.Tokenizer.from_file(path) do
case Tokenizers.Tokenizer.from_file(path, padding: :none, truncation: :none) do
{:ok, tokenizer} -> tokenizer
{:error, error} -> raise "failed to read tokenizer from file, reason: #{error}"
end
Expand Down
4 changes: 3 additions & 1 deletion mix.exs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ defmodule Bumblebee.MixProject do
defp deps do
[
{:axon, "~> 0.5.0", axon_opts()},
{:tokenizers, "~> 0.3"},
# {:tokenizers, "~> 0.3"},
{:tokenizers, github: "elixir-nx/tokenizers", override: true},
{:rustler, ">= 0.0.0", optional: true},
# {:nx, "~> 0.5.0"},
# {:exla, "~> 0.5.0", only: [:dev, :test]},
# {:torchx, "~> 0.5.0", only: [:dev, :test]},
Expand Down
6 changes: 4 additions & 2 deletions mix.lock
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,12 @@
"plug_crypto": {:hex, :plug_crypto, "1.2.5", "918772575e48e81e455818229bf719d4ab4181fcbf7f85b68a35620f78d89ced", [:mix], [], "hexpm", "26549a1d6345e2172eb1c233866756ae44a9609bd33ee6f99147ab3fd87fd842"},
"progress_bar": {:hex, :progress_bar, "3.0.0", "f54ff038c2ac540cfbb4c2bfe97c75e7116ead044f3c2b10c9f212452194b5cd", [:mix], [{:decimal, "~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}], "hexpm", "6981c2b25ab24aecc91a2dc46623658e1399c21a2ae24db986b90d678530f2b7"},
"ranch": {:hex, :ranch, "1.8.0", "8c7a100a139fd57f17327b6413e4167ac559fbc04ca7448e9be9057311597a1d", [:make, :rebar3], [], "hexpm", "49fbcfd3682fab1f5d109351b61257676da1a2fdbe295904176d5e521a2ddfe5"},
"rustler_precompiled": {:hex, :rustler_precompiled, "0.6.1", "160b545bce8bf9a3f1b436b2c10f53574036a0db628e40f393328cbbe593602f", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:rustler, "~> 0.23", [hex: :rustler, repo: "hexpm", optional: true]}], "hexpm", "0dd269fa261c4e3df290b12031c575fff07a542749f7b0e8b744d72d66c43600"},
"rustler": {:hex, :rustler, "0.29.1", "880f20ae3027bd7945def6cea767f5257bc926f33ff50c0d5d5a5315883c084d", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:toml, "~> 0.6", [hex: :toml, repo: "hexpm", optional: false]}], "hexpm", "109497d701861bfcd26eb8f5801fe327a8eef304f56a5b63ef61151ff44ac9b6"},
"rustler_precompiled": {:hex, :rustler_precompiled, "0.6.2", "d2218ba08a43fa331957f30481d00b666664d7e3861431b02bd3f4f30eec8e5b", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:rustler, "~> 0.23", [hex: :rustler, repo: "hexpm", optional: true]}], "hexpm", "b9048eaed8d7d14a53f758c91865cc616608a438d2595f621f6a4b32a5511709"},
"stb_image": {:hex, :stb_image, "0.6.2", "d680a418416b1d778231d1d16151be3474d187e8505e1bd524aa0d08d2de094f", [:make, :mix], [{:cc_precompiler, "~> 0.1.0", [hex: :cc_precompiler, repo: "hexpm", optional: false]}, {:elixir_make, "~> 0.7.0", [hex: :elixir_make, repo: "hexpm", optional: false]}, {:kino, "~> 0.7", [hex: :kino, repo: "hexpm", optional: true]}, {:nx, "~> 0.4", [hex: :nx, repo: "hexpm", optional: true]}], "hexpm", "231ad012f649dd2bd5ef99e9171e814f3235e8f7c45009355789ac4836044a39"},
"telemetry": {:hex, :telemetry, "1.2.1", "68fdfe8d8f05a8428483a97d7aab2f268aaff24b49e0f599faa091f1d4e7f61c", [:rebar3], [], "hexpm", "dad9ce9d8effc621708f99eac538ef1cbe05d6a874dd741de2e689c47feafed5"},
"tokenizers": {:hex, :tokenizers, "0.3.2", "78c6238690a0467c613c8ba3c59338235594a78f870e8f8151b9614516dee0fd", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:rustler, ">= 0.0.0", [hex: :rustler, repo: "hexpm", optional: true]}, {:rustler_precompiled, "~> 0.6", [hex: :rustler_precompiled, repo: "hexpm", optional: false]}], "hexpm", "f6dd9a798e81cf2f3359e1731836ed0a351cae4da5d5d570a7ef3d0543e9cf85"},
"tokenizers": {:git, "https://github.com/elixir-nx/tokenizers.git", "26d864bdedc11ddbc8bae52eaad0858f8a90987f", []},
"toml": {:hex, :toml, "0.7.0", "fbcd773caa937d0c7a02c301a1feea25612720ac3fa1ccb8bfd9d30d822911de", [:mix], [], "hexpm", "0690246a2478c1defd100b0c9b89b4ea280a22be9a7b313a8a058a2408a2fa70"},
"torchx": {:git, "https://github.com/elixir-nx/nx.git", "2c6a9d48890d70fb3937cd19b0cb3e2356008488", [sparse: "torchx"]},
"unpickler": {:hex, :unpickler, "0.1.0", "c2262c0819e6985b761e7107546cef96a485f401816be5304a65fdd200d5bd6a", [:mix], [], "hexpm", "e2b3f61e62406187ac52afead8a63bfb4e49394028993f3c4c42712743cab79e"},
"unzip": {:hex, :unzip, "0.8.0", "ee21d87c21b01567317387dab4228ac570ca15b41cfc221a067354cbf8e68c4d", [:mix], [], "hexpm", "ffa67a483efcedcb5876971a50947222e104d5f8fea2c4a0441e6f7967854827"},
Expand Down

0 comments on commit 0b4f295

Please sign in to comment.