From c3d7de79020f15dbcf169e1dd0ef2b244ed43961 Mon Sep 17 00:00:00 2001 From: zhzluke96 Date: Tue, 8 Oct 2024 20:28:01 +0800 Subject: [PATCH] fix: fix typing --- src/TokenizersHub.ts | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/TokenizersHub.ts b/src/TokenizersHub.ts index 61ef6e0..e873378 100644 --- a/src/TokenizersHub.ts +++ b/src/TokenizersHub.ts @@ -131,7 +131,7 @@ export class TokenizersHub { } tokenize(input: string, tokenizer: PreTrainedTokenizer): Token[] { - const ids = tokenizer.encode(input, undefined, { + const ids = tokenizer.encode(input, { add_special_tokens: false, }); // const encoded = tokenizer._encode_text(input) || []; @@ -159,10 +159,13 @@ export class TokenizersHub { // additional_special_tokens_num: tokenizer.additional_special_tokens.length, // special_tokens_num: tokenizer.special_tokens.length, chat_template: tokenizer.chat_template, - pad_token: tokenizer.getToken("pad_token") ?? tokenizer.pad_token, + pad_token: + (tokenizer as any).getToken("pad_token") ?? tokenizer.pad_token, // unk_token: tokenizer.getToken("unk_token") ?? tokenizer.pad_token, - bos_token: tokenizer.getToken("bos_token") ?? tokenizer.pad_token, - eos_token: tokenizer.getToken("eos_token") ?? tokenizer.pad_token, + bos_token: + (tokenizer as any).getToken("bos_token") ?? tokenizer.pad_token, + eos_token: + (tokenizer as any).getToken("eos_token") ?? tokenizer.pad_token, // mask_token: tokenizer.getToken("mask_token") ?? tokenizer.pad_token, }; }