diff --git a/setup.py b/setup.py index b5a28f0..368ae9c 100755 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="text2text", - version="1.3.5", + version="1.3.6", author="artitw", author_email="artitw@gmail.com", description="Text2Text: Crosslingual NLP/G toolkit", @@ -29,6 +29,7 @@ 'langchain', 'googledrivedownloader', 'numpy', + 'optimum', 'pandas', 'scikit-learn', 'scipy', diff --git a/text2text/assistant.py b/text2text/assistant.py index 28988aa..ff60ad4 100644 --- a/text2text/assistant.py +++ b/text2text/assistant.py @@ -1,8 +1,7 @@ import logging import pandas as pd import text2text as t2t -from transformers import AutoTokenizer, logging -from auto_gptq import AutoGPTQForCausalLM +from transformers import AutoModelForCausalLM, AutoTokenizer, logging logging.set_verbosity(logging.CRITICAL) @@ -14,15 +13,12 @@ class Assistant(t2t.Transformer): def __init__(self, **kwargs): model_name_or_path = kwargs.get("model_name_or_path", "TheBloke/vicuna-13B-v1.5-16K-GPTQ") - self.__class__.tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True) + self.__class__.tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True, padding_side='left') - self.__class__.model = AutoGPTQForCausalLM.from_quantized(model_name_or_path, - use_safetensors=True, - trust_remote_code=False, - device="cuda:0", - use_triton=False, - quantize_config=None - ) + self.__class__.model = AutoModelForCausalLM.from_pretrained(model_name_or_path, + device_map="auto", + trust_remote_code=False, + revision="main") def completion_preprocess(self, input_lines, retriever=None, **kwargs): df = pd.DataFrame({"input_line": input_lines})