You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Explicitly passing a revision is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a revision is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a revision is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Load kernel : D:\langchain-model\chatglm-6b-int4\quantization_kernels.so
No set_num_threads() found in kernel.
Setting CPU quantization kernel threads to 10
Using quantization cache
Applying quantization to glm layers
--- Logging error ---
Traceback (most recent call last):
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\quantization.py", line 19, in
from cpm_kernels.kernels.base import LazyKernelCModule, KernelFunction, round_up
File "D:\Anaconda\envs\chatglm\lib\site-packages\cpm_kernels_init_.py", line 1, in
from . import library
File "D:\Anaconda\envs\chatglm\lib\site-packages\cpm_kernels\library_init_.py", line 2, in
from . import cuda
File "D:\Anaconda\envs\chatglm\lib\site-packages\cpm_kernels\library\cuda.py", line 7, in
cuda = Lib.from_lib("cuda", ctypes.WinDLL("nvcuda.dll"))
File "D:\Anaconda\envs\chatglm\lib\site-packages\cpm_kernels\library\base.py", line 63, in from_lib
ret = Lib(name)
File "D:\Anaconda\envs\chatglm\lib\site-packages\cpm_kernels\library\base.py", line 45, in init
lib_path = windows_find_lib(self.__name)
File "D:\Anaconda\envs\chatglm\lib\site-packages\cpm_kernels\library\base.py", line 39, in windows_find_lib
return lookup_dll(lib_name)
File "D:\Anaconda\envs\chatglm\lib\site-packages\cpm_kernels\library\base.py", line 16, in lookup_dll
for name in os.listdir(path):
NotADirectoryError: [WinError 267] 目录名称无效。: 'C:\Windows\SysWOW64\WindowsPowerShell\v1.0\powershell.exe'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "D:\Anaconda\envs\chatglm\lib\logging_init_.py", line 1100, in emit
msg = self.format(record)
File "D:\Anaconda\envs\chatglm\lib\logging_init_.py", line 943, in format
return fmt.format(record)
File "D:\Anaconda\envs\chatglm\lib\logging_init_.py", line 678, in format
record.message = record.getMessage()
File "D:\Anaconda\envs\chatglm\lib\logging_init_.py", line 368, in getMessage
msg = msg % self.args
TypeError: not all arguments converted during string formatting
Call stack:
File "D:\ChatGLM-6B-int4\ChatGLM-6B-main\cli_demo.py", line 8, in
model = AutoModel.from_pretrained("D:\ChatGLM-6B-int4\ChatGLM-6B-main\model", trust_remote_code=True).half().cuda()
File "D:\Anaconda\envs\chatglm\lib\site-packages\transformers\models\auto\auto_factory.py", line 466, in from_pretrained
return model_class.from_pretrained(
File "D:\Anaconda\envs\chatglm\lib\site-packages\transformers\modeling_utils.py", line 2498, in from_pretrained
model = cls(config, *model_args, **model_kwargs)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\modeling_chatglm.py", line 1061, in init
self.quantize(self.config.quantization_bit, self.config.quantization_embeddings, use_quantization_cache=True, empty_init=True)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\modeling_chatglm.py", line 1424, in quantize
from .quantization import quantize, QuantizedEmbedding, QuantizedLinear, load_cpu_kernel
File "", line 1027, in _find_and_load
File "", line 1006, in _find_and_load_unlocked
File "", line 688, in _load_unlocked
File "", line 883, in exec_module
File "", line 241, in _call_with_frames_removed
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\quantization.py", line 46, in
logger.warning("Failed to load cpm_kernels:", exception)
Message: 'Failed to load cpm_kernels:'
Arguments: (NotADirectoryError(20, '目录名称无效。'),)
欢迎使用 ChatGLM-6B 模型,输入内容即可进行对话,clear 清空对话历史,stop 终止程序
用户:你好
Traceback (most recent call last):
File "D:\ChatGLM-6B-int4\ChatGLM-6B-main\cli_demo.py", line 58, in
main()
File "D:\ChatGLM-6B-int4\ChatGLM-6B-main\cli_demo.py", line 43, in main
for response, history in model.stream_chat(tokenizer, query, history=history):
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\utils_contextlib.py", line 35, in generator_context
response = gen.send(None)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\modeling_chatglm.py", line 1311, in stream_chat
for outputs in self.stream_generate(**inputs, **gen_kwargs):
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\utils_contextlib.py", line 35, in generator_context
response = gen.send(None)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\modeling_chatglm.py", line 1388, in stream_generate
outputs = self(
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\modeling_chatglm.py", line 1190, in forward
transformer_outputs = self.transformer(
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\modeling_chatglm.py", line 996, in forward
layer_ret = layer(
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\modeling_chatglm.py", line 627, in forward
attention_outputs = self.attention(
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\modeling_chatglm.py", line 445, in forward
mixed_raw_layer = self.query_key_value(hidden_states)
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\quantization.py", line 393, in forward
output = W8A16Linear.apply(input, self.weight, self.weight_scale, self.weight_bit_width)
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\autograd\function.py", line 598, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\quantization.py", line 56, in forward
weight = extract_weight_to_half(quant_w, scale_w, weight_bit_width)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\quantization.py", line 276, in extract_weight_to_half
func = kernels.int4WeightExtractionHalf
AttributeError: 'NoneType' object has no attribute 'int4WeightExtractionHalf'
Expected Behavior
No response
Steps To Reproduce
运行cli_demo.py后报错
Environment
- OS:Windows 11
- Python:3.10
- Transformers:4.27.1
- PyTorch:2.3.1+cu118
- CUDA Support (`python -c "import torch; print(torch.cuda.is_available())"`) :true
Anything else?
No response
The text was updated successfully, but these errors were encountered:
Is there an existing issue for this?
Current Behavior
运行cli_demo.py后输入“你好”报错
Explicitly passing a
revision
is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.Explicitly passing a
revision
is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.Explicitly passing a
revision
is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.Load kernel : D:\langchain-model\chatglm-6b-int4\quantization_kernels.so
No set_num_threads() found in kernel.
Setting CPU quantization kernel threads to 10
Using quantization cache
Applying quantization to glm layers
--- Logging error ---
Traceback (most recent call last):
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\quantization.py", line 19, in
from cpm_kernels.kernels.base import LazyKernelCModule, KernelFunction, round_up
File "D:\Anaconda\envs\chatglm\lib\site-packages\cpm_kernels_init_.py", line 1, in
from . import library
File "D:\Anaconda\envs\chatglm\lib\site-packages\cpm_kernels\library_init_.py", line 2, in
from . import cuda
File "D:\Anaconda\envs\chatglm\lib\site-packages\cpm_kernels\library\cuda.py", line 7, in
cuda = Lib.from_lib("cuda", ctypes.WinDLL("nvcuda.dll"))
File "D:\Anaconda\envs\chatglm\lib\site-packages\cpm_kernels\library\base.py", line 63, in from_lib
ret = Lib(name)
File "D:\Anaconda\envs\chatglm\lib\site-packages\cpm_kernels\library\base.py", line 45, in init
lib_path = windows_find_lib(self.__name)
File "D:\Anaconda\envs\chatglm\lib\site-packages\cpm_kernels\library\base.py", line 39, in windows_find_lib
return lookup_dll(lib_name)
File "D:\Anaconda\envs\chatglm\lib\site-packages\cpm_kernels\library\base.py", line 16, in lookup_dll
for name in os.listdir(path):
NotADirectoryError: [WinError 267] 目录名称无效。: 'C:\Windows\SysWOW64\WindowsPowerShell\v1.0\powershell.exe'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "D:\Anaconda\envs\chatglm\lib\logging_init_.py", line 1100, in emit
msg = self.format(record)
File "D:\Anaconda\envs\chatglm\lib\logging_init_.py", line 943, in format
return fmt.format(record)
File "D:\Anaconda\envs\chatglm\lib\logging_init_.py", line 678, in format
record.message = record.getMessage()
File "D:\Anaconda\envs\chatglm\lib\logging_init_.py", line 368, in getMessage
msg = msg % self.args
TypeError: not all arguments converted during string formatting
Call stack:
File "D:\ChatGLM-6B-int4\ChatGLM-6B-main\cli_demo.py", line 8, in
model = AutoModel.from_pretrained("D:\ChatGLM-6B-int4\ChatGLM-6B-main\model", trust_remote_code=True).half().cuda()
File "D:\Anaconda\envs\chatglm\lib\site-packages\transformers\models\auto\auto_factory.py", line 466, in from_pretrained
return model_class.from_pretrained(
File "D:\Anaconda\envs\chatglm\lib\site-packages\transformers\modeling_utils.py", line 2498, in from_pretrained
model = cls(config, *model_args, **model_kwargs)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\modeling_chatglm.py", line 1061, in init
self.quantize(self.config.quantization_bit, self.config.quantization_embeddings, use_quantization_cache=True, empty_init=True)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\modeling_chatglm.py", line 1424, in quantize
from .quantization import quantize, QuantizedEmbedding, QuantizedLinear, load_cpu_kernel
File "", line 1027, in _find_and_load
File "", line 1006, in _find_and_load_unlocked
File "", line 688, in _load_unlocked
File "", line 883, in exec_module
File "", line 241, in _call_with_frames_removed
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\quantization.py", line 46, in
logger.warning("Failed to load cpm_kernels:", exception)
Message: 'Failed to load cpm_kernels:'
Arguments: (NotADirectoryError(20, '目录名称无效。'),)
欢迎使用 ChatGLM-6B 模型,输入内容即可进行对话,clear 清空对话历史,stop 终止程序
用户:你好
Traceback (most recent call last):
File "D:\ChatGLM-6B-int4\ChatGLM-6B-main\cli_demo.py", line 58, in
main()
File "D:\ChatGLM-6B-int4\ChatGLM-6B-main\cli_demo.py", line 43, in main
for response, history in model.stream_chat(tokenizer, query, history=history):
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\utils_contextlib.py", line 35, in generator_context
response = gen.send(None)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\modeling_chatglm.py", line 1311, in stream_chat
for outputs in self.stream_generate(**inputs, **gen_kwargs):
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\utils_contextlib.py", line 35, in generator_context
response = gen.send(None)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\modeling_chatglm.py", line 1388, in stream_generate
outputs = self(
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\modeling_chatglm.py", line 1190, in forward
transformer_outputs = self.transformer(
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\modeling_chatglm.py", line 996, in forward
layer_ret = layer(
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\modeling_chatglm.py", line 627, in forward
attention_outputs = self.attention(
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\modeling_chatglm.py", line 445, in forward
mixed_raw_layer = self.query_key_value(hidden_states)
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\quantization.py", line 393, in forward
output = W8A16Linear.apply(input, self.weight, self.weight_scale, self.weight_bit_width)
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\autograd\function.py", line 598, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\quantization.py", line 56, in forward
weight = extract_weight_to_half(quant_w, scale_w, weight_bit_width)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\quantization.py", line 276, in extract_weight_to_half
func = kernels.int4WeightExtractionHalf
AttributeError: 'NoneType' object has no attribute 'int4WeightExtractionHalf'
Expected Behavior
No response
Steps To Reproduce
运行cli_demo.py后报错
Environment
Anything else?
No response
The text was updated successfully, but these errors were encountered: