Skip to content

Commit

Permalink
Lintfix
Browse files Browse the repository at this point in the history
  • Loading branch information
LeiWang1999 committed Jul 16, 2024
1 parent b9655fd commit fff385f
Show file tree
Hide file tree
Showing 6 changed files with 206 additions and 172 deletions.
1 change: 1 addition & 0 deletions bitblas/cache/operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
BITBLAS_WRAPPED_SOURCE_NAME = "wrapper_source.cu"
BITBLAS_WRAPPED_COMPILED_NAME = "wrapper_compiled.so"


class OperatorCache:
"""
Manages a cache for operator instances (e.g., Matmul, Convolution) based on their configurations.
Expand Down
18 changes: 5 additions & 13 deletions integration/BitNet/create_bitblas_ckpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,8 @@
bitblas.set_log_level("INFO")

model_name_or_path = "BitBLASModel/open_llama_3b_1.58bits"
saved_model_path = os.path.join(
dirpath, "models", f"{model_name_or_path}_bitblas"
)
saved_model_path = os.path.join(dirpath, "models", f"{model_name_or_path}_bitblas")


def generate_text(model, tokenizer, prompt, max_length=100):
input_ids = tokenizer.encode(prompt, return_tensors="pt").to(model.lm_head.weight.device)
Expand Down Expand Up @@ -59,13 +58,8 @@ def main():
model_name_or_path,
use_flash_attention_2=True,
torch_dtype=torch.float16,
)
.cuda()
.half()
)
tokenizer = BitnetTokenizer.from_pretrained(
model_name_or_path, use_fast=False
)
).cuda().half())
tokenizer = BitnetTokenizer.from_pretrained(model_name_or_path, use_fast=False)

# print("original model generated text:")
# print(generate_text(model, tokenizer, "Hi, ", max_length=100))
Expand Down Expand Up @@ -107,9 +101,7 @@ def main():
file_path = cached_file(model_name_or_path, file)
os.system(f"cp {file_path} {saved_model_path}")
# load quantized model
qmodel = BitnetForCausalLM.from_quantized(
saved_model_path,
).cuda().half()
qmodel = BitnetForCausalLM.from_quantized(saved_model_path,).cuda().half()
print("quantized model generated text:")
print(generate_text(qmodel, tokenizer, "Hi, ", max_length=100))

Expand Down
7 changes: 4 additions & 3 deletions integration/BitNet/eval_correctness.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

import argparse
import torch
import bitblas
from modeling_bitnet import BitnetForCausalLM
Expand All @@ -12,6 +11,7 @@
torch.set_grad_enabled(False)
bitblas.set_log_level("INFO")


def generate_text(model, tokenizer, prompt, max_length=100):
input_ids = tokenizer.encode(prompt, return_tensors="pt").to(model.lm_head.weight.device)
# Generate cos and sin values
Expand Down Expand Up @@ -69,7 +69,10 @@ def get_runtime(num_repeats=1):
times = get_runtime(num_repeats)
return np.mean(times)


model_path = '1bitLLM/bitnet_b1_58-3B'


def main():
model = BitnetForCausalLM.from_pretrained(
model_path,
Expand All @@ -79,15 +82,13 @@ def main():
with torch.no_grad():
model._post_process_weights()

# input_id = torch.ones(1, 1).long().cuda()
tokenizer = BitnetTokenizer.from_pretrained(model_path, use_fast=False)
input_id = tokenizer("Hello")['input_ids']
input_id = torch.tensor(input_id).unsqueeze(0).cuda()
output = model(input_id)
print(output)

print(generate_text(model, tokenizer, "Hello", max_length=100))



if __name__ == '__main__':
Expand Down
22 changes: 5 additions & 17 deletions integration/BitNet/load_from_quantized.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,9 @@
import bitblas
from modeling_bitnet import BitnetForCausalLM
from tokenization_bitnet import BitnetTokenizer
from transformers.utils.hub import cached_file
import os
from transformers import GenerationConfig
import time
import json

filepath = os.path.abspath(__file__)
dirpath = os.path.dirname(filepath)
Expand All @@ -18,20 +16,14 @@
bitblas.set_log_level("INFO")

model_name_or_path = "BitBLASModel/open_llama_3b_1.58bits"
saved_model_path = os.path.join(
dirpath, "models", f"{model_name_or_path}_bitblas"
)
saved_model_path = os.path.join(dirpath, "models", f"{model_name_or_path}_bitblas")


def generate_text(model, tokenizer, prompt, max_length=100):
input_ids = tokenizer.encode(prompt, return_tensors="pt").to(
model.lm_head.weight.device
)
input_ids = tokenizer.encode(prompt, return_tensors="pt").to(model.lm_head.weight.device)
# Generate cos and sin values
seq_length = input_ids.size(1)
position_ids = torch.arange(
seq_length, dtype=torch.long, device=input_ids.device
)
position_ids = torch.arange(seq_length, dtype=torch.long, device=input_ids.device)
position_ids = position_ids.unsqueeze(0).expand_as(input_ids)

generation_config = GenerationConfig(
Expand Down Expand Up @@ -60,12 +52,8 @@ def generate_text(model, tokenizer, prompt, max_length=100):

def main():
# load quantized model
qmodel = BitnetForCausalLM.from_quantized(
saved_model_path,
).cuda().half()
tokenizer = BitnetTokenizer.from_pretrained(
model_name_or_path, use_fast=False
)
qmodel = BitnetForCausalLM.from_quantized(saved_model_path,).cuda().half()
tokenizer = BitnetTokenizer.from_pretrained(model_name_or_path, use_fast=False)
# print("original model generated text:")
# print(generate_text(model, tokenizer, "Hi, ", max_length=100))
input_ids = torch.ones((1, 1), dtype=torch.long).cuda()
Expand Down
Loading

0 comments on commit fff385f

Please sign in to comment.