Lintfix

microsoft · Jul 16, 2024 · fff385f · fff385f
1 parent b9655fd
commit fff385f
Show file tree

Hide file tree

Showing 6 changed files with 206 additions and 172 deletions.
diff --git a/bitblas/cache/operator.py b/bitblas/cache/operator.py
@@ -18,6 +18,7 @@
 BITBLAS_WRAPPED_SOURCE_NAME = "wrapper_source.cu"
 BITBLAS_WRAPPED_COMPILED_NAME = "wrapper_compiled.so"
 
+
 class OperatorCache:
     """
     Manages a cache for operator instances (e.g., Matmul, Convolution) based on their configurations.

diff --git a/integration/BitNet/create_bitblas_ckpt.py b/integration/BitNet/create_bitblas_ckpt.py
@@ -18,9 +18,8 @@
 bitblas.set_log_level("INFO")
 
 model_name_or_path = "BitBLASModel/open_llama_3b_1.58bits"
-saved_model_path = os.path.join(
-    dirpath, "models", f"{model_name_or_path}_bitblas"
-)
+saved_model_path = os.path.join(dirpath, "models", f"{model_name_or_path}_bitblas")
+
 
 def generate_text(model, tokenizer, prompt, max_length=100):
     input_ids = tokenizer.encode(prompt, return_tensors="pt").to(model.lm_head.weight.device)
@@ -59,13 +58,8 @@ def main():
             model_name_or_path,
             use_flash_attention_2=True,
             torch_dtype=torch.float16,
-        )
-        .cuda()
-        .half()
-    )
-    tokenizer = BitnetTokenizer.from_pretrained(
-        model_name_or_path, use_fast=False
-    )
+        ).cuda().half())
+    tokenizer = BitnetTokenizer.from_pretrained(model_name_or_path, use_fast=False)
 
     # print("original model generated text:")
     # print(generate_text(model, tokenizer, "Hi, ", max_length=100))
@@ -107,9 +101,7 @@ def main():
         file_path = cached_file(model_name_or_path, file)
         os.system(f"cp {file_path} {saved_model_path}")
     # load quantized model
-    qmodel = BitnetForCausalLM.from_quantized(
-        saved_model_path,
-    ).cuda().half()
+    qmodel = BitnetForCausalLM.from_quantized(saved_model_path,).cuda().half()
     print("quantized model generated text:")
     print(generate_text(qmodel, tokenizer, "Hi, ", max_length=100))
 

diff --git a/integration/BitNet/eval_correctness.py b/integration/BitNet/eval_correctness.py
@@ -1,7 +1,6 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
 
-import argparse
 import torch
 import bitblas
 from modeling_bitnet import BitnetForCausalLM
@@ -12,6 +11,7 @@
 torch.set_grad_enabled(False)
 bitblas.set_log_level("INFO")
 
+
 def generate_text(model, tokenizer, prompt, max_length=100):
     input_ids = tokenizer.encode(prompt, return_tensors="pt").to(model.lm_head.weight.device)
     # Generate cos and sin values
@@ -69,7 +69,10 @@ def get_runtime(num_repeats=1):
         times = get_runtime(num_repeats)
     return np.mean(times)
 
+
 model_path = '1bitLLM/bitnet_b1_58-3B'
+
+
 def main():
     model = BitnetForCausalLM.from_pretrained(
         model_path,
@@ -79,15 +82,13 @@ def main():
     with torch.no_grad():
         model._post_process_weights()
 
-    # input_id = torch.ones(1, 1).long().cuda()
     tokenizer = BitnetTokenizer.from_pretrained(model_path, use_fast=False)
     input_id = tokenizer("Hello")['input_ids']
     input_id = torch.tensor(input_id).unsqueeze(0).cuda()
     output = model(input_id)
     print(output)
 
     print(generate_text(model, tokenizer, "Hello", max_length=100))
-
 
 
 if __name__ == '__main__':

diff --git a/integration/BitNet/load_from_quantized.py b/integration/BitNet/load_from_quantized.py
@@ -5,11 +5,9 @@
 import bitblas
 from modeling_bitnet import BitnetForCausalLM
 from tokenization_bitnet import BitnetTokenizer
-from transformers.utils.hub import cached_file
 import os
 from transformers import GenerationConfig
 import time
-import json
 
 filepath = os.path.abspath(__file__)
 dirpath = os.path.dirname(filepath)
@@ -18,20 +16,14 @@
 bitblas.set_log_level("INFO")
 
 model_name_or_path = "BitBLASModel/open_llama_3b_1.58bits"
-saved_model_path = os.path.join(
-    dirpath, "models", f"{model_name_or_path}_bitblas"
-)
+saved_model_path = os.path.join(dirpath, "models", f"{model_name_or_path}_bitblas")
 
 
 def generate_text(model, tokenizer, prompt, max_length=100):
-    input_ids = tokenizer.encode(prompt, return_tensors="pt").to(
-        model.lm_head.weight.device
-    )
+    input_ids = tokenizer.encode(prompt, return_tensors="pt").to(model.lm_head.weight.device)
     # Generate cos and sin values
     seq_length = input_ids.size(1)
-    position_ids = torch.arange(
-        seq_length, dtype=torch.long, device=input_ids.device
-    )
+    position_ids = torch.arange(seq_length, dtype=torch.long, device=input_ids.device)
     position_ids = position_ids.unsqueeze(0).expand_as(input_ids)
 
     generation_config = GenerationConfig(
@@ -60,12 +52,8 @@ def generate_text(model, tokenizer, prompt, max_length=100):
 
 def main():
     # load quantized model
-    qmodel = BitnetForCausalLM.from_quantized(
-        saved_model_path,
-    ).cuda().half()
-    tokenizer = BitnetTokenizer.from_pretrained(
-        model_name_or_path, use_fast=False
-    )
+    qmodel = BitnetForCausalLM.from_quantized(saved_model_path,).cuda().half()
+    tokenizer = BitnetTokenizer.from_pretrained(model_name_or_path, use_fast=False)
     # print("original model generated text:")
     # print(generate_text(model, tokenizer, "Hi, ", max_length=100))
     input_ids = torch.ones((1, 1), dtype=torch.long).cuda()