NolanoOrg · mallorbc · Mar 31, 2023 · Apr 7, 2023 · Apr 7, 2023 · Apr 7, 2023
diff --git a/README.md b/README.md
@@ -15,7 +15,7 @@ This project aims to address the third using LLaMa.cpp and GGML.
 
 - Inference Speed! Focus on inference, not training.
 - Precompressed models.
-- Minimal setup required - soon `pip install cformers` should be good to get started.
+- Minimal setup required - `pip install cformers` should be good to get started.
 - Easily switch between models and quantization types.
 - Support variety of prompts.
 
@@ -26,14 +26,12 @@ And most importantly:
 
 Setup
 ```bash
-pip install transformers wget
-git clone https://github.com/nolanoOrg/cformers.git
-cd cformers/cformers/cpp && make && cd ..
+pip install cformers
 ```
 
 Usage:
 ```python
-from interface import AutoInference as AI
+from cformers import AutoInference as AI
 ai = AI('EleutherAI/gpt-j-6B')
 x = ai.generate('def parse_html(html_doc):', num_tokens_to_generate=500)
 print(x['token_str'])
@@ -58,8 +56,6 @@ chat.py accepts the following parameteres:
 - ```-p Tell me a joke``` for a single prompt interaction
 - ```-m pythia``` to load one of the available (bloom, pythia or gptj )
 
-We are working on adding support for `pip install cformers.`
-
 Following Architectures are supported:
 - GPT-J
 - BLOOM

diff --git a/cformers/__init__.py b/cformers/__init__.py
@@ -1,2 +1,2 @@
 """Cformers: SoTA Transformer inference on your CPU."""
-from .interface import AutoModel, AutoTokenizer
+from .interface import AutoInference
diff --git a/cformers/cpp/converters/convert_gptj_to_ggml.py b/cformers/cpp/converters/convert_gptj_to_ggml.py
@@ -41,13 +41,19 @@ def bytes_to_unicode():
 model_card = sys.argv[1]
 fname_out = sys.argv[2] + "/ggml-gptj-6b-model.bin"
 
-if "CONVERTER_CACHE_DIR" in os.environ:
-    dir_cache = os.environ["CONVERTER_CACHE_DIR"] + model_card.replace('/', '-.-')
+if not os.path.exists(os.path.realpath(model_card)):
+
+    if "CONVERTER_CACHE_DIR" in os.environ:
+        dir_cache = os.environ["CONVERTER_CACHE_DIR"] + model_card.replace('/', '-.-')
+    else:
+        dir_cache = "~/.cformers_converters" + model_card.replace('/', '-.-')
+
+    if not os.path.exists(dir_cache):
+        os.makedirs(dir_cache)
+
 else:
-    dir_cache = "~/.cformers_converters" + model_card.replace('/', '-.-')
+    dir_cache = os.path.realpath(model_card)
 
-if not os.path.exists(dir_cache):
-    os.makedirs(dir_cache)
 
 # Fetch vocab.json from https://huggingface.co/<model_card>/resolve/main/vocab.json if not found in dir_cache/vocab.json
 if not os.path.exists(dir_cache + "/vocab.json"):

diff --git a/cformers/cpp/main.cpp b/cformers/cpp/main.cpp
@@ -1058,11 +1058,11 @@ bool gptj_model_load(const std::string & fname, gptj_model & model, gpt_vocab &
         int32_t n_vocab = 0;
         fin.read((char *) &n_vocab, sizeof(n_vocab));
 
-        if (n_vocab != model.hparams.n_vocab) {
-            fprintf(stderr, "%s: invalid model file '%s' (bad vocab size %d != %d)\n",
-                    __func__, fname.c_str(), n_vocab, model.hparams.n_vocab);
-            return false;
-        }
+        // if (n_vocab != model.hparams.n_vocab) {
+        //     fprintf(stderr, "%s: invalid model file '%s' (bad vocab size %d != %d)\n",
+        //             __func__, fname.c_str(), n_vocab, model.hparams.n_vocab);
+        //     return false;
+        // }
 
         std::string word;
         for (int i = 0; i < n_vocab; i++) {

diff --git a/cformers/cpp/quantize_gptj.cpp b/cformers/cpp/quantize_gptj.cpp
@@ -103,11 +103,11 @@ bool gptj_model_quantize(const std::string & fname_inp, const std::string & fnam
         finp.read ((char *) &n_vocab, sizeof(n_vocab));
         fout.write((char *) &n_vocab, sizeof(n_vocab));
 
-        if (n_vocab != hparams.n_vocab) {
-            fprintf(stderr, "%s: invalid model file '%s' (bad vocab size %d != %d)\n",
-                    __func__, fname_inp.c_str(), n_vocab, hparams.n_vocab);
-            return false;
-        }
+        // if (n_vocab != hparams.n_vocab) {
+        //     fprintf(stderr, "%s: invalid model file '%s' (bad vocab size %d != %d)\n",
+        //             __func__, fname_inp.c_str(), n_vocab, hparams.n_vocab);
+        //     return false;
+        // }
 
         std::string word;
         for (int i = 0; i < n_vocab; i++) {

diff --git a/cformers/interface.py b/cformers/interface.py
@@ -1,12 +1,15 @@
 """Call's the C++ code from Python."""
 from subprocess import Popen, PIPE
+import subprocess
 import hashlib
 import re
 import os
 import sys
 import select
 import wget
 import requests
+import pathlib
+import time
 
 import transformers as tf # RIP TensorFlow
 
@@ -144,17 +147,21 @@ def get_modes(self):
 
 class AutoInference:
     """A wrapper for the C++ model."""
-    def __init__(self, model_name, hash_sum="", mode="int4_fixed_zero"):
+    def __init__(self, model_name, hash_sum="", mode="int4_fixed_zero",from_pretrained=""):
         self.model_name = model_name
         self.mode = mode
         self.hash_sum = hash_sum
         self.cpp_model_name = MAP_MODEL_TO_URL[model_name].cpp_model_name
-        self.model_url = MAP_MODEL_TO_URL[model_name].get_url(mode)
-        self.model_save_path = os.path.join(CFORMERS_CACHE_PATH, "models", model_name, mode)
-        self.tokenizer = tf.AutoTokenizer.from_pretrained(model_name)
+        if from_pretrained != "":
+            self.model_save_path = os.path.realpath(from_pretrained)
+            self.tokenizer = tf.AutoTokenizer.from_pretrained(os.path.dirname(from_pretrained))
+        else:
+            self.model_url = MAP_MODEL_TO_URL[model_name].get_url(mode)
+            self.model_save_path = os.path.join(CFORMERS_CACHE_PATH, "models", model_name, mode)
+            self.tokenizer = tf.AutoTokenizer.from_pretrained(model_name)
 
         # Download the model if it doesn't exist
-        if not os.path.exists(self.model_save_path):
+        if not os.path.exists(self.model_save_path) and self.from_pretrained == "":
             # Create the directory if it doesn't exist
             parent_dir = os.path.dirname(self.model_save_path)
             if not os.path.exists(parent_dir):
@@ -183,7 +190,9 @@ def generate(self,
                  seed=42,
                  streaming_token_str_hook=lambda x: x,
                  streaming_token_ids_hook=lambda x: x,
-                 print_streaming_output=True):
+                 print_streaming_output=True,
+                 end_token=None,
+                 wait_for_process=False):
         """Generates text from the given prompt.
 
         streaming_output_hook: function to be called after every token is generated.
@@ -197,11 +206,13 @@ def generate(self,
             f"Prompt should be a list of integers {prompt}"
         # Convert to a string of space separated integers
         prompt = " ".join([str(x) for x in prompt])
+
+        main_file = str(pathlib.Path(__file__).parent.resolve())
 
         if os.name == 'nt':
-            main_file = "./cpp/main.exe"
+            main_file += "/cpp/main.exe"
         else:
-            main_file = "./cpp/main"
+            main_file += "/cpp/main"
 
         command = [main_file, self.cpp_model_name,
                    "-m", self.model_save_path,
@@ -237,6 +248,9 @@ def generate(self,
                     streaming_token_str_hook(token_str)
                     streaming_token_ids_hook(token_id)
                     to_print = token_str
+                    if token_str == end_token:
+                        all_stdout_so_far += "<END|>"
+                        break
                 else:
                     token_id_buffer += c.decode('utf-8')
 
@@ -263,12 +277,19 @@ def generate(self,
         # return all_stdout_so_far
         token_line = re.findall(r'<\|BEGIN\>(.*?)<END\|>', all_stdout_so_far, re.DOTALL)[0]
 
+        print(token_line)
+
         # Convert the token_line to a list of integers
         all_tokens = [int(x) for x in token_line.split()]
 
         # Decode the tokens
         decoded_tokens = self.tokenizer.decode(all_tokens)
 
+        if not wait_for_process:
+            return {"success": True,
+                    "token_ids": all_tokens,
+                    "token_str": decoded_tokens}
+
         # Get the exit code
         success = process.wait()
         # Kill the child process if it's still running

diff --git a/setup.py b/setup.py
@@ -0,0 +1,40 @@
+from setuptools import setup, find_packages
+import codecs
+import os
+import subprocess
+
+packages= ['cformers', 'cformers/cpp']
+package_data = {'cformers': ['*'], 'cformers/cpp': ['*']}
+build_main = subprocess.run(["make"], stdout=subprocess.PIPE, cwd="cformers/cpp")
+
+here = os.path.abspath(os.path.dirname(__file__))
+
+with codecs.open(os.path.join(here, "README.md"), encoding="utf-8") as fh:
+    long_description = "\n" + fh.read()
+
+VERSION = '0.0.4'
+DESCRIPTION = 'SoTA Transformers with C-backend for fast inference on your CPU.'
+LONG_DESCRIPTION = 'We identify three pillers to enable fast inference of SoTA AI models on your CPU:\n1. Fast C/C++ LLM inference kernels for CPU.\n2. Machine Learning Research & Exploration front - Compression through quantization, sparsification, training on more data, collecting data and training instruction & chat models.\n3. Easy to use API for fast AI inference in dynamically typed language like Python.\n\nThis project aims to address the third using LLaMa.cpp and GGML.'
+
+# Setting up
+setup(
+    name="cformers",
+    version=VERSION,
+    author="Ayush Kaushal (Ayushk4)",
+    author_email="[email protected]",
+    description=DESCRIPTION,
+    long_description_content_type="text/markdown",
+    long_description=LONG_DESCRIPTION,
+    packages=packages,
+    package_data=package_data,
+    install_requires=['transformers', 'torch', 'wget'],
+    keywords=['python', 'local inference', 'c++ inference', 'language models', 'cpu inference', 'quantization'],
+    classifiers=[
+        "Development Status :: 2 - Pre-Alpha",
+        "Intended Audience :: Developers",
+        "Programming Language :: Python :: 3",
+        "Operating System :: Unix",
+        "Operating System :: MacOS :: MacOS X",
+        "Operating System :: Microsoft :: Windows",
+    ]
+)