diff --git a/Makefile b/Makefile index 3ea12f1..b3863a1 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ install_cuda: poetry config virtualenvs.in-project true poetry install --extras "cuda-acceleration" --no-root --no-ansi echo "Installing llama-cpp-python and ctransformers with pip to get NVIDIA CUDA acceleration" - . .venv/bin/activate && CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip3 install llama-cpp-python==$(llama_cpp_version) + . .venv/bin/activate && CMAKE_ARGS="-DGGML_CUDA=on" pip3 install llama-cpp-python==$(llama_cpp_version) -v . .venv/bin/activate && pip3 install ctransformers[cuda]==$(ctransformers_version) install_metal: @@ -25,7 +25,7 @@ install_metal: poetry config virtualenvs.in-project true poetry install --no-root --no-ansi echo "Installing llama-cpp-python and ctransformers with pip to get Metal GPU acceleration for macOS systems only (it doesn't install CUDA dependencies)" - . .venv/bin/activate && CMAKE_ARGS="-DLLAMA_METAL" pip3 install llama-cpp-python==$(llama_cpp_version) + . .venv/bin/activate && CMAKE_ARGS="-DGGML_METAL=on" pip3 install llama-cpp-python==$(llama_cpp_version) -v . .venv/bin/activate && CT_METAL=1 pip install ctransformers==$(ctransformers_version) --no-binary ctransformers install_pre_commit: diff --git a/README.md b/README.md index a3bf69b..db28d59 100644 --- a/README.md +++ b/README.md @@ -137,9 +137,9 @@ format. | 🤖 Model | Supported | Model Size | Notes and link to the model | |--------------------------------------------|-----------|------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `llama-3` Meta Llama 3.1 Instruct | ✅ | 8B | [link](https://huggingface.co/bartowski/Meta-Llama-3.1-8B-Instruct-GGUF) | +| `llama-3` Meta Llama 3.1 Instruct | ✅ | 8B | **Recommended model** [link](https://huggingface.co/bartowski/Meta-Llama-3.1-8B-Instruct-GGUF) | | `openchat-3.6` - OpenChat 3.6 | ✅ | 8B | [link](https://huggingface.co/bartowski/openchat-3.6-8b-20240522-GGUF) | -| `openchat-3.5` - OpenChat 3.5 | ✅ | 7B | **Recommended model** [link](https://huggingface.co/TheBloke/openchat-3.5-0106-GGUF) | +| `openchat-3.5` - OpenChat 3.5 | ✅ | 7B | [link](https://huggingface.co/TheBloke/openchat-3.5-0106-GGUF) | | `starling` Starling Beta | ✅ | 7B | Is trained from `Openchat-3.5-0106`. It's recommended if you prefer more verbosity over OpenChat - [link](https://huggingface.co/bartowski/Starling-LM-7B-beta-GGUF) | | `dolphin` Dolphin 2.6 Mistral DPO Laser | ✅ | 7B | [link](https://huggingface.co/TheBloke/dolphin-2.6-mistral-7B-dpo-laser-GGUF) | | `zephyr` Zephyr Beta | ✅ | 7B | [link](https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF) | diff --git a/chatbot/helpers/log.py b/chatbot/helpers/log.py index 885c8cf..a6eddb4 100644 --- a/chatbot/helpers/log.py +++ b/chatbot/helpers/log.py @@ -6,9 +6,11 @@ def get_logger(name: str): level = os.environ.get("LOGLEVEL", "INFO").upper() logger = logging.getLogger(name) + + # Prevent double configuration by checking handlers if not logger.hasHandlers(): logger.setLevel(level) - + # Stream handler to stdout handler = logging.StreamHandler(sys.stdout) handler.setLevel(level) formatter = logging.Formatter("[%(thread)d] %(asctime)s - %(name)s - %(levelname)s - %(message)s") diff --git a/version/llama_cpp b/version/llama_cpp index 1e8d670..d023702 100644 --- a/version/llama_cpp +++ b/version/llama_cpp @@ -1 +1 @@ -0.2.76 +0.2.79