From 9475a6fa05f0cd5ee2b6ca0735a8b3f40c4a36b7 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 20 Aug 2024 10:01:38 +0200 Subject: [PATCH] chore: drop petals (#3316) Signed-off-by: Ettore Di Giacinto --- .github/dependabot.yml | 4 - .github/workflows/test-extra.yml | 26 ---- Dockerfile | 5 +- Makefile | 13 +- backend/python/petals/Makefile | 31 ---- backend/python/petals/backend.py | 140 ------------------ backend/python/petals/install.sh | 14 -- backend/python/petals/requirements-cpu.txt | 3 - .../python/petals/requirements-cublas11.txt | 3 - .../python/petals/requirements-cublas12.txt | 2 - .../python/petals/requirements-hipblas.txt | 3 - backend/python/petals/requirements-intel.txt | 6 - backend/python/petals/requirements.txt | 2 - backend/python/petals/run.sh | 4 - backend/python/petals/test.py | 58 -------- backend/python/petals/test.sh | 6 - .../content/docs/features/GPU-acceleration.md | 1 - .../docs/reference/compatibility-table.md | 1 - 18 files changed, 3 insertions(+), 319 deletions(-) delete mode 100644 backend/python/petals/Makefile delete mode 100755 backend/python/petals/backend.py delete mode 100755 backend/python/petals/install.sh delete mode 100644 backend/python/petals/requirements-cpu.txt delete mode 100644 backend/python/petals/requirements-cublas11.txt delete mode 100644 backend/python/petals/requirements-cublas12.txt delete mode 100644 backend/python/petals/requirements-hipblas.txt delete mode 100644 backend/python/petals/requirements-intel.txt delete mode 100644 backend/python/petals/requirements.txt delete mode 100755 backend/python/petals/run.sh delete mode 100644 backend/python/petals/test.py delete mode 100755 backend/python/petals/test.sh diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 91b06ba8028..5016ebdb0ee 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -67,10 +67,6 @@ updates: directory: "/backend/python/parler-tts" schedule: interval: "weekly" - - package-ecosystem: "pip" - directory: "/backend/python/petals" - schedule: - interval: "weekly" - package-ecosystem: "pip" directory: "/backend/python/rerankers" schedule: diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml index e969a95fc01..8b37b52ddb9 100644 --- a/.github/workflows/test-extra.yml +++ b/.github/workflows/test-extra.yml @@ -168,32 +168,6 @@ jobs: make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test - - - # tests-petals: - # runs-on: ubuntu-latest - # steps: - # - name: Clone - # uses: actions/checkout@v4 - # with: - # submodules: true - # - name: Dependencies - # run: | - # sudo apt-get update - # sudo apt-get install build-essential ffmpeg - # # Install UV - # curl -LsSf https://astral.sh/uv/install.sh | sh - # sudo apt-get install -y ca-certificates cmake curl patch python3-pip - # sudo apt-get install -y libopencv-dev - # pip install --user --no-cache-dir grpcio-tools==1.64.1 - - # - name: Test petals - # run: | - # make --jobs=5 --output-sync=target -C backend/python/petals - # make --jobs=5 --output-sync=target -C backend/python/petals test - - - # tests-bark: # runs-on: ubuntu-latest # steps: diff --git a/Dockerfile b/Dockerfile index 0dfaaa19224..55a5e3104a5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -13,7 +13,7 @@ ARG TARGETARCH ARG TARGETVARIANT ENV DEBIAN_FRONTEND=noninteractive -ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh" +ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh" RUN apt-get update && \ @@ -396,9 +396,6 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vall-e-x" || -z "${EXTRA_BACKENDS}" ) && "$I if [[ ( "${EXTRA_BACKENDS}" =~ "openvoice" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \ make -C backend/python/openvoice \ ; fi && \ - if [[ ( "${EXTRA_BACKENDS}" =~ "petals" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \ - make -C backend/python/petals \ - ; fi && \ if [[ ( "${EXTRA_BACKENDS}" =~ "sentencetransformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \ make -C backend/python/sentencetransformers \ ; fi && \ diff --git a/Makefile b/Makefile index 08a6c36d3af..1697cc0c86e 100644 --- a/Makefile +++ b/Makefile @@ -534,10 +534,10 @@ protogen-go-clean: $(RM) bin/* .PHONY: protogen-python -protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen openvoice-protogen +protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen openvoice-protogen .PHONY: protogen-python-clean -protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean openvoice-protogen-clean +protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean openvoice-protogen-clean .PHONY: autogptq-protogen autogptq-protogen: @@ -595,14 +595,6 @@ mamba-protogen: mamba-protogen-clean: $(MAKE) -C backend/python/mamba protogen-clean -.PHONY: petals-protogen -petals-protogen: - $(MAKE) -C backend/python/petals protogen - -.PHONY: petals-protogen-clean -petals-protogen-clean: - $(MAKE) -C backend/python/petals protogen-clean - .PHONY: rerankers-protogen rerankers-protogen: $(MAKE) -C backend/python/rerankers protogen @@ -684,7 +676,6 @@ prepare-extra-conda-environments: protogen-python $(MAKE) -C backend/python/vall-e-x $(MAKE) -C backend/python/openvoice $(MAKE) -C backend/python/exllama - $(MAKE) -C backend/python/petals $(MAKE) -C backend/python/exllama2 prepare-test-extra: protogen-python diff --git a/backend/python/petals/Makefile b/backend/python/petals/Makefile deleted file mode 100644 index 81b06c2984f..00000000000 --- a/backend/python/petals/Makefile +++ /dev/null @@ -1,31 +0,0 @@ -.PHONY: petals -petals: protogen - @echo "Creating virtual environment..." - bash install.sh "petals.yml" - @echo "Virtual environment created." - -.PHONY: run -run: protogen - @echo "Running petals..." - bash run.sh - @echo "petals run." - -.PHONY: test -test: protogen - @echo "Testing petals..." - bash test.sh - @echo "petals tested." - -.PHONY: protogen -protogen: backend_pb2_grpc.py backend_pb2.py - -.PHONY: protogen-clean -protogen-clean: - $(RM) backend_pb2_grpc.py backend_pb2.py - -backend_pb2_grpc.py backend_pb2.py: - python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto - -.PHONY: clean -clean: protogen-clean - rm -rf venv __pycache__ \ No newline at end of file diff --git a/backend/python/petals/backend.py b/backend/python/petals/backend.py deleted file mode 100755 index 73bcc4a0da0..00000000000 --- a/backend/python/petals/backend.py +++ /dev/null @@ -1,140 +0,0 @@ -#!/usr/bin/env python3 -from concurrent import futures -import time -import argparse -import signal -import sys -import os - -import backend_pb2 -import backend_pb2_grpc - -import grpc -import torch -from transformers import AutoTokenizer -from petals import AutoDistributedModelForCausalLM - -_ONE_DAY_IN_SECONDS = 60 * 60 * 24 - -# If MAX_WORKERS are specified in the environment use it, otherwise default to 1 -MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1')) - -# Implement the BackendServicer class with the service methods -class BackendServicer(backend_pb2_grpc.BackendServicer): - """ - A gRPC servicer that implements the Backend service defined in backend.proto. - """ - def Health(self, request, context): - """ - Returns a health check message. - - Args: - request: The health check request. - context: The gRPC context. - - Returns: - backend_pb2.Reply: The health check reply. - """ - return backend_pb2.Reply(message=bytes("OK", 'utf-8')) - - def LoadModel(self, request, context): - """ - Loads a language model. - - Args: - request: The load model request. - context: The gRPC context. - - Returns: - backend_pb2.Result: The load model result. - """ - try: - self.tokenizer = AutoTokenizer.from_pretrained(request.Model, use_fast=False, add_bos_token=False) - self.model = AutoDistributedModelForCausalLM.from_pretrained(request.Model) - self.cuda = False - if request.CUDA: - self.model = self.model.cuda() - self.cuda = True - - except Exception as err: - return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") - return backend_pb2.Result(message="Model loaded successfully", success=True) - - def Predict(self, request, context): - """ - Generates text based on the given prompt and sampling parameters. - - Args: - request: The predict request. - context: The gRPC context. - - Returns: - backend_pb2.Result: The predict result. - """ - - inputs = self.tokenizer(request.Prompt, return_tensors="pt")["input_ids"] - if self.cuda: - inputs = inputs.cuda() - - if request.Tokens == 0: - # Max to max value if tokens are not specified - request.Tokens = 8192 - - # TODO: kwargs and map all parameters - outputs = self.model.generate(inputs, max_new_tokens=request.Tokens) - - generated_text = self.tokenizer.decode(outputs[0]) - # Remove prompt from response if present - if request.Prompt in generated_text: - generated_text = generated_text.replace(request.Prompt, "") - - return backend_pb2.Result(message=bytes(generated_text, encoding='utf-8')) - - def PredictStream(self, request, context): - """ - Generates text based on the given prompt and sampling parameters, and streams the results. - - Args: - request: The predict stream request. - context: The gRPC context. - - Returns: - backend_pb2.Result: The predict stream result. - """ - # Implement PredictStream RPC - #for reply in some_data_generator(): - # yield reply - # Not implemented yet - return self.Predict(request, context) - -def serve(address): - server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)) - backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server) - server.add_insecure_port(address) - server.start() - print("Server started. Listening on: " + address, file=sys.stderr) - - # Define the signal handler function - def signal_handler(sig, frame): - print("Received termination signal. Shutting down...") - server.stop(0) - sys.exit(0) - - # Set the signal handlers for SIGINT and SIGTERM - signal.signal(signal.SIGINT, signal_handler) - signal.signal(signal.SIGTERM, signal_handler) - - try: - while True: - time.sleep(_ONE_DAY_IN_SECONDS) - except KeyboardInterrupt: - server.stop(0) - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Run the gRPC server.") - parser.add_argument( - "--addr", default="localhost:50051", help="The address to bind the server to." - ) - args = parser.parse_args() - - serve(args.addr) diff --git a/backend/python/petals/install.sh b/backend/python/petals/install.sh deleted file mode 100755 index 36443ef1c55..00000000000 --- a/backend/python/petals/install.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash -set -e - -source $(dirname $0)/../common/libbackend.sh - -# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links. -# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match. -# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index -# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index -if [ "x${BUILD_PROFILE}" == "xintel" ]; then - EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match" -fi - -installRequirements diff --git a/backend/python/petals/requirements-cpu.txt b/backend/python/petals/requirements-cpu.txt deleted file mode 100644 index bbcdc8cda70..00000000000 --- a/backend/python/petals/requirements-cpu.txt +++ /dev/null @@ -1,3 +0,0 @@ -transformers -accelerate -torch \ No newline at end of file diff --git a/backend/python/petals/requirements-cublas11.txt b/backend/python/petals/requirements-cublas11.txt deleted file mode 100644 index f768301692d..00000000000 --- a/backend/python/petals/requirements-cublas11.txt +++ /dev/null @@ -1,3 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/cu118 -torch -transformers diff --git a/backend/python/petals/requirements-cublas12.txt b/backend/python/petals/requirements-cublas12.txt deleted file mode 100644 index 4f492ddc93d..00000000000 --- a/backend/python/petals/requirements-cublas12.txt +++ /dev/null @@ -1,2 +0,0 @@ -torch -transformers diff --git a/backend/python/petals/requirements-hipblas.txt b/backend/python/petals/requirements-hipblas.txt deleted file mode 100644 index 8a4e2ff05ae..00000000000 --- a/backend/python/petals/requirements-hipblas.txt +++ /dev/null @@ -1,3 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.0 -torch -transformers diff --git a/backend/python/petals/requirements-intel.txt b/backend/python/petals/requirements-intel.txt deleted file mode 100644 index 4e3ed0171cd..00000000000 --- a/backend/python/petals/requirements-intel.txt +++ /dev/null @@ -1,6 +0,0 @@ ---extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -intel-extension-for-pytorch -torch -optimum[openvino] -setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406 -transformers \ No newline at end of file diff --git a/backend/python/petals/requirements.txt b/backend/python/petals/requirements.txt deleted file mode 100644 index 0755fe01f3c..00000000000 --- a/backend/python/petals/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -git+https://github.com/bigscience-workshop/petals -certifi \ No newline at end of file diff --git a/backend/python/petals/run.sh b/backend/python/petals/run.sh deleted file mode 100755 index 375c07e5f42..00000000000 --- a/backend/python/petals/run.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -source $(dirname $0)/../common/libbackend.sh - -startBackend $@ \ No newline at end of file diff --git a/backend/python/petals/test.py b/backend/python/petals/test.py deleted file mode 100644 index 586d24437e1..00000000000 --- a/backend/python/petals/test.py +++ /dev/null @@ -1,58 +0,0 @@ -import unittest -import subprocess -import time -import backend_pb2 -import backend_pb2_grpc - -import grpc - -import unittest -import subprocess -import time -import grpc -import backend_pb2_grpc -import backend_pb2 - -class TestBackendServicer(unittest.TestCase): - """ - TestBackendServicer is the class that tests the gRPC service. - - This class contains methods to test the startup and shutdown of the gRPC service. - """ - def setUp(self): - self.service = subprocess.Popen(["python", "backend.py", "--addr", "localhost:50051"]) - time.sleep(10) - - def tearDown(self) -> None: - self.service.terminate() - self.service.wait() - - def test_server_startup(self): - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.Health(backend_pb2.HealthMessage()) - self.assertEqual(response.message, b'OK') - except Exception as err: - print(err) - self.fail("Server failed to start") - finally: - self.tearDown() - def test_load_model(self): - """ - This method tests if the model is loaded successfully - """ - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.LoadModel(backend_pb2.ModelOptions(Model="bigscience/bloom-560m")) - print(response) - self.assertTrue(response.success) - self.assertEqual(response.message, "Model loaded successfully") - except Exception as err: - print(err) - self.fail("LoadModel service failed") - finally: - self.tearDown() diff --git a/backend/python/petals/test.sh b/backend/python/petals/test.sh deleted file mode 100755 index 6940b0661df..00000000000 --- a/backend/python/petals/test.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -set -e - -source $(dirname $0)/../common/libbackend.sh - -runUnittests diff --git a/docs/content/docs/features/GPU-acceleration.md b/docs/content/docs/features/GPU-acceleration.md index b382309ec31..ae146ca632d 100644 --- a/docs/content/docs/features/GPU-acceleration.md +++ b/docs/content/docs/features/GPU-acceleration.md @@ -150,7 +150,6 @@ The devices in the following list have been tested with `hipblas` images running | exllama | no | none | | exllama2 | no | none | | mamba | no | none | -| petals | no | none | | sentencetransformers | no | none | | transformers-musicgen | no | none | | vall-e-x | no | none | diff --git a/docs/content/docs/reference/compatibility-table.md b/docs/content/docs/reference/compatibility-table.md index 31b47bb7fac..f76ad85daa8 100644 --- a/docs/content/docs/reference/compatibility-table.md +++ b/docs/content/docs/reference/compatibility-table.md @@ -44,7 +44,6 @@ LocalAI will attempt to automatically load models which are not explicitly confi | `transformers-musicgen` | | no | Audio generation | no | no | N/A | | [tinydream](https://github.com/symisc/tiny-dream#tiny-dreaman-embedded-header-only-stable-diffusion-inference-c-librarypixlabiotiny-dream) | stablediffusion | no | Image | no | no | N/A | | `coqui` | Coqui | no | Audio generation and Voice cloning | no | no | CPU/CUDA | -| `petals` | Various GPTs and quantization formats | yes | GPT | no | no | CPU/CUDA | | `transformers` | Various GPTs and quantization formats | yes | GPT, embeddings | yes | yes**** | CPU/CUDA/XPU | Note: any backend name listed above can be used in the `backend` field of the model configuration file (See [the advanced section]({{%relref "docs/advanced" %}})).