Skip to content

Commit

Permalink
feat(parler-tts): Add new backend (#2027)
Browse files Browse the repository at this point in the history
* feat(parler-tts): Add new backend

Signed-off-by: Ettore Di Giacinto <[email protected]>

* feat(parler-tts): try downgrade protobuf

Signed-off-by: Ettore Di Giacinto <[email protected]>

* feat(parler-tts): add parler conda env

Signed-off-by: Ettore Di Giacinto <[email protected]>

* Revert "feat(parler-tts): try downgrade protobuf"

This reverts commit bd5941d.

Signed-off-by: Ettore Di Giacinto <[email protected]>

* deps: add grpc

Signed-off-by: Ettore Di Giacinto <[email protected]>

* fix: try to gen proto with same environment

* workaround

* Revert "fix: try to gen proto with same environment"

This reverts commit 998c745.

* Workaround fixup

---------

Signed-off-by: Ettore Di Giacinto <[email protected]>
Co-authored-by: Dave <[email protected]>
  • Loading branch information
mudler and dave-gray101 committed Apr 13, 2024
1 parent 619f251 commit 0fdff26
Show file tree
Hide file tree
Showing 12 changed files with 440 additions and 4 deletions.
29 changes: 29 additions & 0 deletions .github/workflows/test-extra.yml
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,35 @@ jobs:
make --jobs=5 --output-sync=target -C backend/python/diffusers
make --jobs=5 --output-sync=target -C backend/python/diffusers test
tests-parler-tts:
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v4
with:
submodules: true
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install build-essential ffmpeg
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
sudo apt-get update && \
sudo apt-get install -y conda
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev
pip install --user grpcio-tools
sudo rm -rfv /usr/bin/conda || true
- name: Test parler-tts
run: |
export PATH=$PATH:/opt/conda/bin
make --jobs=5 --output-sync=target -C backend/python/parler-tts
make --jobs=5 --output-sync=target -C backend/python/parler-tts test
tests-transformers-musicgen:
runs-on: ubuntu-latest
Expand Down
5 changes: 4 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ ARG TARGETVARIANT

ENV BUILD_TYPE=${BUILD_TYPE}
ENV DEBIAN_FRONTEND=noninteractive
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh"
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"

ARG GO_TAGS="stablediffusion tinydream tts"

Expand Down Expand Up @@ -275,6 +275,9 @@ RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
make -C backend/python/transformers-musicgen \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
make -C backend/python/parler-tts \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
make -C backend/python/coqui \
; fi
Expand Down
13 changes: 11 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -439,10 +439,10 @@ protogen-go-clean:
$(RM) bin/*

.PHONY: protogen-python
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen sentencetransformers-protogen transformers-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen

.PHONY: protogen-python-clean
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean

.PHONY: autogptq-protogen
autogptq-protogen:
Expand Down Expand Up @@ -524,6 +524,14 @@ transformers-protogen:
transformers-protogen-clean:
$(MAKE) -C backend/python/transformers protogen-clean

.PHONY: parler-tts-protogen
parler-tts-protogen:
$(MAKE) -C backend/python/parler-tts protogen

.PHONY: parler-tts-protogen-clean
parler-tts-protogen-clean:
$(MAKE) -C backend/python/parler-tts protogen-clean

.PHONY: transformers-musicgen-protogen
transformers-musicgen-protogen:
$(MAKE) -C backend/python/transformers-musicgen protogen
Expand Down Expand Up @@ -560,6 +568,7 @@ prepare-extra-conda-environments: protogen-python
$(MAKE) -C backend/python/sentencetransformers
$(MAKE) -C backend/python/transformers
$(MAKE) -C backend/python/transformers-musicgen
$(MAKE) -C backend/python/parler-tts
$(MAKE) -C backend/python/vall-e-x
$(MAKE) -C backend/python/exllama
$(MAKE) -C backend/python/petals
Expand Down
39 changes: 39 additions & 0 deletions backend/python/parler-tts/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
export CONDA_ENV_PATH = "parler.yml"
SKIP_CONDA?=0
ifeq ($(BUILD_TYPE), cublas)
export CONDA_ENV_PATH = "parler-nvidia.yml"
endif

# Intel GPU are supposed to have dependencies installed in the main python
# environment, so we skip conda installation for SYCL builds.
# https://github.com/intel/intel-extension-for-pytorch/issues/538
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
export SKIP_CONDA=1
endif

.PHONY: parler-tts
parler-tts: protogen
@echo "Installing $(CONDA_ENV_PATH)..."
bash install.sh $(CONDA_ENV_PATH)

.PHONY: run
run: protogen
@echo "Running transformers..."
bash run.sh
@echo "transformers run."

.PHONY: test
test: protogen
@echo "Testing transformers..."
bash test.sh
@echo "transformers tested."

.PHONY: protogen
protogen: backend_pb2_grpc.py backend_pb2.py

.PHONY: protogen-clean
protogen-clean:
$(RM) backend_pb2_grpc.py backend_pb2.py

backend_pb2_grpc.py backend_pb2.py:
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
39 changes: 39 additions & 0 deletions backend/python/parler-tts/install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#!/bin/bash
set -ex

SKIP_CONDA=${SKIP_CONDA:-0}

# Check if environment exist
conda_env_exists(){
! conda list --name "${@}" >/dev/null 2>/dev/null
}

if [ $SKIP_CONDA -eq 1 ]; then
echo "Skipping conda environment installation"
else
export PATH=$PATH:/opt/conda/bin
if conda_env_exists "parler" ; then
echo "Creating virtual environment..."
conda env create --name parler --file $1
echo "Virtual environment created."
else
echo "Virtual environment already exists."
fi
fi

if [ $SKIP_CONDA -ne 1 ]; then
# Activate conda environment
source activate parler
# https://github.com/descriptinc/audiotools/issues/101
# incompatible protobuf versions.
curl -L https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -o $CONDA_PREFIX/lib/python3.11/site-packages/google/protobuf/internal/builder.py
fi

if [ "$PIP_CACHE_PURGE" = true ] ; then
if [ $SKIP_CONDA -ne 1 ]; then
# Activate conda environment
source activate parler
fi

pip cache purge
fi
48 changes: 48 additions & 0 deletions backend/python/parler-tts/parler-nvidia.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
name: parler
channels:
- defaults
dependencies:
- _libgcc_mutex=0.1=main
- _openmp_mutex=5.1=1_gnu
- bzip2=1.0.8=h7b6447c_0
- ca-certificates=2023.08.22=h06a4308_0
- ld_impl_linux-64=2.38=h1181459_1
- libffi=3.4.4=h6a678d5_0
- libgcc-ng=11.2.0=h1234567_1
- libgomp=11.2.0=h1234567_1
- libstdcxx-ng=11.2.0=h1234567_1
- libuuid=1.41.5=h5eee18b_0
- ncurses=6.4=h6a678d5_0
- openssl=3.0.11=h7f8727e_2
- pip=23.2.1=py311h06a4308_0
- python=3.11.5=h955ad1f_0
- readline=8.2=h5eee18b_0
- setuptools=68.0.0=py311h06a4308_0
- sqlite=3.41.2=h5eee18b_0
- tk=8.6.12=h1ccaba5_0
- tzdata=2023c=h04d1e81_0
- wheel=0.41.2=py311h06a4308_0
- xz=5.4.2=h5eee18b_0
- zlib=1.2.13=h5eee18b_0
- pip:
- accelerate>=0.11.0
- grpcio==1.59.0
- numpy==1.26.0
- nvidia-cublas-cu12==12.1.3.1
- nvidia-cuda-cupti-cu12==12.1.105
- nvidia-cuda-nvrtc-cu12==12.1.105
- nvidia-cuda-runtime-cu12==12.1.105
- nvidia-cudnn-cu12==8.9.2.26
- nvidia-cufft-cu12==11.0.2.54
- nvidia-curand-cu12==10.3.2.106
- nvidia-cusolver-cu12==11.4.5.107
- nvidia-cusparse-cu12==12.1.0.106
- nvidia-nccl-cu12==2.18.1
- nvidia-nvjitlink-cu12==12.2.140
- nvidia-nvtx-cu12==12.1.105
- torch==2.1.0
- transformers>=4.34.0
- descript-audio-codec
- sentencepiece
- git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16
prefix: /opt/conda/envs/diffusers
36 changes: 36 additions & 0 deletions backend/python/parler-tts/parler.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name: parler
channels:
- defaults
dependencies:
- _libgcc_mutex=0.1=main
- _openmp_mutex=5.1=1_gnu
- bzip2=1.0.8=h7b6447c_0
- ca-certificates=2023.08.22=h06a4308_0
- ld_impl_linux-64=2.38=h1181459_1
- libffi=3.4.4=h6a678d5_0
- libgcc-ng=11.2.0=h1234567_1
- libgomp=11.2.0=h1234567_1
- libstdcxx-ng=11.2.0=h1234567_1
- libuuid=1.41.5=h5eee18b_0
- ncurses=6.4=h6a678d5_0
- openssl=3.0.11=h7f8727e_2
- pip=23.2.1=py311h06a4308_0
- python=3.11.5=h955ad1f_0
- readline=8.2=h5eee18b_0
- setuptools=68.0.0=py311h06a4308_0
- sqlite=3.41.2=h5eee18b_0
- tk=8.6.12=h1ccaba5_0
- tzdata=2023c=h04d1e81_0
- wheel=0.41.2=py311h06a4308_0
- xz=5.4.2=h5eee18b_0
- zlib=1.2.13=h5eee18b_0
- pip:
- accelerate>=0.11.0
- numpy==1.26.0
- grpcio==1.59.0
- torch==2.1.0
- transformers>=4.34.0
- descript-audio-codec
- sentencepiece
- git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16
prefix: /opt/conda/envs/parler
125 changes: 125 additions & 0 deletions backend/python/parler-tts/parler_tts_server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
#!/usr/bin/env python3
"""
Extra gRPC server for MusicgenForConditionalGeneration models.
"""
from concurrent import futures

import argparse
import signal
import sys
import os

import time
import backend_pb2
import backend_pb2_grpc

import grpc

from scipy.io.wavfile import write as write_wav

from parler_tts import ParlerTTSForConditionalGeneration
from transformers import AutoTokenizer
import soundfile as sf
import torch

_ONE_DAY_IN_SECONDS = 60 * 60 * 24

# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))

# Implement the BackendServicer class with the service methods
class BackendServicer(backend_pb2_grpc.BackendServicer):
"""
A gRPC servicer for the backend service.
This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding.
"""
def Health(self, request, context):
"""
A gRPC method that returns the health status of the backend service.
Args:
request: A HealthRequest object that contains the request parameters.
context: A grpc.ServicerContext object that provides information about the RPC.
Returns:
A Reply object that contains the health status of the backend service.
"""
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))

def LoadModel(self, request, context):
"""
A gRPC method that loads a model into memory.
Args:
request: A LoadModelRequest object that contains the request parameters.
context: A grpc.ServicerContext object that provides information about the RPC.
Returns:
A Result object that contains the result of the LoadModel operation.
"""
model_name = request.Model
device = "cuda:0" if torch.cuda.is_available() else "cpu"
try:
self.model = ParlerTTSForConditionalGeneration.from_pretrained(model_name).to(device)
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
except Exception as err:
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")

return backend_pb2.Result(message="Model loaded successfully", success=True)

def TTS(self, request, context):
model_name = request.model
voice = request.voice
if voice == "":
voice = "A female speaker with a slightly low-pitched voice delivers her words quite expressively, in a very confined sounding environment with clear audio quality. She speaks very fast."
if model_name == "":
return backend_pb2.Result(success=False, message="request.model is required")
try:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
input_ids = self.tokenizer(voice, return_tensors="pt").input_ids.to(device)
prompt_input_ids = self.tokenizer(request.text, return_tensors="pt").input_ids.to(device)

generation = self.model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
audio_arr = generation.cpu().numpy().squeeze()
print("[parler-tts] TTS generated!", file=sys.stderr)
sf.write(request.dst, audio_arr, self.model.config.sampling_rate)
print("[parler-tts] TTS saved to", request.dst, file=sys.stderr)
print("[parler-tts] TTS for", file=sys.stderr)
print(request, file=sys.stderr)
except Exception as err:
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
return backend_pb2.Result(success=True)


def serve(address):
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
server.add_insecure_port(address)
server.start()
print("[parler-tts] Server started. Listening on: " + address, file=sys.stderr)

# Define the signal handler function
def signal_handler(sig, frame):
print("[parler-tts] Received termination signal. Shutting down...")
server.stop(0)
sys.exit(0)

# Set the signal handlers for SIGINT and SIGTERM
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)

try:
while True:
time.sleep(_ONE_DAY_IN_SECONDS)
except KeyboardInterrupt:
server.stop(0)

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run the gRPC server.")
parser.add_argument(
"--addr", default="localhost:50051", help="The address to bind the server to."
)
args = parser.parse_args()
print(f"[parler-tts] startup: {args}", file=sys.stderr)
serve(args.addr)
Loading

0 comments on commit 0fdff26

Please sign in to comment.