Update TTS Tasks page. (#347)

Wee update to the models listed, just making it a bit more current.
huggingface · Nov 22, 2023 · 0f83213 · 0f83213
1 parent 7197754
commit 0f83213
Show file tree

Hide file tree

Showing 2 changed files with 5 additions and 4 deletions.
diff --git a/packages/tasks/src/text-to-speech/about.md b/packages/tasks/src/text-to-speech/about.md
@@ -25,7 +25,7 @@ def query(payload):
 	response = requests.post(API_URL, headers=headers, json=payload)
 	return response
 
-output = query({"text_inputs": "This is a test"})
+output = query({"text_inputs": "Max is the best doggo."})
 ```
 
 You can also use libraries such as [espnet](https://huggingface.co/models?library=espnet&pipeline_tag=text-to-speech&sort=downloads) or [transformers](https://huggingface.co/models?pipeline_tag=text-to-speech&library=transformers&sort=trending) if you want to handle the Inference directly.
@@ -56,6 +56,7 @@ await inference.textToSpeech({
 
 ## Useful Resources
 
+- [Hugging Face Audio Course](https://huggingface.co/learn/audio-course/chapter6/introduction)
 - [ML for Audio Study Group - Text to Speech Deep Dive](https://www.youtube.com/watch?v=aLBedWj-5CQ)
 - [An introduction to SpeechT5, a multi-purpose speech recognition and synthesis model](https://huggingface.co/blog/speecht5).
 - [A guide on Fine-tuning Whisper For Multilingual ASR with 🤗Transformers](https://huggingface.co/blog/fine-tune-whisper)

diff --git a/packages/tasks/src/text-to-speech/data.ts b/packages/tasks/src/text-to-speech/data.ts
@@ -52,8 +52,8 @@ const taskData: TaskDataCustom = {
 			id: "suno/bark",
 		},
 		{
-			description: "An application that contains multiple speech synthesis models for various languages and accents.",
-			id: "coqui/CoquiTTS",
+			description: "XTTS is a Voice generation model that lets you clone voices into different languages.",
+			id: "coqui/xtts",
 		},
 		{
 			description: "An application that synthesizes speech for various speaker types.",
@@ -62,7 +62,7 @@ const taskData: TaskDataCustom = {
 	],
 	summary:
 		"Text-to-Speech (TTS) is the task of generating natural sounding speech given text input. TTS models can be extended to have a single model that generates speech for multiple speakers and multiple languages.",
-	widgetModels: ["microsoft/speecht5_tts"],
+	widgetModels: ["suno/bark"],
 	youtubeId: "NW62DpzJ274",
 };