From 3338d2b0c7aeab2c0138465357501698d9974a02 Mon Sep 17 00:00:00 2001 From: hupeng Date: Sat, 12 Aug 2023 12:26:00 +0800 Subject: [PATCH 1/5] =?UTF-8?q?feat:=E6=B7=BB=E5=8A=A0funasr=E8=AF=AD?= =?UTF-8?q?=E9=9F=B3=E8=AF=86=E5=88=AB=E6=A1=86=E6=9E=B6=E6=94=AF=E6=8C=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- requirements.txt | 3 ++- robot/ASR.py | 25 ++++++++++++++++++++++++- robot/sdk/FunASREngine.py | 22 ++++++++++++++++++++++ static/default.yml | 17 +++++++++++++++++ 4 files changed, 65 insertions(+), 2 deletions(-) create mode 100644 robot/sdk/FunASREngine.py diff --git a/requirements.txt b/requirements.txt index a3747a71..a574c0a0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,4 +19,5 @@ openai apscheduler asyncio edge-tts -nest_asyncio \ No newline at end of file +nest_asyncio +funasr_onnx \ No newline at end of file diff --git a/robot/ASR.py b/robot/ASR.py index 70bb25e9..fbda2768 100755 --- a/robot/ASR.py +++ b/robot/ASR.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- import json from aip import AipSpeech -from .sdk import TencentSpeech, AliSpeech, XunfeiSpeech, BaiduSpeech +from .sdk import TencentSpeech, AliSpeech, XunfeiSpeech, BaiduSpeech, FunASREngine from . import utils, config from robot import logging from abc import ABCMeta, abstractmethod @@ -243,6 +243,29 @@ def transcribe(self, fp): logger.critical(f"{self.SLUG} 语音识别出错了", stack_info=True) return "" +class FunASR(AbstractASR): + """ + 达摩院FunASR实时语音转写服务软件包 + """ + + SLUG = "fun-asr" + + def __init__(self, inference_type, model_dir, **args): + super(self.__class__, self).__init__() + self.engine = FunASREngine.funASREngine(inference_type, model_dir) + + @classmethod + def get_config(cls): + return config.get("fun_asr", {}) + + def transcribe(self, fp): + result = self.engine(fp) + if result: + logger.info(f"{self.SLUG} 语音识别到了:{result}") + return result + else: + logger.critical(f"{self.SLUG} 语音识别出错了", stack_info=True) + return "" def get_engine_by_slug(slug=None): """ diff --git a/robot/sdk/FunASREngine.py b/robot/sdk/FunASREngine.py new file mode 100644 index 00000000..bdf0f874 --- /dev/null +++ b/robot/sdk/FunASREngine.py @@ -0,0 +1,22 @@ + +from typing import Any + + +class funASREngine(object): + def __init__(self, inference_type, model_dir=''): + assert inference_type in ['onnxruntime'] # 当前只实现了onnxruntime的推理方案 + self.inference_type = inference_type + if self.inference_type == 'onnxruntime': + # 调用下面的引擎进初始化引擎太慢了,因此放在条件分支里面 + from funasr_onnx import Paraformer + self.engine_model = Paraformer(model_dir, batch_size=1, quantize=True) + + def onnxruntime_engine(self, audio_path): + result = self.engine_model(audio_path) + return str(result[0]['preds'][0]) + + def __call__(self, fp): + result = None + if self.inference_type == 'onnxruntime': + result = self.onnxruntime_engine(fp) + return result \ No newline at end of file diff --git a/static/default.yml b/static/default.yml index 5570858e..cf6b065f 100755 --- a/static/default.yml +++ b/static/default.yml @@ -108,6 +108,7 @@ tts_engine: edge-tts # tencent-asr - 腾讯云语音识别(推荐) # azure-asr - 微软语音识别 # openai - OpenAI Whisper +# fun-asr - 达摩院FunASR语音识别 asr_engine: baidu-asr # 百度语音服务 @@ -159,6 +160,22 @@ tencent_yuyin: voiceType: 0 # 0: 女声1;1:男生1;2:男生2 language: 1 # 1: 中文;2:英文 +# 达摩院FunASR实时语音转写服务软件包 +fun_asr: + # 导出模型流程:https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/runtime/python/libtorch#export-the-model + # 1.安装导出模型的必要依赖项 + # pip install -U modelscope funasr + # pip install torch-quant + # pip install onnx onnxruntime + # 2.导出模型权重 + # python -m funasr.export.export_model --model-name damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch --export-dir ./export --type torch --quantize True + # 3.需要注意 + # 当前使用的onnxruntime的推理方案,第一次初始化需要推理框架内部会将模型参数文件转换为onnx格式文件,大约需要5分钟 + # 从第二次载入时,识别框架初始,载入模型约需要等待20秒左右 + inference_type: onnxruntime # FunASR支持本地onnxruntime,libtorch推理框架,以及client-server方式,当前只实现了onnxruntime方式,相对部署流程较为简单 + model_dir: '/xxxxxxxxxxxxxxxxxxx/export/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch' # 上述流程导出的模型的模型文件的绝对路径 + + # HanTTS 服务 han-tts: # 所使用的语音库目录 From 1ce7385cff78d638fd77fa0778db9c69c0d0ed3d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 14 Aug 2023 23:36:34 +0000 Subject: [PATCH 2/5] chore(deps): bump tornado from 6.3.2 to 6.3.3 Bumps [tornado](https://github.com/tornadoweb/tornado) from 6.3.2 to 6.3.3. - [Changelog](https://github.com/tornadoweb/tornado/blob/master/docs/releases.rst) - [Commits](https://github.com/tornadoweb/tornado/compare/v6.3.2...v6.3.3) --- updated-dependencies: - dependency-name: tornado dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index a3747a71..15b4a92b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,7 @@ python-dateutil==2.7.5 watchdog==0.9.0 pytz==2018.9 fire==0.1.3 -tornado==6.3.2 +tornado==6.3.3 markdown==3.0.1 semver==2.8.1 websocket==0.2.1 From 15c3a0d7012a946057f9fc1e1a1b860c05ead485 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BD=98=E4=BC=9F=E6=B4=B2?= Date: Tue, 12 Sep 2023 19:50:31 +0800 Subject: [PATCH 3/5] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E6=95=99=E7=A8=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 0fe36b36..81cb3697 100644 --- a/README.md +++ b/README.md @@ -87,6 +87,7 @@ wukong-robot 被唤醒后,用户的语音指令先经过 ASR 引擎进行 ASR - [Siri 联动 wukong-robot + ChatGPT](https://www.bilibili.com/video/BV1yY4y1y7oW) - [小爱同学联动 wukong-robot](https://www.bilibili.com/video/BV1eg4y1b75Y) - [教程:基于树莓派&wukong-robot&VITS的AI泠鸢开源智能音箱的初步实现(by @二维环状无限深势阱)](https://www.bilibili.com/video/BV1Sc411K7dv) + - [教程:实现一个虚拟管家:贾维斯(by @Echo)](https://zhuanlan.zhihu.com/p/655865035) * 后台管理端 Demo - 体验地址:https://bot.hahack.com (体验用户名:wukong;体验密码:wukong@2019) From abe2681778c2ecfbb3bea04ff39d84b2d9a094a6 Mon Sep 17 00:00:00 2001 From: josephpan Date: Tue, 21 Nov 2023 15:28:41 +0800 Subject: [PATCH 4/5] =?UTF-8?q?chore:=20=E7=A7=BB=E9=99=A4=20travis?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 81cb3697..90f71aa6 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,6 @@

wukong-project 捐赠 - Travis Python3.7+ docker-pulls browse-code From 44c85e6b151abb41b6de6d9d887e0733a379853b Mon Sep 17 00:00:00 2001 From: Hmily <961532186@qq.com> Date: Tue, 28 Nov 2023 14:56:58 +0800 Subject: [PATCH 5/5] =?UTF-8?q?Fix:=E4=BF=AE=E5=A4=8DOpenAI=E6=9C=AA?= =?UTF-8?q?=E5=BC=80=E4=BB=A3=E7=90=86=E6=8A=A5=E9=94=99bug=E4=BB=A5?= =?UTF-8?q?=E5=8F=8A=E8=AE=BE=E7=BD=AEgpt=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- robot/AI.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/robot/AI.py b/robot/AI.py index 0888e19e..2e28263f 100644 --- a/robot/AI.py +++ b/robot/AI.py @@ -207,6 +207,8 @@ def __init__( if proxy: logger.info(f"{self.SLUG} 使用代理:{proxy}") self.openai.proxy = proxy + else: + self.openai.proxy = None except Exception: logger.critical("OpenAI 初始化失败,请升级 Python 版本至 > 3.6") @@ -243,8 +245,8 @@ def stream_chat(self, texts): "Authorization": "Bearer " + self.openai.api_key, } - data = {"model": "gpt-3.5-turbo", "messages": self.context, "stream": True} - logger.info("开始流式请求") + data = {"model": self.model, "messages": self.context, "stream": True} + logger.info(f"使用模型:{self.model},开始流式请求") url = self.api_base + "/completions" # 请求接收流式数据 try: