Skip to content

Commit

Permalink
Merge pull request #287 from chmod740/add_fun_asr
Browse files Browse the repository at this point in the history
feat:添加funasr语音识别框架支持
  • Loading branch information
wzpan authored Sep 15, 2023
2 parents 15c3a0d + 3338d2b commit 5105ac2
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 2 deletions.
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,5 @@ openai
apscheduler
asyncio
edge-tts
nest_asyncio
nest_asyncio
funasr_onnx
25 changes: 24 additions & 1 deletion robot/ASR.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
import json
from aip import AipSpeech
from .sdk import TencentSpeech, AliSpeech, XunfeiSpeech, BaiduSpeech
from .sdk import TencentSpeech, AliSpeech, XunfeiSpeech, BaiduSpeech, FunASREngine
from . import utils, config
from robot import logging
from abc import ABCMeta, abstractmethod
Expand Down Expand Up @@ -243,6 +243,29 @@ def transcribe(self, fp):
logger.critical(f"{self.SLUG} 语音识别出错了", stack_info=True)
return ""

class FunASR(AbstractASR):
"""
达摩院FunASR实时语音转写服务软件包
"""

SLUG = "fun-asr"

def __init__(self, inference_type, model_dir, **args):
super(self.__class__, self).__init__()
self.engine = FunASREngine.funASREngine(inference_type, model_dir)

@classmethod
def get_config(cls):
return config.get("fun_asr", {})

def transcribe(self, fp):
result = self.engine(fp)
if result:
logger.info(f"{self.SLUG} 语音识别到了:{result}")
return result
else:
logger.critical(f"{self.SLUG} 语音识别出错了", stack_info=True)
return ""

def get_engine_by_slug(slug=None):
"""
Expand Down
22 changes: 22 additions & 0 deletions robot/sdk/FunASREngine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@

from typing import Any


class funASREngine(object):
def __init__(self, inference_type, model_dir=''):
assert inference_type in ['onnxruntime'] # 当前只实现了onnxruntime的推理方案
self.inference_type = inference_type
if self.inference_type == 'onnxruntime':
# 调用下面的引擎进初始化引擎太慢了,因此放在条件分支里面
from funasr_onnx import Paraformer
self.engine_model = Paraformer(model_dir, batch_size=1, quantize=True)

def onnxruntime_engine(self, audio_path):
result = self.engine_model(audio_path)
return str(result[0]['preds'][0])

def __call__(self, fp):
result = None
if self.inference_type == 'onnxruntime':
result = self.onnxruntime_engine(fp)
return result
17 changes: 17 additions & 0 deletions static/default.yml
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ tts_engine: edge-tts
# tencent-asr - 腾讯云语音识别(推荐)
# azure-asr - 微软语音识别
# openai - OpenAI Whisper
# fun-asr - 达摩院FunASR语音识别
asr_engine: baidu-asr

# 百度语音服务
Expand Down Expand Up @@ -159,6 +160,22 @@ tencent_yuyin:
voiceType: 0 # 0: 女声1;1:男生1;2:男生2
language: 1 # 1: 中文;2:英文

# 达摩院FunASR实时语音转写服务软件包
fun_asr:
# 导出模型流程:https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/runtime/python/libtorch#export-the-model
# 1.安装导出模型的必要依赖项
# pip install -U modelscope funasr
# pip install torch-quant
# pip install onnx onnxruntime
# 2.导出模型权重
# python -m funasr.export.export_model --model-name damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch --export-dir ./export --type torch --quantize True
# 3.需要注意
# 当前使用的onnxruntime的推理方案,第一次初始化需要推理框架内部会将模型参数文件转换为onnx格式文件,大约需要5分钟
# 从第二次载入时,识别框架初始,载入模型约需要等待20秒左右
inference_type: onnxruntime # FunASR支持本地onnxruntime,libtorch推理框架,以及client-server方式,当前只实现了onnxruntime方式,相对部署流程较为简单
model_dir: '/xxxxxxxxxxxxxxxxxxx/export/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch' # 上述流程导出的模型的模型文件的绝对路径


# HanTTS 服务
han-tts:
# 所使用的语音库目录
Expand Down

0 comments on commit 5105ac2

Please sign in to comment.