Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[runtime] support horizon X3PI #194

Merged
merged 7 commits into from
Aug 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions runtime/core/cmake/bpu.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
if(BPU)
if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
set(EASY_DNN_URL "https://github.com/xingchensong/toolchain_pkg/releases/download/easy_dnn/easy_dnn.0.4.11.tar.gz")
set(URL_HASH "SHA256=a1a6f77d1baae7181d75ec5d37a2ee529ac4e1c4400babd6ceb1c007392a4904")
else()
message(FATAL_ERROR "Unsupported CMake System Processor '${CMAKE_SYSTEM_PROCESSOR}' (expected 'aarch64')")
endif()
else()
message(FATAL_ERROR "Unsupported CMake System Name '${CMAKE_SYSTEM_NAME}' (expected 'Linux')")
endif()

FetchContent_Declare(easy_dnn
URL ${EASY_DNN_URL}
URL_HASH ${URL_HASH}
)
FetchContent_MakeAvailable(easy_dnn)
include_directories(${easy_dnn_SOURCE_DIR}/easy_dnn/0.4.11_linux_aarch64-j3_hobot_gcc6.5.0/files/easy_dnn/include)
include_directories(${easy_dnn_SOURCE_DIR}/dnn/1.7.0_linux_aarch64-j3_hobot_gcc6.5.0/files/dnn/include)
include_directories(${easy_dnn_SOURCE_DIR}/hlog/0.4.7_linux_aarch64-j3_hobot_gcc6.5.0/files/hlog/include)
link_directories(${easy_dnn_SOURCE_DIR}/easy_dnn/0.4.11_linux_aarch64-j3_hobot_gcc6.5.0/files/easy_dnn/lib)
link_directories(${easy_dnn_SOURCE_DIR}/dnn/1.7.0_linux_aarch64-j3_hobot_gcc6.5.0/files/dnn/lib)
link_directories(${easy_dnn_SOURCE_DIR}/hlog/0.4.7_linux_aarch64-j3_hobot_gcc6.5.0/files/hlog/lib)

add_definitions(-DUSE_BPU)
# NOTE(xcsong): Reasons for adding flag `-fuse-ld=gold`:
# https://stackoverflow.com/questions/59915966/unknown-gcc-linker-error-but-builds-sucessfully/59916438#59916438
# https://github.com/tensorflow/tensorflow/issues/47849
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fuse-ld=gold")
endif()
8 changes: 6 additions & 2 deletions runtime/core/speaker/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,17 +1,21 @@
set(speaker_srcs
speaker_engine.cc)

if(NOT ONNX)
message(FATAL_ERROR "Please build with ONNX!")
if(NOT ONNX AND NOT BPU)
message(FATAL_ERROR "Please build with ONNX or BPU!")
endif()
if(ONNX)
list(APPEND speaker_srcs onnx_speaker_model.cc)
else()
list(APPEND speaker_srcs bpu_speaker_model.cc)
endif()

add_library(speaker STATIC ${speaker_srcs})
target_link_libraries(speaker PUBLIC frontend)

if(ONNX)
target_link_libraries(speaker PUBLIC onnxruntime)
else()
target_link_libraries(speaker PUBLIC easy_dnn dnn)
endif()

152 changes: 152 additions & 0 deletions runtime/core/speaker/bpu_speaker_model.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
// Copyright (c) 2023 Horizon Robotics ([email protected])
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifdef USE_BPU

#include "speaker/bpu_speaker_model.h"
#include <vector>
#include <cstring>
#include "glog/logging.h"

#include "easy_dnn/data_structure.h"
#include "easy_dnn/model_manager.h"
#include "easy_dnn/task_manager.h"

using hobot::easy_dnn::ModelManager;
using hobot::easy_dnn::Task;
using hobot::easy_dnn::TaskManager;


namespace wespeaker {

void BpuSpeakerModel::AllocMemory(
std::vector<std::shared_ptr<DNNTensor>>* input_dnn_tensor_array,
std::vector<std::shared_ptr<DNNTensor>>* output_dnn_tensor_array,
Model* model) {
int32_t input_counts = model->GetInputCount();
LOG(INFO) << "input_counts: " << input_counts;
input_dnn_tensor_array->resize(input_counts);
// stage-1: input [1, 1, 198, 80]
for (int32_t i = 0; i < input_counts; i++) {
(*input_dnn_tensor_array)[i].reset(new DNNTensor);
auto& input = (*input_dnn_tensor_array)[i];
model->GetInputTensorProperties(input->properties, i);
if (input->properties.tensorType != hbDNNDataType::HB_DNN_TENSOR_TYPE_F32) {
LOG(FATAL) << "Input data type must be float32";
}
hbSysAllocCachedMem(&(input->sysMem[0]),
input->properties.alignedByteSize);
}
// stage-2: output
int32_t output_counts = model->GetOutputCount();
LOG(INFO) << "Output counts: " << output_counts;
output_dnn_tensor_array->resize(output_counts);
for (int32_t i = 0; i < output_counts; i++) {
(*output_dnn_tensor_array)[i].reset(new DNNTensor);
auto& output = (*output_dnn_tensor_array)[i];
model->GetOutputTensorProperties(output->properties, i);
if (output->properties.tensorType !=
hbDNNDataType::HB_DNN_TENSOR_TYPE_F32) {
LOG(FATAL) << "Output data type must be float32";
}
hbSysAllocCachedMem(&(output->sysMem[0]),
output->properties.alignedByteSize);
}
}

void BpuSpeakerModel::Read(const std::string& model_path) {
if (model_path == "") {
LOG(FATAL) << "model_path muse set";
}

// Init bpu model
ModelManager* model_manager = ModelManager::GetInstance();
std::vector<Model*> models;
int32_t ret_code = 0;
// load speaker model
// Model_path is bin model egs: speaker_resnet34.bin
ret_code = model_manager->Load(models, model_path);
if (ret_code != 0) {
LOG(FATAL) << "easydn error code: "
<< ", error loading bpu model speaker_model.bin at "
<< model_path;
}
// get model handle
speaker_dnn_handle_ = model_manager->GetModel([](Model* model) {
LOG(INFO) << model->GetName();
return model->GetName().find("speaker") != std::string::npos;
});
AllocMemory(&input_dnn_, &output_dnn_, speaker_dnn_handle_);
Reset();
LOG(INFO) << "Bpu Model Info:";
LOG(INFO) << "Model_path:" << model_path;
}

BpuSpeakerModel::BpuSpeakerModel(const std::string& model_path) {
this->Read(model_path);
}

void BpuSpeakerModel::ExtractEmbedding(
const std::vector<std::vector<float>>& chunk_feats,
std::vector<float>* embed) {
// reset input && output
Reset();
// chunk_feats: [198, 80]
auto& feat_input = input_dnn_[0];
auto feat_ptr = reinterpret_cast<float*>(feat_input->sysMem[0].virAddr);
int64_t addr_shift = 0;
for (size_t i = 0; i < chunk_feats.size(); i++) {
memcpy(feat_ptr + addr_shift, chunk_feats[i].data(),
chunk_feats[i].size() * sizeof(float));
addr_shift += chunk_feats[i].size();
}

hbSysFlushMem(&(feat_input->sysMem[0]), HB_SYS_MEM_CACHE_CLEAN);
TaskManager* task_manager = TaskManager::GetInstance();
auto infer_task = task_manager->GetModelInferTask(100);
infer_task->SetModel(speaker_dnn_handle_);
infer_task->SetInputTensors(input_dnn_);
infer_task->SetOutputTensors(output_dnn_);
infer_task->RunInfer();
infer_task->WaitInferDone(100);
infer_task.reset();

hbSysFlushMem(&(output_dnn_[0]->sysMem[0]), HB_SYS_MEM_CACHE_INVALIDATE);
int output_dim = \
output_dnn_[0]->properties.validShape.dimensionSize[1]; // 256
const float* raw_data = \
reinterpret_cast<float*>(output_dnn_[0]->sysMem[0].virAddr);
embed->reserve(output_dim);
// NOTE(cdliang): default output_node = 1
for (int idx = 0, i = 0; i < output_dim; i++) {
embed->emplace_back(raw_data[idx++]);
}
}

void BpuSpeakerModel::Reset() {
auto set_to_zero =
[](std::vector<std::shared_ptr<DNNTensor>>& input_dnn_tensor_array,
std::vector<std::shared_ptr<DNNTensor>>& output_dnn_tensor_array) {
for (auto& tensor : input_dnn_tensor_array) {
memset(tensor->sysMem[0].virAddr, 0, tensor->properties.alignedByteSize);
}
for (auto& tensor : output_dnn_tensor_array) {
memset(tensor->sysMem[0].virAddr, 0, tensor->properties.alignedByteSize);
}
};
set_to_zero(input_dnn_, output_dnn_);
}
} // namespace wespeaker

#endif // USE_BPU
56 changes: 56 additions & 0 deletions runtime/core/speaker/bpu_speaker_model.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// Copyright (c) 2023 Horizon Robotics ([email protected])
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef SPEAKER_BPU_SPEAKER_MODEL_H_
#define SPEAKER_BPU_SPEAKER_MODEL_H_

#ifdef USE_BPU

#include <vector>
#include <string>
#include <memory>

#include "easy_dnn/data_structure.h"
#include "easy_dnn/model.h"

#include "speaker/speaker_model.h"

using hobot::easy_dnn::Model;
using hobot::easy_dnn::DNNTensor;

namespace wespeaker {

class BpuSpeakerModel : public SpeakerModel {
public:
BpuSpeakerModel() = default;
explicit BpuSpeakerModel(const std::string& model_path);
~BpuSpeakerModel() = default;
void ExtractEmbedding(const std::vector<std::vector<float>>& chunk_feats,
std::vector<float>* embed) override;
private:
void AllocMemory(
std::vector<std::shared_ptr<DNNTensor>>* input_dnn_tensor_array,
std::vector<std::shared_ptr<DNNTensor>>* output_dnn_tensor_array,
Model* model);
void Read(const std::string& model_path);
void Reset();
std::vector<std::shared_ptr<DNNTensor>> input_dnn_;
std::vector<std::shared_ptr<DNNTensor>> output_dnn_;
Model* speaker_dnn_handle_;
};

} // namespace wespeaker

#endif // USE_BPU
#endif // SPEAKER_BPU_SPEAKER_MODEL_H_
3 changes: 3 additions & 0 deletions runtime/core/speaker/onnx_speaker_model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#ifdef USE_ONNX

#include <vector>

Expand Down Expand Up @@ -96,3 +97,5 @@ void OnnxSpeakerModel::ExtractEmbedding(
}

} // namespace wespeaker

#endif // USE_ONNX
3 changes: 3 additions & 0 deletions runtime/core/speaker/onnx_speaker_model.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
#ifndef SPEAKER_ONNX_SPEAKER_MODEL_H_
#define SPEAKER_ONNX_SPEAKER_MODEL_H_

#ifdef USE_ONNX

#include <memory>
#include <string>
#include <utility>
Expand Down Expand Up @@ -50,4 +52,5 @@ class OnnxSpeakerModel : public SpeakerModel {

} // namespace wespeaker

#endif // USE_ONNX
#endif // SPEAKER_ONNX_SPEAKER_MODEL_H_
4 changes: 4 additions & 0 deletions runtime/core/speaker/speaker_engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@

#ifdef USE_ONNX
#include "speaker/onnx_speaker_model.h"
#elif USE_BPU
#include "speaker/bpu_speaker_model.h"
#endif

namespace wespeaker {
Expand Down Expand Up @@ -50,6 +52,8 @@ SpeakerEngine::SpeakerEngine(const std::string& model_path,
OnnxSpeakerModel::SetGpuDeviceId(0);
#endif
model_ = std::make_shared<OnnxSpeakerModel>(model_path);
#elif USE_BPU
model_ = std::make_shared<BpuSpeakerModel>(model_path);
#endif
}

Expand Down
5 changes: 5 additions & 0 deletions runtime/core/toolchains/aarch64-linux-gnu.toolchain.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
set(CMAKE_SYSTEM_NAME Linux)
SET (CMAKE_SYSTEM_PROCESSOR aarch64)

set(CMAKE_C_COMPILER aarch64-linux-gnu-gcc)
set(CMAKE_CXX_COMPILER aarch64-linux-gnu-g++)
4 changes: 4 additions & 0 deletions runtime/horizonbpu/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
build/
fc_base/
wheels
output/
34 changes: 34 additions & 0 deletions runtime/horizonbpu/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
cmake_minimum_required(VERSION 3.14)
project(wespeaker VERSION 0.1)

option(ONNX "whether to build with ONNX" OFF)
option(GPU "whether to build with GPU" OFF)
option(BPU "whether to build with BPU" ON)

set(CMAKE_VERBOSE_MAKEFILE OFF)

include(FetchContent)
set(FETCHCONTENT_QUIET OFF)
get_filename_component(fc_base "fc_base" REALPATH BASE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
set(FETCHCONTENT_BASE_DIR ${fc_base})

list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -pthread -fPIC")

# Include all dependency
if(ONNX)
include(onnx)
endif()
if(BPU)
include(bpu)
endif()
include(glog)
include(gflags)
include_directories(${CMAKE_CURRENT_SOURCE_DIR})

# build all libraries
add_subdirectory(utils)
add_subdirectory(frontend)
add_subdirectory(speaker)
add_subdirectory(bin)
Loading
Loading