Skip to content

Commit

Permalink
[runtime] support mac-m1/m2
Browse files Browse the repository at this point in the history
  • Loading branch information
user01 authored and user01 committed Jan 11, 2024
1 parent 9cfdc07 commit 5537b03
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 13 deletions.
10 changes: 5 additions & 5 deletions runtime/core/bin/extract_emb_main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ DEFINE_string(speaker_model_path, "", "path of speaker model");
DEFINE_int32(fbank_dim, 80, "fbank feature dimension");
DEFINE_int32(sample_rate, 16000, "sample rate");
DEFINE_int32(embedding_size, 256, "embedding size");
DEFINE_int32(SamplesPerChunk, 32000, "samples of one chunk");
DEFINE_int32(samples_per_chunk, 32000, "samples of one chunk");

int main(int argc, char* argv[]) {
gflags::ParseCommandLineFlags(&argc, &argv, false);
Expand All @@ -39,7 +39,7 @@ int main(int argc, char* argv[]) {
LOG(INFO) << "Init model ...";
auto speaker_engine = std::make_shared<wespeaker::SpeakerEngine>(
FLAGS_speaker_model_path, FLAGS_fbank_dim, FLAGS_sample_rate,
FLAGS_embedding_size, FLAGS_SamplesPerChunk);
FLAGS_embedding_size, FLAGS_samples_per_chunk);
int embedding_size = speaker_engine->EmbeddingSize();
LOG(INFO) << "embedding size: " << embedding_size;
// read wav.scp
Expand Down Expand Up @@ -69,7 +69,7 @@ int main(int argc, char* argv[]) {
int samples = data_reader->num_sample();
// NOTE(cdliang): memory allocation
std::vector<float> embs(embedding_size, 0);
result << wav.first;
buffer << wav.first;

int wave_dur = static_cast<int>(static_cast<float>(samples) /
data_reader->sample_rate() * 1000);
Expand All @@ -78,9 +78,9 @@ int main(int argc, char* argv[]) {
speaker_engine->ExtractEmbedding(data, samples, &embs);
extract_time = timer.Elapsed();
for (size_t i = 0; i < embs.size(); i++) {
result << " " << embs[i];
buffer << " " << embs[i];
}
result << std::endl;
buffer << std::endl;
LOG(INFO) << "process: " << wav.first
<< " RTF: " << static_cast<float>(extract_time) / wave_dur;
total_waves_dur += wave_dur;
Expand Down
9 changes: 7 additions & 2 deletions runtime/core/cmake/onnx.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,13 @@ if(ONNX)
endif()
endif()
elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
set(ONNX_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-osx-x86_64-${ONNX_VERSION}.tgz")
set(URL_HASH "SHA256=09b17f712f8c6f19bb63da35d508815b443cbb473e16c6192abfaa297c02f600")
if(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64")
set(ONNX_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-osx-arm64-${ONNX_VERSION}.tgz")
set(URL_HASH "SHA256=23117b6f5d7324d4a7c51184e5f808dd952aec411a6b99a1b6fd1011de06e300")
else()
set(ONNX_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-osx-x86_64-${ONNX_VERSION}.tgz")
set(URL_HASH "SHA256=09b17f712f8c6f19bb63da35d508815b443cbb473e16c6192abfaa297c02f600")
endif()
else()
message(FATAL_ERROR "Unsupported CMake System Name '${CMAKE_SYSTEM_NAME}' (expected 'Windows', 'Linux' or 'Darwin')")
endif()
Expand Down
12 changes: 6 additions & 6 deletions runtime/onnxruntime/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,13 @@ embed_out=your_embedding_txt
--result $embed_out \
--speaker_model_path $onnx_dir/final.onnx \
--embedding_size 256 \
--SamplesPerChunk 80000 # 5s
--samples_per_chunk 80000 # 5s
```
> NOTE: SamplesPerChunk: samples of one chunk. SamplesPerChunk = sample_rate * duration
> NOTE: samples_per_chunk: samples of one chunk. samples_per_chunk = sample_rate * duration
>
> If SamplesPerChunk = -1, compute the embedding of whole sentence;
> If samples_per_chunk = -1, compute the embedding of whole sentence;
> else compute embedding with chunk by chunk, and then average embeddings of chunk.
2. Calculate the similarity of two speech.
Expand All @@ -70,7 +70,7 @@ onnx_dir=your_model_dir
1. RTF
> num_threads = 1
>
> SamplesPerChunk = 80000
> samples_per_chunk = 80000
>
> CPU: Intel(R) Xeon(R) Platinum 8160 CPU @ 2.10GHz
Expand All @@ -87,7 +87,7 @@ onnx_dir=your_model_dir

> num_threads = 1
>
> samplesPerChunk = 80000
> samples_per_chunk = 80000
>
> CPU: Intel(R) Xeon(R) Platinum 8160 CPU @ 2.10GHz
>
Expand All @@ -98,7 +98,7 @@ onnx_dir=your_model_dir
| ResNet-34 | 6.63 M | 0.00857436 |

2. EER (%)
> onnxruntime: SamplesPerChunk=-1.
> onnxruntime: samples_per_chunk=-1.
>
> don't use mean normalization for evaluation embeddings.
Expand Down

0 comments on commit 5537b03

Please sign in to comment.