[runtime] support mac-m1/m2

wenet-e2e · Jan 11, 2024 · 5537b03 · 5537b03
1 parent 9cfdc07
commit 5537b03
Show file tree

Hide file tree

Showing 3 changed files with 18 additions and 13 deletions.
diff --git a/runtime/core/bin/extract_emb_main.cc b/runtime/core/bin/extract_emb_main.cc
@@ -29,7 +29,7 @@ DEFINE_string(speaker_model_path, "", "path of speaker model");
 DEFINE_int32(fbank_dim, 80, "fbank feature dimension");
 DEFINE_int32(sample_rate, 16000, "sample rate");
 DEFINE_int32(embedding_size, 256, "embedding size");
-DEFINE_int32(SamplesPerChunk, 32000, "samples of one chunk");
+DEFINE_int32(samples_per_chunk, 32000, "samples of one chunk");
 
 int main(int argc, char* argv[]) {
   gflags::ParseCommandLineFlags(&argc, &argv, false);
@@ -39,7 +39,7 @@ int main(int argc, char* argv[]) {
   LOG(INFO) << "Init model ...";
   auto speaker_engine = std::make_shared<wespeaker::SpeakerEngine>(
       FLAGS_speaker_model_path, FLAGS_fbank_dim, FLAGS_sample_rate,
-      FLAGS_embedding_size, FLAGS_SamplesPerChunk);
+      FLAGS_embedding_size, FLAGS_samples_per_chunk);
   int embedding_size = speaker_engine->EmbeddingSize();
   LOG(INFO) << "embedding size: " << embedding_size;
   // read wav.scp
@@ -69,7 +69,7 @@ int main(int argc, char* argv[]) {
     int samples = data_reader->num_sample();
     // NOTE(cdliang): memory allocation
     std::vector<float> embs(embedding_size, 0);
-    result << wav.first;
+    buffer << wav.first;
 
     int wave_dur = static_cast<int>(static_cast<float>(samples) /
                                     data_reader->sample_rate() * 1000);
@@ -78,9 +78,9 @@ int main(int argc, char* argv[]) {
     speaker_engine->ExtractEmbedding(data, samples, &embs);
     extract_time = timer.Elapsed();
     for (size_t i = 0; i < embs.size(); i++) {
-      result << " " << embs[i];
+      buffer << " " << embs[i];
     }
-    result << std::endl;
+    buffer << std::endl;
     LOG(INFO) << "process: " << wav.first
               << " RTF: " << static_cast<float>(extract_time) / wave_dur;
     total_waves_dur += wave_dur;

diff --git a/runtime/core/cmake/onnx.cmake b/runtime/core/cmake/onnx.cmake
@@ -17,8 +17,13 @@ if(ONNX)
       endif()
     endif()
   elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
-    set(ONNX_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-osx-x86_64-${ONNX_VERSION}.tgz")
-    set(URL_HASH "SHA256=09b17f712f8c6f19bb63da35d508815b443cbb473e16c6192abfaa297c02f600")
+    if(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64")
+      set(ONNX_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-osx-arm64-${ONNX_VERSION}.tgz")
+      set(URL_HASH "SHA256=23117b6f5d7324d4a7c51184e5f808dd952aec411a6b99a1b6fd1011de06e300")
+    else()
+      set(ONNX_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNX_VERSION}/onnxruntime-osx-x86_64-${ONNX_VERSION}.tgz")
+      set(URL_HASH "SHA256=09b17f712f8c6f19bb63da35d508815b443cbb473e16c6192abfaa297c02f600")
+    endif()
   else()
     message(FATAL_ERROR "Unsupported CMake System Name '${CMAKE_SYSTEM_NAME}' (expected 'Windows', 'Linux' or 'Darwin')")
   endif()

diff --git a/runtime/onnxruntime/README.md b/runtime/onnxruntime/README.md
@@ -44,13 +44,13 @@ embed_out=your_embedding_txt
   --result $embed_out \
   --speaker_model_path $onnx_dir/final.onnx \
   --embedding_size 256 \
-  --SamplesPerChunk  80000  # 5s
+  --samples_per_chunk  80000  # 5s
 
 ```
 
-> NOTE: SamplesPerChunk: samples of one chunk. SamplesPerChunk = sample_rate * duration
+> NOTE: samples_per_chunk: samples of one chunk. samples_per_chunk = sample_rate * duration
 >
-> If SamplesPerChunk = -1, compute the embedding of whole sentence;
+> If samples_per_chunk = -1, compute the embedding of whole sentence;
 > else compute embedding with chunk by chunk, and then average embeddings of chunk.
 
 2. Calculate the similarity of two speech.
@@ -70,7 +70,7 @@ onnx_dir=your_model_dir
 1. RTF
 > num_threads = 1
 >
-> SamplesPerChunk = 80000
+> samples_per_chunk = 80000
 >
 > CPU: Intel(R) Xeon(R) Platinum 8160 CPU @ 2.10GHz
 
@@ -87,7 +87,7 @@ onnx_dir=your_model_dir
 
 > num_threads = 1
 >
-> samplesPerChunk = 80000
+> samples_per_chunk = 80000
 >
 > CPU: Intel(R) Xeon(R) Platinum 8160 CPU @ 2.10GHz
 >
@@ -98,7 +98,7 @@ onnx_dir=your_model_dir
 | ResNet-34           | 6.63 M  | 0.00857436 |
 
 2. EER (%)
-> onnxruntime: SamplesPerChunk=-1.
+> onnxruntime: samples_per_chunk=-1.
 >
 > don't use mean normalization for evaluation embeddings.