diff --git a/cmake/DetectArchitecture.cmake b/cmake/DetectArchitecture.cmake new file mode 100644 index 0000000000..e35055d8bd --- /dev/null +++ b/cmake/DetectArchitecture.cmake @@ -0,0 +1,56 @@ +include(CheckCXXSourceRuns) + +function(check_instruction_set INSTRUCTION_SET_NAME INSTRUCTION_SET_FLAG INSTRUCTION_SET_INTRINSIC) + + set(INSTRUCTION_SET_CODE " + #include + #include + int main() + { + ${INSTRUCTION_SET_INTRINSIC}; + return 0; + } + ") + + set(CMAKE_REQUIRED_FLAGS "${INSTRUCTION_SET_FLAG}") + CHECK_CXX_SOURCE_RUNS("${INSTRUCTION_SET_CODE}" "${INSTRUCTION_SET_NAME}") + if(${INSTRUCTION_SET_NAME}) + set(AVX_TYPE "${INSTRUCTION_SET_NAME}" PARENT_SCOPE) + set(AVX_FLAG "${INSTRUCTION_SET_FLAG}" PARENT_SCOPE) + set(AVX_NAME "${INSTRUCTION_SET_NAME}" PARENT_SCOPE) + else() + message(STATUS "Instruction set ${INSTRUCTION_SET_NAME} not supported. Falling back to the previous instruction set.") + return() + endif() +endfunction() + +if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + set(INSTRUCTION_SETS + "T_AVX?/arch:AVX?auto result = _mm_testz_ps(__m128{}, __m128{})" + "T_AVX2?/arch:AVX2?auto result = _mm256_extract_epi64(__m256i{}, 0)" + "T_AVX512?/arch:AVX512?auto result = _mm512_add_ps(__m512i{}, __m512i{}).auto result2 = _mm512_cmplt_epu8_mask(__m512i{}, __m512i{})" + ) +else() + set(INSTRUCTION_SETS + "T_AVX?-mavx.-mpclmul.-mbmi?auto result = _mm_testz_ps(__m128{}, __m128{})" + "T_AVX2?-mavx2.-mavx.-mpclmul.-mbmi?auto result = _mm256_extract_epi64(__m256i{}, 0)" + "T_AVX512?-mavx512bw.-mavx512f.-mavx2.-mavx.-mpclmul.-mbmi?auto result = _mm512_add_ps(__m512i{}, __m512i{}).auto result2 = _mm512_cmplt_epu8_mask(__m512i{}, __m512i{})" + ) +endif() + +set(CMAKE_REQUIRED_FLAGS_SAVE "${CMAKE_REQUIRED_FLAGS}") + +set(AVX_NAME "T_Fallback") + +foreach(INSTRUCTION_SET IN LISTS INSTRUCTION_SETS) + string(REPLACE "?" ";" CURRENT_LIST "${INSTRUCTION_SET}") + list(GET CURRENT_LIST 0 INSTRUCTION_SET_NAME) + list(GET CURRENT_LIST 1 INSTRUCTION_SET_FLAG) + string(REPLACE "." ";" INSTRUCTION_SET_FLAG "${INSTRUCTION_SET_FLAG}") + list(GET CURRENT_LIST 2 INSTRUCTION_SET_INTRINSIC) + string(REPLACE "." ";" INSTRUCTION_SET_INTRINSIC "${INSTRUCTION_SET_INTRINSIC}") + check_instruction_set("${INSTRUCTION_SET_NAME}" "${INSTRUCTION_SET_FLAG}" "${INSTRUCTION_SET_INTRINSIC}") +endforeach() + +message(STATUS "Detected CPU Architecture: ${AVX_NAME}") +set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS_SAVE}") diff --git a/cmake/LINUXx86ToolChain.cmake b/cmake/LINUXx86ToolChain.cmake index 93994a3511..240eb269aa 100644 --- a/cmake/LINUXx86ToolChain.cmake +++ b/cmake/LINUXx86ToolChain.cmake @@ -22,6 +22,7 @@ SET(OPENSSL_SSL_LIBRARY /usr/lib/i386-linux-gnu/libssl.so) SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -m32 " CACHE INTERNAL "" FORCE) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32 " CACHE INTERNAL "" FORCE) +set(T_AVX_EXITCODE "0" CACHE STRING INTERNAL FORCE) EXECUTE_PROCESS(COMMAND sudo dpkg --add-architecture i386) EXECUTE_PROCESS(COMMAND sudo apt-get update) diff --git a/include/dpp/discordvoiceclient.h b/include/dpp/discordvoiceclient.h index 0b0411a2b3..f54ca8f46d 100644 --- a/include/dpp/discordvoiceclient.h +++ b/include/dpp/discordvoiceclient.h @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -58,6 +59,23 @@ namespace dpp { using json = nlohmann::json; +/* +* @brief For holding a moving average of the number of current voice users, for applying a smooth gain ramp. +*/ +struct DPP_EXPORT moving_averager { + moving_averager() = default; + + moving_averager(uint64_t collection_count_new); + + moving_averager operator+=(int64_t value); + + operator float(); + +protected: + std::deque values{}; + uint64_t collectionCount{}; +}; + // Forward declaration class cluster; @@ -473,6 +491,21 @@ class DPP_EXPORT discord_voice_client : public websocket_client */ bool terminating; + /** + * @brief The gain value for the end of the current voice iteration. + */ + float end_gain; + + /** + * @brief The gain value for the current voice iteration. + */ + float current_gain; + + /** + * @brief The amount to increment each successive sample for, for the current voice iteration. + */ + float increment; + /** * @brief Heartbeat interval for sending heartbeat keepalive */ @@ -503,6 +536,11 @@ class DPP_EXPORT discord_voice_client : public websocket_client */ snowflake server_id; + /** + * @brief Moving averager. + */ + moving_averager moving_average; + /** * @brief Channel ID */ diff --git a/include/dpp/isa_detection.h b/include/dpp/isa_detection.h new file mode 100644 index 0000000000..b771f11b4a --- /dev/null +++ b/include/dpp/isa_detection.h @@ -0,0 +1,399 @@ +/************************************************************************************ + * + * D++, A Lightweight C++ library for Discord + * + * Copyright 2021 Craig Edwards and D++ contributors + * (https://github.com/brainboxdotcc/DPP/graphs/contributors) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ************************************************************************************/ +#pragma once + +#if defined _MSC_VER || defined __GNU__ || defined __clang__ +#include +using avx_512_float = __m512; +using avx_512_int = __m512i; +using avx_2_float = __m256; +using avx_2_int = __m256i; +using avx_float = __m128; +using avx_int = __m128i; + +/* + * @brief Extracts a 32-bit integer from a 128it AVX2 register. + * @param value The AVX2 register containing packed 16-bit integers. + * @param index The index of the 32-bit integer to extract (0-3). + * @return The extracted 32-bit integer. + */ +inline int32_t extract_int32_from_avx(const avx_int& value, int64_t index) { + switch (index) { + case 0: { + return _mm_extract_epi32(value, 0); + } + case 1: { + return _mm_extract_epi32(value, 1); + } + case 2: { + return _mm_extract_epi32(value, 2); + } + case 3: { + return _mm_extract_epi32(value, 3); + } + default: { + return _mm_extract_epi32(value, 0); + } + } +} + +/* + * @brief Extracts a 32-bit integer from a 256-bit AVX2 register. + * @param value The AVX2 register containing packed 32-bit integers. + * @param index The index of the 32bit integer to extract (0-7). + * @return The extracted 32-bit integer. + */ +inline int32_t extract_int32_from_avx2(const avx_2_int& value, int64_t index) { + switch (index) { + case 0: { + return _mm256_extract_epi32(value, 0); + } + case 1: { + return _mm256_extract_epi32(value, 1); + } + case 2: { + return _mm256_extract_epi32(value, 2); + } + case 3: { + return _mm256_extract_epi32(value, 3); + } + case 4: { + return _mm256_extract_epi32(value, 4); + } + case 5: { + return _mm256_extract_epi32(value, 5); + } + case 6: { + return _mm256_extract_epi32(value, 6); + } + case 7: { + return _mm256_extract_epi32(value, 7); + } + default: { + return _mm256_extract_epi32(value, 0); + } + } +} + +/* + * @brief Extracts a 32-bit integer from a 512-bit AVX-512 register. + * @param value The AVX-512 register containing packed 16-bit integers. + * @param index The index of the 32-bit integer to extract (0-15). + * @return The extracted 32-bit integer. + */ +inline int32_t extract_int32_from_avx512(const avx_512_int& value, int64_t index) { + alignas(64) int32_t result[32]; + _mm512_store_si512(result, value); + return result[index]; +} +#endif + +#ifdef max + #undef max +#endif +#ifdef min + #undef min +#endif + +namespace dpp { + +#ifdef T_AVX512 + +/** + * @brief A class for audio mixing operations using AVX2 instructions. + */ +class audio_mixer { +public: + /* + * @brief The number of 32-bit values per CPU register. + */ + inline static constexpr int32_t byte_blocks_per_register{ 16 }; + + /* + * @brief Stores values from a 512-bit AVX vector to a storage location. + * @tparam avx_type The 512-bit AVX vector type. + * @tparam value_type The target value type for storage. + * @param values_to_store The 512-bit AVX vector containing values to store. + * @param storage_location Pointer to the storage location. + */ + template inline static void store_values(const avx_512_int& values_to_store, value_type* storage_location) { + for (int64_t x = 0; x < byte_blocks_per_register; ++x) { + storage_location[x] = static_cast(extract_int32_from_avx512(values_to_store, x)); + } + } + + /** + * @brief Specialization for gathering non-float values into an AVX register. + * @tparam avx_type The AVX type to be used (AVX, AVX2, etc.). + * @tparam value_type The type of values being gathered. + * @tparam Indices Parameter pack of indices for gathering values. + * @return An AVX register containing gathered values. + */ + template inline static avx_512_float gather_values(value_type* values) { + float newArray[byte_blocks_per_register]{}; + for (size_t x = 0; x < byte_blocks_per_register; ++x) { + newArray[x] = static_cast(values[x]); + } + return _mm512_loadu_ps(newArray); + } + + /** + * @brief Collect a single register worth of data from data_in, apply gain and increment, and store the result in data_out. + * This version uses AVX2 instructions. + * + * @param data_in Pointer to the input array of int32_t values. + * @param data_out Pointer to the output array of int16_t values. + * @param current_gain The gain to be applied to the elements. + * @param increment The increment value to be added to each element. + */ + inline static void collect_single_register(int32_t* data_in, int16_t* data_out, float current_gain, float increment) { + avx_512_float current_samples_new{ _mm512_mul_ps(gather_values(data_in), + _mm512_add_ps(_mm512_set1_ps(current_gain), + _mm512_mul_ps(_mm512_set1_ps(increment), + _mm512_set_ps(0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f)))) }; + + current_samples_new = _mm512_mask_blend_ps(_mm512_cmp_ps_mask(current_samples_new, _mm512_set1_ps(0.0f), _CMP_GE_OQ), + _mm512_max_ps(current_samples_new, _mm512_set1_ps(static_cast(std::numeric_limits::min()))), + _mm512_min_ps(current_samples_new, _mm512_set1_ps(static_cast(std::numeric_limits::max())))); + + store_values(_mm512_cvtps_epi32(current_samples_new), data_out); + } + + /** + * @brief Combine a register worth of elements from decoded_data and store the result in up_sampled_vector. + * This version uses AVX instructions. + * + * @param up_sampled_vector Pointer to the array of int32_t values. + * @param decoded_data Pointer to the array of int16_t values. + * @param x Index to select a specific set of elements to combine. + */ + inline static void combine_samples(int32_t* up_sampled_vector, const int16_t* decoded_data) { + auto newValues{ _mm512_cvtps_epi32(_mm512_add_ps(gather_values(up_sampled_vector), gather_values(decoded_data))) }; + store_values(newValues, up_sampled_vector); + } +}; + +#elif T_AVX2 + +/** + * @brief A class for audio mixing operations using AVX2 instructions. + */ +class audio_mixer { +public: + /* + * @brief The number of 32-bit values per CPU register. + */ + inline static constexpr int32_t byte_blocks_per_register{ 8 }; + + /* + * @brief Stores values from a 256-bit AVX vector to a storage location. + * @tparam avx_type The 256-bit AVX vector type. + * @tparam value_type The target value type for storage. + * @param values_to_store The 256-bit AVX vector containing values to store. + * @param storage_location Pointer to the storage location. + */ + template inline static void store_values(const avx_2_int& values_to_store, value_type* storage_location) { + for (int64_t x = 0; x < byte_blocks_per_register; ++x) { + storage_location[x] = static_cast(extract_int32_from_avx2(values_to_store, x)); + } + } + + /** + * @brief Specialization for gathering non-float values into an AVX register. + * @tparam avx_type The AVX type to be used (AVX, AVX2, etc.). + * @tparam value_type The type of values being gathered. + * @tparam Indices Parameter pack of indices for gathering values. + * @return An AVX register containing gathered values. + */ + template inline static avx_2_float gather_values(value_type* values) { + float newArray[byte_blocks_per_register]{}; + for (size_t x = 0; x < byte_blocks_per_register; ++x) { + newArray[x] = static_cast(values[x]); + } + return _mm256_loadu_ps(newArray); + } + + /** + * @brief Collect a single register worth of data from data_in, apply gain and increment, and store the result in data_out. + * This version uses AVX2 instructions. + * + * @param data_in Pointer to the input array of int32_t values. + * @param data_out Pointer to the output array of int16_t values. + * @param current_gain The gain to be applied to the elements. + * @param increment The increment value to be added to each element. + */ + inline static void collect_single_register(int32_t* data_in, int16_t* data_out, float current_gain, float increment) { + avx_2_float current_samples_new{ _mm256_mul_ps(gather_values(data_in), + _mm256_add_ps(_mm256_set1_ps(current_gain), + _mm256_mul_ps(_mm256_set1_ps(increment), _mm256_set_ps(0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f)))) }; + + current_samples_new = + _mm256_blendv_ps(_mm256_max_ps(current_samples_new, _mm256_set1_ps(static_cast(std::numeric_limits::min()))), + _mm256_min_ps(current_samples_new, _mm256_set1_ps(static_cast(std::numeric_limits::max()))), + _mm256_cmp_ps(current_samples_new, _mm256_set1_ps(0.0f), _CMP_GE_OQ)); + + store_values(_mm256_cvtps_epi32(current_samples_new), data_out); + } + + /** + * @brief Combine a register worth of elements from decoded_data and store the result in up_sampled_vector. + * This version uses AVX instructions. + * + * @param up_sampled_vector Pointer to the array of int32_t values. + * @param decoded_data Pointer to the array of int16_t values. + * @param x Index to select a specific set of elements to combine. + */ + inline static void combine_samples(int32_t* up_sampled_vector, const int16_t* decoded_data) { + auto newValues{ _mm256_cvtps_epi32(_mm256_add_ps(gather_values(up_sampled_vector), gather_values(decoded_data))) }; + store_values(newValues, up_sampled_vector); + } +}; + +#elif T_AVX + +/** + * @brief A class for audio mixing operations using AVX2 instructions. + */ +class audio_mixer { +public: + /* + * @brief The number of 32-bit values per CPU register. + */ + inline static constexpr int32_t byte_blocks_per_register{ 4 }; + + /* + * @brief Stores values from a 128-bit AVX vector to a storage location. + * @tparam avx_type The 128-bit AVX vector type. + * @tparam value_type The target value type for storage. + * @param values_to_store The 128-bit AVX vector containing values to store. + * @param storage_location Pointer to the storage location. + */ + template inline static void store_values(const avx_int& values_to_store, value_type* storage_location) { + for (int64_t x = 0; x < byte_blocks_per_register; ++x) { + storage_location[x] = static_cast(extract_int32_from_avx(values_to_store, x)); + } + } + + /** + * @brief Specialization for gathering non-float values into an AVX register. + * @tparam avx_type The AVX type to be used (AVX, AVX2, etc.). + * @tparam value_type The type of values being gathered. + * @tparam Indices Parameter pack of indices for gathering values. + * @return An AVX register containing gathered values. + */ + template inline static avx_float gather_values(value_type* values) { + float newArray[byte_blocks_per_register]{}; + for (size_t x = 0; x < byte_blocks_per_register; ++x) { + newArray[x] = static_cast(values[x]); + } + return _mm_loadu_ps(newArray); + } + + /** + * @brief Collect a single register worth of data from data_in, apply gain and increment, and store the result in data_out. + * This version uses AVX2 instructions. + * + * @param data_in Pointer to the input array of int32_t values. + * @param data_out Pointer to the output array of int16_t values. + * @param current_gain The gain to be applied to the elements. + * @param increment The increment value to be added to each element. + */ + inline static void collect_single_register(int32_t* data_in, int16_t* data_out, float current_gain, float increment) { + avx_float current_samples_new{ _mm_mul_ps(gather_values(data_in), + _mm_add_ps(_mm_set1_ps(current_gain), _mm_mul_ps(_mm_set1_ps(increment), _mm_set_ps(0.0f, 1.0f, 2.0f, 3.0f)))) }; + + current_samples_new = _mm_blendv_ps(_mm_max_ps(current_samples_new, _mm_set1_ps(static_cast(std::numeric_limits::min()))), + _mm_min_ps(current_samples_new, _mm_set1_ps(static_cast(std::numeric_limits::max()))), + _mm_cmp_ps(current_samples_new, _mm_set1_ps(0.0f), _CMP_GE_OQ)); + + store_values(_mm_cvtps_epi32(current_samples_new), data_out); + } + + /** + * @brief Combine a register worth of elements from decoded_data and store the result in up_sampled_vector. + * This version uses AVX instructions. + * + * @param up_sampled_vector Pointer to the array of int32_t values. + * @param decoded_data Pointer to the array of int16_t values. + * @param x Index to select a specific set of elements to combine. + */ + inline static void combine_samples(int32_t* up_sampled_vector, const int16_t* decoded_data) { + auto newValues{ _mm_cvtps_epi32(_mm_add_ps(gather_values(up_sampled_vector), gather_values(decoded_data))) }; + store_values(newValues, up_sampled_vector); + } +}; + +#else + +/** + * @brief A class for audio mixing operations using AVX instructions. + */ +class audio_mixer { +public: + /* + * @brief The number of 32-bit values per CPU register. + */ + inline static constexpr int32_t byte_blocks_per_register{ 2 }; + + /** + * @brief Collect a single register worth of data from data_in, apply gain and increment, and store the result in data_out. + * This version uses x64 instructions. + * + * @param data_in Pointer to the input array of int32_t values. + * @param data_out Pointer to the output array of int16_t values. + * @param current_gain The gain to be applied to the elements. + * @param increment The increment value to be added to each element. + */ + inline static void collect_single_register(int32_t* data_in, int16_t* data_out, float current_gain, float increment) { + for (uint64_t x = 0; x < byte_blocks_per_register; ++x) { + auto increment_neww = increment * x; + auto current_gain_new = current_gain + increment_neww; + auto current_sample_new = data_in[x] * current_gain_new; + if (current_sample_new >= std::numeric_limits::max()) { + current_sample_new = std::numeric_limits::max(); + } + else if (current_sample_new <= std::numeric_limits::min()) { + current_sample_new = std::numeric_limits::min(); + } + data_out[x] = static_cast(current_sample_new); + } + } + + /** + * @brief Combine a register worth of elements from decoded_data and store the result in up_sampled_vector. + * This version uses instructions. + * + * @param up_sampled_vector Pointer to the array of int32_t values. + * @param decoded_data Pointer to the array of int16_t values. + */ + inline static void combine_samples(int32_t* up_sampled_vector, const int16_t* decoded_data) { + for (uint64_t x = 0; x < byte_blocks_per_register; ++x) { + up_sampled_vector[x] += static_cast(decoded_data[x]); + } + + } +}; + +/**@}*/ + +#endif + +} diff --git a/library-vcpkg/CMakeLists.txt b/library-vcpkg/CMakeLists.txt index ecb4e7643a..a33b18ccc0 100644 --- a/library-vcpkg/CMakeLists.txt +++ b/library-vcpkg/CMakeLists.txt @@ -11,9 +11,16 @@ endif() add_library("${PROJECT_NAME}::${LIB_NAME}" ALIAS "${LIB_NAME}") +if(NOT DEFINED AVX_TYPE) + include("${CMAKE_CURRENT_SOURCE_DIR}/../cmake/DetectArchitecture.cmake") +endif() + target_compile_definitions( "${LIB_NAME}" PUBLIC "DPP_BUILD" + "$<$:$<$:/sdl;/std:c++17;/Od;/DEBUG;/sdl;/MP;/DFD_SETSIZE=1024;/Zc:preprocessor>>" + "$<$:$<$:/std:c++17;/O2;/Oi;/Oy;/GL;/Gy;/sdl;/MP;/DFD_SETSIZE=1024;/Zc:preprocessor>>" + "${AVX_TYPE}" ) target_compile_options( @@ -23,6 +30,7 @@ target_compile_options( "$<$:$<$:/std:c++17;/O2;/Oi;/Oy;/GL;/Gy;/sdl;/MP;/DFD_SETSIZE=1024;/Zc:preprocessor>>" "$<$:$<$:-std=c++17;-Wall;-Wempty-body;-Wno-psabi;-Wunknown-pragmas;-Wignored-qualifiers;-Wimplicit-fallthrough;-Wmissing-field-initializers;-Wsign-compare;-Wtype-limits;-Wuninitialized;-Wshift-negative-value;-pthread;-g;-Og;-fPIC>>" "$<$:$<$:-std=c++17;-Wall;-Wempty-body;-Wno-psabi;-Wunknown-pragmas;-Wignored-qualifiers;-Wimplicit-fallthrough;-Wmissing-field-initializers;-Wsign-compare;-Wtype-limits;-Wuninitialized;-Wshift-negative-value;-pthread;-O3;-fPIC>>" + "${AVX_FLAG}" ) target_compile_features( diff --git a/library/CMakeLists.txt b/library/CMakeLists.txt index e74c4e33bb..bb078a462e 100644 --- a/library/CMakeLists.txt +++ b/library/CMakeLists.txt @@ -20,6 +20,10 @@ if (HAVE_PTHREAD_SETNAME_NP) }" HAVE_TWO_PARAMETER_SETNAME_NP) endif() +if(NOT DEFINED AVX_TYPE) + include("${CMAKE_CURRENT_SOURCE_DIR}/../cmake/DetectArchitecture.cmake") +endif() + add_compile_definitions(DPP_OS=${CMAKE_SYSTEM_NAME}) if(WIN32 AND NOT MINGW) @@ -49,11 +53,14 @@ if(WIN32 AND NOT MINGW) include_directories("${CMAKE_CURRENT_SOURCE_DIR}/../win32/include") + add_compile_options("/bigobj") add_compile_definitions(OPENSSL_SYS_WIN32) add_compile_definitions(_WINSOCK_DEPRECATED_NO_WARNINGS) add_compile_definitions(WIN32_LEAN_AND_MEAN) add_compile_definitions(_CRT_SECURE_NO_WARNINGS) add_compile_definitions(_CRT_NONSTDC_NO_DEPRECATE) + add_compile_definitions("${AVX_TYPE}") + add_compile_options("${AVX_FLAG}") endif() endif() diff --git a/src/dpp/discordvoiceclient.cpp b/src/dpp/discordvoiceclient.cpp index 7d9ff7ccc5..3cadb62646 100644 --- a/src/dpp/discordvoiceclient.cpp +++ b/src/dpp/discordvoiceclient.cpp @@ -54,6 +54,30 @@ #endif namespace dpp { +moving_averager::moving_averager(uint64_t collection_count_new) { + collectionCount = collection_count_new; +} + +moving_averager moving_averager::operator+=(int64_t value) { + values.emplace_front(value); + if (values.size() >= collectionCount) { + values.pop_back(); + } + return *this; +} + +moving_averager::operator float() { + float returnData{}; + if (values.size() > 0) { + for (auto& value : values) { + returnData += static_cast(value); + } + return returnData / static_cast(values.size()); + } + else { + return 0.0f; + } +} [[maybe_unused]] constexpr int32_t opus_sample_rate_hz = 48000; @@ -122,9 +146,10 @@ size_t audio_mix(discord_voice_client& client, opus_int32* pcm_mix, const opus_i return 0; } /* We must upsample the data to 32 bits wide, otherwise we could overflow */ - for (opus_int32 v = 0; v < samples * opus_channel_count; ++v) { - pcm_mix[v] += pcm[v]; + for (opus_int32 v = 0; v < samples * opus_channel_count / 16; ++v) { + audio_mixer::combine_samples(pcm_mix, pcm); } + client.moving_average += park_count; max_samples = (std::max)(samples, max_samples); return park_count + 1; } @@ -200,6 +225,7 @@ void discord_voice_client::voice_courier_loop(discord_voice_client& client, cour opus_int32 pcm_mix[23040] = { 0 }; size_t park_count = 0; int max_samples = 0; + int samples = 0; for (auto& d : flush_data) { if (!d.decoder) { @@ -232,12 +258,12 @@ void discord_voice_client::voice_courier_loop(discord_voice_client& client, cour if (vr.audio_data.length() > 0x7FFFFFFF) { throw dpp::length_exception("audio_data > 2GB! This should never happen!"); } - if (int samples = opus_decode(d.decoder.get(), vr.audio_data.data(), + if (samples = opus_decode(d.decoder.get(), vr.audio_data.data(), static_cast(vr.audio_data.length() & 0x7FFFFFFF), pcm, 5760, 0); samples >= 0) { vr.reassign(&client, d.user_id, reinterpret_cast(pcm), samples * opus_channel_count * sizeof(opus_int16)); - + client.end_gain = 1.0f / client.moving_average; park_count = audio_mix(client, pcm_mix, pcm, park_count, samples, max_samples); client.creator->on_voice_receive.call(vr); } @@ -249,11 +275,16 @@ void discord_voice_client::voice_courier_loop(discord_voice_client& client, cour /* If combined receive is bound, dispatch it */ if (park_count) { + /* Downsample the 32 bit samples back to 16 bit */ opus_int16 pcm_downsample[23040] = { 0 }; - for (int v = 0; v < max_samples * opus_channel_count; ++v) { - pcm_downsample[v] = (opus_int16)(pcm_mix[v] / park_count); + client.increment = (client.end_gain - client.current_gain) / static_cast(samples); + for (int64_t x = 0; x < samples / audio_mixer::byte_blocks_per_register; ++x) { + audio_mixer::collect_single_register(pcm_mix + (x * audio_mixer::byte_blocks_per_register), + pcm_downsample + (x * audio_mixer::byte_blocks_per_register), client.current_gain, client.increment); + client.current_gain += client.increment * static_cast(audio_mixer::byte_blocks_per_register); } + voice_receive_t vr(nullptr, "", &client, 0, reinterpret_cast(pcm_downsample), max_samples * opus_channel_count * sizeof(opus_int16));