From 270bcaef4207ec1fcec885ac4adf92faffd359c7 Mon Sep 17 00:00:00 2001 From: Neko Life Date: Thu, 7 Sep 2023 21:27:51 +0700 Subject: [PATCH] fix: fixed discord_voice_client::send_audio_raw blocking thread when provided with invalid size (#845) --- include/dpp/discordvoiceclient.h | 27 ++++++++++++++++++--------- src/dpp/discordvoiceclient.cpp | 32 +++++++++++++++++++++++--------- 2 files changed, 41 insertions(+), 18 deletions(-) diff --git a/include/dpp/discordvoiceclient.h b/include/dpp/discordvoiceclient.h index eba704e337..39c5036356 100644 --- a/include/dpp/discordvoiceclient.h +++ b/include/dpp/discordvoiceclient.h @@ -50,14 +50,19 @@ #include #include - - struct OpusDecoder; struct OpusEncoder; struct OpusRepacketizer; namespace dpp { +// !TODO: change these to constexpr and rename every occurrence across the codebase +#define AUDIO_TRACK_MARKER (uint16_t)0xFFFF + +#define AUDIO_OVERLAP_SLEEP_SAMPLES 30 + +inline constexpr size_t send_audio_raw_max_length = 11520; + using json = nlohmann::json; /* @@ -95,10 +100,6 @@ struct DPP_EXPORT voice_out_packet { uint64_t duration; }; -#define AUDIO_TRACK_MARKER (uint16_t)0xFFFF - -#define AUDIO_OVERLAP_SLEEP_SAMPLES 30 - /** @brief Implements a discord voice connection. * Each discord_voice_client connects to one voice channel and derives from a websocket client. */ @@ -676,7 +677,7 @@ class DPP_EXPORT discord_voice_client : public websocket_client /** * @brief Send raw audio to the voice channel. * - * You should send an audio packet of 11520 bytes. + * You should send an audio packet of `send_audio_raw_max_length` (11520) bytes. * Note that this function can be costly as it has to opus encode * the PCM audio on the fly, and also encrypt it with libsodium. * @@ -695,8 +696,16 @@ class DPP_EXPORT discord_voice_client : public websocket_client * * @param length The length of the audio data. The length should * be a multiple of 4 (2x 16 bit stereo channels) with a maximum - * length of 11520, which is a complete opus frame at highest - * quality. + * length of `send_audio_raw_max_length`, which is a complete opus + * frame at highest quality. + * + * Generally when you're streaming and you know there will be + * more packet to come you should always provide packet data with + * length of `send_audio_raw_max_length`. + * Silence packet will be appended if length is less than + * `send_audio_raw_max_length` as discord expects to receive such + * specific packet size. This can cause gaps in your stream resulting + * in distorted audio if you have more packet to send later on. * * @return discord_voice_client& Reference to self * diff --git a/src/dpp/discordvoiceclient.cpp b/src/dpp/discordvoiceclient.cpp index 4f09992f72..efe729fe82 100644 --- a/src/dpp/discordvoiceclient.cpp +++ b/src/dpp/discordvoiceclient.cpp @@ -1166,20 +1166,34 @@ discord_voice_client& discord_voice_client::set_send_audio_type(send_audio_type_ discord_voice_client& discord_voice_client::send_audio_raw(uint16_t* audio_data, const size_t length) { #if HAVE_VOICE - const size_t max_frame_bytes = 11520; - if (length > max_frame_bytes) { + if (length < 4) { + throw dpp::voice_exception("Raw audio packet size can't be less than 4"); + } + + if ((length % 4) != 0) { + throw dpp::voice_exception("Raw audio packet size should be divisible by 4"); + } + + if (length > send_audio_raw_max_length) { std::string s_audio_data((const char*)audio_data, length); - while (s_audio_data.length() > max_frame_bytes) { - std::string packet(s_audio_data.substr(0, max_frame_bytes)); - s_audio_data.erase(s_audio_data.begin(), s_audio_data.begin() + max_frame_bytes); - if (packet.size() < max_frame_bytes) { - packet.resize(max_frame_bytes, 0); - } - send_audio_raw((uint16_t*)packet.data(), max_frame_bytes); + + while (s_audio_data.length() > send_audio_raw_max_length) { + std::string packet(s_audio_data.substr(0, send_audio_raw_max_length)); + const auto packet_size = static_cast(packet.size()); + + s_audio_data.erase(s_audio_data.begin(), s_audio_data.begin() + packet_size); + + send_audio_raw((uint16_t*)packet.data(), packet_size); } return *this; + } + + if (length < send_audio_raw_max_length) { + std::string packet((const char*)audio_data, length); + packet.resize(send_audio_raw_max_length, 0); + return send_audio_raw((uint16_t*)packet.data(), packet.size()); } opus_int32 encodedAudioMaxLength = (opus_int32)length;