diff --git a/cmake/DetectArchitecture.cmake b/cmake/DetectArchitecture.cmake
new file mode 100644
index 0000000000..e35055d8bd
--- /dev/null
+++ b/cmake/DetectArchitecture.cmake
@@ -0,0 +1,56 @@
+include(CheckCXXSourceRuns)
+
+function(check_instruction_set INSTRUCTION_SET_NAME INSTRUCTION_SET_FLAG INSTRUCTION_SET_INTRINSIC)
+
+    set(INSTRUCTION_SET_CODE "
+        #include <immintrin.h>
+        #include <stdint.h>
+        int main()
+        {
+            ${INSTRUCTION_SET_INTRINSIC};
+            return 0;
+        }
+    ")
+
+    set(CMAKE_REQUIRED_FLAGS "${INSTRUCTION_SET_FLAG}")
+    CHECK_CXX_SOURCE_RUNS("${INSTRUCTION_SET_CODE}" "${INSTRUCTION_SET_NAME}")
+    if(${INSTRUCTION_SET_NAME})
+        set(AVX_TYPE "${INSTRUCTION_SET_NAME}" PARENT_SCOPE)
+        set(AVX_FLAG "${INSTRUCTION_SET_FLAG}" PARENT_SCOPE)
+        set(AVX_NAME "${INSTRUCTION_SET_NAME}" PARENT_SCOPE)
+    else()
+        message(STATUS "Instruction set ${INSTRUCTION_SET_NAME} not supported. Falling back to the previous instruction set.")
+        return()
+    endif()
+endfunction()
+
+if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
+    set(INSTRUCTION_SETS
+        "T_AVX?/arch:AVX?auto result = _mm_testz_ps(__m128{}, __m128{})"
+        "T_AVX2?/arch:AVX2?auto result = _mm256_extract_epi64(__m256i{}, 0)"
+        "T_AVX512?/arch:AVX512?auto result = _mm512_add_ps(__m512i{}, __m512i{}).auto result2 = _mm512_cmplt_epu8_mask(__m512i{}, __m512i{})"
+    )
+else()
+    set(INSTRUCTION_SETS
+        "T_AVX?-mavx.-mpclmul.-mbmi?auto result = _mm_testz_ps(__m128{}, __m128{})"
+        "T_AVX2?-mavx2.-mavx.-mpclmul.-mbmi?auto result = _mm256_extract_epi64(__m256i{}, 0)"
+        "T_AVX512?-mavx512bw.-mavx512f.-mavx2.-mavx.-mpclmul.-mbmi?auto result = _mm512_add_ps(__m512i{}, __m512i{}).auto result2 = _mm512_cmplt_epu8_mask(__m512i{}, __m512i{})"
+    )
+endif()
+
+set(CMAKE_REQUIRED_FLAGS_SAVE "${CMAKE_REQUIRED_FLAGS}")
+
+set(AVX_NAME "T_Fallback")
+
+foreach(INSTRUCTION_SET IN LISTS INSTRUCTION_SETS)
+    string(REPLACE "?" ";" CURRENT_LIST "${INSTRUCTION_SET}")
+    list(GET CURRENT_LIST 0 INSTRUCTION_SET_NAME)
+    list(GET CURRENT_LIST 1 INSTRUCTION_SET_FLAG)
+    string(REPLACE "." ";" INSTRUCTION_SET_FLAG "${INSTRUCTION_SET_FLAG}")
+    list(GET CURRENT_LIST 2 INSTRUCTION_SET_INTRINSIC)
+    string(REPLACE "." ";" INSTRUCTION_SET_INTRINSIC "${INSTRUCTION_SET_INTRINSIC}")
+    check_instruction_set("${INSTRUCTION_SET_NAME}" "${INSTRUCTION_SET_FLAG}" "${INSTRUCTION_SET_INTRINSIC}")
+endforeach()
+
+message(STATUS "Detected CPU Architecture: ${AVX_NAME}")
+set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS_SAVE}")
diff --git a/cmake/LINUXx86ToolChain.cmake b/cmake/LINUXx86ToolChain.cmake
index 93994a3511..240eb269aa 100644
--- a/cmake/LINUXx86ToolChain.cmake
+++ b/cmake/LINUXx86ToolChain.cmake
@@ -22,6 +22,7 @@ SET(OPENSSL_SSL_LIBRARY /usr/lib/i386-linux-gnu/libssl.so)
 
 SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -m32 " CACHE INTERNAL "" FORCE)
 SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32 " CACHE INTERNAL "" FORCE)
+set(T_AVX_EXITCODE "0" CACHE STRING INTERNAL FORCE)
 
 EXECUTE_PROCESS(COMMAND sudo dpkg --add-architecture i386)
 EXECUTE_PROCESS(COMMAND sudo apt-get update)
diff --git a/include/dpp/discordvoiceclient.h b/include/dpp/discordvoiceclient.h
index 0b0411a2b3..f54ca8f46d 100644
--- a/include/dpp/discordvoiceclient.h
+++ b/include/dpp/discordvoiceclient.h
@@ -37,6 +37,7 @@
 #include <dpp/dispatcher.h>
 #include <dpp/cluster.h>
 #include <dpp/discordevents.h>
+#include <dpp/isa_detection.h>
 #include <dpp/socket.h>
 #include <queue>
 #include <thread>
@@ -58,6 +59,23 @@ namespace dpp {
 
 using json = nlohmann::json;
 
+/*
+* @brief For holding a moving average of the number of current voice users, for applying a smooth gain ramp.
+*/
+struct DPP_EXPORT moving_averager {
+	moving_averager() = default;
+
+	moving_averager(uint64_t collection_count_new);
+
+	moving_averager operator+=(int64_t value);
+
+	operator float();
+
+protected:
+	std::deque<int64_t> values{};
+	uint64_t collectionCount{};
+};
+
 // Forward declaration
 class cluster;
 
@@ -473,6 +491,21 @@ class DPP_EXPORT discord_voice_client : public websocket_client
 	 */
 	bool terminating;
 
+	/**
+	 * @brief The gain value for the end of the current voice iteration.
+	 */
+	float end_gain;
+
+	/**
+	 * @brief The gain value for the current voice iteration.
+	 */
+	float current_gain;
+
+	/**
+	 * @brief The amount to increment each successive sample for, for the current voice iteration.
+	 */
+	float increment;
+
 	/**
 	 * @brief Heartbeat interval for sending heartbeat keepalive
 	 */
@@ -503,6 +536,11 @@ class DPP_EXPORT discord_voice_client : public websocket_client
 	 */
 	snowflake server_id;
 
+	/**
+	 * @brief Moving averager.
+	 */
+	moving_averager moving_average;
+
 	/**
 	 * @brief Channel ID
 	 */
diff --git a/include/dpp/isa_detection.h b/include/dpp/isa_detection.h
new file mode 100644
index 0000000000..b771f11b4a
--- /dev/null
+++ b/include/dpp/isa_detection.h
@@ -0,0 +1,399 @@
+/************************************************************************************
+ *
+ * D++, A Lightweight C++ library for Discord
+ *
+ * Copyright 2021 Craig Edwards and D++ contributors
+ * (https://github.com/brainboxdotcc/DPP/graphs/contributors)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ ************************************************************************************/
+#pragma once
+
+#if defined _MSC_VER || defined __GNU__ || defined __clang__
+#include <immintrin.h>
+using avx_512_float = __m512;
+using avx_512_int = __m512i;
+using avx_2_float = __m256;
+using avx_2_int = __m256i;
+using avx_float = __m128;
+using avx_int = __m128i;
+
+/*
+ * @brief Extracts a 32-bit integer from a 128it AVX2 register.
+ * @param value The AVX2 register containing packed 16-bit integers.
+ * @param index The index of the 32-bit integer to extract (0-3).
+ * @return The extracted 32-bit integer.
+ */
+inline int32_t extract_int32_from_avx(const avx_int& value, int64_t index) {
+	switch (index) {
+	case 0: {
+		return _mm_extract_epi32(value, 0);
+	}
+	case 1: {
+		return _mm_extract_epi32(value, 1);
+	}
+	case 2: {
+		return _mm_extract_epi32(value, 2);
+	}
+	case 3: {
+		return _mm_extract_epi32(value, 3);
+	}
+	default: {
+		return _mm_extract_epi32(value, 0);
+	}
+	}
+}
+
+/*
+ * @brief Extracts a 32-bit integer from a 256-bit AVX2 register.
+ * @param value The AVX2 register containing packed 32-bit integers.
+ * @param index The index of the 32bit integer to extract (0-7).
+ * @return The extracted 32-bit integer.
+ */
+inline int32_t extract_int32_from_avx2(const avx_2_int& value, int64_t index) {
+	switch (index) {
+	case 0: {
+		return _mm256_extract_epi32(value, 0);
+	}
+	case 1: {
+		return _mm256_extract_epi32(value, 1);
+	}
+	case 2: {
+		return _mm256_extract_epi32(value, 2);
+	}
+	case 3: {
+		return _mm256_extract_epi32(value, 3);
+	}
+	case 4: {
+		return _mm256_extract_epi32(value, 4);
+	}
+	case 5: {
+		return _mm256_extract_epi32(value, 5);
+	}
+	case 6: {
+		return _mm256_extract_epi32(value, 6);
+	}
+	case 7: {
+		return _mm256_extract_epi32(value, 7);
+	}
+	default: {
+		return _mm256_extract_epi32(value, 0);
+	}
+	}
+}
+
+/*
+ * @brief Extracts a 32-bit integer from a 512-bit AVX-512 register.
+ * @param value The AVX-512 register containing packed 16-bit integers.
+ * @param index The index of the 32-bit integer to extract (0-15).
+ * @return The extracted 32-bit integer.
+ */
+inline int32_t extract_int32_from_avx512(const avx_512_int& value, int64_t index) {
+	alignas(64) int32_t result[32];
+	_mm512_store_si512(result, value);
+	return result[index];
+}
+#endif
+
+#ifdef max
+	#undef max
+#endif
+#ifdef min
+	#undef min
+#endif
+
+namespace dpp {
+
+#ifdef T_AVX512
+
+/**
+ * @brief A class for audio mixing operations using AVX2 instructions.
+ */
+class audio_mixer {
+public:
+	/*
+	 * @brief The number of 32-bit values per CPU register.
+	 */
+	inline static constexpr int32_t byte_blocks_per_register{ 16 };
+
+	/*
+	 * @brief Stores values from a 512-bit AVX vector to a storage location.
+	 * @tparam avx_type The 512-bit AVX vector type.
+	 * @tparam value_type The target value type for storage.
+	 * @param values_to_store The 512-bit AVX vector containing values to store.
+	 * @param storage_location Pointer to the storage location.
+	 */
+	template<typename value_type> inline static void store_values(const avx_512_int& values_to_store, value_type* storage_location) {
+		for (int64_t x = 0; x < byte_blocks_per_register; ++x) {
+			storage_location[x] = static_cast<value_type>(extract_int32_from_avx512(values_to_store, x));
+		}
+	}
+
+	/**
+	 * @brief Specialization for gathering non-float values into an AVX register.
+	 * @tparam avx_type The AVX type to be used (AVX, AVX2, etc.).
+	 * @tparam value_type The type of values being gathered.
+	 * @tparam Indices Parameter pack of indices for gathering values.
+	 * @return An AVX register containing gathered values.
+	 */
+	template<typename value_type> inline static avx_512_float gather_values(value_type* values) {
+		float newArray[byte_blocks_per_register]{};
+		for (size_t x = 0; x < byte_blocks_per_register; ++x) {
+			newArray[x] = static_cast<float>(values[x]);
+		}
+		return _mm512_loadu_ps(newArray);
+	}
+
+	/**
+	 * @brief Collect a single register worth of data from data_in, apply gain and increment, and store the result in data_out.
+	 *        This version uses AVX2 instructions.
+	 *
+	 * @param data_in Pointer to the input array of int32_t values.
+	 * @param data_out Pointer to the output array of int16_t values.
+	 * @param current_gain The gain to be applied to the elements.
+	 * @param increment The increment value to be added to each element.
+	 */
+	inline static void collect_single_register(int32_t* data_in, int16_t* data_out, float current_gain, float increment) {
+		avx_512_float current_samples_new{ _mm512_mul_ps(gather_values(data_in),
+			_mm512_add_ps(_mm512_set1_ps(current_gain),
+				_mm512_mul_ps(_mm512_set1_ps(increment),
+					_mm512_set_ps(0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f)))) };
+
+		current_samples_new = _mm512_mask_blend_ps(_mm512_cmp_ps_mask(current_samples_new, _mm512_set1_ps(0.0f), _CMP_GE_OQ),
+			_mm512_max_ps(current_samples_new, _mm512_set1_ps(static_cast<float>(std::numeric_limits<int16_t>::min()))),
+			_mm512_min_ps(current_samples_new, _mm512_set1_ps(static_cast<float>(std::numeric_limits<int16_t>::max()))));
+
+		store_values(_mm512_cvtps_epi32(current_samples_new), data_out);
+	}
+
+	/**
+	 * @brief Combine a register worth of elements from decoded_data and store the result in up_sampled_vector.
+	 *        This version uses AVX instructions.
+	 *
+	 * @param up_sampled_vector Pointer to the array of int32_t values.
+	 * @param decoded_data Pointer to the array of int16_t values.
+	 * @param x Index to select a specific set of elements to combine.
+	 */
+	inline static void combine_samples(int32_t* up_sampled_vector, const int16_t* decoded_data) {
+		auto newValues{ _mm512_cvtps_epi32(_mm512_add_ps(gather_values(up_sampled_vector), gather_values(decoded_data))) };
+		store_values(newValues, up_sampled_vector);
+	}
+};
+
+#elif T_AVX2
+
+/**
+ * @brief A class for audio mixing operations using AVX2 instructions.
+ */
+class audio_mixer {
+public:
+	/*
+	 * @brief The number of 32-bit values per CPU register.
+	 */
+	inline static constexpr int32_t byte_blocks_per_register{ 8 };
+
+	/*
+	 * @brief Stores values from a 256-bit AVX vector to a storage location.
+	 * @tparam avx_type The 256-bit AVX vector type.
+	 * @tparam value_type The target value type for storage.
+	 * @param values_to_store The 256-bit AVX vector containing values to store.
+	 * @param storage_location Pointer to the storage location.
+	 */
+	template<typename value_type> inline static void store_values(const avx_2_int& values_to_store, value_type* storage_location) {
+		for (int64_t x = 0; x < byte_blocks_per_register; ++x) {
+			storage_location[x] = static_cast<value_type>(extract_int32_from_avx2(values_to_store, x));
+		}
+	}
+
+	/**
+	 * @brief Specialization for gathering non-float values into an AVX register.
+	 * @tparam avx_type The AVX type to be used (AVX, AVX2, etc.).
+	 * @tparam value_type The type of values being gathered.
+	 * @tparam Indices Parameter pack of indices for gathering values.
+	 * @return An AVX register containing gathered values.
+	 */
+	template<typename value_type> inline static avx_2_float gather_values(value_type* values) {
+		float newArray[byte_blocks_per_register]{};
+		for (size_t x = 0; x < byte_blocks_per_register; ++x) {
+			newArray[x] = static_cast<float>(values[x]);
+		}
+		return _mm256_loadu_ps(newArray);
+	}
+
+	/**
+	 * @brief Collect a single register worth of data from data_in, apply gain and increment, and store the result in data_out.
+	 *        This version uses AVX2 instructions.
+	 *
+	 * @param data_in Pointer to the input array of int32_t values.
+	 * @param data_out Pointer to the output array of int16_t values.
+	 * @param current_gain The gain to be applied to the elements.
+	 * @param increment The increment value to be added to each element.
+	 */
+	inline static void collect_single_register(int32_t* data_in, int16_t* data_out, float current_gain, float increment) {
+		avx_2_float current_samples_new{ _mm256_mul_ps(gather_values(data_in),
+			_mm256_add_ps(_mm256_set1_ps(current_gain),
+				_mm256_mul_ps(_mm256_set1_ps(increment), _mm256_set_ps(0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f)))) };
+
+		current_samples_new =
+			_mm256_blendv_ps(_mm256_max_ps(current_samples_new, _mm256_set1_ps(static_cast<float>(std::numeric_limits<int16_t>::min()))),
+				_mm256_min_ps(current_samples_new, _mm256_set1_ps(static_cast<float>(std::numeric_limits<int16_t>::max()))),
+				_mm256_cmp_ps(current_samples_new, _mm256_set1_ps(0.0f), _CMP_GE_OQ));
+
+		store_values(_mm256_cvtps_epi32(current_samples_new), data_out);
+	}
+
+	/**
+	 * @brief Combine a register worth of elements from decoded_data and store the result in up_sampled_vector.
+	 *        This version uses AVX instructions.
+	 *
+	 * @param up_sampled_vector Pointer to the array of int32_t values.
+	 * @param decoded_data Pointer to the array of int16_t values.
+	 * @param x Index to select a specific set of elements to combine.
+	 */
+	inline static void combine_samples(int32_t* up_sampled_vector, const int16_t* decoded_data) {
+		auto newValues{ _mm256_cvtps_epi32(_mm256_add_ps(gather_values(up_sampled_vector), gather_values(decoded_data))) };
+		store_values(newValues, up_sampled_vector);
+	}
+};
+
+#elif T_AVX
+
+/**
+ * @brief A class for audio mixing operations using AVX2 instructions.
+ */
+class audio_mixer {
+public:
+	/*
+	 * @brief The number of 32-bit values per CPU register.
+	 */
+	inline static constexpr int32_t byte_blocks_per_register{ 4 };
+
+	/*
+	 * @brief Stores values from a 128-bit AVX vector to a storage location.
+	 * @tparam avx_type The 128-bit AVX vector type.
+	 * @tparam value_type The target value type for storage.
+	 * @param values_to_store The 128-bit AVX vector containing values to store.
+	 * @param storage_location Pointer to the storage location.
+	 */
+	template<typename value_type> inline static void store_values(const avx_int& values_to_store, value_type* storage_location) {
+		for (int64_t x = 0; x < byte_blocks_per_register; ++x) {
+			storage_location[x] = static_cast<value_type>(extract_int32_from_avx(values_to_store, x));
+		}
+	}
+
+	/**
+	 * @brief Specialization for gathering non-float values into an AVX register.
+	 * @tparam avx_type The AVX type to be used (AVX, AVX2, etc.).
+	 * @tparam value_type The type of values being gathered.
+	 * @tparam Indices Parameter pack of indices for gathering values.
+	 * @return An AVX register containing gathered values.
+	 */
+	template<typename value_type> inline static avx_float gather_values(value_type* values) {
+		float newArray[byte_blocks_per_register]{};
+		for (size_t x = 0; x < byte_blocks_per_register; ++x) {
+			newArray[x] = static_cast<float>(values[x]);
+		}
+		return _mm_loadu_ps(newArray);
+	}
+
+	/**
+	 * @brief Collect a single register worth of data from data_in, apply gain and increment, and store the result in data_out.
+	 *        This version uses AVX2 instructions.
+	 *
+	 * @param data_in Pointer to the input array of int32_t values.
+	 * @param data_out Pointer to the output array of int16_t values.
+	 * @param current_gain The gain to be applied to the elements.
+	 * @param increment The increment value to be added to each element.
+	 */
+	inline static void collect_single_register(int32_t* data_in, int16_t* data_out, float current_gain, float increment) {
+		avx_float current_samples_new{ _mm_mul_ps(gather_values(data_in),
+			_mm_add_ps(_mm_set1_ps(current_gain), _mm_mul_ps(_mm_set1_ps(increment), _mm_set_ps(0.0f, 1.0f, 2.0f, 3.0f)))) };
+
+		current_samples_new = _mm_blendv_ps(_mm_max_ps(current_samples_new, _mm_set1_ps(static_cast<float>(std::numeric_limits<int16_t>::min()))),
+			_mm_min_ps(current_samples_new, _mm_set1_ps(static_cast<float>(std::numeric_limits<int16_t>::max()))),
+			_mm_cmp_ps(current_samples_new, _mm_set1_ps(0.0f), _CMP_GE_OQ));
+
+		store_values(_mm_cvtps_epi32(current_samples_new), data_out);
+	}
+
+	/**
+	 * @brief Combine a register worth of elements from decoded_data and store the result in up_sampled_vector.
+	 *        This version uses AVX instructions.
+	 *
+	 * @param up_sampled_vector Pointer to the array of int32_t values.
+	 * @param decoded_data Pointer to the array of int16_t values.
+	 * @param x Index to select a specific set of elements to combine.
+	 */
+	inline static void combine_samples(int32_t* up_sampled_vector, const int16_t* decoded_data) {
+		auto newValues{ _mm_cvtps_epi32(_mm_add_ps(gather_values(up_sampled_vector), gather_values(decoded_data))) };
+		store_values(newValues, up_sampled_vector);
+	}
+};
+
+#else 
+
+/**
+ * @brief A class for audio mixing operations using AVX instructions.
+ */
+class audio_mixer {
+public:
+	/*
+	 * @brief The number of 32-bit values per CPU register.
+	 */
+	inline static constexpr int32_t byte_blocks_per_register{ 2 };
+
+	/**
+	 * @brief Collect a single register worth of data from data_in, apply gain and increment, and store the result in data_out.
+	 *        This version uses x64 instructions.
+	 *
+	 * @param data_in Pointer to the input array of int32_t values.
+	 * @param data_out Pointer to the output array of int16_t values.
+	 * @param current_gain The gain to be applied to the elements.
+	 * @param increment The increment value to be added to each element.
+	 */
+	inline static void collect_single_register(int32_t* data_in, int16_t* data_out, float current_gain, float increment) {
+		for (uint64_t x = 0; x < byte_blocks_per_register; ++x) {
+			auto increment_neww = increment * x;
+			auto current_gain_new = current_gain + increment_neww;
+			auto current_sample_new = data_in[x] * current_gain_new;
+			if (current_sample_new >= std::numeric_limits<int16_t>::max()) {
+				current_sample_new = std::numeric_limits<int16_t>::max();
+			}
+			else if (current_sample_new <= std::numeric_limits<int16_t>::min()) {
+				current_sample_new = std::numeric_limits<int16_t>::min();
+			}
+			data_out[x] = static_cast<int16_t>(current_sample_new);
+		}
+	}
+
+	/**
+	 * @brief Combine a register worth of elements from decoded_data and store the result in up_sampled_vector.
+	 *        This version uses  instructions.
+	 *
+	 * @param up_sampled_vector Pointer to the array of int32_t values.
+	 * @param decoded_data Pointer to the array of int16_t values.
+	 */
+	inline static void combine_samples(int32_t* up_sampled_vector, const int16_t* decoded_data) {
+		for (uint64_t x = 0; x < byte_blocks_per_register; ++x) {
+			up_sampled_vector[x] += static_cast<int32_t>(decoded_data[x]);
+		}
+
+	}
+};
+
+/**@}*/
+
+#endif
+
+}
diff --git a/library-vcpkg/CMakeLists.txt b/library-vcpkg/CMakeLists.txt
index ecb4e7643a..a33b18ccc0 100644
--- a/library-vcpkg/CMakeLists.txt
+++ b/library-vcpkg/CMakeLists.txt
@@ -11,9 +11,16 @@ endif()
 
 add_library("${PROJECT_NAME}::${LIB_NAME}" ALIAS "${LIB_NAME}")
 
+if(NOT DEFINED AVX_TYPE)
+	include("${CMAKE_CURRENT_SOURCE_DIR}/../cmake/DetectArchitecture.cmake")
+endif()
+
 target_compile_definitions(
 	"${LIB_NAME}" PUBLIC
 	"DPP_BUILD"
+	"$<$<PLATFORM_ID:Windows>:$<$<CONFIG:Debug>:/sdl;/std:c++17;/Od;/DEBUG;/sdl;/MP;/DFD_SETSIZE=1024;/Zc:preprocessor>>"
+	"$<$<PLATFORM_ID:Windows>:$<$<CONFIG:Release>:/std:c++17;/O2;/Oi;/Oy;/GL;/Gy;/sdl;/MP;/DFD_SETSIZE=1024;/Zc:preprocessor>>"
+	"${AVX_TYPE}"
 )
 
 target_compile_options(
@@ -23,6 +30,7 @@ target_compile_options(
 	"$<$<PLATFORM_ID:Windows>:$<$<CONFIG:Release>:/std:c++17;/O2;/Oi;/Oy;/GL;/Gy;/sdl;/MP;/DFD_SETSIZE=1024;/Zc:preprocessor>>"
 	"$<$<PLATFORM_ID:Linux>:$<$<CONFIG:Debug>:-std=c++17;-Wall;-Wempty-body;-Wno-psabi;-Wunknown-pragmas;-Wignored-qualifiers;-Wimplicit-fallthrough;-Wmissing-field-initializers;-Wsign-compare;-Wtype-limits;-Wuninitialized;-Wshift-negative-value;-pthread;-g;-Og;-fPIC>>"
 	"$<$<PLATFORM_ID:Linux>:$<$<CONFIG:Release>:-std=c++17;-Wall;-Wempty-body;-Wno-psabi;-Wunknown-pragmas;-Wignored-qualifiers;-Wimplicit-fallthrough;-Wmissing-field-initializers;-Wsign-compare;-Wtype-limits;-Wuninitialized;-Wshift-negative-value;-pthread;-O3;-fPIC>>"
+	"${AVX_FLAG}"
 )
 
 target_compile_features(
diff --git a/library/CMakeLists.txt b/library/CMakeLists.txt
index e74c4e33bb..bb078a462e 100644
--- a/library/CMakeLists.txt
+++ b/library/CMakeLists.txt
@@ -20,6 +20,10 @@ if (HAVE_PTHREAD_SETNAME_NP)
 	}" HAVE_TWO_PARAMETER_SETNAME_NP)
 endif()
 
+if(NOT DEFINED AVX_TYPE)
+	include("${CMAKE_CURRENT_SOURCE_DIR}/../cmake/DetectArchitecture.cmake")
+endif()
+
 add_compile_definitions(DPP_OS=${CMAKE_SYSTEM_NAME})
 
 if(WIN32 AND NOT MINGW)
@@ -49,11 +53,14 @@ if(WIN32 AND NOT MINGW)
 
 		include_directories("${CMAKE_CURRENT_SOURCE_DIR}/../win32/include")
 
+		add_compile_options("/bigobj")
 		add_compile_definitions(OPENSSL_SYS_WIN32)
 		add_compile_definitions(_WINSOCK_DEPRECATED_NO_WARNINGS)
 		add_compile_definitions(WIN32_LEAN_AND_MEAN)
 		add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
 		add_compile_definitions(_CRT_NONSTDC_NO_DEPRECATE)
+		add_compile_definitions("${AVX_TYPE}")
+		add_compile_options("${AVX_FLAG}")
 
 	endif()
 endif()
diff --git a/src/dpp/discordvoiceclient.cpp b/src/dpp/discordvoiceclient.cpp
index 7d9ff7ccc5..3cadb62646 100644
--- a/src/dpp/discordvoiceclient.cpp
+++ b/src/dpp/discordvoiceclient.cpp
@@ -54,6 +54,30 @@
 #endif
 
 namespace dpp {
+moving_averager::moving_averager(uint64_t collection_count_new) {
+	collectionCount = collection_count_new;
+}
+
+moving_averager moving_averager::operator+=(int64_t value) {
+	values.emplace_front(value);
+	if (values.size() >= collectionCount) {
+		values.pop_back();
+	}
+	return *this;
+}
+
+moving_averager::operator float() {
+	float returnData{};
+	if (values.size() > 0) {
+		for (auto& value : values) {
+			returnData += static_cast<float>(value);
+		}
+		return returnData / static_cast<float>(values.size());
+	}
+	else {
+		return 0.0f;
+	}
+}
 
 [[maybe_unused]]
 constexpr int32_t opus_sample_rate_hz = 48000;
@@ -122,9 +146,10 @@ size_t audio_mix(discord_voice_client& client, opus_int32* pcm_mix, const opus_i
 		return 0;
 	}
 	/* We must upsample the data to 32 bits wide, otherwise we could overflow */
-	for (opus_int32 v = 0; v < samples * opus_channel_count; ++v) {
-		pcm_mix[v] += pcm[v];
+	for (opus_int32 v = 0; v < samples * opus_channel_count / 16; ++v) {
+		audio_mixer::combine_samples(pcm_mix, pcm);
 	}
+	client.moving_average += park_count;
 	max_samples = (std::max)(samples, max_samples);
 	return park_count + 1;
 }
@@ -200,6 +225,7 @@ void discord_voice_client::voice_courier_loop(discord_voice_client& client, cour
 		opus_int32 pcm_mix[23040] = { 0 };
 		size_t park_count = 0;
 		int max_samples = 0;
+		int samples = 0;
 
 		for (auto& d : flush_data) {
 			if (!d.decoder) {
@@ -232,12 +258,12 @@ void discord_voice_client::voice_courier_loop(discord_voice_client& client, cour
 					if (vr.audio_data.length() > 0x7FFFFFFF) {
 						throw dpp::length_exception("audio_data > 2GB! This should never happen!");
 					}
-					if (int samples = opus_decode(d.decoder.get(), vr.audio_data.data(),
+					if (samples = opus_decode(d.decoder.get(), vr.audio_data.data(),
 						static_cast<opus_int32>(vr.audio_data.length() & 0x7FFFFFFF), pcm, 5760, 0);
 					    samples >= 0) {
 						vr.reassign(&client, d.user_id, reinterpret_cast<uint8_t*>(pcm),
 							samples * opus_channel_count * sizeof(opus_int16));
-
+						client.end_gain = 1.0f / client.moving_average;
 						park_count = audio_mix(client, pcm_mix, pcm, park_count, samples, max_samples);
 						client.creator->on_voice_receive.call(vr);
 					}
@@ -249,11 +275,16 @@ void discord_voice_client::voice_courier_loop(discord_voice_client& client, cour
 
 		/* If combined receive is bound, dispatch it */
 		if (park_count) {
+			
 			/* Downsample the 32 bit samples back to 16 bit */
 			opus_int16 pcm_downsample[23040] = { 0 };
-			for (int v = 0; v < max_samples * opus_channel_count; ++v) {
-				pcm_downsample[v] = (opus_int16)(pcm_mix[v] / park_count);
+			client.increment = (client.end_gain - client.current_gain) / static_cast<float>(samples);
+			for (int64_t x = 0; x < samples / audio_mixer::byte_blocks_per_register; ++x) {
+				audio_mixer::collect_single_register(pcm_mix + (x * audio_mixer::byte_blocks_per_register),
+					pcm_downsample + (x * audio_mixer::byte_blocks_per_register), client.current_gain, client.increment);
+				client.current_gain += client.increment * static_cast<float>(audio_mixer::byte_blocks_per_register);
 			}
+
 			voice_receive_t vr(nullptr, "", &client, 0, reinterpret_cast<uint8_t*>(pcm_downsample),
 				max_samples * opus_channel_count * sizeof(opus_int16));