Authored new BitstreamGenerator

Soon to be absorbed by new Application class
tornupnegatives · Mar 15, 2024 · 485ea48 · 485ea48
1 parent d14b4e3
commit 485ea48
Show file tree

Hide file tree

Showing 4 changed files with 218 additions and 175 deletions.
diff --git a/src/bitstream/BitstreamGenerator.cpp b/src/bitstream/BitstreamGenerator.cpp
@@ -1,209 +1,191 @@
-// Copyright (C) 2023 Joseph Bellahcen <[email protected]>
+// Copyright (C) 2022-2024 Joseph Bellahcen <[email protected]>
 
 #include "bitstream/BitstreamGenerator.hpp"
 
 #include <filesystem>
 #include <fstream>
 #include <iostream>
 #include <string>
+#include <tuple>
+#include <utility>
 #include <vector>
 
+#include "analysis/Autocorrelation.hpp"
+#include "analysis/LinearPredictor.hpp"
+#include "analysis/PitchEstimator.hpp"
 #include "audio/AudioBuffer.hpp"
 #include "audio/AudioFilter.hpp"
+#include "bitstream/BitstreamGeneratorParameters.hpp"
 #include "encoding/Frame.hpp"
 #include "encoding/FrameEncoder.hpp"
 #include "encoding/FramePostprocessor.hpp"
-#include "analysis/Autocorrelation.hpp"
-#include "analysis/LinearPredictor.hpp"
-#include "analysis/PitchEstimator.hpp"
 
 namespace tms_express {
 
-BitstreamGenerator::BitstreamGenerator(float window_width_ms,
-    int highpass_cutoff_hz, int lowpass_cutoff_hz, float pre_emphasis_alpha,
-    EncoderStyle style, bool include_stop_frame, int gain_shift,
-    float max_voiced_gain_db, float max_unvoiced_gain_db,
-    bool detect_repeat_frames, int max_pitch_hz, int min_pitch_hz) {
-    //
-    window_width_ms_ = window_width_ms;
-    highpass_cutoff_hz_ = highpass_cutoff_hz;
-    lowpass_cutoff_hz_ = lowpass_cutoff_hz;
-    pre_emphasis_alpha_ = pre_emphasis_alpha;
-    style_ = style;
-    include_stop_frame_ = include_stop_frame;
-    gain_shift_ = gain_shift;
-    main_voiced_gain_db_ = max_voiced_gain_db;
-    max_unvoiced_gain_db_ = max_unvoiced_gain_db;
-    detect_repeat_frames_ = detect_repeat_frames;
-    max_pitch_hz_ = max_pitch_hz;
-    min_pitch_hz_ = min_pitch_hz;
-}
+///////////////////////////////////////////////////////////////////////////////
+// Initializers ///////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////
 
-void BitstreamGenerator::encode(const std::string &audio_input_path,
-    const std::string &bitstream_name, const std::string &output_path) const {
-    // Perform LPC analysis and convert audio data to a bitstream
-    auto frames = generateFrames(audio_input_path);
-    auto bitstream = serializeFrames(frames, bitstream_name);
-
-    // Write bitstream to disk
-    std::ofstream lpcOut;
-    lpcOut.open(output_path);
-    lpcOut << bitstream;
-    lpcOut.close();
-}
+BitstreamGenerator::BitstreamGenerator(SharedParameters params)
+    : shared_params_(params) {}
 
-void BitstreamGenerator::encodeBatch(
-    const std::vector<std::string> &audio_input_paths,
-    const std::vector<std::string> &bitstream_names,
-    const std::string &output_path) const {
-    std::string in_path, filename;
+///////////////////////////////////////////////////////////////////////////////
+// Analysis ///////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////
 
-    if (style_ == ENCODERSTYLE_ASCII) {
-        // Create directory to populate with encoded files
-        std::filesystem::create_directory(output_path);
+std::vector<int> BitstreamGenerator::analyzeLowerTract(
+    LowerVocalTractParameters params) {
+    auto buffer =
+        *AudioBuffer::Create(input_path_, shared_params_.sample_rate_hz,
+                             shared_params_.window_width_ms);
 
-        for (int i = 0; i < static_cast<int>(audio_input_paths.size()); i++) {
-            in_path = audio_input_paths[i];
-            filename = bitstream_names[i];
-
-            std::filesystem::path out_path = output_path;
-            out_path /= (filename + ".lpc");
-
-            encode(in_path, filename, out_path.string());
-        }
-    } else {
-        std::ofstream lpcOut;
-        lpcOut.open(output_path);
+    // Apply preprocessing
+    //
+    // Pitch estimation will likely only benefit from lowpass filtering, as
+    // pitch is a low-frequency component of the signal
+    auto preprocessor = AudioFilter();
+    preprocessor.applyPreEmphasis(buffer, params.pre_emphasis_alpha);
+    preprocessor.applyHighpass(buffer, params.highpass_cutoff_hz);
+    preprocessor.applyLowpass(buffer, params.lowpass_cutoff_hz);
 
-        for (int i = 0; i < static_cast<int>(audio_input_paths.size()); i++) {
-            in_path = audio_input_paths[i];
-            filename = bitstream_names[i];
+    // Extract buffer metadata
+    const auto n_segments = buffer.getNSegments();
+    const auto sample_rate = buffer.getSampleRateHz();
 
-            auto frames = generateFrames(in_path);
-            auto bitstream = serializeFrames(frames, filename);
+    // Initialize analysis objects and data structures
+    auto pitch_estimator =
+        PitchEstimator(sample_rate, params.max_pitch_hz, params.max_pitch_hz);
+    auto pitch_table = std::vector<int>(n_segments);
 
-            lpcOut << bitstream << std::endl;
-        }
+    for (int i = 0; i < n_segments; i++) {
+        auto segment = buffer.getSegment(i);
+        auto acf = tms_express::Autocorrelation(segment);
+        auto pitch_period = pitch_estimator.estimatePeriod(acf);
 
-        lpcOut.close();
+        pitch_table.at(i) = (pitch_period);
     }
-}
 
-std::vector<Frame> BitstreamGenerator::generateFrames(
-    const std::string &path) const {
-    // Mix audio to 8kHz mono and store in a segmented buffer
-    // TODO(Joseph Bellahcen): Handle nullptr
-    auto lpc_buffer = *AudioBuffer::Create(path, 8000, window_width_ms_);
+    return pitch_table;
+}
 
-    // Copy the buffer so that upper and lower vocal tract analysis may occur
-    // separately
-    auto pitch_buffer = lpc_buffer.copy();
+std::tuple<std::vector<std::vector<float>>, std::vector<float>>
+BitstreamGenerator::analyzeUpperTract(UpperVocalTractParameters params) {
+    auto buffer =
+        *AudioBuffer::Create(input_path_, shared_params_.sample_rate_hz,
+                             shared_params_.window_width_ms);
 
     // Apply preprocessing
-    //
-    // The pitch buffer will ONLY be lowpass-filtered, as pitch is a
-    // low-frequency component of the signal. Neither highpass filtering nor
-    // pre-emphasis, which exaggerate high-frequency components, will improve
-    // pitch estimation
     auto preprocessor = AudioFilter();
-    preprocessor.applyPreEmphasis(lpc_buffer, pre_emphasis_alpha_);
-    preprocessor.applyHighpass(lpc_buffer, highpass_cutoff_hz_);
-    preprocessor.applyLowpass(pitch_buffer, lowpass_cutoff_hz_);
+    preprocessor.applyPreEmphasis(buffer, params.pre_emphasis_alpha);
+    preprocessor.applyHighpass(buffer, params.highpass_cutoff_hz);
+    preprocessor.applyLowpass(buffer, params.lowpass_cutoff_hz);
 
     // Extract buffer metadata
-    //
-    // Only the LPC buffer is queried for metadata, since it will have the same
-    // number of samples as the pitch buffer. The sample rate of the buffer is
-    // extracted despite being known, as future iterations of TMS Express may
-    // support encoding 10kHz/variable sample rate audio for the TMS5200C
-    auto n_segments = lpc_buffer.getNSegments();
-    auto sample_rate = lpc_buffer.getSampleRateHz();
+    const auto n_segments = buffer.getNSegments();
 
     // Initialize analysis objects and data structures
     auto linear_predictor = LinearPredictor();
-    auto pitch_estimator = PitchEstimator(sample_rate, min_pitch_hz_,
-        max_pitch_hz_);
-    auto frames = std::vector<Frame>();
+    auto coeff_table = std::vector<std::vector<float>>(n_segments);
+    auto gain_table = std::vector<float>(n_segments);
 
     for (int i = 0; i < n_segments; i++) {
-        // Get segment for frame
-        auto pitch_segment = pitch_buffer.getSegment(i);
-        auto lpc_segment = lpc_buffer.getSegment(i);
+        auto segment = buffer.getSegment(i);
 
         // Apply a window function to the segment to smoothen its boundaries
         //
         // Because information about the transition between adjacent frames is
         // lost during segmentation, a window will help produce smoother results
-        preprocessor.applyHammingWindow(lpc_segment);
+        preprocessor.applyHammingWindow(segment);
 
         // Compute the autocorrelation of each segment, which serves as the
         // basis of all analysis
-        auto lpc_acf = tms_express::Autocorrelation(lpc_segment);
-        auto pitch_acf = tms_express::Autocorrelation(pitch_segment);
+        auto acf = tms_express::Autocorrelation(segment);
 
         // Extract LPC reflector coefficients and compute the predictor gain
-        auto coeffs = linear_predictor.computeCoeffs(lpc_acf);
+        auto coeffs = linear_predictor.computeCoeffs(acf);
         auto gain = linear_predictor.gain();
 
-        // Estimate pitch
-        auto pitch_period = pitch_estimator.estimatePeriod(pitch_acf);
+        coeff_table.at(i) = coeffs;
+        gain_table.at(i) = gain;
+    }
 
-        // Decide whether the segment is voiced or unvoiced
-        auto segment_is_voiced = coeffs[0] < 0;
+    return {coeff_table, gain_table};
+}
+
+std::vector<bool> BitstreamGenerator::estimateVoicing(
+    const std::vector<std::vector<float>>& coeff_table) {
+    auto voicing_table = std::vector<bool>(coeff_table.size());
 
-        // Store parameters in a Frame object
-        auto frame = Frame(pitch_period, segment_is_voiced, gain, coeffs);
-        frames.push_back(frame);
+    for (int i = 0; i < static_cast<int>(coeff_table.size()); i++) {
+        voicing_table.at(i) = coeff_table.at(i).at(0) < 0;
     }
 
-    // Apply post-processing
-    auto post_processor = FramePostprocessor(&frames, main_voiced_gain_db_,
-        max_unvoiced_gain_db_);
+    return voicing_table;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// Encoding ///////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////
+
+void BitstreamGenerator::applyPostProcessing(std::vector<Frame>* frame_table,
+                                             PostProcessorParameters params) {
+    auto post_processor = FramePostprocessor(
+        frame_table, params.max_voiced_gain_db, params.max_unvoiced_gain_db);
     post_processor.normalizeGain();
-    post_processor.shiftGain(gain_shift_);
+    post_processor.shiftGain(params.gain_shift);
 
-    if (detect_repeat_frames_) {
+    if (params.detect_repeat_frames) {
         post_processor.detectRepeatFrames();
     }
-
-    return frames;
 }
 
 std::string BitstreamGenerator::serializeFrames(
-    const std::vector<Frame>& frames, const std::string &filename) const {
-    //
-    // Encode frames to hex bitstreams
-    auto encoder = FrameEncoder(frames, style_ != ENCODERSTYLE_ASCII);
+    const std::string& name, const std::vector<Frame>& frame_table,
+    BitstreamParameters params) {
+    auto encoder =
+        FrameEncoder(frame_table, params.encoder_style != ENCODER_STYLE_ASCII);
     std::string bitstream;
 
-    switch (style_) {
-        case ENCODERSTYLE_ASCII:
-            bitstream = encoder.toHex(include_stop_frame_);
+    switch (params.encoder_style) {
+        case ENCODER_STYLE_ASCII:
+            bitstream = encoder.toHex(params.include_stop_frame);
             break;
 
-        case ENCODERSTYLE_C:
+        case ENCODER_STYLE_C:
             // C-style bitstreams are headers which contain a byte array
             // Format: const int bitstream_name [] = {<values>};
-            bitstream = encoder.toHex(include_stop_frame_);
-            bitstream = "const int " + filename + "[] = {" + bitstream + "};\n";
+            bitstream = encoder.toHex(params.include_stop_frame);
+            bitstream = "const int " + name + "[] = {" + bitstream + "};\n";
             break;
 
-        case ENCODERSTYLE_ARDUINO:
+        case ENCODER_STYLE_C_ARDUINO:
             // Arduino-style bitstreams are C-style bitstreams which include the
             // Arduino header and PROGMEM keyword. This is for compatibility
             // with the Arduino Talkie library
             // Format: extern const uint8_t name [] PROGMEM = {<values>};
-            bitstream = encoder.toHex(include_stop_frame_);
-            bitstream = "extern const uint8_t " + filename + "[] PROGMEM = {" +
-                bitstream + "};\n";
+            bitstream = encoder.toHex(params.include_stop_frame);
+            bitstream = "extern const uint8_t " + name + "[] PROGMEM = {" +
+                        bitstream + "};\n";
             break;
 
-        case ENCODERSTYLE_JSON:
+        case ENCODER_STYLE_JSON:
             bitstream = encoder.toJSON();
             break;
     }
 
     return bitstream;
 }
 
+///////////////////////////////////////////////////////////////////////////////
+// Accessors //////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////
+
+std::string BitstreamGenerator::getInputPath() const {
+    return input_path_;
+}
+
+void BitstreamGenerator::setInputPath(const std::string& path) {
+    input_path_ = path;
+}
+
 };  // namespace tms_express
diff --git a/src/bitstream/BitstreamGenerator.hpp b/src/bitstream/BitstreamGenerator.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2023 Joseph Bellahcen <[email protected]>
+// Copyright (C) 2022-2024 Joseph Bellahcen <[email protected]>
 
 #ifndef TMS_EXPRESS_SRC_BITSTREAM_BITSTREAMGENERATOR_HPP_
 #define TMS_EXPRESS_SRC_BITSTREAM_BITSTREAMGENERATOR_HPP_
@@ -19,7 +19,7 @@ class BitstreamGenerator {
     // Initializers ///////////////////////////////////////////////////////////
     ///////////////////////////////////////////////////////////////////////////
 
-    explicit BitstreamGenerator(std::string input, SharedParameters params);
+    explicit BitstreamGenerator(SharedParameters params);
 
     ///////////////////////////////////////////////////////////////////////////
     // Analysis ///////////////////////////////////////////////////////////////
@@ -43,28 +43,47 @@ class BitstreamGenerator {
     analyzeUpperTract(UpperVocalTractParameters params);
 
     /// @brief Categorizes each segment as voiced or unvoiced
-    /// @param coeffs LPC reflector coefficients
+    /// @param coeff_table LPC reflector coefficients
     /// @return Voicing table, with one voicing estimate per sample. A voicing
     ///         estimate of `true` corresponds to a voiced sample (vowel sound),
     ///         while an estimate of `false` corresponds to an unvoiced sample
     ///         (consonant sound)
     std::vector<bool> estimateVoicing(
-        const std::vector<std::vector<float>>& coeffs);
+        const std::vector<std::vector<float>>& coeff_table);
 
     ///////////////////////////////////////////////////////////////////////////
     // Encoding ///////////////////////////////////////////////////////////////
     ///////////////////////////////////////////////////////////////////////////
 
     /// @brief Post-processes frame table to apply analysis-independent edits
     /// @param frame_table Vector of Frame objects representing input audio
-    void applyPostProcessing(const std::vector<Frame>& frame_table);
+    void applyPostProcessing(std::vector<Frame>* frame_table,
+                             PostProcessorParameters params);
 
     /// @brief Converts frame table to bitstream
+    /// @param name Name representing bitstream
     /// @param frame_table Vector of Frame objects representing input audio
     /// @param params Bitstream parameters
     /// @return Serialized frame table, as a bitstream string
-    std::string serializeFrames(const std::vector<Frame>& frame_table,
+    std::string serializeFrames(const std::string& name,
+                                const std::vector<Frame>& frame_table,
                                 BitstreamParameters params);
+
+    ///////////////////////////////////////////////////////////////////////////
+    // Accessors //////////////////////////////////////////////////////////////
+    ///////////////////////////////////////////////////////////////////////////
+
+    std::string getInputPath() const;
+
+    void setInputPath(const std::string& path);
+
+ private:
+    ///////////////////////////////////////////////////////////////////////////
+    // Members ////////////////////////////////////////////////////////////////
+    ///////////////////////////////////////////////////////////////////////////
+
+    std::string input_path_;
+    SharedParameters shared_params_;
 };
 
 };  // namespace tms_express