Skip to content

Commit

Permalink
Authored new BitstreamGenerator
Browse files Browse the repository at this point in the history
Soon to be absorbed by new Application class
  • Loading branch information
tornupnegatives committed Mar 15, 2024
1 parent d14b4e3 commit 485ea48
Show file tree
Hide file tree
Showing 4 changed files with 218 additions and 175 deletions.
234 changes: 108 additions & 126 deletions src/bitstream/BitstreamGenerator.cpp
Original file line number Diff line number Diff line change
@@ -1,209 +1,191 @@
// Copyright (C) 2023 Joseph Bellahcen <[email protected]>
// Copyright (C) 2022-2024 Joseph Bellahcen <[email protected]>

#include "bitstream/BitstreamGenerator.hpp"

#include <filesystem>
#include <fstream>
#include <iostream>
#include <string>
#include <tuple>
#include <utility>
#include <vector>

#include "analysis/Autocorrelation.hpp"
#include "analysis/LinearPredictor.hpp"
#include "analysis/PitchEstimator.hpp"
#include "audio/AudioBuffer.hpp"
#include "audio/AudioFilter.hpp"
#include "bitstream/BitstreamGeneratorParameters.hpp"
#include "encoding/Frame.hpp"
#include "encoding/FrameEncoder.hpp"
#include "encoding/FramePostprocessor.hpp"
#include "analysis/Autocorrelation.hpp"
#include "analysis/LinearPredictor.hpp"
#include "analysis/PitchEstimator.hpp"

namespace tms_express {

BitstreamGenerator::BitstreamGenerator(float window_width_ms,
int highpass_cutoff_hz, int lowpass_cutoff_hz, float pre_emphasis_alpha,
EncoderStyle style, bool include_stop_frame, int gain_shift,
float max_voiced_gain_db, float max_unvoiced_gain_db,
bool detect_repeat_frames, int max_pitch_hz, int min_pitch_hz) {
//
window_width_ms_ = window_width_ms;
highpass_cutoff_hz_ = highpass_cutoff_hz;
lowpass_cutoff_hz_ = lowpass_cutoff_hz;
pre_emphasis_alpha_ = pre_emphasis_alpha;
style_ = style;
include_stop_frame_ = include_stop_frame;
gain_shift_ = gain_shift;
main_voiced_gain_db_ = max_voiced_gain_db;
max_unvoiced_gain_db_ = max_unvoiced_gain_db;
detect_repeat_frames_ = detect_repeat_frames;
max_pitch_hz_ = max_pitch_hz;
min_pitch_hz_ = min_pitch_hz;
}
///////////////////////////////////////////////////////////////////////////////
// Initializers ///////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////

void BitstreamGenerator::encode(const std::string &audio_input_path,
const std::string &bitstream_name, const std::string &output_path) const {
// Perform LPC analysis and convert audio data to a bitstream
auto frames = generateFrames(audio_input_path);
auto bitstream = serializeFrames(frames, bitstream_name);

// Write bitstream to disk
std::ofstream lpcOut;
lpcOut.open(output_path);
lpcOut << bitstream;
lpcOut.close();
}
BitstreamGenerator::BitstreamGenerator(SharedParameters params)
: shared_params_(params) {}

void BitstreamGenerator::encodeBatch(
const std::vector<std::string> &audio_input_paths,
const std::vector<std::string> &bitstream_names,
const std::string &output_path) const {
std::string in_path, filename;
///////////////////////////////////////////////////////////////////////////////
// Analysis ///////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////

if (style_ == ENCODERSTYLE_ASCII) {
// Create directory to populate with encoded files
std::filesystem::create_directory(output_path);
std::vector<int> BitstreamGenerator::analyzeLowerTract(
LowerVocalTractParameters params) {
auto buffer =
*AudioBuffer::Create(input_path_, shared_params_.sample_rate_hz,
shared_params_.window_width_ms);

for (int i = 0; i < static_cast<int>(audio_input_paths.size()); i++) {
in_path = audio_input_paths[i];
filename = bitstream_names[i];

std::filesystem::path out_path = output_path;
out_path /= (filename + ".lpc");

encode(in_path, filename, out_path.string());
}
} else {
std::ofstream lpcOut;
lpcOut.open(output_path);
// Apply preprocessing
//
// Pitch estimation will likely only benefit from lowpass filtering, as
// pitch is a low-frequency component of the signal
auto preprocessor = AudioFilter();
preprocessor.applyPreEmphasis(buffer, params.pre_emphasis_alpha);
preprocessor.applyHighpass(buffer, params.highpass_cutoff_hz);
preprocessor.applyLowpass(buffer, params.lowpass_cutoff_hz);

for (int i = 0; i < static_cast<int>(audio_input_paths.size()); i++) {
in_path = audio_input_paths[i];
filename = bitstream_names[i];
// Extract buffer metadata
const auto n_segments = buffer.getNSegments();
const auto sample_rate = buffer.getSampleRateHz();

auto frames = generateFrames(in_path);
auto bitstream = serializeFrames(frames, filename);
// Initialize analysis objects and data structures
auto pitch_estimator =
PitchEstimator(sample_rate, params.max_pitch_hz, params.max_pitch_hz);
auto pitch_table = std::vector<int>(n_segments);

lpcOut << bitstream << std::endl;
}
for (int i = 0; i < n_segments; i++) {
auto segment = buffer.getSegment(i);
auto acf = tms_express::Autocorrelation(segment);
auto pitch_period = pitch_estimator.estimatePeriod(acf);

lpcOut.close();
pitch_table.at(i) = (pitch_period);
}
}

std::vector<Frame> BitstreamGenerator::generateFrames(
const std::string &path) const {
// Mix audio to 8kHz mono and store in a segmented buffer
// TODO(Joseph Bellahcen): Handle nullptr
auto lpc_buffer = *AudioBuffer::Create(path, 8000, window_width_ms_);
return pitch_table;
}

// Copy the buffer so that upper and lower vocal tract analysis may occur
// separately
auto pitch_buffer = lpc_buffer.copy();
std::tuple<std::vector<std::vector<float>>, std::vector<float>>
BitstreamGenerator::analyzeUpperTract(UpperVocalTractParameters params) {
auto buffer =
*AudioBuffer::Create(input_path_, shared_params_.sample_rate_hz,
shared_params_.window_width_ms);

// Apply preprocessing
//
// The pitch buffer will ONLY be lowpass-filtered, as pitch is a
// low-frequency component of the signal. Neither highpass filtering nor
// pre-emphasis, which exaggerate high-frequency components, will improve
// pitch estimation
auto preprocessor = AudioFilter();
preprocessor.applyPreEmphasis(lpc_buffer, pre_emphasis_alpha_);
preprocessor.applyHighpass(lpc_buffer, highpass_cutoff_hz_);
preprocessor.applyLowpass(pitch_buffer, lowpass_cutoff_hz_);
preprocessor.applyPreEmphasis(buffer, params.pre_emphasis_alpha);
preprocessor.applyHighpass(buffer, params.highpass_cutoff_hz);
preprocessor.applyLowpass(buffer, params.lowpass_cutoff_hz);

// Extract buffer metadata
//
// Only the LPC buffer is queried for metadata, since it will have the same
// number of samples as the pitch buffer. The sample rate of the buffer is
// extracted despite being known, as future iterations of TMS Express may
// support encoding 10kHz/variable sample rate audio for the TMS5200C
auto n_segments = lpc_buffer.getNSegments();
auto sample_rate = lpc_buffer.getSampleRateHz();
const auto n_segments = buffer.getNSegments();

// Initialize analysis objects and data structures
auto linear_predictor = LinearPredictor();
auto pitch_estimator = PitchEstimator(sample_rate, min_pitch_hz_,
max_pitch_hz_);
auto frames = std::vector<Frame>();
auto coeff_table = std::vector<std::vector<float>>(n_segments);
auto gain_table = std::vector<float>(n_segments);

for (int i = 0; i < n_segments; i++) {
// Get segment for frame
auto pitch_segment = pitch_buffer.getSegment(i);
auto lpc_segment = lpc_buffer.getSegment(i);
auto segment = buffer.getSegment(i);

// Apply a window function to the segment to smoothen its boundaries
//
// Because information about the transition between adjacent frames is
// lost during segmentation, a window will help produce smoother results
preprocessor.applyHammingWindow(lpc_segment);
preprocessor.applyHammingWindow(segment);

// Compute the autocorrelation of each segment, which serves as the
// basis of all analysis
auto lpc_acf = tms_express::Autocorrelation(lpc_segment);
auto pitch_acf = tms_express::Autocorrelation(pitch_segment);
auto acf = tms_express::Autocorrelation(segment);

// Extract LPC reflector coefficients and compute the predictor gain
auto coeffs = linear_predictor.computeCoeffs(lpc_acf);
auto coeffs = linear_predictor.computeCoeffs(acf);
auto gain = linear_predictor.gain();

// Estimate pitch
auto pitch_period = pitch_estimator.estimatePeriod(pitch_acf);
coeff_table.at(i) = coeffs;
gain_table.at(i) = gain;
}

// Decide whether the segment is voiced or unvoiced
auto segment_is_voiced = coeffs[0] < 0;
return {coeff_table, gain_table};
}

std::vector<bool> BitstreamGenerator::estimateVoicing(
const std::vector<std::vector<float>>& coeff_table) {
auto voicing_table = std::vector<bool>(coeff_table.size());

// Store parameters in a Frame object
auto frame = Frame(pitch_period, segment_is_voiced, gain, coeffs);
frames.push_back(frame);
for (int i = 0; i < static_cast<int>(coeff_table.size()); i++) {
voicing_table.at(i) = coeff_table.at(i).at(0) < 0;
}

// Apply post-processing
auto post_processor = FramePostprocessor(&frames, main_voiced_gain_db_,
max_unvoiced_gain_db_);
return voicing_table;
}

///////////////////////////////////////////////////////////////////////////////
// Encoding ///////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////

void BitstreamGenerator::applyPostProcessing(std::vector<Frame>* frame_table,
PostProcessorParameters params) {
auto post_processor = FramePostprocessor(
frame_table, params.max_voiced_gain_db, params.max_unvoiced_gain_db);
post_processor.normalizeGain();
post_processor.shiftGain(gain_shift_);
post_processor.shiftGain(params.gain_shift);

if (detect_repeat_frames_) {
if (params.detect_repeat_frames) {
post_processor.detectRepeatFrames();
}

return frames;
}

std::string BitstreamGenerator::serializeFrames(
const std::vector<Frame>& frames, const std::string &filename) const {
//
// Encode frames to hex bitstreams
auto encoder = FrameEncoder(frames, style_ != ENCODERSTYLE_ASCII);
const std::string& name, const std::vector<Frame>& frame_table,
BitstreamParameters params) {
auto encoder =
FrameEncoder(frame_table, params.encoder_style != ENCODER_STYLE_ASCII);
std::string bitstream;

switch (style_) {
case ENCODERSTYLE_ASCII:
bitstream = encoder.toHex(include_stop_frame_);
switch (params.encoder_style) {
case ENCODER_STYLE_ASCII:
bitstream = encoder.toHex(params.include_stop_frame);
break;

case ENCODERSTYLE_C:
case ENCODER_STYLE_C:
// C-style bitstreams are headers which contain a byte array
// Format: const int bitstream_name [] = {<values>};
bitstream = encoder.toHex(include_stop_frame_);
bitstream = "const int " + filename + "[] = {" + bitstream + "};\n";
bitstream = encoder.toHex(params.include_stop_frame);
bitstream = "const int " + name + "[] = {" + bitstream + "};\n";
break;

case ENCODERSTYLE_ARDUINO:
case ENCODER_STYLE_C_ARDUINO:
// Arduino-style bitstreams are C-style bitstreams which include the
// Arduino header and PROGMEM keyword. This is for compatibility
// with the Arduino Talkie library
// Format: extern const uint8_t name [] PROGMEM = {<values>};
bitstream = encoder.toHex(include_stop_frame_);
bitstream = "extern const uint8_t " + filename + "[] PROGMEM = {" +
bitstream + "};\n";
bitstream = encoder.toHex(params.include_stop_frame);
bitstream = "extern const uint8_t " + name + "[] PROGMEM = {" +
bitstream + "};\n";
break;

case ENCODERSTYLE_JSON:
case ENCODER_STYLE_JSON:
bitstream = encoder.toJSON();
break;
}

return bitstream;
}

///////////////////////////////////////////////////////////////////////////////
// Accessors //////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////

std::string BitstreamGenerator::getInputPath() const {
return input_path_;
}

void BitstreamGenerator::setInputPath(const std::string& path) {
input_path_ = path;
}

}; // namespace tms_express
31 changes: 25 additions & 6 deletions src/bitstream/BitstreamGenerator.hpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (C) 2023 Joseph Bellahcen <[email protected]>
// Copyright (C) 2022-2024 Joseph Bellahcen <[email protected]>

#ifndef TMS_EXPRESS_SRC_BITSTREAM_BITSTREAMGENERATOR_HPP_
#define TMS_EXPRESS_SRC_BITSTREAM_BITSTREAMGENERATOR_HPP_
Expand All @@ -19,7 +19,7 @@ class BitstreamGenerator {
// Initializers ///////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////

explicit BitstreamGenerator(std::string input, SharedParameters params);
explicit BitstreamGenerator(SharedParameters params);

///////////////////////////////////////////////////////////////////////////
// Analysis ///////////////////////////////////////////////////////////////
Expand All @@ -43,28 +43,47 @@ class BitstreamGenerator {
analyzeUpperTract(UpperVocalTractParameters params);

/// @brief Categorizes each segment as voiced or unvoiced
/// @param coeffs LPC reflector coefficients
/// @param coeff_table LPC reflector coefficients
/// @return Voicing table, with one voicing estimate per sample. A voicing
/// estimate of `true` corresponds to a voiced sample (vowel sound),
/// while an estimate of `false` corresponds to an unvoiced sample
/// (consonant sound)
std::vector<bool> estimateVoicing(
const std::vector<std::vector<float>>& coeffs);
const std::vector<std::vector<float>>& coeff_table);

///////////////////////////////////////////////////////////////////////////
// Encoding ///////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////

/// @brief Post-processes frame table to apply analysis-independent edits
/// @param frame_table Vector of Frame objects representing input audio
void applyPostProcessing(const std::vector<Frame>& frame_table);
void applyPostProcessing(std::vector<Frame>* frame_table,
PostProcessorParameters params);

/// @brief Converts frame table to bitstream
/// @param name Name representing bitstream
/// @param frame_table Vector of Frame objects representing input audio
/// @param params Bitstream parameters
/// @return Serialized frame table, as a bitstream string
std::string serializeFrames(const std::vector<Frame>& frame_table,
std::string serializeFrames(const std::string& name,
const std::vector<Frame>& frame_table,
BitstreamParameters params);

///////////////////////////////////////////////////////////////////////////
// Accessors //////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////

std::string getInputPath() const;

void setInputPath(const std::string& path);

private:
///////////////////////////////////////////////////////////////////////////
// Members ////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////

std::string input_path_;
SharedParameters shared_params_;
};

}; // namespace tms_express
Expand Down
Loading

0 comments on commit 485ea48

Please sign in to comment.