-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Soon to be absorbed by new Application class
- Loading branch information
1 parent
3ca8d0d
commit 58f26fc
Showing
4 changed files
with
218 additions
and
175 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,209 +1,191 @@ | ||
// Copyright (C) 2023 Joseph Bellahcen <[email protected]> | ||
// Copyright (C) 2022-2024 Joseph Bellahcen <[email protected]> | ||
|
||
#include "bitstream/BitstreamGenerator.hpp" | ||
|
||
#include <filesystem> | ||
#include <fstream> | ||
#include <iostream> | ||
#include <string> | ||
#include <tuple> | ||
#include <utility> | ||
#include <vector> | ||
|
||
#include "analysis/Autocorrelation.hpp" | ||
#include "analysis/LinearPredictor.hpp" | ||
#include "analysis/PitchEstimator.hpp" | ||
#include "audio/AudioBuffer.hpp" | ||
#include "audio/AudioFilter.hpp" | ||
#include "bitstream/BitstreamGeneratorParameters.hpp" | ||
#include "encoding/Frame.hpp" | ||
#include "encoding/FrameEncoder.hpp" | ||
#include "encoding/FramePostprocessor.hpp" | ||
#include "analysis/Autocorrelation.hpp" | ||
#include "analysis/LinearPredictor.hpp" | ||
#include "analysis/PitchEstimator.hpp" | ||
|
||
namespace tms_express { | ||
|
||
BitstreamGenerator::BitstreamGenerator(float window_width_ms, | ||
int highpass_cutoff_hz, int lowpass_cutoff_hz, float pre_emphasis_alpha, | ||
EncoderStyle style, bool include_stop_frame, int gain_shift, | ||
float max_voiced_gain_db, float max_unvoiced_gain_db, | ||
bool detect_repeat_frames, int max_pitch_hz, int min_pitch_hz) { | ||
// | ||
window_width_ms_ = window_width_ms; | ||
highpass_cutoff_hz_ = highpass_cutoff_hz; | ||
lowpass_cutoff_hz_ = lowpass_cutoff_hz; | ||
pre_emphasis_alpha_ = pre_emphasis_alpha; | ||
style_ = style; | ||
include_stop_frame_ = include_stop_frame; | ||
gain_shift_ = gain_shift; | ||
main_voiced_gain_db_ = max_voiced_gain_db; | ||
max_unvoiced_gain_db_ = max_unvoiced_gain_db; | ||
detect_repeat_frames_ = detect_repeat_frames; | ||
max_pitch_hz_ = max_pitch_hz; | ||
min_pitch_hz_ = min_pitch_hz; | ||
} | ||
/////////////////////////////////////////////////////////////////////////////// | ||
// Initializers /////////////////////////////////////////////////////////////// | ||
/////////////////////////////////////////////////////////////////////////////// | ||
|
||
void BitstreamGenerator::encode(const std::string &audio_input_path, | ||
const std::string &bitstream_name, const std::string &output_path) const { | ||
// Perform LPC analysis and convert audio data to a bitstream | ||
auto frames = generateFrames(audio_input_path); | ||
auto bitstream = serializeFrames(frames, bitstream_name); | ||
|
||
// Write bitstream to disk | ||
std::ofstream lpcOut; | ||
lpcOut.open(output_path); | ||
lpcOut << bitstream; | ||
lpcOut.close(); | ||
} | ||
BitstreamGenerator::BitstreamGenerator(SharedParameters params) | ||
: shared_params_(params) {} | ||
|
||
void BitstreamGenerator::encodeBatch( | ||
const std::vector<std::string> &audio_input_paths, | ||
const std::vector<std::string> &bitstream_names, | ||
const std::string &output_path) const { | ||
std::string in_path, filename; | ||
/////////////////////////////////////////////////////////////////////////////// | ||
// Analysis /////////////////////////////////////////////////////////////////// | ||
/////////////////////////////////////////////////////////////////////////////// | ||
|
||
if (style_ == ENCODERSTYLE_ASCII) { | ||
// Create directory to populate with encoded files | ||
std::filesystem::create_directory(output_path); | ||
std::vector<int> BitstreamGenerator::analyzeLowerTract( | ||
LowerVocalTractParameters params) { | ||
auto buffer = | ||
*AudioBuffer::Create(input_path_, shared_params_.sample_rate_hz, | ||
shared_params_.window_width_ms); | ||
|
||
for (int i = 0; i < static_cast<int>(audio_input_paths.size()); i++) { | ||
in_path = audio_input_paths[i]; | ||
filename = bitstream_names[i]; | ||
|
||
std::filesystem::path out_path = output_path; | ||
out_path /= (filename + ".lpc"); | ||
|
||
encode(in_path, filename, out_path.string()); | ||
} | ||
} else { | ||
std::ofstream lpcOut; | ||
lpcOut.open(output_path); | ||
// Apply preprocessing | ||
// | ||
// Pitch estimation will likely only benefit from lowpass filtering, as | ||
// pitch is a low-frequency component of the signal | ||
auto preprocessor = AudioFilter(); | ||
preprocessor.applyPreEmphasis(buffer, params.pre_emphasis_alpha); | ||
preprocessor.applyHighpass(buffer, params.highpass_cutoff_hz); | ||
preprocessor.applyLowpass(buffer, params.lowpass_cutoff_hz); | ||
|
||
for (int i = 0; i < static_cast<int>(audio_input_paths.size()); i++) { | ||
in_path = audio_input_paths[i]; | ||
filename = bitstream_names[i]; | ||
// Extract buffer metadata | ||
const auto n_segments = buffer.getNSegments(); | ||
const auto sample_rate = buffer.getSampleRateHz(); | ||
|
||
auto frames = generateFrames(in_path); | ||
auto bitstream = serializeFrames(frames, filename); | ||
// Initialize analysis objects and data structures | ||
auto pitch_estimator = | ||
PitchEstimator(sample_rate, params.max_pitch_hz, params.max_pitch_hz); | ||
auto pitch_table = std::vector<int>(n_segments); | ||
|
||
lpcOut << bitstream << std::endl; | ||
} | ||
for (int i = 0; i < n_segments; i++) { | ||
auto segment = buffer.getSegment(i); | ||
auto acf = tms_express::Autocorrelation(segment); | ||
auto pitch_period = pitch_estimator.estimatePeriod(acf); | ||
|
||
lpcOut.close(); | ||
pitch_table.at(i) = (pitch_period); | ||
} | ||
} | ||
|
||
std::vector<Frame> BitstreamGenerator::generateFrames( | ||
const std::string &path) const { | ||
// Mix audio to 8kHz mono and store in a segmented buffer | ||
// TODO(Joseph Bellahcen): Handle nullptr | ||
auto lpc_buffer = *AudioBuffer::Create(path, 8000, window_width_ms_); | ||
return pitch_table; | ||
} | ||
|
||
// Copy the buffer so that upper and lower vocal tract analysis may occur | ||
// separately | ||
auto pitch_buffer = lpc_buffer.copy(); | ||
std::tuple<std::vector<std::vector<float>>, std::vector<float>> | ||
BitstreamGenerator::analyzeUpperTract(UpperVocalTractParameters params) { | ||
auto buffer = | ||
*AudioBuffer::Create(input_path_, shared_params_.sample_rate_hz, | ||
shared_params_.window_width_ms); | ||
|
||
// Apply preprocessing | ||
// | ||
// The pitch buffer will ONLY be lowpass-filtered, as pitch is a | ||
// low-frequency component of the signal. Neither highpass filtering nor | ||
// pre-emphasis, which exaggerate high-frequency components, will improve | ||
// pitch estimation | ||
auto preprocessor = AudioFilter(); | ||
preprocessor.applyPreEmphasis(lpc_buffer, pre_emphasis_alpha_); | ||
preprocessor.applyHighpass(lpc_buffer, highpass_cutoff_hz_); | ||
preprocessor.applyLowpass(pitch_buffer, lowpass_cutoff_hz_); | ||
preprocessor.applyPreEmphasis(buffer, params.pre_emphasis_alpha); | ||
preprocessor.applyHighpass(buffer, params.highpass_cutoff_hz); | ||
preprocessor.applyLowpass(buffer, params.lowpass_cutoff_hz); | ||
|
||
// Extract buffer metadata | ||
// | ||
// Only the LPC buffer is queried for metadata, since it will have the same | ||
// number of samples as the pitch buffer. The sample rate of the buffer is | ||
// extracted despite being known, as future iterations of TMS Express may | ||
// support encoding 10kHz/variable sample rate audio for the TMS5200C | ||
auto n_segments = lpc_buffer.getNSegments(); | ||
auto sample_rate = lpc_buffer.getSampleRateHz(); | ||
const auto n_segments = buffer.getNSegments(); | ||
|
||
// Initialize analysis objects and data structures | ||
auto linear_predictor = LinearPredictor(); | ||
auto pitch_estimator = PitchEstimator(sample_rate, min_pitch_hz_, | ||
max_pitch_hz_); | ||
auto frames = std::vector<Frame>(); | ||
auto coeff_table = std::vector<std::vector<float>>(n_segments); | ||
auto gain_table = std::vector<float>(n_segments); | ||
|
||
for (int i = 0; i < n_segments; i++) { | ||
// Get segment for frame | ||
auto pitch_segment = pitch_buffer.getSegment(i); | ||
auto lpc_segment = lpc_buffer.getSegment(i); | ||
auto segment = buffer.getSegment(i); | ||
|
||
// Apply a window function to the segment to smoothen its boundaries | ||
// | ||
// Because information about the transition between adjacent frames is | ||
// lost during segmentation, a window will help produce smoother results | ||
preprocessor.applyHammingWindow(lpc_segment); | ||
preprocessor.applyHammingWindow(segment); | ||
|
||
// Compute the autocorrelation of each segment, which serves as the | ||
// basis of all analysis | ||
auto lpc_acf = tms_express::Autocorrelation(lpc_segment); | ||
auto pitch_acf = tms_express::Autocorrelation(pitch_segment); | ||
auto acf = tms_express::Autocorrelation(segment); | ||
|
||
// Extract LPC reflector coefficients and compute the predictor gain | ||
auto coeffs = linear_predictor.computeCoeffs(lpc_acf); | ||
auto coeffs = linear_predictor.computeCoeffs(acf); | ||
auto gain = linear_predictor.gain(); | ||
|
||
// Estimate pitch | ||
auto pitch_period = pitch_estimator.estimatePeriod(pitch_acf); | ||
coeff_table.at(i) = coeffs; | ||
gain_table.at(i) = gain; | ||
} | ||
|
||
// Decide whether the segment is voiced or unvoiced | ||
auto segment_is_voiced = coeffs[0] < 0; | ||
return {coeff_table, gain_table}; | ||
} | ||
|
||
std::vector<bool> BitstreamGenerator::estimateVoicing( | ||
const std::vector<std::vector<float>>& coeff_table) { | ||
auto voicing_table = std::vector<bool>(coeff_table.size()); | ||
|
||
// Store parameters in a Frame object | ||
auto frame = Frame(pitch_period, segment_is_voiced, gain, coeffs); | ||
frames.push_back(frame); | ||
for (int i = 0; i < static_cast<int>(coeff_table.size()); i++) { | ||
voicing_table.at(i) = coeff_table.at(i).at(0) < 0; | ||
} | ||
|
||
// Apply post-processing | ||
auto post_processor = FramePostprocessor(&frames, main_voiced_gain_db_, | ||
max_unvoiced_gain_db_); | ||
return voicing_table; | ||
} | ||
|
||
/////////////////////////////////////////////////////////////////////////////// | ||
// Encoding /////////////////////////////////////////////////////////////////// | ||
/////////////////////////////////////////////////////////////////////////////// | ||
|
||
void BitstreamGenerator::applyPostProcessing(std::vector<Frame>* frame_table, | ||
PostProcessorParameters params) { | ||
auto post_processor = FramePostprocessor( | ||
frame_table, params.max_voiced_gain_db, params.max_unvoiced_gain_db); | ||
post_processor.normalizeGain(); | ||
post_processor.shiftGain(gain_shift_); | ||
post_processor.shiftGain(params.gain_shift); | ||
|
||
if (detect_repeat_frames_) { | ||
if (params.detect_repeat_frames) { | ||
post_processor.detectRepeatFrames(); | ||
} | ||
|
||
return frames; | ||
} | ||
|
||
std::string BitstreamGenerator::serializeFrames( | ||
const std::vector<Frame>& frames, const std::string &filename) const { | ||
// | ||
// Encode frames to hex bitstreams | ||
auto encoder = FrameEncoder(frames, style_ != ENCODERSTYLE_ASCII); | ||
const std::string& name, const std::vector<Frame>& frame_table, | ||
BitstreamParameters params) { | ||
auto encoder = | ||
FrameEncoder(frame_table, params.encoder_style != ENCODER_STYLE_ASCII); | ||
std::string bitstream; | ||
|
||
switch (style_) { | ||
case ENCODERSTYLE_ASCII: | ||
bitstream = encoder.toHex(include_stop_frame_); | ||
switch (params.encoder_style) { | ||
case ENCODER_STYLE_ASCII: | ||
bitstream = encoder.toHex(params.include_stop_frame); | ||
break; | ||
|
||
case ENCODERSTYLE_C: | ||
case ENCODER_STYLE_C: | ||
// C-style bitstreams are headers which contain a byte array | ||
// Format: const int bitstream_name [] = {<values>}; | ||
bitstream = encoder.toHex(include_stop_frame_); | ||
bitstream = "const int " + filename + "[] = {" + bitstream + "};\n"; | ||
bitstream = encoder.toHex(params.include_stop_frame); | ||
bitstream = "const int " + name + "[] = {" + bitstream + "};\n"; | ||
break; | ||
|
||
case ENCODERSTYLE_ARDUINO: | ||
case ENCODER_STYLE_C_ARDUINO: | ||
// Arduino-style bitstreams are C-style bitstreams which include the | ||
// Arduino header and PROGMEM keyword. This is for compatibility | ||
// with the Arduino Talkie library | ||
// Format: extern const uint8_t name [] PROGMEM = {<values>}; | ||
bitstream = encoder.toHex(include_stop_frame_); | ||
bitstream = "extern const uint8_t " + filename + "[] PROGMEM = {" + | ||
bitstream + "};\n"; | ||
bitstream = encoder.toHex(params.include_stop_frame); | ||
bitstream = "extern const uint8_t " + name + "[] PROGMEM = {" + | ||
bitstream + "};\n"; | ||
break; | ||
|
||
case ENCODERSTYLE_JSON: | ||
case ENCODER_STYLE_JSON: | ||
bitstream = encoder.toJSON(); | ||
break; | ||
} | ||
|
||
return bitstream; | ||
} | ||
|
||
/////////////////////////////////////////////////////////////////////////////// | ||
// Accessors ////////////////////////////////////////////////////////////////// | ||
/////////////////////////////////////////////////////////////////////////////// | ||
|
||
std::string BitstreamGenerator::getInputPath() const { | ||
return input_path_; | ||
} | ||
|
||
void BitstreamGenerator::setInputPath(const std::string& path) { | ||
input_path_ = path; | ||
} | ||
|
||
}; // namespace tms_express |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
// Copyright (C) 2023 Joseph Bellahcen <[email protected]> | ||
// Copyright (C) 2022-2024 Joseph Bellahcen <[email protected]> | ||
|
||
#ifndef TMS_EXPRESS_SRC_BITSTREAM_BITSTREAMGENERATOR_HPP_ | ||
#define TMS_EXPRESS_SRC_BITSTREAM_BITSTREAMGENERATOR_HPP_ | ||
|
@@ -19,7 +19,7 @@ class BitstreamGenerator { | |
// Initializers /////////////////////////////////////////////////////////// | ||
/////////////////////////////////////////////////////////////////////////// | ||
|
||
explicit BitstreamGenerator(std::string input, SharedParameters params); | ||
explicit BitstreamGenerator(SharedParameters params); | ||
|
||
/////////////////////////////////////////////////////////////////////////// | ||
// Analysis /////////////////////////////////////////////////////////////// | ||
|
@@ -43,28 +43,47 @@ class BitstreamGenerator { | |
analyzeUpperTract(UpperVocalTractParameters params); | ||
|
||
/// @brief Categorizes each segment as voiced or unvoiced | ||
/// @param coeffs LPC reflector coefficients | ||
/// @param coeff_table LPC reflector coefficients | ||
/// @return Voicing table, with one voicing estimate per sample. A voicing | ||
/// estimate of `true` corresponds to a voiced sample (vowel sound), | ||
/// while an estimate of `false` corresponds to an unvoiced sample | ||
/// (consonant sound) | ||
std::vector<bool> estimateVoicing( | ||
const std::vector<std::vector<float>>& coeffs); | ||
const std::vector<std::vector<float>>& coeff_table); | ||
|
||
/////////////////////////////////////////////////////////////////////////// | ||
// Encoding /////////////////////////////////////////////////////////////// | ||
/////////////////////////////////////////////////////////////////////////// | ||
|
||
/// @brief Post-processes frame table to apply analysis-independent edits | ||
/// @param frame_table Vector of Frame objects representing input audio | ||
void applyPostProcessing(const std::vector<Frame>& frame_table); | ||
void applyPostProcessing(std::vector<Frame>* frame_table, | ||
PostProcessorParameters params); | ||
|
||
/// @brief Converts frame table to bitstream | ||
/// @param name Name representing bitstream | ||
/// @param frame_table Vector of Frame objects representing input audio | ||
/// @param params Bitstream parameters | ||
/// @return Serialized frame table, as a bitstream string | ||
std::string serializeFrames(const std::vector<Frame>& frame_table, | ||
std::string serializeFrames(const std::string& name, | ||
const std::vector<Frame>& frame_table, | ||
BitstreamParameters params); | ||
|
||
/////////////////////////////////////////////////////////////////////////// | ||
// Accessors ////////////////////////////////////////////////////////////// | ||
/////////////////////////////////////////////////////////////////////////// | ||
|
||
std::string getInputPath() const; | ||
|
||
void setInputPath(const std::string& path); | ||
|
||
private: | ||
/////////////////////////////////////////////////////////////////////////// | ||
// Members //////////////////////////////////////////////////////////////// | ||
/////////////////////////////////////////////////////////////////////////// | ||
|
||
std::string input_path_; | ||
SharedParameters shared_params_; | ||
}; | ||
|
||
}; // namespace tms_express | ||
|
Oops, something went wrong.