Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 108 additions & 24 deletions decoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ type Decoder struct {
silkRedundancyFades []silkRedundancyFade
silkCeltAdditions []silkCeltAddition
floatBuffer []float32
singleFrame [1][]byte
sampleRate int
channels int
}
Expand Down Expand Up @@ -163,6 +164,13 @@ func (d *Decoder) resampleSilkChannel(
in, out []float32,
channelIndex, channelCount, samplesPerChannel, resampledSamplesPerChannel int,
) error {
if channelCount == 1 {
return d.silkResampler[channelIndex].Resample(
in[:samplesPerChannel],
out[:resampledSamplesPerChannel],
)
}

if cap(d.resampleChannelIn[channelIndex]) < samplesPerChannel {
d.resampleChannelIn[channelIndex] = make([]float32, samplesPerChannel)
}
Expand Down Expand Up @@ -548,9 +556,26 @@ func (d *Decoder) decode(
tocHeader := tableOfContentsHeader(in[0])
cfg := tocHeader.configuration()

encodedFrames, err := parsePacketFrames(in, tocHeader)
if err != nil {
return 0, 0, false, 0, 0, err
var encodedFrames [][]byte
if tocHeader.frameCode() == frameCodeOneFrame {
// [R2] Code 0 uses an implicit frame length for the whole payload, so it
// must not exceed the 1275-byte maximum.
if len(in[1:]) > maxOpusFrameSize {
return 0, 0, false, 0, 0, fmt.Errorf(
"%w: frame size %d exceeds %d",
errMalformedPacket,
len(in[1:]),
maxOpusFrameSize,
)
}
d.singleFrame[0] = in[1:]
encodedFrames = d.singleFrame[:]
} else {
var err error
encodedFrames, err = parsePacketFrames(in, tocHeader)
if err != nil {
return 0, 0, false, 0, 0, err
}
}

switch cfg.mode() {
Expand Down Expand Up @@ -1147,37 +1172,95 @@ func (d *Decoder) decodeToFloat32(
return 0, 0, false, err
}

samplesPerChannel, err = d.finishDecodeToFloat32(
out,
bandwidth,
decodedSampleRate,
sampleCount,
decodedChannelCount,
)
if err != nil {
return 0, 0, false, err
}

return samplesPerChannel, bandwidth, isStereo, nil
}

func (d *Decoder) finishDecodeToFloat32(
out []float32,
bandwidth Bandwidth,
decodedSampleRate int,
sampleCount int,
decodedChannelCount int,
) (samplesPerChannel int, err error) {
samplesPerChannel = (sampleCount / decodedChannelCount) * d.sampleRate / decodedSampleRate
if len(out) < samplesPerChannel*d.channels {
return 0, errOutBufferTooSmall
}

requiredSamples := samplesPerChannel * decodedChannelCount
resampleOut, useResampleBuffer := d.prepareResampleOutput(out, requiredSamples, decodedChannelCount)
if err = d.writeDecodedOutput(
resampleOut,
bandwidth,
decodedSampleRate,
sampleCount,
decodedChannelCount,
); err != nil {
return 0, err
}
if useResampleBuffer {
d.applySilkRedundancyFades(decodedChannelCount)
d.copyResampledSamples(out, decodedChannelCount)
}

return samplesPerChannel, nil
}

func (d *Decoder) prepareResampleOutput(
out []float32,
requiredSamples int,
decodedChannelCount int,
) (resampleOut []float32, useResampleBuffer bool) {
if !d.needsResampleBuffer(decodedChannelCount) {
return out[:requiredSamples], false
}

if cap(d.resampleBuffer) < requiredSamples {
d.resampleBuffer = make([]float32, requiredSamples)
}
d.resampleBuffer = d.resampleBuffer[:requiredSamples]

return d.resampleBuffer, true
}

func (d *Decoder) needsResampleBuffer(decodedChannelCount int) bool {
return decodedChannelCount != d.channels ||
len(d.silkRedundancyFades) > 0 ||
len(d.silkCeltAdditions) > 0
}

func (d *Decoder) writeDecodedOutput(
out []float32,
bandwidth Bandwidth,
decodedSampleRate int,
sampleCount int,
decodedChannelCount int,
) error {
decodedMode := d.previousMode
switch {
case decodedMode == configurationModeSilkOnly &&
decodedSampleRate == bandwidth.SampleRate() &&
bandwidth != BandwidthFullband:
// The RFC SILK decoder resampler has delay even for same-rate copy paths.
if err = d.resampleSilk(d.silkBuffer[:sampleCount], d.resampleBuffer, decodedChannelCount, bandwidth); err != nil {
return 0, 0, false, err
}
return d.resampleSilk(d.silkBuffer[:sampleCount], out, decodedChannelCount, bandwidth)
case d.sampleRate == decodedSampleRate:
copy(d.resampleBuffer, d.silkBuffer[:sampleCount])
default:
if err = d.resampleSilk(d.silkBuffer[:sampleCount], d.resampleBuffer, decodedChannelCount, bandwidth); err != nil {
return 0, 0, false, err
}
}
d.applySilkRedundancyFades(decodedChannelCount)
copy(out, d.silkBuffer[:sampleCount])

if len(out) < samplesPerChannel*d.channels {
return 0, 0, false, errOutBufferTooSmall
return nil
default:
return d.resampleSilk(d.silkBuffer[:sampleCount], out, decodedChannelCount, bandwidth)
}

d.copyResampledSamples(out, decodedChannelCount)

return samplesPerChannel, bandwidth, isStereo, nil
}

// applySilkRedundancyFades applies the leading/trailing 2.5 ms cross-laps from
Expand Down Expand Up @@ -1243,14 +1326,15 @@ func (d *Decoder) applySilkRedundancyFades(channelCount int) {
}

func (d *Decoder) copyResampledSamples(out []float32, channelCount int) {
if channelCount == d.channels {
copy(out, d.resampleBuffer)

return
}

outIndex := 0
for i := 0; i < len(d.resampleBuffer); i += channelCount {
switch {
case channelCount == d.channels:
for c := 0; c < d.channels; c++ {
out[outIndex] = d.resampleBuffer[i+c]
outIndex++
}
case channelCount == 1 && d.channels == 2:
out[outIndex] = d.resampleBuffer[i]
out[outIndex+1] = d.resampleBuffer[i]
Expand Down
22 changes: 20 additions & 2 deletions internal/bitdepth/bitdepth.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,11 @@ var (
// Float32ToSigned16 quantizes a float32 PCM sample to signed 16-bit PCM.
func Float32ToSigned16(sample float32) int16 {
sample64 := math.Round(float64(sample * 32768))
sample64 = math.Max(sample64, -32768)
sample64 = math.Min(sample64, 32767)
if sample64 < -32768 {
sample64 = -32768
} else if sample64 > 32767 {
sample64 = 32767
}

return int16(sample64)
}
Expand All @@ -45,6 +48,21 @@ func ConvertFloat32LittleEndianToSigned16LittleEndian(
return errOutBufferTooSmall
}

if resampleCount == 1 {
currIndex := 0
for _, sample := range in {
res := Float32ToSigned16(sample)

out[currIndex] = byte(res & 0b11111111)
currIndex++

out[currIndex] = byte(uint16(res) >> 8) // #nosec G115,G602 -- output length was checked above
currIndex++
}

return nil
}

currIndex := 0
for i := 0; i < len(in); i += channelCount {
for j := resampleCount; j > 0; j-- {
Expand Down
138 changes: 90 additions & 48 deletions internal/silk/decoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -1998,10 +1998,6 @@ func (d *Decoder) lpcSynthesis(
n, s, dLPC int, //nolint:varnamelen
aQ12, res, gainQ16, lpc []float32,
) {
// j be the index of the first sample in the residual corresponding to
// the current subframe.
j := 0

// Then, for i such that j <= i < (j + n), the result of LPC synthesis
// for the current subframe is
//
Expand All @@ -2011,58 +2007,104 @@ func (d *Decoder) lpcSynthesis(
// 65536.0 /_ 4096.0
// k=0
//
var currentLPCVal float32
for i := j; i < (j + n); i++ {
sampleIndex := i + (n * s)

lpcVal := gainQ16[s] / 65536.0
lpcVal *= res[sampleIndex]

for k, aQ12 := range aQ12[:dLPC] {
lpcIndex := sampleIndex - k - 1
switch {
case lpcIndex >= 0:
currentLPCVal = lpc[lpcIndex]
case s == 0:
previousIndex := len(d.previousFrameLPCValues) - 1 + (i - k)
if previousIndex >= 0 {
currentLPCVal = d.previousFrameLPCValues[previousIndex]
} else {
currentLPCVal = 0
}
default:
currentLPCVal = 0
}
normalizedAQ12, reversedAQ12 := normalizedLPCWeights(aQ12, dLPC)
gain := gainQ16[s] / 65536.0
subframeOffset := n * s
subframeOut := out[:n]
if s > 0 {
lpcSynthesisSteadyState(
subframeOut,
dLPC,
reversedAQ12,
res[subframeOffset:subframeOffset+n],
lpc[subframeOffset:subframeOffset+n],
lpc[subframeOffset-dLPC:subframeOffset+n],
gain,
)
} else {
d.lpcSynthesisFirstSubframe(subframeOut, dLPC, normalizedAQ12, res[:n], lpc[:n], gain)
}

lpcVal += currentLPCVal * (aQ12 / 4096.0)
d.savePreviousFrameLPCValues(lpc, out, n, dLPC)
}

func normalizedLPCWeights(aQ12 []float32, dLPC int) (normalizedAQ12, reversedAQ12 [16]float32) {
for coefficientIndex := range dLPC {
normalizedAQ12[coefficientIndex] = aQ12[coefficientIndex] / 4096.0
}
for coefficientIndex := range dLPC {
reversedAQ12[coefficientIndex] = normalizedAQ12[dLPC-coefficientIndex-1]
}

return normalizedAQ12, reversedAQ12
}

func lpcSynthesisSteadyState(
out []float32,
dLPC int,
reversedAQ12 [16]float32,
subframeRes, subframeLPC, historyAndOutput []float32,
gain float32,
) {
for sampleIndex := range out {
lpcVal := gain * subframeRes[sampleIndex]
history := historyAndOutput[sampleIndex : sampleIndex+dLPC]
for coefficientIndex := range dLPC {
lpcVal += history[coefficientIndex] * reversedAQ12[coefficientIndex]
}

lpc[sampleIndex] = lpcVal
subframeLPC[sampleIndex] = lpcVal
out[sampleIndex] = clampNegativeOneToOne(lpcVal)
}
}

// Then, the signal is clamped into the final nominal range:
//
// out[i] = clamp(-1.0, lpc[i], 1.0)
//
out[i] = clampNegativeOneToOne(lpc[sampleIndex])

// The decoder saves the final d_LPC values, i.e., lpc[i] such that
// (j + n - d_LPC) <= i < (j + n), to feed into the LPC synthesis of the
// next subframe. This requires storage for up to 16 values of lpc[i]
// (for WB frames).
// The final d_LPC synthesized samples become the history for the next
// subframe. RFC 6716 section 4.2.7.9 describes that continuity
// requirement, and decode_frame.c preserves this state even for the
// first decoded frame. The old haveDecoded guard skipped that initial
// handoff and left the next frame with an all-zero LPC history.
if len(out)-1 == i {
if cap(d.previousFrameLPCValues) < dLPC {
d.previousFrameLPCValues = make([]float32, dLPC)
func (d *Decoder) lpcSynthesisFirstSubframe(
out []float32,
dLPC int,
normalizedAQ12 [16]float32,
subframeRes, subframeLPC []float32,
gain float32,
) {
var currentLPCVal float32
for sampleIndex := range out {
lpcVal := gain * subframeRes[sampleIndex]

for coefficientIndex := range dLPC {
if lpcIndex := sampleIndex - coefficientIndex - 1; lpcIndex >= 0 {
currentLPCVal = subframeLPC[lpcIndex]
} else if previousIndex := len(d.previousFrameLPCValues) - 1 + (sampleIndex - coefficientIndex); previousIndex >= 0 {
currentLPCVal = d.previousFrameLPCValues[previousIndex]
} else {
d.previousFrameLPCValues = d.previousFrameLPCValues[:dLPC]
currentLPCVal = 0
}
copy(d.previousFrameLPCValues, lpc[len(lpc)-dLPC:])

lpcVal += currentLPCVal * normalizedAQ12[coefficientIndex]
}

subframeLPC[sampleIndex] = lpcVal
out[sampleIndex] = clampNegativeOneToOne(lpcVal)
}
}

func (d *Decoder) savePreviousFrameLPCValues(lpc, out []float32, n, dLPC int) { //nolint:varnamelen
// The decoder saves the final d_LPC values, i.e., lpc[i] such that
// (j + n - d_LPC) <= i < (j + n), to feed into the LPC synthesis of the
// next subframe. This requires storage for up to 16 values of lpc[i]
// (for WB frames).
// The final d_LPC synthesized samples become the history for the next
// subframe. RFC 6716 section 4.2.7.9 describes that continuity
// requirement, and decode_frame.c preserves this state even for the
// first decoded frame. The old haveDecoded guard skipped that initial
// handoff and left the next frame with an all-zero LPC history.
if len(out) != n {
return
}
if cap(d.previousFrameLPCValues) < dLPC {
d.previousFrameLPCValues = make([]float32, dLPC)
} else {
d.previousFrameLPCValues = d.previousFrameLPCValues[:dLPC]
}
copy(d.previousFrameLPCValues, lpc[len(lpc)-dLPC:])
}

// The remainder of the reconstruction process for the frame does not
Expand Down
Loading