diff --git a/decoder.go b/decoder.go index 2dd4f2f..5937cae 100644 --- a/decoder.go +++ b/decoder.go @@ -45,6 +45,7 @@ type Decoder struct { silkRedundancyFades []silkRedundancyFade silkCeltAdditions []silkCeltAddition floatBuffer []float32 + singleFrame [1][]byte sampleRate int channels int } @@ -163,6 +164,13 @@ func (d *Decoder) resampleSilkChannel( in, out []float32, channelIndex, channelCount, samplesPerChannel, resampledSamplesPerChannel int, ) error { + if channelCount == 1 { + return d.silkResampler[channelIndex].Resample( + in[:samplesPerChannel], + out[:resampledSamplesPerChannel], + ) + } + if cap(d.resampleChannelIn[channelIndex]) < samplesPerChannel { d.resampleChannelIn[channelIndex] = make([]float32, samplesPerChannel) } @@ -548,9 +556,26 @@ func (d *Decoder) decode( tocHeader := tableOfContentsHeader(in[0]) cfg := tocHeader.configuration() - encodedFrames, err := parsePacketFrames(in, tocHeader) - if err != nil { - return 0, 0, false, 0, 0, err + var encodedFrames [][]byte + if tocHeader.frameCode() == frameCodeOneFrame { + // [R2] Code 0 uses an implicit frame length for the whole payload, so it + // must not exceed the 1275-byte maximum. + if len(in[1:]) > maxOpusFrameSize { + return 0, 0, false, 0, 0, fmt.Errorf( + "%w: frame size %d exceeds %d", + errMalformedPacket, + len(in[1:]), + maxOpusFrameSize, + ) + } + d.singleFrame[0] = in[1:] + encodedFrames = d.singleFrame[:] + } else { + var err error + encodedFrames, err = parsePacketFrames(in, tocHeader) + if err != nil { + return 0, 0, false, 0, 0, err + } } switch cfg.mode() { @@ -1147,37 +1172,95 @@ func (d *Decoder) decodeToFloat32( return 0, 0, false, err } + samplesPerChannel, err = d.finishDecodeToFloat32( + out, + bandwidth, + decodedSampleRate, + sampleCount, + decodedChannelCount, + ) + if err != nil { + return 0, 0, false, err + } + + return samplesPerChannel, bandwidth, isStereo, nil +} + +func (d *Decoder) finishDecodeToFloat32( + out []float32, + bandwidth Bandwidth, + decodedSampleRate int, + sampleCount int, + decodedChannelCount int, +) (samplesPerChannel int, err error) { samplesPerChannel = (sampleCount / decodedChannelCount) * d.sampleRate / decodedSampleRate + if len(out) < samplesPerChannel*d.channels { + return 0, errOutBufferTooSmall + } + requiredSamples := samplesPerChannel * decodedChannelCount + resampleOut, useResampleBuffer := d.prepareResampleOutput(out, requiredSamples, decodedChannelCount) + if err = d.writeDecodedOutput( + resampleOut, + bandwidth, + decodedSampleRate, + sampleCount, + decodedChannelCount, + ); err != nil { + return 0, err + } + if useResampleBuffer { + d.applySilkRedundancyFades(decodedChannelCount) + d.copyResampledSamples(out, decodedChannelCount) + } + + return samplesPerChannel, nil +} + +func (d *Decoder) prepareResampleOutput( + out []float32, + requiredSamples int, + decodedChannelCount int, +) (resampleOut []float32, useResampleBuffer bool) { + if !d.needsResampleBuffer(decodedChannelCount) { + return out[:requiredSamples], false + } + if cap(d.resampleBuffer) < requiredSamples { d.resampleBuffer = make([]float32, requiredSamples) } d.resampleBuffer = d.resampleBuffer[:requiredSamples] + + return d.resampleBuffer, true +} + +func (d *Decoder) needsResampleBuffer(decodedChannelCount int) bool { + return decodedChannelCount != d.channels || + len(d.silkRedundancyFades) > 0 || + len(d.silkCeltAdditions) > 0 +} + +func (d *Decoder) writeDecodedOutput( + out []float32, + bandwidth Bandwidth, + decodedSampleRate int, + sampleCount int, + decodedChannelCount int, +) error { decodedMode := d.previousMode switch { case decodedMode == configurationModeSilkOnly && decodedSampleRate == bandwidth.SampleRate() && bandwidth != BandwidthFullband: // The RFC SILK decoder resampler has delay even for same-rate copy paths. - if err = d.resampleSilk(d.silkBuffer[:sampleCount], d.resampleBuffer, decodedChannelCount, bandwidth); err != nil { - return 0, 0, false, err - } + return d.resampleSilk(d.silkBuffer[:sampleCount], out, decodedChannelCount, bandwidth) case d.sampleRate == decodedSampleRate: - copy(d.resampleBuffer, d.silkBuffer[:sampleCount]) - default: - if err = d.resampleSilk(d.silkBuffer[:sampleCount], d.resampleBuffer, decodedChannelCount, bandwidth); err != nil { - return 0, 0, false, err - } - } - d.applySilkRedundancyFades(decodedChannelCount) + copy(out, d.silkBuffer[:sampleCount]) - if len(out) < samplesPerChannel*d.channels { - return 0, 0, false, errOutBufferTooSmall + return nil + default: + return d.resampleSilk(d.silkBuffer[:sampleCount], out, decodedChannelCount, bandwidth) } - - d.copyResampledSamples(out, decodedChannelCount) - - return samplesPerChannel, bandwidth, isStereo, nil } // applySilkRedundancyFades applies the leading/trailing 2.5 ms cross-laps from @@ -1243,14 +1326,15 @@ func (d *Decoder) applySilkRedundancyFades(channelCount int) { } func (d *Decoder) copyResampledSamples(out []float32, channelCount int) { + if channelCount == d.channels { + copy(out, d.resampleBuffer) + + return + } + outIndex := 0 for i := 0; i < len(d.resampleBuffer); i += channelCount { switch { - case channelCount == d.channels: - for c := 0; c < d.channels; c++ { - out[outIndex] = d.resampleBuffer[i+c] - outIndex++ - } case channelCount == 1 && d.channels == 2: out[outIndex] = d.resampleBuffer[i] out[outIndex+1] = d.resampleBuffer[i] diff --git a/internal/bitdepth/bitdepth.go b/internal/bitdepth/bitdepth.go index f382b9d..1eec374 100644 --- a/internal/bitdepth/bitdepth.go +++ b/internal/bitdepth/bitdepth.go @@ -19,8 +19,11 @@ var ( // Float32ToSigned16 quantizes a float32 PCM sample to signed 16-bit PCM. func Float32ToSigned16(sample float32) int16 { sample64 := math.Round(float64(sample * 32768)) - sample64 = math.Max(sample64, -32768) - sample64 = math.Min(sample64, 32767) + if sample64 < -32768 { + sample64 = -32768 + } else if sample64 > 32767 { + sample64 = 32767 + } return int16(sample64) } @@ -45,6 +48,21 @@ func ConvertFloat32LittleEndianToSigned16LittleEndian( return errOutBufferTooSmall } + if resampleCount == 1 { + currIndex := 0 + for _, sample := range in { + res := Float32ToSigned16(sample) + + out[currIndex] = byte(res & 0b11111111) + currIndex++ + + out[currIndex] = byte(uint16(res) >> 8) // #nosec G115,G602 -- output length was checked above + currIndex++ + } + + return nil + } + currIndex := 0 for i := 0; i < len(in); i += channelCount { for j := resampleCount; j > 0; j-- { diff --git a/internal/silk/decoder.go b/internal/silk/decoder.go index 7604cf6..276c2b5 100644 --- a/internal/silk/decoder.go +++ b/internal/silk/decoder.go @@ -1998,10 +1998,6 @@ func (d *Decoder) lpcSynthesis( n, s, dLPC int, //nolint:varnamelen aQ12, res, gainQ16, lpc []float32, ) { - // j be the index of the first sample in the residual corresponding to - // the current subframe. - j := 0 - // Then, for i such that j <= i < (j + n), the result of LPC synthesis // for the current subframe is // @@ -2011,58 +2007,104 @@ func (d *Decoder) lpcSynthesis( // 65536.0 /_ 4096.0 // k=0 // - var currentLPCVal float32 - for i := j; i < (j + n); i++ { - sampleIndex := i + (n * s) - - lpcVal := gainQ16[s] / 65536.0 - lpcVal *= res[sampleIndex] - - for k, aQ12 := range aQ12[:dLPC] { - lpcIndex := sampleIndex - k - 1 - switch { - case lpcIndex >= 0: - currentLPCVal = lpc[lpcIndex] - case s == 0: - previousIndex := len(d.previousFrameLPCValues) - 1 + (i - k) - if previousIndex >= 0 { - currentLPCVal = d.previousFrameLPCValues[previousIndex] - } else { - currentLPCVal = 0 - } - default: - currentLPCVal = 0 - } + normalizedAQ12, reversedAQ12 := normalizedLPCWeights(aQ12, dLPC) + gain := gainQ16[s] / 65536.0 + subframeOffset := n * s + subframeOut := out[:n] + if s > 0 { + lpcSynthesisSteadyState( + subframeOut, + dLPC, + reversedAQ12, + res[subframeOffset:subframeOffset+n], + lpc[subframeOffset:subframeOffset+n], + lpc[subframeOffset-dLPC:subframeOffset+n], + gain, + ) + } else { + d.lpcSynthesisFirstSubframe(subframeOut, dLPC, normalizedAQ12, res[:n], lpc[:n], gain) + } - lpcVal += currentLPCVal * (aQ12 / 4096.0) + d.savePreviousFrameLPCValues(lpc, out, n, dLPC) +} + +func normalizedLPCWeights(aQ12 []float32, dLPC int) (normalizedAQ12, reversedAQ12 [16]float32) { + for coefficientIndex := range dLPC { + normalizedAQ12[coefficientIndex] = aQ12[coefficientIndex] / 4096.0 + } + for coefficientIndex := range dLPC { + reversedAQ12[coefficientIndex] = normalizedAQ12[dLPC-coefficientIndex-1] + } + + return normalizedAQ12, reversedAQ12 +} + +func lpcSynthesisSteadyState( + out []float32, + dLPC int, + reversedAQ12 [16]float32, + subframeRes, subframeLPC, historyAndOutput []float32, + gain float32, +) { + for sampleIndex := range out { + lpcVal := gain * subframeRes[sampleIndex] + history := historyAndOutput[sampleIndex : sampleIndex+dLPC] + for coefficientIndex := range dLPC { + lpcVal += history[coefficientIndex] * reversedAQ12[coefficientIndex] } - lpc[sampleIndex] = lpcVal + subframeLPC[sampleIndex] = lpcVal + out[sampleIndex] = clampNegativeOneToOne(lpcVal) + } +} - // Then, the signal is clamped into the final nominal range: - // - // out[i] = clamp(-1.0, lpc[i], 1.0) - // - out[i] = clampNegativeOneToOne(lpc[sampleIndex]) - - // The decoder saves the final d_LPC values, i.e., lpc[i] such that - // (j + n - d_LPC) <= i < (j + n), to feed into the LPC synthesis of the - // next subframe. This requires storage for up to 16 values of lpc[i] - // (for WB frames). - // The final d_LPC synthesized samples become the history for the next - // subframe. RFC 6716 section 4.2.7.9 describes that continuity - // requirement, and decode_frame.c preserves this state even for the - // first decoded frame. The old haveDecoded guard skipped that initial - // handoff and left the next frame with an all-zero LPC history. - if len(out)-1 == i { - if cap(d.previousFrameLPCValues) < dLPC { - d.previousFrameLPCValues = make([]float32, dLPC) +func (d *Decoder) lpcSynthesisFirstSubframe( + out []float32, + dLPC int, + normalizedAQ12 [16]float32, + subframeRes, subframeLPC []float32, + gain float32, +) { + var currentLPCVal float32 + for sampleIndex := range out { + lpcVal := gain * subframeRes[sampleIndex] + + for coefficientIndex := range dLPC { + if lpcIndex := sampleIndex - coefficientIndex - 1; lpcIndex >= 0 { + currentLPCVal = subframeLPC[lpcIndex] + } else if previousIndex := len(d.previousFrameLPCValues) - 1 + (sampleIndex - coefficientIndex); previousIndex >= 0 { + currentLPCVal = d.previousFrameLPCValues[previousIndex] } else { - d.previousFrameLPCValues = d.previousFrameLPCValues[:dLPC] + currentLPCVal = 0 } - copy(d.previousFrameLPCValues, lpc[len(lpc)-dLPC:]) + + lpcVal += currentLPCVal * normalizedAQ12[coefficientIndex] } + + subframeLPC[sampleIndex] = lpcVal + out[sampleIndex] = clampNegativeOneToOne(lpcVal) + } +} + +func (d *Decoder) savePreviousFrameLPCValues(lpc, out []float32, n, dLPC int) { //nolint:varnamelen + // The decoder saves the final d_LPC values, i.e., lpc[i] such that + // (j + n - d_LPC) <= i < (j + n), to feed into the LPC synthesis of the + // next subframe. This requires storage for up to 16 values of lpc[i] + // (for WB frames). + // The final d_LPC synthesized samples become the history for the next + // subframe. RFC 6716 section 4.2.7.9 describes that continuity + // requirement, and decode_frame.c preserves this state even for the + // first decoded frame. The old haveDecoded guard skipped that initial + // handoff and left the next frame with an all-zero LPC history. + if len(out) != n { + return + } + if cap(d.previousFrameLPCValues) < dLPC { + d.previousFrameLPCValues = make([]float32, dLPC) + } else { + d.previousFrameLPCValues = d.previousFrameLPCValues[:dLPC] } + copy(d.previousFrameLPCValues, lpc[len(lpc)-dLPC:]) } // The remainder of the reconstruction process for the frame does not