diff --git a/decoder.go b/decoder.go
index 2dd4f2f..5937cae 100644
--- a/decoder.go
+++ b/decoder.go
@@ -45,6 +45,7 @@ type Decoder struct {
 	silkRedundancyFades    []silkRedundancyFade
 	silkCeltAdditions      []silkCeltAddition
 	floatBuffer            []float32
+	singleFrame            [1][]byte
 	sampleRate             int
 	channels               int
 }
@@ -163,6 +164,13 @@ func (d *Decoder) resampleSilkChannel(
 	in, out []float32,
 	channelIndex, channelCount, samplesPerChannel, resampledSamplesPerChannel int,
 ) error {
+	if channelCount == 1 {
+		return d.silkResampler[channelIndex].Resample(
+			in[:samplesPerChannel],
+			out[:resampledSamplesPerChannel],
+		)
+	}
+
 	if cap(d.resampleChannelIn[channelIndex]) < samplesPerChannel {
 		d.resampleChannelIn[channelIndex] = make([]float32, samplesPerChannel)
 	}
@@ -548,9 +556,26 @@ func (d *Decoder) decode(
 	tocHeader := tableOfContentsHeader(in[0])
 	cfg := tocHeader.configuration()
 
-	encodedFrames, err := parsePacketFrames(in, tocHeader)
-	if err != nil {
-		return 0, 0, false, 0, 0, err
+	var encodedFrames [][]byte
+	if tocHeader.frameCode() == frameCodeOneFrame {
+		// [R2] Code 0 uses an implicit frame length for the whole payload, so it
+		// must not exceed the 1275-byte maximum.
+		if len(in[1:]) > maxOpusFrameSize {
+			return 0, 0, false, 0, 0, fmt.Errorf(
+				"%w: frame size %d exceeds %d",
+				errMalformedPacket,
+				len(in[1:]),
+				maxOpusFrameSize,
+			)
+		}
+		d.singleFrame[0] = in[1:]
+		encodedFrames = d.singleFrame[:]
+	} else {
+		var err error
+		encodedFrames, err = parsePacketFrames(in, tocHeader)
+		if err != nil {
+			return 0, 0, false, 0, 0, err
+		}
 	}
 
 	switch cfg.mode() {
@@ -1147,37 +1172,95 @@ func (d *Decoder) decodeToFloat32(
 		return 0, 0, false, err
 	}
 
+	samplesPerChannel, err = d.finishDecodeToFloat32(
+		out,
+		bandwidth,
+		decodedSampleRate,
+		sampleCount,
+		decodedChannelCount,
+	)
+	if err != nil {
+		return 0, 0, false, err
+	}
+
+	return samplesPerChannel, bandwidth, isStereo, nil
+}
+
+func (d *Decoder) finishDecodeToFloat32(
+	out []float32,
+	bandwidth Bandwidth,
+	decodedSampleRate int,
+	sampleCount int,
+	decodedChannelCount int,
+) (samplesPerChannel int, err error) {
 	samplesPerChannel = (sampleCount / decodedChannelCount) * d.sampleRate / decodedSampleRate
+	if len(out) < samplesPerChannel*d.channels {
+		return 0, errOutBufferTooSmall
+	}
+
 	requiredSamples := samplesPerChannel * decodedChannelCount
+	resampleOut, useResampleBuffer := d.prepareResampleOutput(out, requiredSamples, decodedChannelCount)
+	if err = d.writeDecodedOutput(
+		resampleOut,
+		bandwidth,
+		decodedSampleRate,
+		sampleCount,
+		decodedChannelCount,
+	); err != nil {
+		return 0, err
+	}
+	if useResampleBuffer {
+		d.applySilkRedundancyFades(decodedChannelCount)
+		d.copyResampledSamples(out, decodedChannelCount)
+	}
+
+	return samplesPerChannel, nil
+}
+
+func (d *Decoder) prepareResampleOutput(
+	out []float32,
+	requiredSamples int,
+	decodedChannelCount int,
+) (resampleOut []float32, useResampleBuffer bool) {
+	if !d.needsResampleBuffer(decodedChannelCount) {
+		return out[:requiredSamples], false
+	}
+
 	if cap(d.resampleBuffer) < requiredSamples {
 		d.resampleBuffer = make([]float32, requiredSamples)
 	}
 	d.resampleBuffer = d.resampleBuffer[:requiredSamples]
+
+	return d.resampleBuffer, true
+}
+
+func (d *Decoder) needsResampleBuffer(decodedChannelCount int) bool {
+	return decodedChannelCount != d.channels ||
+		len(d.silkRedundancyFades) > 0 ||
+		len(d.silkCeltAdditions) > 0
+}
+
+func (d *Decoder) writeDecodedOutput(
+	out []float32,
+	bandwidth Bandwidth,
+	decodedSampleRate int,
+	sampleCount int,
+	decodedChannelCount int,
+) error {
 	decodedMode := d.previousMode
 	switch {
 	case decodedMode == configurationModeSilkOnly &&
 		decodedSampleRate == bandwidth.SampleRate() &&
 		bandwidth != BandwidthFullband:
 		// The RFC SILK decoder resampler has delay even for same-rate copy paths.
-		if err = d.resampleSilk(d.silkBuffer[:sampleCount], d.resampleBuffer, decodedChannelCount, bandwidth); err != nil {
-			return 0, 0, false, err
-		}
+		return d.resampleSilk(d.silkBuffer[:sampleCount], out, decodedChannelCount, bandwidth)
 	case d.sampleRate == decodedSampleRate:
-		copy(d.resampleBuffer, d.silkBuffer[:sampleCount])
-	default:
-		if err = d.resampleSilk(d.silkBuffer[:sampleCount], d.resampleBuffer, decodedChannelCount, bandwidth); err != nil {
-			return 0, 0, false, err
-		}
-	}
-	d.applySilkRedundancyFades(decodedChannelCount)
+		copy(out, d.silkBuffer[:sampleCount])
 
-	if len(out) < samplesPerChannel*d.channels {
-		return 0, 0, false, errOutBufferTooSmall
+		return nil
+	default:
+		return d.resampleSilk(d.silkBuffer[:sampleCount], out, decodedChannelCount, bandwidth)
 	}
-
-	d.copyResampledSamples(out, decodedChannelCount)
-
-	return samplesPerChannel, bandwidth, isStereo, nil
 }
 
 // applySilkRedundancyFades applies the leading/trailing 2.5 ms cross-laps from
@@ -1243,14 +1326,15 @@ func (d *Decoder) applySilkRedundancyFades(channelCount int) {
 }
 
 func (d *Decoder) copyResampledSamples(out []float32, channelCount int) {
+	if channelCount == d.channels {
+		copy(out, d.resampleBuffer)
+
+		return
+	}
+
 	outIndex := 0
 	for i := 0; i < len(d.resampleBuffer); i += channelCount {
 		switch {
-		case channelCount == d.channels:
-			for c := 0; c < d.channels; c++ {
-				out[outIndex] = d.resampleBuffer[i+c]
-				outIndex++
-			}
 		case channelCount == 1 && d.channels == 2:
 			out[outIndex] = d.resampleBuffer[i]
 			out[outIndex+1] = d.resampleBuffer[i]
diff --git a/internal/bitdepth/bitdepth.go b/internal/bitdepth/bitdepth.go
index f382b9d..1eec374 100644
--- a/internal/bitdepth/bitdepth.go
+++ b/internal/bitdepth/bitdepth.go
@@ -19,8 +19,11 @@ var (
 // Float32ToSigned16 quantizes a float32 PCM sample to signed 16-bit PCM.
 func Float32ToSigned16(sample float32) int16 {
 	sample64 := math.Round(float64(sample * 32768))
-	sample64 = math.Max(sample64, -32768)
-	sample64 = math.Min(sample64, 32767)
+	if sample64 < -32768 {
+		sample64 = -32768
+	} else if sample64 > 32767 {
+		sample64 = 32767
+	}
 
 	return int16(sample64)
 }
@@ -45,6 +48,21 @@ func ConvertFloat32LittleEndianToSigned16LittleEndian(
 		return errOutBufferTooSmall
 	}
 
+	if resampleCount == 1 {
+		currIndex := 0
+		for _, sample := range in {
+			res := Float32ToSigned16(sample)
+
+			out[currIndex] = byte(res & 0b11111111)
+			currIndex++
+
+			out[currIndex] = byte(uint16(res) >> 8) // #nosec G115,G602 -- output length was checked above
+			currIndex++
+		}
+
+		return nil
+	}
+
 	currIndex := 0
 	for i := 0; i < len(in); i += channelCount {
 		for j := resampleCount; j > 0; j-- {
diff --git a/internal/silk/decoder.go b/internal/silk/decoder.go
index 7604cf6..276c2b5 100644
--- a/internal/silk/decoder.go
+++ b/internal/silk/decoder.go
@@ -1998,10 +1998,6 @@ func (d *Decoder) lpcSynthesis(
 	n, s, dLPC int, //nolint:varnamelen
 	aQ12, res, gainQ16, lpc []float32,
 ) {
-	// j be the index of the first sample in the residual corresponding to
-	// the current subframe.
-	j := 0
-
 	// Then, for i such that j <= i < (j + n), the result of LPC synthesis
 	// for the current subframe is
 	//
@@ -2011,58 +2007,104 @@ func (d *Decoder) lpcSynthesis(
 	//                  65536.0              /_               4096.0
 	//                                       k=0
 	//
-	var currentLPCVal float32
-	for i := j; i < (j + n); i++ {
-		sampleIndex := i + (n * s)
-
-		lpcVal := gainQ16[s] / 65536.0
-		lpcVal *= res[sampleIndex]
-
-		for k, aQ12 := range aQ12[:dLPC] {
-			lpcIndex := sampleIndex - k - 1
-			switch {
-			case lpcIndex >= 0:
-				currentLPCVal = lpc[lpcIndex]
-			case s == 0:
-				previousIndex := len(d.previousFrameLPCValues) - 1 + (i - k)
-				if previousIndex >= 0 {
-					currentLPCVal = d.previousFrameLPCValues[previousIndex]
-				} else {
-					currentLPCVal = 0
-				}
-			default:
-				currentLPCVal = 0
-			}
+	normalizedAQ12, reversedAQ12 := normalizedLPCWeights(aQ12, dLPC)
+	gain := gainQ16[s] / 65536.0
+	subframeOffset := n * s
+	subframeOut := out[:n]
+	if s > 0 {
+		lpcSynthesisSteadyState(
+			subframeOut,
+			dLPC,
+			reversedAQ12,
+			res[subframeOffset:subframeOffset+n],
+			lpc[subframeOffset:subframeOffset+n],
+			lpc[subframeOffset-dLPC:subframeOffset+n],
+			gain,
+		)
+	} else {
+		d.lpcSynthesisFirstSubframe(subframeOut, dLPC, normalizedAQ12, res[:n], lpc[:n], gain)
+	}
 
-			lpcVal += currentLPCVal * (aQ12 / 4096.0)
+	d.savePreviousFrameLPCValues(lpc, out, n, dLPC)
+}
+
+func normalizedLPCWeights(aQ12 []float32, dLPC int) (normalizedAQ12, reversedAQ12 [16]float32) {
+	for coefficientIndex := range dLPC {
+		normalizedAQ12[coefficientIndex] = aQ12[coefficientIndex] / 4096.0
+	}
+	for coefficientIndex := range dLPC {
+		reversedAQ12[coefficientIndex] = normalizedAQ12[dLPC-coefficientIndex-1]
+	}
+
+	return normalizedAQ12, reversedAQ12
+}
+
+func lpcSynthesisSteadyState(
+	out []float32,
+	dLPC int,
+	reversedAQ12 [16]float32,
+	subframeRes, subframeLPC, historyAndOutput []float32,
+	gain float32,
+) {
+	for sampleIndex := range out {
+		lpcVal := gain * subframeRes[sampleIndex]
+		history := historyAndOutput[sampleIndex : sampleIndex+dLPC]
+		for coefficientIndex := range dLPC {
+			lpcVal += history[coefficientIndex] * reversedAQ12[coefficientIndex]
 		}
 
-		lpc[sampleIndex] = lpcVal
+		subframeLPC[sampleIndex] = lpcVal
+		out[sampleIndex] = clampNegativeOneToOne(lpcVal)
+	}
+}
 
-		// Then, the signal is clamped into the final nominal range:
-		//
-		//     out[i] = clamp(-1.0, lpc[i], 1.0)
-		//
-		out[i] = clampNegativeOneToOne(lpc[sampleIndex])
-
-		//  The decoder saves the final d_LPC values, i.e., lpc[i] such that
-		// (j + n - d_LPC) <= i < (j + n), to feed into the LPC synthesis of the
-		// next subframe.  This requires storage for up to 16 values of lpc[i]
-		// (for WB frames).
-		// The final d_LPC synthesized samples become the history for the next
-		// subframe. RFC 6716 section 4.2.7.9 describes that continuity
-		// requirement, and decode_frame.c preserves this state even for the
-		// first decoded frame. The old haveDecoded guard skipped that initial
-		// handoff and left the next frame with an all-zero LPC history.
-		if len(out)-1 == i {
-			if cap(d.previousFrameLPCValues) < dLPC {
-				d.previousFrameLPCValues = make([]float32, dLPC)
+func (d *Decoder) lpcSynthesisFirstSubframe(
+	out []float32,
+	dLPC int,
+	normalizedAQ12 [16]float32,
+	subframeRes, subframeLPC []float32,
+	gain float32,
+) {
+	var currentLPCVal float32
+	for sampleIndex := range out {
+		lpcVal := gain * subframeRes[sampleIndex]
+
+		for coefficientIndex := range dLPC {
+			if lpcIndex := sampleIndex - coefficientIndex - 1; lpcIndex >= 0 {
+				currentLPCVal = subframeLPC[lpcIndex]
+			} else if previousIndex := len(d.previousFrameLPCValues) - 1 + (sampleIndex - coefficientIndex); previousIndex >= 0 {
+				currentLPCVal = d.previousFrameLPCValues[previousIndex]
 			} else {
-				d.previousFrameLPCValues = d.previousFrameLPCValues[:dLPC]
+				currentLPCVal = 0
 			}
-			copy(d.previousFrameLPCValues, lpc[len(lpc)-dLPC:])
+
+			lpcVal += currentLPCVal * normalizedAQ12[coefficientIndex]
 		}
+
+		subframeLPC[sampleIndex] = lpcVal
+		out[sampleIndex] = clampNegativeOneToOne(lpcVal)
+	}
+}
+
+func (d *Decoder) savePreviousFrameLPCValues(lpc, out []float32, n, dLPC int) { //nolint:varnamelen
+	//  The decoder saves the final d_LPC values, i.e., lpc[i] such that
+	// (j + n - d_LPC) <= i < (j + n), to feed into the LPC synthesis of the
+	// next subframe.  This requires storage for up to 16 values of lpc[i]
+	// (for WB frames).
+	// The final d_LPC synthesized samples become the history for the next
+	// subframe. RFC 6716 section 4.2.7.9 describes that continuity
+	// requirement, and decode_frame.c preserves this state even for the
+	// first decoded frame. The old haveDecoded guard skipped that initial
+	// handoff and left the next frame with an all-zero LPC history.
+	if len(out) != n {
+		return
+	}
+	if cap(d.previousFrameLPCValues) < dLPC {
+		d.previousFrameLPCValues = make([]float32, dLPC)
+	} else {
+		d.previousFrameLPCValues = d.previousFrameLPCValues[:dLPC]
 	}
+	copy(d.previousFrameLPCValues, lpc[len(lpc)-dLPC:])
 }
 
 // The remainder of the reconstruction process for the frame does not