Skip to content

Commit 8cf872a

Browse files
Zheng Lvdavidgao
authored andcommitted
Make CELT FFT twiddle complex type aligned
This makes kiss_twiddle_cpx 4-byte aligned (instead of 2-byte) for fixed-point builds. Tested with an armv6j+nofp development board, CELT encoding becomes 1.4x as fast, and decoding over 2x. Performance gain is mostly attributed to the proper alignment of the static const array mdct_twiddles960. Co-authored-by: David Gao <[email protected]> Signed-off-by: Felicia Lim <[email protected]>
1 parent 757c53f commit 8cf872a

File tree

1 file changed

+11
-1
lines changed

1 file changed

+11
-1
lines changed

celt/kiss_fft.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,10 @@ extern "C" {
5252
# define kiss_fft_scalar opus_int32
5353
# define kiss_twiddle_scalar opus_int16
5454

55+
/* Some 32-bit CPUs would load/store a kiss_twiddle_cpx with a single memory
56+
* access, and could benefit from additional alignment.
57+
*/
58+
# define KISS_TWIDDLE_CPX_ALIGNMENT (sizeof(opus_int32))
5559

5660
#else
5761
# ifndef kiss_fft_scalar
@@ -62,6 +66,12 @@ extern "C" {
6266
# endif
6367
#endif
6468

69+
#if defined(__GNUC__) && defined(KISS_TWIDDLE_CPX_ALIGNMENT)
70+
#define KISS_TWIDDLE_CPX_ALIGNED __attribute__((aligned(KISS_TWIDDLE_CPX_ALIGNMENT)))
71+
#else
72+
#define KISS_TWIDDLE_CPX_ALIGNED
73+
#endif
74+
6575
typedef struct {
6676
kiss_fft_scalar r;
6777
kiss_fft_scalar i;
@@ -70,7 +80,7 @@ typedef struct {
7080
typedef struct {
7181
kiss_twiddle_scalar r;
7282
kiss_twiddle_scalar i;
73-
}kiss_twiddle_cpx;
83+
} KISS_TWIDDLE_CPX_ALIGNED kiss_twiddle_cpx;
7484

7585
#define MAXFACTORS 8
7686
/* e.g. an fft of length 128 has 4 factors

0 commit comments

Comments
 (0)