[FFmpeg-devel] [PATCH 3/5] avcodec/fft_fixed: Hardcode cosine tables to save space
Andreas Rheinhardt
andreas.rheinhardt at gmail.com
Thu Jan 7 01:13:06 EET 2021
The tables that are used take 256B; the code to initialize them uses
281B here (GCC 9.3, x64, -O3, but in av_cold functions). On top of that,
removing this code also allows to remove the array of AVOnce used to
guard the cosine tables against multiple initializations; this also
removes relocations.
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt at gmail.com>
---
libavcodec/Makefile | 2 +-
libavcodec/fft.h | 19 +++++++++---------
libavcodec/fft_fixed.c | 42 +++++++++++++++++++++++++++++++++++++++
libavcodec/fft_template.c | 27 +++++++++----------------
4 files changed, 62 insertions(+), 28 deletions(-)
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index fea37ef3c9..8e03feb7d1 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -83,7 +83,7 @@ OBJS-$(CONFIG_EXIF) += exif.o tiff_common.o
OBJS-$(CONFIG_FAANDCT) += faandct.o
OBJS-$(CONFIG_FAANIDCT) += faanidct.o
OBJS-$(CONFIG_FDCTDSP) += fdctdsp.o jfdctfst.o jfdctint.o
-FFT-OBJS-$(CONFIG_HARDCODED_TABLES) += cos_tables.o cos_fixed_tables.o
+FFT-OBJS-$(CONFIG_HARDCODED_TABLES) += cos_tables.o
OBJS-$(CONFIG_FFT) += avfft.o fft_fixed.o fft_float.o \
fft_fixed_32.o fft_init_table.o \
$(FFT-OBJS-yes)
diff --git a/libavcodec/fft.h b/libavcodec/fft.h
index fedc0c5ef0..4b54265b7f 100644
--- a/libavcodec/fft.h
+++ b/libavcodec/fft.h
@@ -114,10 +114,18 @@ struct FFTContext {
uint32_t *revtab32;
};
-#if CONFIG_HARDCODED_TABLES
+#if CONFIG_HARDCODED_TABLES || !FFT_FLOAT
#define COSTABLE_CONST const
+#define ff_init_ff_cos_tabs(...)
#else
#define COSTABLE_CONST
+#define ff_init_ff_cos_tabs FFT_NAME(ff_init_ff_cos_tabs)
+
+/**
+ * Initialize the cosine table in ff_cos_tabs[index]
+ * @param index index in ff_cos_tabs array of the table to initialize
+ */
+void ff_init_ff_cos_tabs(int index);
#endif
#define COSTABLE(size) \
@@ -138,16 +146,9 @@ extern COSTABLE(16384);
extern COSTABLE(32768);
extern COSTABLE(65536);
extern COSTABLE(131072);
-#endif /* FFT_FLOAT */
extern COSTABLE_CONST FFTSample* const FFT_NAME(ff_cos_tabs)[];
-#define ff_init_ff_cos_tabs FFT_NAME(ff_init_ff_cos_tabs)
-
-/**
- * Initialize the cosine table in ff_cos_tabs[index]
- * @param index index in ff_cos_tabs array of the table to initialize
- */
-void ff_init_ff_cos_tabs(int index);
+#endif /* FFT_FLOAT */
#define ff_fft_init FFT_NAME(ff_fft_init)
#define ff_fft_end FFT_NAME(ff_fft_end)
diff --git a/libavcodec/fft_fixed.c b/libavcodec/fft_fixed.c
index 52d225ee09..ce52dec7fd 100644
--- a/libavcodec/fft_fixed.c
+++ b/libavcodec/fft_fixed.c
@@ -19,4 +19,46 @@
#define FFT_FLOAT 0
#define FFT_FIXED_32 0
#define MAX_BITS 7
+
+#include "fft.h"
+
+COSTABLE(16) = {
+ 32767, 30274, 23170, 12540,
+ 0, 12540, 23170, 30274,
+};
+COSTABLE(32) = {
+ 32767, 32138, 30274, 27246,
+ 23170, 18205, 12540, 6393,
+ 0, 6393, 12540, 18205,
+ 23170, 27246, 30274, 32138,
+};
+COSTABLE(64) = {
+ 32767, 32610, 32138, 31357,
+ 30274, 28899, 27246, 25330,
+ 23170, 20788, 18205, 15447,
+ 12540, 9512, 6393, 3212,
+ 0, 3212, 6393, 9512,
+ 12540, 15447, 18205, 20788,
+ 23170, 25330, 27246, 28899,
+ 30274, 31357, 32138, 32610,
+};
+COSTABLE(128) = {
+ 32767, 32729, 32610, 32413,
+ 32138, 31786, 31357, 30853,
+ 30274, 29622, 28899, 28106,
+ 27246, 26320, 25330, 24279,
+ 23170, 22006, 20788, 19520,
+ 18205, 16846, 15447, 14010,
+ 12540, 11039, 9512, 7962,
+ 6393, 4808, 3212, 1608,
+ 0, 1608, 3212, 4808,
+ 6393, 7962, 9512, 11039,
+ 12540, 14010, 15447, 16846,
+ 18205, 19520, 20788, 22006,
+ 23170, 24279, 25330, 26320,
+ 27246, 28106, 28899, 29622,
+ 30274, 30853, 31357, 31786,
+ 32138, 32413, 32610, 32729,
+};
+
#include "fft_template.c"
diff --git a/libavcodec/fft_template.c b/libavcodec/fft_template.c
index 7a7d51a6b4..9d125de073 100644
--- a/libavcodec/fft_template.c
+++ b/libavcodec/fft_template.c
@@ -42,12 +42,12 @@
#else /* FFT_FIXED_32 */
/* cos(2*pi*x/n) for 0<=x<=n/4, followed by its reverse */
+#if FFT_FLOAT
#if !CONFIG_HARDCODED_TABLES
COSTABLE(16);
COSTABLE(32);
COSTABLE(64);
COSTABLE(128);
-#if FFT_FLOAT
COSTABLE(256);
COSTABLE(512);
COSTABLE(1024);
@@ -58,7 +58,6 @@ COSTABLE(16384);
COSTABLE(32768);
COSTABLE(65536);
COSTABLE(131072);
-#endif /* FFT_FLOAT */
static av_cold void init_ff_cos_tabs(int index)
{
@@ -87,7 +86,6 @@ INIT_FF_COS_TABS_FUNC(4, 16)
INIT_FF_COS_TABS_FUNC(5, 32)
INIT_FF_COS_TABS_FUNC(6, 64)
INIT_FF_COS_TABS_FUNC(7, 128)
-#if FFT_FLOAT
INIT_FF_COS_TABS_FUNC(8, 256)
INIT_FF_COS_TABS_FUNC(9, 512)
INIT_FF_COS_TABS_FUNC(10, 1024)
@@ -98,7 +96,6 @@ INIT_FF_COS_TABS_FUNC(14, 16384)
INIT_FF_COS_TABS_FUNC(15, 32768)
INIT_FF_COS_TABS_FUNC(16, 65536)
INIT_FF_COS_TABS_FUNC(17, 131072)
-#endif /* FFT_FLOAT */
static CosTabsInitOnce cos_tabs_init_once[] = {
{ NULL },
@@ -109,7 +106,6 @@ static CosTabsInitOnce cos_tabs_init_once[] = {
{ init_ff_cos_tabs_32, AV_ONCE_INIT },
{ init_ff_cos_tabs_64, AV_ONCE_INIT },
{ init_ff_cos_tabs_128, AV_ONCE_INIT },
-#if FFT_FLOAT
{ init_ff_cos_tabs_256, AV_ONCE_INIT },
{ init_ff_cos_tabs_512, AV_ONCE_INIT },
{ init_ff_cos_tabs_1024, AV_ONCE_INIT },
@@ -120,17 +116,20 @@ static CosTabsInitOnce cos_tabs_init_once[] = {
{ init_ff_cos_tabs_32768, AV_ONCE_INIT },
{ init_ff_cos_tabs_65536, AV_ONCE_INIT },
{ init_ff_cos_tabs_131072, AV_ONCE_INIT },
-#endif /* FFT_FLOAT */
};
+av_cold void ff_init_ff_cos_tabs(int index)
+{
+ ff_thread_once(&cos_tabs_init_once[index].control, cos_tabs_init_once[index].func);
+}
#endif
+
COSTABLE_CONST FFTSample * const FFT_NAME(ff_cos_tabs)[] = {
NULL, NULL, NULL, NULL,
FFT_NAME(ff_cos_16),
FFT_NAME(ff_cos_32),
FFT_NAME(ff_cos_64),
FFT_NAME(ff_cos_128),
-#if FFT_FLOAT
FFT_NAME(ff_cos_256),
FFT_NAME(ff_cos_512),
FFT_NAME(ff_cos_1024),
@@ -141,8 +140,8 @@ COSTABLE_CONST FFTSample * const FFT_NAME(ff_cos_tabs)[] = {
FFT_NAME(ff_cos_32768),
FFT_NAME(ff_cos_65536),
FFT_NAME(ff_cos_131072),
-#endif /* FFT_FLOAT */
};
+#endif /* FFT_FLOAT */
#endif /* FFT_FIXED_32 */
@@ -160,13 +159,6 @@ static int split_radix_permutation(int i, int n, int inverse)
else return split_radix_permutation(i, m, inverse)*4 - 1;
}
-av_cold void ff_init_ff_cos_tabs(int index)
-{
-#if (!CONFIG_HARDCODED_TABLES) && (!FFT_FIXED_32)
- ff_thread_once(&cos_tabs_init_once[index].control, cos_tabs_init_once[index].func);
-#endif
-}
-
static const int avx_tab[] = {
0, 4, 1, 5, 8, 12, 9, 13, 2, 6, 3, 7, 10, 14, 11, 15
};
@@ -250,13 +242,12 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
if (ARCH_X86) ff_fft_init_x86(s);
if (CONFIG_MDCT) s->mdct_calcw = s->mdct_calc;
if (HAVE_MIPSFPU) ff_fft_init_mips(s);
+ for (j = 4; !CONFIG_HARDCODED_TABLES && j <= nbits; j++)
+ ff_init_ff_cos_tabs(j);
#else
if (CONFIG_MDCT) s->mdct_calcw = ff_mdct_calcw_c;
if (ARCH_ARM) ff_fft_fixed_init_arm(s);
#endif
- for(j=4; j<=nbits; j++) {
- ff_init_ff_cos_tabs(j);
- }
#endif /* FFT_FIXED_32 */
--
2.25.1
More information about the ffmpeg-devel
mailing list