[FFmpeg-devel] [PATCH 4/4] lavc/fft-test: update benchmark code

Ganesh Ajjanagadde gajjanag at gmail.com
Fri Mar 25 01:50:52 CET 2016


1. The most important change is the addition of a permute call to ensure
that the comparison between fftw and avfft is fair, since fftw does not
require the call. This is evident from the test code additions in the
previous commit, and also from http://www.fftw.org/benchfft/.
2. Added in a memcpy for the FFTW test. Does not really change the benches,
but might as well do it.
3. A trivial one is the addition of an extra decimal place in the
timings; I found it quite stable and useful for more detailed benchmarks.

FFT 4 test
Checking...
max:0.000000 e:0
Speed test...
time: 0.12 us/transform [total time=1.98 s its=16777216]
FFTW 4 test
Checking...
max:0.000000 e:0
Speed test...
time: 0.11 us/transform [total time=1.91 s its=16777216]
FFT 8 test
Checking...
max:0.000000 e:5.21541e-08
Speed test...
time: 0.13 us/transform [total time=1.12 s its=8388608]
FFTW 8 test
Checking...
max:0.000000 e:6.70552e-08
Speed test...
time: 0.13 us/transform [total time=1.09 s its=8388608]
FFT 16 test
Checking...
max:0.000000 e:8.73796e-08
Speed test...
time: 0.16 us/transform [total time=1.33 s its=8388608]
FFTW 16 test
Checking...
max:0.000000 e:1.0382e-07
Speed test...
time: 0.17 us/transform [total time=1.40 s its=8388608]
FFT 32 test
Checking...
max:0.000000 e:1.63922e-07
Speed test...
time: 0.10 us/transform [total time=1.75 s its=16777216]
FFTW 32 test
Checking...
max:0.000001 e:2.17227e-07
Speed test...
time: 0.16 us/transform [total time=1.31 s its=8388608]
FFT 64 test
Checking...
max:0.000001 e:3.09466e-07
Speed test...
time: 0.19 us/transform [total time=1.60 s its=8388608]
FFTW 64 test
Checking...
max:0.000001 e:3.12211e-07
Speed test...
time: 0.26 us/transform [total time=1.08 s its=4194304]
FFT 128 test
Checking...
max:0.000002 e:4.97895e-07
Speed test...
time: 0.39 us/transform [total time=1.61 s its=4194304]
FFTW 128 test
Checking...
max:0.000002 e:5.88435e-07
Speed test...
time: 0.50 us/transform [total time=1.05 s its=2097152]
FFT 256 test
Checking...
max:0.000003 e:8.80516e-07
Speed test...
time: 0.87 us/transform [total time=1.83 s its=2097152]
FFTW 256 test
Checking...
max:0.000004 e:8.74694e-07
Speed test...
time: 0.81 us/transform [total time=1.69 s its=2097152]
FFT 512 test
Checking...
max:0.000008 e:1.25487e-06
Speed test...
time: 1.79 us/transform [total time=1.87 s its=1048576]
FFTW 512 test
Checking...
max:0.000008 e:1.39262e-06
Speed test...
time: 1.38 us/transform [total time=1.44 s its=1048576]
FFT 1024 test
Checking...
max:0.000008 e:1.90538e-06
Speed test...
time: 4.00 us/transform [total time=1.05 s its=262144]
FFTW 1024 test
Checking...
max:0.000008 e:2.00327e-06
Speed test...
time: 2.83 us/transform [total time=1.49 s its=524288]
FFT 2048 test
Checking...
max:0.000015 e:2.83741e-06
Speed test...
time: 9.36 us/transform [total time=1.23 s its=131072]
FFTW 2048 test
Checking...
max:0.000015 e:3.02253e-06
Speed test...
time: 6.91 us/transform [total time=1.81 s its=262144]
FFT 4096 test
Checking...
max:0.000023 e:4.2942e-06
Speed test...
time: 28.71 us/transform [total time=1.88 s its=65536]
FFTW 4096 test
Checking...
max:0.000023 e:4.43059e-06
Speed test...
time: 17.19 us/transform [total time=1.13 s its=65536]
FFT 8192 test
Checking...
max:0.000031 e:6.37783e-06
Speed test...
time: 73.50 us/transform [total time=1.20 s its=16384]
FFTW 8192 test
Checking...
max:0.000037 e:6.65281e-06
Speed test...
time: 36.71 us/transform [total time=1.20 s its=32768]
FFT 16384 test
Checking...
max:0.000046 e:9.52666e-06
Speed test...
time: 164.88 us/transform [total time=1.35 s its=8192]
FFTW 16384 test
Checking...
max:0.000046 e:1.00169e-05
Speed test...
time: 86.26 us/transform [total time=1.41 s its=16384]
FFT 32768 test
Checking...
max:0.000076 e:1.39189e-05
Speed test...
time: 382.27 us/transform [total time=1.57 s its=4096]
FFTW 32768 test
Checking...
max:0.000061 e:1.46244e-05
Speed test...
time: 200.96 us/transform [total time=1.65 s its=8192]

Signed-off-by: Ganesh Ajjanagadde <gajjanag at gmail.com>
---
 libavcodec/fft-test.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/libavcodec/fft-test.c b/libavcodec/fft-test.c
index 7fe42be..7b67b2e 100644
--- a/libavcodec/fft-test.c
+++ b/libavcodec/fft-test.c
@@ -253,7 +253,7 @@ int main(int argc, char **argv)
     DCTContext d;
 #if CONFIG_LIBFFTW3
     FFTWContext fftw;
-    FFTWComplex *tab_fftw;
+    FFTWComplex *tab_fftw, *tab_fftw_copy;
 #endif /* CONFIG_LIBFFTW3 */
 #endif /* FFT_FLOAT */
     int it, i, err = 1;
@@ -317,8 +317,9 @@ int main(int argc, char **argv)
     tab_ref  = av_malloc_array(fft_size, sizeof(FFTComplex));
     tab2     = av_malloc_array(fft_size, sizeof(FFTSample));
 #if CONFIG_LIBFFTW3 && FFT_FLOAT
-    tab_fftw = av_malloc_array(fft_size, sizeof(*tab_fftw));
-    if (!tab_fftw)
+    tab_fftw      = av_malloc_array(fft_size, sizeof(*tab_fftw));
+    tab_fftw_copy = av_malloc_array(fft_size, sizeof(*tab_fftw_copy));
+    if (!(tab_fftw && tab_fftw_copy))
         goto cleanup_fftw;
 #endif /* CONFIG_LIBFFTW3 */
 
@@ -394,6 +395,9 @@ int main(int argc, char **argv)
         tab_fftw[i][1] = tab1[i].im;
 #endif /* CONFIG_LIBFFTW3 */
     }
+#if CONFIG_LIBFFTW3 && FFT_FLOAT
+    memcpy(tab_fftw_copy, tab_fftw, fft_size * sizeof(*tab_fftw));
+#endif
 
     /* checking result */
     av_log(NULL, AV_LOG_INFO, "Checking...\n");
@@ -502,10 +506,12 @@ int main(int argc, char **argv)
                     break;
                 case TRANSFORM_FFT:
                     memcpy(tab, tab1, fft_size * sizeof(FFTComplex));
+                    s.fft_permute(&s, tab);
                     s.fft_calc(&s, tab);
                     break;
 #if CONFIG_LIBFFTW3 && FFT_FLOAT
                 case TRANSFORM_FFTW:
+                    memcpy(tab_fftw, tab_fftw_copy, fft_size * sizeof(*tab_fftw));
                     fftw.fft_calc(&fftw, tab_fftw);
                     break;
 #endif /* CONFIG_LIBFFTW3 */
@@ -527,7 +533,7 @@ int main(int argc, char **argv)
             nb_its *= 2;
         }
         av_log(NULL, AV_LOG_INFO,
-               "time: %0.1f us/transform [total time=%0.2f s its=%d]\n",
+               "time: %0.2f us/transform [total time=%0.2f s its=%d]\n",
                (double) duration / nb_its,
                (double) duration / 1000000.0,
                nb_its);
-- 
2.7.4



More information about the ffmpeg-devel mailing list