[FFmpeg-cvslog] lavc/aacsbr: sbr_dequant optimization

Ganesh Ajjanagadde git at videolan.org
Sat Dec 19 18:42:11 CET 2015


ffmpeg | branch: master | Ganesh Ajjanagadde <gajjanagadde at gmail.com> | Tue Dec 15 23:27:23 2015 -0500| [def3c83e1b85cc17cade257cbf3b1fb84e61f788] | committer: Ganesh Ajjanagadde

lavc/aacsbr: sbr_dequant optimization

This uses ff_exp2fi to get a speedup (~ 6x).

sample benchmark (Haswell, GNU/Linux):
old:
  19102 decicycles in sbr_dequant,    1023 runs,      1 skips
  19002 decicycles in sbr_dequant,    2045 runs,      3 skips
  17638 decicycles in sbr_dequant,    4093 runs,      3 skips
  15825 decicycles in sbr_dequant,    8189 runs,      3 skips
  16404 decicycles in sbr_dequant,   16379 runs,      5 skips

new:
   3063 decicycles in sbr_dequant,    1024 runs,      0 skips
   3049 decicycles in sbr_dequant,    2048 runs,      0 skips
   2968 decicycles in sbr_dequant,    4096 runs,      0 skips
   2818 decicycles in sbr_dequant,    8191 runs,      1 skips
   2853 decicycles in sbr_dequant,   16383 runs,      1 skips

Reviewed-by: Andreas Cadhalpun <Andreas.Cadhalpun at googlemail.com>
Signed-off-by: Ganesh Ajjanagadde <gajjanagadde at gmail.com>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=def3c83e1b85cc17cade257cbf3b1fb84e61f788
---

 libavcodec/aacsbr.c |   34 ++++++++++++++++++++++------------
 libavcodec/aacsbr.h |    2 +-
 2 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/libavcodec/aacsbr.c b/libavcodec/aacsbr.c
index d1e3a91..15956e3 100644
--- a/libavcodec/aacsbr.c
+++ b/libavcodec/aacsbr.c
@@ -33,6 +33,7 @@
 #include "aacsbrdata.h"
 #include "aacsbr_tablegen.h"
 #include "fft.h"
+#include "internal.h"
 #include "aacps.h"
 #include "sbrdsp.h"
 #include "libavutil/internal.h"
@@ -73,15 +74,22 @@ static void sbr_dequant(SpectralBandReplication *sbr, int id_aac)
 {
     int k, e;
     int ch;
-
+    static const double exp2_tab[2] = {1, M_SQRT2};
     if (id_aac == TYPE_CPE && sbr->bs_coupling) {
-        float alpha      = sbr->data[0].bs_amp_res ?  1.0f :  0.5f;
-        float pan_offset = sbr->data[0].bs_amp_res ? 12.0f : 24.0f;
+        int pan_offset = sbr->data[0].bs_amp_res ? 12 : 24;
         for (e = 1; e <= sbr->data[0].bs_num_env; e++) {
             for (k = 0; k < sbr->n[sbr->data[0].bs_freq_res[e]]; k++) {
-                float temp1 = exp2f(sbr->data[0].env_facs_q[e][k] * alpha + 7.0f);
-                float temp2 = exp2f((pan_offset - sbr->data[1].env_facs_q[e][k]) * alpha);
-                float fac;
+                float temp1, temp2, fac;
+                if (sbr->data[0].bs_amp_res) {
+                    temp1 = ff_exp2fi(sbr->data[0].env_facs_q[e][k] + 7);
+                    temp2 = ff_exp2fi(pan_offset - sbr->data[1].env_facs_q[e][k]);
+                }
+                else {
+                    temp1 = ff_exp2fi((sbr->data[0].env_facs_q[e][k]>>1) + 7) *
+                            exp2_tab[sbr->data[0].env_facs_q[e][k] & 1];
+                    temp2 = ff_exp2fi((pan_offset - sbr->data[1].env_facs_q[e][k])>>1) *
+                            exp2_tab[(pan_offset - sbr->data[1].env_facs_q[e][k]) & 1];
+                }
                 if (temp1 > 1E20) {
                     av_log(NULL, AV_LOG_ERROR, "envelope scalefactor overflow in dequant\n");
                     temp1 = 1;
@@ -93,8 +101,8 @@ static void sbr_dequant(SpectralBandReplication *sbr, int id_aac)
         }
         for (e = 1; e <= sbr->data[0].bs_num_noise; e++) {
             for (k = 0; k < sbr->n_q; k++) {
-                float temp1 = exp2f(NOISE_FLOOR_OFFSET - sbr->data[0].noise_facs_q[e][k] + 1);
-                float temp2 = exp2f(12 - sbr->data[1].noise_facs_q[e][k]);
+                float temp1 = ff_exp2fi(NOISE_FLOOR_OFFSET - sbr->data[0].noise_facs_q[e][k] + 1);
+                float temp2 = ff_exp2fi(12 - sbr->data[1].noise_facs_q[e][k]);
                 float fac;
                 av_assert0(temp1 <= 1E20);
                 fac = temp1 / (1.0f + temp2);
@@ -104,11 +112,13 @@ static void sbr_dequant(SpectralBandReplication *sbr, int id_aac)
         }
     } else { // SCE or one non-coupled CPE
         for (ch = 0; ch < (id_aac == TYPE_CPE) + 1; ch++) {
-            float alpha = sbr->data[ch].bs_amp_res ? 1.0f : 0.5f;
             for (e = 1; e <= sbr->data[ch].bs_num_env; e++)
                 for (k = 0; k < sbr->n[sbr->data[ch].bs_freq_res[e]]; k++){
-                    sbr->data[ch].env_facs[e][k] =
-                        exp2f(alpha * sbr->data[ch].env_facs_q[e][k] + 6.0f);
+                    if (sbr->data[ch].bs_amp_res)
+                        sbr->data[ch].env_facs[e][k] = ff_exp2fi(sbr->data[ch].env_facs_q[e][k] + 6);
+                    else
+                        sbr->data[ch].env_facs[e][k] = ff_exp2fi((sbr->data[ch].env_facs_q[e][k]>>1) + 6)
+                                                       * exp2_tab[sbr->data[ch].env_facs_q[e][k] & 1];
                     if (sbr->data[ch].env_facs[e][k] > 1E20) {
                         av_log(NULL, AV_LOG_ERROR, "envelope scalefactor overflow in dequant\n");
                         sbr->data[ch].env_facs[e][k] = 1;
@@ -118,7 +128,7 @@ static void sbr_dequant(SpectralBandReplication *sbr, int id_aac)
             for (e = 1; e <= sbr->data[ch].bs_num_noise; e++)
                 for (k = 0; k < sbr->n_q; k++)
                     sbr->data[ch].noise_facs[e][k] =
-                        exp2f(NOISE_FLOOR_OFFSET - sbr->data[ch].noise_facs_q[e][k]);
+                        ff_exp2fi(NOISE_FLOOR_OFFSET - sbr->data[ch].noise_facs_q[e][k]);
         }
     }
 }
diff --git a/libavcodec/aacsbr.h b/libavcodec/aacsbr.h
index ed1a7f9..88c4d8a 100644
--- a/libavcodec/aacsbr.h
+++ b/libavcodec/aacsbr.h
@@ -34,7 +34,7 @@
 #include "sbr.h"
 
 #define ENVELOPE_ADJUSTMENT_OFFSET 2
-#define NOISE_FLOOR_OFFSET FIXR(6.0f)
+#define NOISE_FLOOR_OFFSET 6
 
 /**
  * SBR VLC tables



More information about the ffmpeg-cvslog mailing list