[FFmpeg-cvslog] Merge commit '4efab89332ea39a77145e8b15562b981d9dbde68'

James Almer git at videolan.org
Tue Jan 31 20:12:20 EET 2017


ffmpeg | branch: master | James Almer <jamrial at gmail.com> | Tue Jan 31 15:08:19 2017 -0300| [ac774cfa571734c49c26e2d3387adccff8957ff8] | committer: James Almer

Merge commit '4efab89332ea39a77145e8b15562b981d9dbde68'

* commit '4efab89332ea39a77145e8b15562b981d9dbde68':
  x86: Use *_FAST/*_SLOW CPU feature detection macros where appropriate

Merged-by: James Almer <jamrial at gmail.com>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ac774cfa571734c49c26e2d3387adccff8957ff8
---

 libavcodec/x86/ac3dsp_init.c | 13 ++++++++-----
 libavcodec/x86/h264_qpel.c   | 11 ++++++-----
 libavcodec/x86/lpc.c         |  2 +-
 libavcodec/x86/vp8dsp_init.c |  4 ++--
 4 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/libavcodec/x86/ac3dsp_init.c b/libavcodec/x86/ac3dsp_init.c
index 07f0d25..9fd0aef 100644
--- a/libavcodec/x86/ac3dsp_init.c
+++ b/libavcodec/x86/ac3dsp_init.c
@@ -228,16 +228,19 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
         c->float_to_fixed24 = ff_float_to_fixed24_sse2;
         c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2;
         c->extract_exponents = ff_ac3_extract_exponents_sse2;
-        if (!(cpu_flags & AV_CPU_FLAG_SSE2SLOW)) {
-            c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2;
-            c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2;
-        }
         if (bit_exact) {
             c->apply_window_int16 = ff_apply_window_int16_sse2;
-        } else if (!(cpu_flags & AV_CPU_FLAG_SSE2SLOW)) {
+        }
+    }
+
+    if (EXTERNAL_SSE2_FAST(cpu_flags)) {
+        c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2;
+        c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2;
+        if (!bit_exact) {
             c->apply_window_int16 = ff_apply_window_int16_round_sse2;
         }
     }
+
     if (EXTERNAL_SSSE3(cpu_flags)) {
         c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3;
         if (cpu_flags & AV_CPU_FLAG_ATOM) {
diff --git a/libavcodec/x86/h264_qpel.c b/libavcodec/x86/h264_qpel.c
index 8c4b1e1..96fa4a7 100644
--- a/libavcodec/x86/h264_qpel.c
+++ b/libavcodec/x86/h264_qpel.c
@@ -562,11 +562,6 @@ av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth)
     }
 
     if (EXTERNAL_SSE2(cpu_flags)) {
-        if (!(cpu_flags & AV_CPU_FLAG_SSE2SLOW) && !high_bit_depth) {
-            // these functions are slower than mmx on AMD, but faster on Intel
-            H264_QPEL_FUNCS(0, 0, sse2);
-        }
-
         if (!high_bit_depth) {
             H264_QPEL_FUNCS(0, 1, sse2);
             H264_QPEL_FUNCS(0, 2, sse2);
@@ -593,6 +588,12 @@ av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth)
         }
     }
 
+    if (EXTERNAL_SSE2_FAST(cpu_flags)) {
+        if (!high_bit_depth) {
+            H264_QPEL_FUNCS(0, 0, sse2);
+        }
+    }
+
     if (EXTERNAL_SSSE3(cpu_flags)) {
         if (!high_bit_depth) {
             H264_QPEL_FUNCS(1, 0, ssse3);
diff --git a/libavcodec/x86/lpc.c b/libavcodec/x86/lpc.c
index 3a9493f..6c72e21 100644
--- a/libavcodec/x86/lpc.c
+++ b/libavcodec/x86/lpc.c
@@ -154,7 +154,7 @@ av_cold void ff_lpc_init_x86(LPCContext *c)
 #if HAVE_SSE2_INLINE
     int cpu_flags = av_get_cpu_flags();
 
-    if (HAVE_SSE2_INLINE && cpu_flags & (AV_CPU_FLAG_SSE2 | AV_CPU_FLAG_SSE2SLOW)) {
+    if (INLINE_SSE2(cpu_flags) || INLINE_SSE2_SLOW(cpu_flags)) {
         c->lpc_apply_welch_window = lpc_apply_welch_window_sse2;
         c->lpc_compute_autocorr   = lpc_compute_autocorr_sse2;
     }
diff --git a/libavcodec/x86/vp8dsp_init.c b/libavcodec/x86/vp8dsp_init.c
index 897d5a0..20c5fac 100644
--- a/libavcodec/x86/vp8dsp_init.c
+++ b/libavcodec/x86/vp8dsp_init.c
@@ -346,7 +346,7 @@ av_cold void ff_vp78dsp_init_x86(VP8DSPContext *c)
         c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse;
     }
 
-    if (HAVE_SSE2_EXTERNAL && cpu_flags & (AV_CPU_FLAG_SSE2 | AV_CPU_FLAG_SSE2SLOW)) {
+    if (EXTERNAL_SSE2(cpu_flags) || EXTERNAL_SSE2_SLOW(cpu_flags)) {
         VP8_LUMA_MC_FUNC(0, 16, sse2);
         VP8_MC_FUNC(1, 8, sse2);
         VP8_BILINEAR_MC_FUNC(0, 16, sse2);
@@ -416,7 +416,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext *c)
         c->vp8_luma_dc_wht                      = ff_vp8_luma_dc_wht_sse;
     }
 
-    if (HAVE_SSE2_EXTERNAL && cpu_flags & (AV_CPU_FLAG_SSE2 | AV_CPU_FLAG_SSE2SLOW)) {
+    if (EXTERNAL_SSE2(cpu_flags) || EXTERNAL_SSE2_SLOW(cpu_flags)) {
         c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2;
 
         c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2;


======================================================================

diff --cc libavcodec/x86/lpc.c
index 3a9493f,e8cce42..6c72e21
--- a/libavcodec/x86/lpc.c
+++ b/libavcodec/x86/lpc.c
@@@ -154,7 -152,7 +154,7 @@@ av_cold void ff_lpc_init_x86(LPCContex
  #if HAVE_SSE2_INLINE
      int cpu_flags = av_get_cpu_flags();
  
-     if (HAVE_SSE2_INLINE && cpu_flags & (AV_CPU_FLAG_SSE2 | AV_CPU_FLAG_SSE2SLOW)) {
 -    if (INLINE_SSE2_SLOW(cpu_flags)) {
++    if (INLINE_SSE2(cpu_flags) || INLINE_SSE2_SLOW(cpu_flags)) {
          c->lpc_apply_welch_window = lpc_apply_welch_window_sse2;
          c->lpc_compute_autocorr   = lpc_compute_autocorr_sse2;
      }
diff --cc libavcodec/x86/vp8dsp_init.c
index 897d5a0,3e84bed..20c5fac
--- a/libavcodec/x86/vp8dsp_init.c
+++ b/libavcodec/x86/vp8dsp_init.c
@@@ -346,7 -346,7 +346,7 @@@ av_cold void ff_vp78dsp_init_x86(VP8DSP
          c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse;
      }
  
-     if (HAVE_SSE2_EXTERNAL && cpu_flags & (AV_CPU_FLAG_SSE2 | AV_CPU_FLAG_SSE2SLOW)) {
 -    if (EXTERNAL_SSE2_SLOW(cpu_flags)) {
++    if (EXTERNAL_SSE2(cpu_flags) || EXTERNAL_SSE2_SLOW(cpu_flags)) {
          VP8_LUMA_MC_FUNC(0, 16, sse2);
          VP8_MC_FUNC(1, 8, sse2);
          VP8_BILINEAR_MC_FUNC(0, 16, sse2);
@@@ -416,7 -416,7 +416,7 @@@ av_cold void ff_vp8dsp_init_x86(VP8DSPC
          c->vp8_luma_dc_wht                      = ff_vp8_luma_dc_wht_sse;
      }
  
-     if (HAVE_SSE2_EXTERNAL && cpu_flags & (AV_CPU_FLAG_SSE2 | AV_CPU_FLAG_SSE2SLOW)) {
 -    if (EXTERNAL_SSE2_SLOW(cpu_flags)) {
++    if (EXTERNAL_SSE2(cpu_flags) || EXTERNAL_SSE2_SLOW(cpu_flags)) {
          c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2;
  
          c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2;



More information about the ffmpeg-cvslog mailing list