[FFmpeg-cvslog] avcodec/x86/lossless_audiodsp: Move order&8 fallback into C code

Michael Niedermayer git at videolan.org
Fri Feb 6 02:44:01 CET 2015


ffmpeg | branch: master | Michael Niedermayer <michaelni at gmx.at> | Fri Feb  6 01:57:23 2015 +0100| [f1214763af1abf5d7f49b98f88c06e13b98932a6] | committer: Michael Niedermayer

avcodec/x86/lossless_audiodsp: Move order&8 fallback into C code

This is simpler and more robust, and fixes mismatching XMM save restore
mismatches

Signed-off-by: Michael Niedermayer <michaelni at gmx.at>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f1214763af1abf5d7f49b98f88c06e13b98932a6
---

 libavcodec/x86/lossless_audiodsp.asm    |    8 --------
 libavcodec/x86/lossless_audiodsp_init.c |   28 ++++++++++++++++++++++++++--
 2 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/libavcodec/x86/lossless_audiodsp.asm b/libavcodec/x86/lossless_audiodsp.asm
index 5dff835..084ed9a 100644
--- a/libavcodec/x86/lossless_audiodsp.asm
+++ b/libavcodec/x86/lossless_audiodsp.asm
@@ -26,12 +26,6 @@ SECTION_TEXT
 ; int ff_scalarproduct_and_madd_int16(int16_t *v1, int16_t *v2, int16_t *v3,
 ;                                     int order, int mul)
 cglobal scalarproduct_and_madd_int16, 4,4,8, v1, v2, v3, order, mul
-%if mmsize == 16
-    test orderq, 8
-        jnz scalarproduct_and_madd_int16_fallback
-%else
-    scalarproduct_and_madd_int16_fallback
-%endif
     shl orderq, 1
     movd    m7, mulm
 %if mmsize == 16
@@ -123,8 +117,6 @@ align 16
 ;                                     int order, int mul)
 INIT_XMM ssse3
 cglobal scalarproduct_and_madd_int16, 4,4,10, v1, v2, v3, order, mul
-    test orderq, 8
-        jnz scalarproduct_and_madd_int16_fallback
     shl orderq, 1
     movd    m7, mulm
     pshuflw m7, m7, 0
diff --git a/libavcodec/x86/lossless_audiodsp_init.c b/libavcodec/x86/lossless_audiodsp_init.c
index 4879dff..2c13e1e 100644
--- a/libavcodec/x86/lossless_audiodsp_init.c
+++ b/libavcodec/x86/lossless_audiodsp_init.c
@@ -31,6 +31,30 @@ int32_t ff_scalarproduct_and_madd_int16_ssse3(int16_t *v1, const int16_t *v2,
                                               const int16_t *v3,
                                               int order, int mul);
 
+static int32_t scalarproduct_and_madd_int16_sse2(int16_t *v1, const int16_t *v2,
+                                                 const int16_t *v3,
+                                                 int order, int mul)
+{
+#if HAVE_SSE2_EXTERNAL
+    if (order & 8)
+        return ff_scalarproduct_and_madd_int16_mmxext(v1, v2, v3, order, mul);
+    else
+        return ff_scalarproduct_and_madd_int16_sse2(v1, v2, v3, order, mul);
+#endif
+}
+
+static int32_t scalarproduct_and_madd_int16_ssse3(int16_t *v1, const int16_t *v2,
+                                                  const int16_t *v3,
+                                                  int order, int mul)
+{
+#if HAVE_SSSE3_EXTERNAL
+    if (order & 8)
+        return ff_scalarproduct_and_madd_int16_mmxext(v1, v2, v3, order, mul);
+    else
+        return ff_scalarproduct_and_madd_int16_ssse3(v1, v2, v3, order, mul);
+#endif
+}
+
 av_cold void ff_llauddsp_init_x86(LLAudDSPContext *c)
 {
     int cpu_flags = av_get_cpu_flags();
@@ -39,9 +63,9 @@ av_cold void ff_llauddsp_init_x86(LLAudDSPContext *c)
         c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_mmxext;
 
     if (EXTERNAL_SSE2(cpu_flags))
-        c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_sse2;
+        c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_sse2;
 
     if (EXTERNAL_SSSE3(cpu_flags) &&
         !(cpu_flags & (AV_CPU_FLAG_SSE42 | AV_CPU_FLAG_3DNOW))) // cachesplit
-        c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_ssse3;
+        c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_ssse3;
 }



More information about the ffmpeg-cvslog mailing list