[FFmpeg-cvslog] flac/x86: add ff_flac_lpc_32_xop()

James Almer git at videolan.org
Thu Feb 13 22:19:45 CET 2014


ffmpeg | branch: master | James Almer <jamrial at gmail.com> | Sat Feb  8 02:54:51 2014 -0300| [e87974bc00e997c5844300687a97a11e0dbf6f12] | committer: Michael Niedermayer

flac/x86: add ff_flac_lpc_32_xop()

Tested on an AMD FX 6300

679081 decicycles in ff_flac_lpc_32_xop, 32768 runs
774425 decicycles in ff_flac_lpc_32_sse4, 32768 runs

Signed-off-by: James Almer <jamrial at gmail.com>
Signed-off-by: Michael Niedermayer <michaelni at gmx.at>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e87974bc00e997c5844300687a97a11e0dbf6f12
---

 libavcodec/x86/flacdsp.asm    |   21 ++++++++++++---------
 libavcodec/x86/flacdsp_init.c |    6 ++++++
 2 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/libavcodec/x86/flacdsp.asm b/libavcodec/x86/flacdsp.asm
index e28f905..1a83cd8 100644
--- a/libavcodec/x86/flacdsp.asm
+++ b/libavcodec/x86/flacdsp.asm
@@ -24,7 +24,8 @@
 
 SECTION .text
 
-INIT_XMM sse4
+%macro LPC_32 1
+INIT_XMM %1
 cglobal flac_lpc_32, 5,6,5, decoded, coeffs, pred_order, qlevel, len, j
     sub    lend, pred_orderd
     jle .ret
@@ -43,25 +44,21 @@ ALIGN 16
     test   jq, jq
     jz .end_order
 .loop_order:
-    pmuldq m0, m1
-    paddq  m2, m0
+    pmacsdql m2, m0, m1, m2
     movd   m0, [decodedq+jq*4]
-    pmuldq m1, m0
-    paddq  m3, m1
+    pmacsdql m3, m1, m0, m3
     movd   m1, [coeffsq+jq*4]
     inc    jq
     jl .loop_order
 .end_order:
-    pmuldq m0, m1
-    paddq  m2, m0
+    pmacsdql m2, m0, m1, m2
     psrlq  m2, m4
     movd   m0, [decodedq]
     paddd  m0, m2
     movd   [decodedq], m0
     sub  lend, 2
     jl .ret
-    pmuldq m1, m0
-    paddq  m3, m1
+    pmacsdql m3, m1, m0, m3
     psrlq  m3, m4
     movd   m1, [decodedq+4]
     paddd  m1, m3
@@ -69,3 +66,9 @@ ALIGN 16
     jg .loop_sample
 .ret:
     REP_RET
+%endmacro
+
+%if HAVE_XOP_EXTERNAL
+LPC_32 xop
+%endif
+LPC_32 sse4
diff --git a/libavcodec/x86/flacdsp_init.c b/libavcodec/x86/flacdsp_init.c
index 1a02522..151ce34 100644
--- a/libavcodec/x86/flacdsp_init.c
+++ b/libavcodec/x86/flacdsp_init.c
@@ -24,6 +24,8 @@
 
 void ff_flac_lpc_32_sse4(int32_t *samples, const int coeffs[32], int order,
                          int qlevel, int len);
+void ff_flac_lpc_32_xop(int32_t *samples, const int coeffs[32], int order,
+                        int qlevel, int len);
 
 av_cold void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt,
                                  int bps)
@@ -35,5 +37,9 @@ av_cold void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt,
         if (bps > 16 && CONFIG_FLAC_DECODER)
             c->lpc = ff_flac_lpc_32_sse4;
     }
+    if (EXTERNAL_XOP(cpu_flags)) {
+        if (bps > 16)
+            c->lpc = ff_flac_lpc_32_xop;
+    }
 #endif
 }



More information about the ffmpeg-cvslog mailing list