[FFmpeg-devel] [PATCH 1/7] x86: sbrdsp: implement SSE/SSE2 qmf_pre_shuffle

Michael Niedermayer michaelni at gmx.at
Sat Apr 6 14:57:30 CEST 2013


On Sat, Apr 06, 2013 at 10:52:08AM +0000, Christophe Gisquet wrote:
> From 253 to 70(sse)/52(sse2) cycles on Arrandale and Win64.
> 61/55 cycles on SandyBridge.

SSE2 is 41 cycles now on sb :)


> ---
>  libavcodec/x86/sbrdsp.asm    | 54 ++++++++++++++++++++++++++++++++++++++++++++
>  libavcodec/x86/sbrdsp_init.c |  7 ++++++
>  2 files changed, 61 insertions(+)
> 
> diff --git a/libavcodec/x86/sbrdsp.asm b/libavcodec/x86/sbrdsp.asm
> index 999e5af..f3c30d0 100644
> --- a/libavcodec/x86/sbrdsp.asm
> +++ b/libavcodec/x86/sbrdsp.asm
> @@ -242,3 +242,57 @@ cglobal sbr_neg_odd_64, 1,2,4,z
>      cmp        zq, r1q
>      jne     .loop
>      REP_RET
> +
> +%macro SBR_QMF_PRE_SHUFFLE 0
> +cglobal sbr_qmf_pre_shuffle, 1,4,7,z
> +%define OFFSET  (32*4-2*mmsize)
> +    mov       r3q, OFFSET
> +    lea       r1q, [zq + (32+1)*4]
> +    lea       r2q, [zq + 64*4]
> +    mova       m6, [ps_neg]
> +.loop:
> +    movu       m0, [r1q]
> +    movu       m2, [r1q + mmsize]
> +    movu       m1, [zq + r3q + 4 + mmsize]
> +    movu       m3, [zq + r3q + 4]
> +%if cpuflag(sse2)
> +%define XOR      pxor
> +%define SHUFFLE  pshufd
> +%define UNPACKL  punpckldq
> +%define UNPACKH  punpckhdq
> +%define MOVH     movq
> +%else
> +%define XOR      xorps
> +%define SHUFFLE  shufps
> +%define UNPACKL  unpcklps
> +%define UNPACKH  unpckhps
> +%define MOVH     movlps
> +%endif
> +

> +    XOR        m2, m6
> +    XOR        m0, m6
> +    SHUFFLE    m2, m2, q0123
> +    SHUFFLE    m0, m0, q0123

doing the shuffles before the XOR is 1 cycle faster on my sb
if its not for you then ignore

@@ -269,10 +269,10 @@ cglobal sbr_qmf_pre_shuffle, 1,4,7,z
 %define MOVH     movlps
 %endif

-    XOR        m2, m6
-    XOR        m0, m6
     SHUFFLE    m2, m2, q0123
     SHUFFLE    m0, m0, q0123
+    XOR        m2, m6
+    XOR        m0, m6
     mova       m5, m2
     mova       m4, m0
     UNPACKL    m2, m3

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Many things microsoft did are stupid, but not doing something just because
microsoft did it is even more stupid. If everything ms did were stupid they
would be bankrupt already.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 198 bytes
Desc: Digital signature
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20130406/b9caeebc/attachment.asc>


More information about the ffmpeg-devel mailing list