[FFmpeg-cvslog] x86: sbrdsp: implement SSE qmf_deint_neg
Christophe Gisquet
git at videolan.org
Thu May 15 23:26:02 CEST 2014
ffmpeg | branch: master | Christophe Gisquet <christophe.gisquet at gmail.com> | Fri Dec 7 18:26:30 2012 +0100| [d1310c591eb24202ecc28af4adb28cce690109e5] | committer: Michael Niedermayer
x86: sbrdsp: implement SSE qmf_deint_neg
>From 133 (unrolled av_intfloat32 C) to 59 cycles on Arrandale/Win64.
Signed-off-by: Michael Niedermayer <michaelni at gmx.at>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d1310c591eb24202ecc28af4adb28cce690109e5
---
libavcodec/x86/sbrdsp.asm | 22 ++++++++++++++++++++++
libavcodec/x86/sbrdsp_init.c | 3 +++
2 files changed, 25 insertions(+)
diff --git a/libavcodec/x86/sbrdsp.asm b/libavcodec/x86/sbrdsp.asm
index af774e1..d556f27 100644
--- a/libavcodec/x86/sbrdsp.asm
+++ b/libavcodec/x86/sbrdsp.asm
@@ -423,3 +423,25 @@ apply_noise_main:
add count, mmsize
jl .loop
RET
+
+INIT_XMM sse
+cglobal sbr_qmf_deint_neg, 2,4,4,v,src,vrev,c
+%define COUNT 32*4
+%define OFFSET 32*4
+ mov cq, -COUNT
+ lea vrevq, [vq + OFFSET + COUNT]
+ add vq, OFFSET-mmsize
+ add srcq, 2*COUNT
+ mova m3, [ps_neg]
+.loop:
+ mova m0, [srcq + 2*cq + 0*mmsize]
+ mova m1, [srcq + 2*cq + 1*mmsize]
+ shufps m2, m0, m1, q2020
+ shufps m1, m0, q1313
+ xorps m2, m3
+ mova [vq], m1
+ mova [vrevq + cq], m2
+ sub vq, mmsize
+ add cq, mmsize
+ jl .loop
+ REP_RET
diff --git a/libavcodec/x86/sbrdsp_init.c b/libavcodec/x86/sbrdsp_init.c
index 2b912d0..a2aca74 100644
--- a/libavcodec/x86/sbrdsp_init.c
+++ b/libavcodec/x86/sbrdsp_init.c
@@ -51,6 +51,8 @@ void ff_sbr_hf_apply_noise_3_sse2(float (*Y)[2], const float *s_m,
const float *q_filt, int noise,
int kx, int m_max);
+void ff_sbr_qmf_deint_neg_sse(float *v, const float *src);
+
av_cold void ff_sbrdsp_init_x86(SBRDSPContext *s)
{
int cpu_flags = av_get_cpu_flags();
@@ -63,6 +65,7 @@ av_cold void ff_sbrdsp_init_x86(SBRDSPContext *s)
s->hf_gen = ff_sbr_hf_gen_sse;
s->qmf_post_shuffle = ff_sbr_qmf_post_shuffle_sse;
s->qmf_deint_bfly = ff_sbr_qmf_deint_bfly_sse;
+ s->qmf_deint_neg = ff_sbr_qmf_deint_neg_sse;
}
if (EXTERNAL_SSE2(cpu_flags)) {
More information about the ffmpeg-cvslog
mailing list