[FFmpeg-devel] [PATCH 3/4] x86: lossless audio: SSE4 madd 32bits

Michael Niedermayer michael at niedermayer.cc
Sat May 7 23:28:37 CEST 2016


On Sat, May 07, 2016 at 05:58:21PM +0200, Paul B Mahol wrote:
> On 5/1/16, Christophe Gisquet <christophe.gisquet at gmail.com> wrote:
> > The unique user so far is wmalossless 24bits. The few samples tested show an
> > order of 8, so more unrolling or an avx2 version do not make sense.
> >
> > Timings: 68 -> 49 cycles
> > ---
> >  libavcodec/x86/lossless_audiodsp.asm    | 33
> > +++++++++++++++++++++++++++++++++
> >  libavcodec/x86/lossless_audiodsp_init.c |  7 +++++++
> >  2 files changed, 40 insertions(+)
> >
> > diff --git a/libavcodec/x86/lossless_audiodsp.asm
> > b/libavcodec/x86/lossless_audiodsp.asm
> > index 5597dad..063d7b4 100644
> > --- a/libavcodec/x86/lossless_audiodsp.asm
> > +++ b/libavcodec/x86/lossless_audiodsp.asm
> > @@ -68,6 +68,39 @@ SCALARPRODUCT
> >  INIT_XMM sse2
> >  SCALARPRODUCT
> >
> > +INIT_XMM sse4
> > +; int ff_scalarproduct_and_madd_int32(int16_t *v1, int32_t *v2, int16_t
> > *v3,
> > +;                                     int order, int mul)
> > +cglobal scalarproduct_and_madd_int32, 4,4,8, v1, v2, v3, order, mul
> > +    shl orderq, 1
> > +    movd    m7, mulm
> > +    SPLATW  m7, m7
> > +    pxor    m6, m6
> > +    add v1q, orderq
> > +    lea v2q, [v2q + 2*orderq]
> > +    add v3q, orderq
> > +    neg orderq
> > +.loop:
> > +    mova    m3, [v1q + orderq]
> > +    movu    m0, [v2q + 2*orderq]
> > +    pmovsxwd m4, m3
> > +    movu    m1, [v2q + 2*orderq + mmsize]
> > +    movhlps m5, m3
> > +    movu    m2, [v3q + orderq]
> > +    pmovsxwd m5, m5
> > +    pmullw  m2, m7
> > +    pmulld  m0, m4
> > +    pmulld  m1, m5
> > +    paddw   m2, m3
> > +    paddd   m6, m0
> > +    paddd   m6, m1
> > +    mova    [v1q + orderq], m2
> > +    add     orderq, 16
> > +    jl .loop
> > +    HADDD   m6, m0
> > +    movd   eax, m6
> > +    RET
> > +
> >  %macro SCALARPRODUCT_LOOP 1
> >  align 16
> >  .loop%1:
> > diff --git a/libavcodec/x86/lossless_audiodsp_init.c
> > b/libavcodec/x86/lossless_audiodsp_init.c
> > index 197173c..10b6a65 100644
> > --- a/libavcodec/x86/lossless_audiodsp_init.c
> > +++ b/libavcodec/x86/lossless_audiodsp_init.c
> > @@ -31,6 +31,10 @@ int32_t ff_scalarproduct_and_madd_int16_ssse3(int16_t
> > *v1, const int16_t *v2,
> >                                                const int16_t *v3,
> >                                                int order, int mul);
> >
> > +int32_t ff_scalarproduct_and_madd_int32_sse4(int16_t *v1, const int32_t
> > *v2,
> > +                                             const int16_t *v3,
> > +                                             int order, int mul);
> > +
> >  av_cold void ff_llauddsp_init_x86(LLAudDSPContext *c)
> >  {
> >  #if HAVE_YASM
> > @@ -45,5 +49,8 @@ av_cold void ff_llauddsp_init_x86(LLAudDSPContext *c)
> >      if (EXTERNAL_SSSE3(cpu_flags) &&
> >          !(cpu_flags & (AV_CPU_FLAG_SSE42 | AV_CPU_FLAG_3DNOW))) //
> > cachesplit
> >          c->scalarproduct_and_madd_int16 =
> > ff_scalarproduct_and_madd_int16_ssse3;
> > +
> > +    if (EXTERNAL_SSE4(cpu_flags))
> > +        c->scalarproduct_and_madd_int32 =
> > ff_scalarproduct_and_madd_int32_sse4;
> >  #endif
> >  }
> > --
> > 2.8.1
> >
> > _______________________________________________
> > ffmpeg-devel mailing list
> > ffmpeg-devel at ffmpeg.org
> > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> >
> 
> lgtm

applied

thx

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

The bravest are surely those who have the clearest vision
of what is before them, glory and danger alike, and yet
notwithstanding go out to meet it. -- Thucydides
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 181 bytes
Desc: Digital signature
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20160507/53774a02/attachment.sig>


More information about the ffmpeg-devel mailing list