[FFmpeg-cvslog] r25597 - trunk/libavcodec/x86/h264_qpel_mmx.c

Michael Niedermayer michaelni
Thu Oct 28 23:27:49 CEST 2010


On Thu, Oct 28, 2010 at 11:38:39PM +0400, Yuriy Kaminskiy wrote:
> ramiro wrote:
> > Author: ramiro
> > Date: Thu Oct 28 20:22:21 2010
> > New Revision: 25597
> > 
> > Log:
> > h264dsp: merge some more asm blocks
> > 
> > Modified:
> >    trunk/libavcodec/x86/h264_qpel_mmx.c
> > 
> > Modified: trunk/libavcodec/x86/h264_qpel_mmx.c
> > ==============================================================================
> > --- trunk/libavcodec/x86/h264_qpel_mmx.c	Thu Oct 28 15:20:26 2010	(r25596)
> > +++ trunk/libavcodec/x86/h264_qpel_mmx.c	Thu Oct 28 20:22:21 2010	(r25597)
> > @@ -31,8 +31,8 @@
> >          "psubw "#B", "#T"           \n\t"\
> >          "psubw "#E", "#T"           \n\t"\
> >          "punpcklbw "#Z", "#F"       \n\t"\
> > -        "pmullw %4, "#T"            \n\t"\
> > -        "paddw %5, "#A"             \n\t"\
> > +        "pmullw "MANGLE(ff_pw_5)", "#T"\n\t"\
> > +        "paddw "MANGLE(ff_pw_16)", "#A"\n\t"\
> >          "add %2, %0                 \n\t"\
> >          "paddw "#F", "#A"           \n\t"\
> >          "paddw "#A", "#T"           \n\t"\
> > @@ -46,11 +46,11 @@
> >          "mov"#d" (%0), "#F"         \n\t"\
> >          "paddw "#D", "#T"           \n\t"\
> >          "psllw $2, "#T"             \n\t"\
> > -        "paddw %4, "#A"             \n\t"\
> > +        "paddw "MANGLE(ff_pw_16)", "#A"\n\t"\
> >          "psubw "#B", "#T"           \n\t"\
> >          "psubw "#E", "#T"           \n\t"\
> >          "punpcklbw "#Z", "#F"       \n\t"\
> > -        "pmullw %3, "#T"            \n\t"\
> > +        "pmullw "MANGLE(ff_pw_5)", "#T"\n\t"\
> >          "paddw "#F", "#A"           \n\t"\
> >          "add %2, %0                 \n\t"\
> >          "paddw "#A", "#T"           \n\t"\
> > @@ -204,7 +204,7 @@ static av_noinline void OPNAME ## h264_q
> >              QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 3*8*3)\
> >               \
> >              : "+a"(src)\
> > -            : "c"(tmp), "S"((x86_reg)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)\
> > +            : "c"(tmp), "S"((x86_reg)srcStride)\
> >              : "memory"\
> >          );\
> >          tmp += 4;\
> > @@ -385,13 +385,8 @@ static av_noinline void OPNAME ## h264_q
> >          QPEL_H264V(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\
> >          QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
> >          QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
> > -         \
> > -        : "+a"(src), "+c"(dst)\
> > -        : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
> > -        : "memory"\
> > -     );\
> > -     if(h==16){\
> > -        __asm__ volatile(\
> > +        "cmpl $16, %4               \n\t"\
> > +        "jne 2f                     \n\t"\
> >              QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
> >              QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
> >              QPEL_H264V(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\
> > @@ -400,12 +395,12 @@ static av_noinline void OPNAME ## h264_q
> >              QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
> >              QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
> >              QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
> > +        "2:                         \n\t"\
> >              \
> >             : "+a"(src), "+c"(dst)\
> > -           : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
> > +           : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "g"(h)\
> >             : "memory"\
> >          );\
> > -     }\
> >       src += 4-(h+5)*srcStride;\
> >       dst += 4-h*dstStride;\
> >     }\
> > @@ -439,12 +434,8 @@ static av_always_inline void OPNAME ## h
> >              QPEL_H264HV(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, 5*48)\
> >              QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 6*48)\
> >              QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 7*48)\
> > -            : "+a"(src)\
> > -            : "c"(tmp), "S"((x86_reg)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)\
> > -            : "memory"\
> > -        );\
> > -        if(size==16){\
> 
> Size is compile-time constant, so this check was always-true, or always-false
> before, now it is always evaluated at runtime.
> 
> > @@ -811,13 +802,8 @@ static av_noinline void OPNAME ## h264_q
> >          QPEL_H264V_XMM(%%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, OP)\
> >          QPEL_H264V_XMM(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, OP)\
> >          QPEL_H264V_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, OP)\
> > -         \
> > -        : "+a"(src), "+c"(dst)\
> > -        : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
> > -        : "memory"\
> > -    );\
> > -    if(h==16){\
> Same here, h is compile-time constant.

why do you think so?
the functions are marked as av_noinline

either way a benchmark for the change can of course not hurt

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Many that live deserve death. And some that die deserve life. Can you give
it to them? Then do not be too eager to deal out death in judgement. For
even the very wise cannot see all ends. -- Gandalf
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 198 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-cvslog/attachments/20101028/62bfe315/attachment.pgp>



More information about the ffmpeg-cvslog mailing list