[Ffmpeg-devel] gcc4 support & MMX fixups (from Debian)

Aurelien Jacobs aurel
Wed Feb 1 01:39:01 CET 2006


On Wed, 1 Feb 2006 01:26:49 +0100
Pawe? Sikora <pluto at pld-linux.org> wrote:

> Dnia Wednesday, 1 of February 2006 01:08, Aurelien Jacobs napisa?:
> > On Wed, 1 Feb 2006 00:21:56 +0100
> >
> > Pawe? Sikora <pluto at pld-linux.org> wrote:
> > > Dnia Wednesday, 1 of February 2006 00:01, Aurelien Jacobs napisa?:
> > > > > orig:  iters = 1000000000, dt = 7.92 [avg]
> > > > > fixed: iters = 1000000000, dt = 7.35 [avg]
> > > > >
> > > > > we gain: ~7.2%
> 
> > Oh ! My bad... stupid me. I just forgot the -O3 when compiling !
> > Now here are some better results :
> >
> >   orig:  iters = 1000000000, dt = 5.04
> >   fixed: iters = 1000000000, dt = 5.47
> >
> > So that's still worse for the fixed version, but that's much more
> > reasonable.
> >
> > Here is the asm code resulting of fixed_transpose4x4:
> > (...)
> 
> hmmm, the 4.1/4.0 fixed_transpose4x4 are equal but benchmarks differs.
> maybe orig_transpose4x4 has different prologue?

seems so.

> [ 4.1 / -O2 ]
> orig_transpose4x4:
>         leal    (%rdx,%rdx), %r9d
>         leal    (%rcx,%rcx), %eax
>         movslq  %edx,%r11
>         movslq  %ecx,%r8
>         movslq  %r9d,%r10
>         addl    %edx, %r9d
>         movslq  %eax,%rdx
>         addl    %ecx, %eax
>         movslq  %r9d,%r9
>         cltq
> #APP
>         movd  (%rsi), %mm0
>         movd  (%rsi,%r8), %mm1
>         movd  (%rsi,%rdx), %mm2
>         movd  (%rsi,%rax), %mm3
>         punpcklbw %mm1, %mm0
>         punpcklbw %mm3, %mm2
>         movq %mm0, %mm1
>         punpcklwd %mm2, %mm0
>         punpckhwd %mm2, %mm1
>         movd  %mm0, (%rdi)
>         punpckhdq %mm0, %mm0
>         movd  %mm0, (%rdi,%r11)
>         movd  %mm1, (%rdi,%r10)
>         punpckhdq %mm1, %mm1
>         movd  %mm1, (%rdi,%r9)
> #NO_APP
>         ret

[ 4.0 / -O2 ]
orig_transpose4x4:
        leal    (%rdx,%rdx), %r8d
        movslq  %edx,%r10
        leaq    (%rcx,%rcx,2), %rax
        movslq  %r8d,%r9
        addl    %edx, %r8d
        movslq  %r8d,%r8
#APP
        movd  (%rsi), %mm0                
        movd  (%rsi,%rcx), %mm1                
        movd  (%rsi,%rcx,2), %mm2                
        movd  (%rax,%rsi), %mm3                
        punpcklbw %mm1, %mm0         
        punpcklbw %mm3, %mm2         
        movq %mm0, %mm1              
        punpcklwd %mm2, %mm0         
        punpckhwd %mm2, %mm1         
        movd  %mm0, (%rdi)                
        punpckhdq %mm0, %mm0         
        movd  %mm0, (%rdi,%r10)                
        movd  %mm1, (%rdi,%r9)                
        punpckhdq %mm1, %mm1         
        movd  %mm1, (%rdi,%r8)                
        
#NO_APP
        ret

Aurel





More information about the ffmpeg-devel mailing list