[FFmpeg-devel] [PATCH] ac3enc: Add x86-optimized function to speed up log2_tab().

Loren Merritt lorenm
Sun Feb 13 23:49:11 CET 2011


>+cglobal ac3_max_msb_abs_int16_%1, 2,2,5, src, len
>+    pxor        m2, m2
>+    pxor        m3, m3
>+.loop:
>+%ifidn %2, min_max
>+    mova        m0, [srcq]
>+    mova        m1, [srcq+mmsize]
>+    pminsw      m2, m0
>+    pminsw      m2, m1
>+    pmaxsw      m3, m0
>+    pmaxsw      m3, m1
>+%else ; or_abs
>+%ifidn %1, mmx
>+    mova        m0, [srcq]
>+    mova        m1, [srcq+mmsize]
>+    ABS2        m0, m1, m3, m4
>+%else ; ssse3
>+    ; using memory args is faster for ssse3
>+    pabsw       m0, [srcq]
>+    pabsw       m1, [srcq+mmsize]
>+%endif
>+    por         m2, m0
>+    por         m2, m1
>+%endif
>+    add       srcq, mmsize*2
>+    sub       lend, mmsize
>+    ja .loop
>+%ifidn %2, min_max
>+    ABS2        m2, m3, m0, m1
>+    por         m2, m3
>+%endif
>+%ifidn mmsize, 16
>+    mova        m0, m2
>+    punpckhqdq  m0, m0

movhlps

--Loren Merritt



More information about the ffmpeg-devel mailing list