[FFmpeg-devel] [PATCH] vp9/x86: iwht4x4 (lossless) mmx.
Clément Bœsch
u at pkh.me
Wed Jan 22 07:48:57 CET 2014
On Mon, Jan 20, 2014 at 08:05:13PM -0500, Ronald S. Bultje wrote:
> ---
> libavcodec/x86/vp9dsp_init.c | 5 +++++
> libavcodec/x86/vp9itxfm.asm | 43 +++++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 48 insertions(+)
>
> diff --git a/libavcodec/x86/vp9dsp_init.c b/libavcodec/x86/vp9dsp_init.c
> index 9c322c1..9e4bc93 100644
> --- a/libavcodec/x86/vp9dsp_init.c
> +++ b/libavcodec/x86/vp9dsp_init.c
> @@ -173,6 +173,7 @@ itxfm_funcs(16, ssse3);
> itxfm_funcs(16, avx);
> itxfm_func(idct, idct, 32, ssse3);
> itxfm_func(idct, idct, 32, avx);
> +itxfm_func(iwht, iwht, 4, mmx);
>
> #undef itxfm_func
> #undef itxfm_funcs
> @@ -223,6 +224,10 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
> if (EXTERNAL_MMX(cpu_flags)) {
> init_fpel(4, 0, 4, put, mmx);
> init_fpel(3, 0, 8, put, mmx);
> + dsp->itxfm_add[4 /* lossless */][DCT_DCT] =
> + dsp->itxfm_add[4 /* lossless */][ADST_DCT] =
> + dsp->itxfm_add[4 /* lossless */][DCT_ADST] =
> + dsp->itxfm_add[4 /* lossless */][ADST_ADST] = ff_vp9_iwht_iwht_4x4_add_mmx;
> }
>
> if (EXTERNAL_SSE(cpu_flags)) {
> diff --git a/libavcodec/x86/vp9itxfm.asm b/libavcodec/x86/vp9itxfm.asm
> index fe9f99a..3279b53 100644
> --- a/libavcodec/x86/vp9itxfm.asm
> +++ b/libavcodec/x86/vp9itxfm.asm
> @@ -152,6 +152,49 @@ SECTION .text
> %endmacro
>
> ;-------------------------------------------------------------------------------------------
> +; void vp9_iwht_iwht_4x4_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
> +;-------------------------------------------------------------------------------------------
> +
> +%macro VP9_IWHT4_1D 0
> + SWAP 1, 2
> + SWAP 2, 3
Again, here and below, probably SWAP 1,2,3 and SWAP 3,2,1
> + paddw m0, m2
> + psubw m3, m1
> + psubw m4, m0, m3
> + psraw m4, 1
> + psubw m5, m4, m1
> + SWAP 5, 1
> + psubw m4, m2
> + SWAP 4, 2
> + psubw m0, m1
> + paddw m3, m2
> + SWAP 2, 3
> + SWAP 1, 2
> +%endmacro
> +
> +INIT_MMX mmx
> +cglobal vp9_iwht_iwht_4x4_add, 3, 3, 0, dst, stride, block, eob
> + mova m0, [blockq+0*8]
> + mova m1, [blockq+1*8]
> + mova m2, [blockq+2*8]
> + mova m3, [blockq+3*8]
> + psraw m0, 2
> + psraw m1, 2
> + psraw m2, 2
> + psraw m3, 2
> +
> + VP9_IWHT4_1D
> + TRANSPOSE4x4W 0, 1, 2, 3, 4
> + VP9_IWHT4_1D
> +
> + pxor m4, m4
> + VP9_STORE_2X 0, 1, 5, 6, 4
> + lea dstq, [dstq+strideq*2]
> + VP9_STORE_2X 2, 3, 5, 6, 4
> + ZERO_BLOCK blockq, 8, 4, m4
> + RET
> +
Rest probably OK :)
--
Clément B.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 490 bytes
Desc: not available
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20140122/d581a389/attachment.asc>
More information about the ffmpeg-devel
mailing list