[FFmpeg-devel] [PATCH 02/10] diracdsp: add dequantization SIMD
James Almer
jamrial at gmail.com
Fri Jun 24 17:38:35 CEST 2016
On 6/24/2016 8:43 AM, Rostislav Pehlivanov wrote:
> From 154e4312b09f568108dd97089e394c10bb3c28a9 Mon Sep 17 00:00:00 2001
> From: Rostislav Pehlivanov <rpehlivanov at ob-encoder.com>
> Date: Thu, 23 Jun 2016 18:06:56 +0100
> Subject: [PATCH 2/2] diracdsp: add dequantization SIMD
>
> Currently unused, to be used in the following commits.
>
> Signed-off-by: Rostislav Pehlivanov <rpehlivanov at obe.tv>
> ---
> libavcodec/diracdsp.c | 24 ++++++++++++++++++++++++
> libavcodec/diracdsp.h | 4 ++++
> libavcodec/x86/diracdsp.asm | 36 ++++++++++++++++++++++++++++++++++++
> libavcodec/x86/diracdsp_init.c | 2 ++
> 4 files changed, 66 insertions(+)
>
> diff --git a/libavcodec/diracdsp.c b/libavcodec/diracdsp.c
> index ab8d149..cd1209e 100644
> --- a/libavcodec/diracdsp.c
> +++ b/libavcodec/diracdsp.c
> @@ -189,6 +189,27 @@ static void add_rect_clamped_c(uint8_t *dst, const uint16_t *src, int stride,
> }
> }
>
> +#define DEQUANT_SUBBAND(PX) \
> +static void dequant_subband_ ## PX ## _c(uint8_t *src, uint8_t *dst, ptrdiff_t stride, \
> + const int qf, const int qs, int tot_v, int tot_h) \
> +{ \
> + int i, y; \
> + for (y = 0; y < tot_v; y++) { \
> + PX c, sign, *src_r = (PX *)src, *dst_r = (PX *)dst; \
> + for (i = 0; i < tot_h; i++) { \
> + c = *src_r++; \
> + sign = FFSIGN(c)*(!!c); \
> + c = (FFABS(c)*qf + qs) >> 2; \
> + *dst_r++ = c*sign; \
> + } \
> + src += tot_h << (sizeof(PX) >> 1); \
> + dst += stride; \
> + } \
> +}
> +
> +DEQUANT_SUBBAND(int16_t)
> +DEQUANT_SUBBAND(int32_t)
> +
> #define PIXFUNC(PFX, WIDTH) \
> c->PFX ## _dirac_pixels_tab[WIDTH>>4][0] = ff_ ## PFX ## _dirac_pixels ## WIDTH ## _c; \
> c->PFX ## _dirac_pixels_tab[WIDTH>>4][1] = ff_ ## PFX ## _dirac_pixels ## WIDTH ## _l2_c; \
> @@ -214,6 +235,9 @@ av_cold void ff_diracdsp_init(DiracDSPContext *c)
> c->biweight_dirac_pixels_tab[1] = biweight_dirac_pixels16_c;
> c->biweight_dirac_pixels_tab[2] = biweight_dirac_pixels32_c;
>
> + c->dequant_subband[0] = c->dequant_subband[2] = dequant_subband_int16_t_c;
> + c->dequant_subband[1] = c->dequant_subband[3] = dequant_subband_int32_t_c;
> +
> PIXFUNC(put, 8);
> PIXFUNC(put, 16);
> PIXFUNC(put, 32);
> diff --git a/libavcodec/diracdsp.h b/libavcodec/diracdsp.h
> index 25a872d..224828d 100644
> --- a/libavcodec/diracdsp.h
> +++ b/libavcodec/diracdsp.h
> @@ -22,6 +22,7 @@
> #define AVCODEC_DIRACDSP_H
>
> #include <stdint.h>
> +#include <stddef.h>
>
> typedef void (*dirac_weight_func)(uint8_t *block, int stride, int log2_denom, int weight, int h);
> typedef void (*dirac_biweight_func)(uint8_t *dst, const uint8_t *src, int stride, int log2_denom, int weightd, int weights, int h);
> @@ -46,6 +47,9 @@ typedef struct {
> void (*add_rect_clamped)(uint8_t *dst/*align 16*/, const uint16_t *src/*align 16*/, int stride, const int16_t *idwt/*align 16*/, int idwt_stride, int width, int height/*mod 2*/);
> void (*add_dirac_obmc[3])(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
>
> + /* 0-1: int16_t and int32_t asm/c, 2-3: int16 and int32_t, C only */
> + void (*dequant_subband[4])(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h);
> +
> dirac_weight_func weight_dirac_pixels_tab[3];
> dirac_biweight_func biweight_dirac_pixels_tab[3];
> } DiracDSPContext;
> diff --git a/libavcodec/x86/diracdsp.asm b/libavcodec/x86/diracdsp.asm
> index a0d6788..a764706 100644
> --- a/libavcodec/x86/diracdsp.asm
> +++ b/libavcodec/x86/diracdsp.asm
> @@ -307,4 +307,40 @@ cglobal put_signed_rect_clamped_10, 6, 9, 6, dst, dst_stride, src, src_stride, w
>
> RET
>
> +; void dequant_subband_32(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h)
> +cglobal dequant_subband_32, 7, 9, 4, src, dst, stride, qf, qs, tot_v, tot_h
> +
> + movd m2, qfd
> + movd m3, qsd
> + SPLATD m2
> + SPLATD m3
> + mov r7, dstq
> + mov r8, tot_hq
Replace every r7 and r8 with r3 and r4, make the cglobal line 7, 7, 4
and the function will work on x86_32.
> +
> + .loop_v:
> + mov dstq, r7
> + mov tot_hq, r8
> +
> + .loop_h:
> + movu m0, [srcq]
> +
> + pabsd m1, m0
> + pmulld m1, m2
> + paddd m1, m3
> + psrld m1, 2
> + psignd m1, m0
> +
> + movu [dstq], m1
> +
> + add srcq, mmsize
> + add dstq, mmsize
> + sub tot_hq, 4
> + jl .loop_h
Jump if greater. Also use tot_hd, or change the prototypes.
> +
> + add r7, strideq
> + sub tot_vq, 1
> + jl .loop_v
Ditto.
> +
> + RET
> +
> %endif
> diff --git a/libavcodec/x86/diracdsp_init.c b/libavcodec/x86/diracdsp_init.c
> index 7fa554e..a1bab9c 100644
> --- a/libavcodec/x86/diracdsp_init.c
> +++ b/libavcodec/x86/diracdsp_init.c
> @@ -48,6 +48,7 @@ void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t
>
> #if ARCH_X86_64
> void ff_put_signed_rect_clamped_10_sse4(uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int width, int height);
> +void ff_dequant_subband_32_sse4(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h);
> #endif
>
> #if HAVE_YASM
> @@ -191,6 +192,7 @@ void ff_diracdsp_init_x86(DiracDSPContext* c)
>
> #if ARCH_X86_64
> if (EXTERNAL_SSE4(mm_flags)) {
> + c->dequant_subband[1] = ff_dequant_subband_32_sse4;
> c->put_signed_rect_clamped[1] = ff_put_signed_rect_clamped_10_sse4;
> }
> #endif
> -- 2.8.1.369.geae769a
More information about the ffmpeg-devel
mailing list