[FFmpeg-devel] [PATCH] avfilter: add hflip x86 SIMD
James Almer
jamrial at gmail.com
Sat Dec 2 04:25:03 EET 2017
On 12/1/2017 7:02 PM, Paul B Mahol wrote:
> Signed-off-by: Paul B Mahol <onemda at gmail.com>
> ---
> libavfilter/hflip.h | 38 +++++++++++++++++++++++++
> libavfilter/vf_hflip.c | 30 ++++++++++++++------
> libavfilter/x86/Makefile | 2 ++
> libavfilter/x86/vf_hflip.asm | 61 +++++++++++++++++++++++++++++++++++++++++
> libavfilter/x86/vf_hflip_init.c | 38 +++++++++++++++++++++++++
> 5 files changed, 160 insertions(+), 9 deletions(-)
> create mode 100644 libavfilter/hflip.h
> create mode 100644 libavfilter/x86/vf_hflip.asm
> create mode 100644 libavfilter/x86/vf_hflip_init.c
>
> diff --git a/libavfilter/hflip.h b/libavfilter/hflip.h
> new file mode 100644
> index 0000000000..138380427c
> --- /dev/null
> +++ b/libavfilter/hflip.h
> @@ -0,0 +1,38 @@
> +/*
> + * Copyright (c) 2007 Benoit Fouet
> + * Copyright (c) 2010 Stefano Sabatini
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AVFILTER_HFLIP_H
> +#define AVFILTER_HFLIP_H
> +
> +#include "avfilter.h"
> +
> +typedef struct FlipContext {
> + const AVClass *class;
> + int max_step[4]; ///< max pixel step for each plane, expressed as a number of bytes
> + int planewidth[4]; ///< width of each plane
> + int planeheight[4]; ///< height of each plane
> +
> + void (*flip_line[4])(const uint8_t *src, uint8_t *dst, int w);
> +} FlipContext;
> +
> +void ff_hflip_init_x86(FlipContext *s, int step[4]);
> +
> +#endif /* AVFILTER_HFLIP_H */
> diff --git a/libavfilter/vf_hflip.c b/libavfilter/vf_hflip.c
> index cf20c193f7..65cf7c5cd1 100644
> --- a/libavfilter/vf_hflip.c
> +++ b/libavfilter/vf_hflip.c
> @@ -29,6 +29,7 @@
> #include "libavutil/opt.h"
> #include "avfilter.h"
> #include "formats.h"
> +#include "hflip.h"
> #include "internal.h"
> #include "video.h"
> #include "libavutil/pixdesc.h"
> @@ -36,13 +37,6 @@
> #include "libavutil/intreadwrite.h"
> #include "libavutil/imgutils.h"
>
> -typedef struct FlipContext {
> - const AVClass *class;
> - int max_step[4]; ///< max pixel step for each plane, expressed as a number of bytes
> - int planewidth[4]; ///< width of each plane
> - int planeheight[4]; ///< height of each plane
> -} FlipContext;
> -
> static const AVOption hflip_options[] = {
> { NULL }
> };
> @@ -67,12 +61,21 @@ static int query_formats(AVFilterContext *ctx)
> return ff_set_common_formats(ctx, pix_fmts);
> }
>
> +static void hflip_byte_c(const uint8_t *src, uint8_t *dst, int w)
> +{
> + int j;
> +
> + for (j = 0; j < w; j++)
> + dst[j] = src[-j];
> +}
> +
> static int config_props(AVFilterLink *inlink)
> {
> FlipContext *s = inlink->dst->priv;
> const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(inlink->format);
> const int hsub = pix_desc->log2_chroma_w;
> const int vsub = pix_desc->log2_chroma_h;
> + int i;
>
> av_image_fill_max_pixsteps(s->max_step, NULL, pix_desc);
> s->planewidth[0] = s->planewidth[3] = inlink->w;
> @@ -80,6 +83,16 @@ static int config_props(AVFilterLink *inlink)
> s->planeheight[0] = s->planeheight[3] = inlink->h;
> s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, vsub);
>
> + for (i = 0; i < 4; i++) {
> + switch (s->max_step[i]) {
> + case 1:
> + s->flip_line[i] = hflip_byte_c;
> + }
> + }
> +
> + if (ARCH_X86)
> + ff_hflip_init_x86(s, s->max_step);
> +
> return 0;
> }
>
> @@ -109,8 +122,7 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int job, int nb_jobs)
> for (i = start; i < end; i++) {
> switch (step) {
> case 1:
> - for (j = 0; j < width; j++)
> - outrow[j] = inrow[-j];
> + s->flip_line[plane](inrow, outrow, width);
> break;
>
> case 2:
> diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile
> index 3431625883..1420954f62 100644
> --- a/libavfilter/x86/Makefile
> +++ b/libavfilter/x86/Makefile
> @@ -5,6 +5,7 @@ OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp_init.o
> OBJS-$(CONFIG_EQ_FILTER) += x86/vf_eq.o
> OBJS-$(CONFIG_FSPP_FILTER) += x86/vf_fspp_init.o
> OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun_init.o
> +OBJS-$(CONFIG_HFLIP_FILTER) += x86/vf_hflip_init.o
> OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d_init.o
> OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet_init.o
> OBJS-$(CONFIG_INTERLACE_FILTER) += x86/vf_interlace_init.o
> @@ -31,6 +32,7 @@ X86ASM-OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif.o
> X86ASM-OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp.o
> X86ASM-OBJS-$(CONFIG_FSPP_FILTER) += x86/vf_fspp.o
> X86ASM-OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun.o
> +X86ASM-OBJS-$(CONFIG_HFLIP_FILTER) += x86/vf_hflip.o
> X86ASM-OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d.o
> X86ASM-OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet.o
> X86ASM-OBJS-$(CONFIG_INTERLACE_FILTER) += x86/vf_interlace.o
> diff --git a/libavfilter/x86/vf_hflip.asm b/libavfilter/x86/vf_hflip.asm
> new file mode 100644
> index 0000000000..bc52a16ad8
> --- /dev/null
> +++ b/libavfilter/x86/vf_hflip.asm
> @@ -0,0 +1,61 @@
> +;*****************************************************************************
> +;* x86-optimized functions for hflip filter
> +;*
> +;* Copyright (C) 2017 Paul B Mahol
> +;*
> +;* This file is part of FFmpeg.
> +;*
> +;* FFmpeg is free software; you can redistribute it and/or
> +;* modify it under the terms of the GNU Lesser General Public
> +;* License as published by the Free Software Foundation; either
> +;* version 2.1 of the License, or (at your option) any later version.
> +;*
> +;* FFmpeg is distributed in the hope that it will be useful,
> +;* but WITHOUT ANY WARRANTY; without even the implied warranty of
> +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> +;* Lesser General Public License for more details.
> +;*
> +;* You should have received a copy of the GNU Lesser General Public
> +;* License along with FFmpeg; if not, write to the Free Software
> +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> +;*****************************************************************************
> +
> +%include "libavutil/x86/x86util.asm"
> +
> +SECTION_RODATA
> +
> +pb_flip: times 16 db 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
> +
> +SECTION .text
> +
> +INIT_XMM ssse3
> +%if ARCH_X86_64
Unneeded.
> +cglobal hflip_byte, 5, 5, 8, src, dst, w, x, v
There are three arguments, not five. Also, only two xmm regs are being used.
> + mova m0, [pb_flip]
> + mov xq, 0
> + sub wq, mmsize
> + cmp wq, mmsize
> + jl .skip
> +
> + .loop0:
> + neg xq
> + movu m1, [srcq + xq - mmsize + 1]
> + pshufb m1, m0
> + neg xq
> + movu [dstq + xq], m1
> + add xq, mmsize
> + cmp xq, wq
> + jl .loop0
> +
> +.skip:
> + add wq, mmsize
> + .loop1:
> + neg xq
> + mov vb, [srcq + xq]
> + neg xq
> + mov [dstq + xq], vb
> + add xq, 1
> + cmp xq, wq
> + jl .loop1
> +RET
> +%endif
No comments about the assembly. Rostislav mentioned on IRC you can do it
in a more efficient way, so poke him about it.
More information about the ffmpeg-devel
mailing list