[FFmpeg-devel] [PATCH v3 1/4] libswscale/x86/rgb2rgb: add shuffle_bytes avx2

Wu, Jianhua jianhua.wu at intel.com
Thu Oct 14 10:19:37 EEST 2021


Ping.

> -----Original Message-----
> From: Wu, Jianhua <jianhua.wu at intel.com>
> Sent: Thursday, September 30, 2021 4:44 PM
> To: ffmpeg-devel at ffmpeg.org
> Cc: Wu, Jianhua <jianhua.wu at intel.com>
> Subject: [PATCH v3 1/4] libswscale/x86/rgb2rgb: add shuffle_bytes avx2
> 
> Performance data(Less is better):
>     shuffle_bytes_ssse3   3.64654
>     shuffle_bytes_avx2    0.94288
> 
> Signed-off-by: Wu Jianhua <jianhua.wu at intel.com>
> ---
>  libswscale/x86/rgb2rgb.c     | 17 +++++++++++++++--
>  libswscale/x86/rgb_2_rgb.asm | 11 +++++++++++
>  2 files changed, 26 insertions(+), 2 deletions(-)
> 
> diff --git a/libswscale/x86/rgb2rgb.c b/libswscale/x86/rgb2rgb.c index
> c38a953277..0ab139aca4 100644
> --- a/libswscale/x86/rgb2rgb.c
> +++ b/libswscale/x86/rgb2rgb.c
> @@ -146,6 +146,12 @@ void ff_shuffle_bytes_3012_ssse3(const uint8_t *src,
> uint8_t *dst, int src_size)  void ff_shuffle_bytes_3210_ssse3(const uint8_t
> *src, uint8_t *dst, int src_size);
> 
>  #if ARCH_X86_64
> +void ff_shuffle_bytes_2103_avx2(const uint8_t *src, uint8_t *dst, int
> +src_size); void ff_shuffle_bytes_0321_avx2(const uint8_t *src, uint8_t
> +*dst, int src_size); void ff_shuffle_bytes_1230_avx2(const uint8_t
> +*src, uint8_t *dst, int src_size); void
> +ff_shuffle_bytes_3012_avx2(const uint8_t *src, uint8_t *dst, int
> +src_size); void ff_shuffle_bytes_3210_avx2(const uint8_t *src, uint8_t
> +*dst, int src_size);
> +
>  void ff_uyvytoyuv422_sse2(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
>                            const uint8_t *src, int width, int height,
>                            int lumStride, int chromStride, int srcStride); @@ -186,9 +192,16
> @@ av_cold void rgb2rgb_init_x86(void)
>          shuffle_bytes_3012 = ff_shuffle_bytes_3012_ssse3;
>          shuffle_bytes_3210 = ff_shuffle_bytes_3210_ssse3;
>      }
> -    if (EXTERNAL_AVX(cpu_flags)) {
>  #if ARCH_X86_64
> +    if (EXTERNAL_AVX2_FAST(cpu_flags)) {
> +        shuffle_bytes_0321 = ff_shuffle_bytes_0321_avx2;
> +        shuffle_bytes_2103 = ff_shuffle_bytes_2103_avx2;
> +        shuffle_bytes_1230 = ff_shuffle_bytes_1230_avx2;
> +        shuffle_bytes_3012 = ff_shuffle_bytes_3012_avx2;
> +        shuffle_bytes_3210 = ff_shuffle_bytes_3210_avx2;
> +    }
> +    if (EXTERNAL_AVX(cpu_flags)) {
>          uyvytoyuv422 = ff_uyvytoyuv422_avx; -#endif
>      }
> +#endif
>  }
> diff --git a/libswscale/x86/rgb_2_rgb.asm b/libswscale/x86/rgb_2_rgb.asm
> index 29b856e281..c695c61d5c 100644
> --- a/libswscale/x86/rgb_2_rgb.asm
> +++ b/libswscale/x86/rgb_2_rgb.asm
> @@ -159,6 +159,17 @@ SHUFFLE_BYTES 1, 2, 3, 0  SHUFFLE_BYTES 3, 0, 1, 2
> SHUFFLE_BYTES 3, 2, 1, 0
> 
> +%if ARCH_X86_64
> +%if HAVE_AVX2_EXTERNAL
> +INIT_YMM avx2
> +SHUFFLE_BYTES 2, 1, 0, 3
> +SHUFFLE_BYTES 0, 3, 2, 1
> +SHUFFLE_BYTES 1, 2, 3, 0
> +SHUFFLE_BYTES 3, 0, 1, 2
> +SHUFFLE_BYTES 3, 2, 1, 0
> +%endif
> +%endif
> +
>  ;--------------------------------------------------------------------------------------------
> ---
>  ; uyvytoyuv422(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
>  ;              const uint8_t *src, int width, int height,
> --
> 2.17.1

Hi there,

Any update?

Thanks,
Jianhua




More information about the ffmpeg-devel mailing list