[FFmpeg-devel] [PATCH] avfilter: add OpenCL scale filter
Rostislav Pehlivanov
atomnuker at gmail.com
Tue Mar 27 16:18:44 EEST 2018
On 27 March 2018 at 05:48, Gabriel Machado <gabriel_machado at live.com> wrote:
> From: Gabriel Machado <gabriel_machado at live.com>
>
> Some scaling filters implemented as OpenCL kernels. Can be used as:
>
> scale_opencl=<width>:<height>:flags=<filter>
> where <filter> can be `neighbor', `bilinear', `bicubic' or `fast_bicubic'
>
> This is an initial draft, there's still a long way to go in terms of
> completeness, configurability and performance.
>
> ---
> configure | 1 +
> libavfilter/Makefile | 1 +
> libavfilter/allfilters.c | 1 +
> libavfilter/opencl/scale.cl | 165 ++++++++++++++++++++++++
> libavfilter/opencl_source.h | 1 +
> libavfilter/vf_scale_opencl.c | 289 ++++++++++++++++++++++++++++++
> ++++++++++++
> 6 files changed, 458 insertions(+)
> create mode 100644 libavfilter/opencl/scale.cl
> create mode 100644 libavfilter/vf_scale_opencl.c
>
> diff --git a/configure b/configure
> index 5ccf3ce..4007ee8 100755
> --- a/configure
> +++ b/configure
> @@ -2821,6 +2821,7 @@ v4l2_m2m_deps_any="linux_videodev2_h"
>
> hwupload_cuda_filter_deps="ffnvcodec"
> scale_npp_filter_deps="ffnvcodec libnpp"
> +scale_opencl_filter_deps="opencl"
> scale_cuda_filter_deps="cuda_sdk"
> thumbnail_cuda_filter_deps="cuda_sdk"
>
> diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> index a90ca30..6303cbd 100644
> --- a/libavfilter/Makefile
> +++ b/libavfilter/Makefile
> @@ -302,6 +302,7 @@ OBJS-$(CONFIG_SAB_FILTER) +=
> vf_sab.o
> OBJS-$(CONFIG_SCALE_FILTER) += vf_scale.o scale.o
> OBJS-$(CONFIG_SCALE_CUDA_FILTER) += vf_scale_cuda.o
> vf_scale_cuda.ptx.o
> OBJS-$(CONFIG_SCALE_NPP_FILTER) += vf_scale_npp.o scale.o
> +OBJS-$(CONFIG_SCALE_OPENCL_FILTER) += vf_scale_opencl.o
> opencl.o opencl/scale.o
> OBJS-$(CONFIG_SCALE_QSV_FILTER) += vf_scale_qsv.o
> OBJS-$(CONFIG_SCALE_VAAPI_FILTER) += vf_scale_vaapi.o scale.o
> vaapi_vpp.o
> OBJS-$(CONFIG_SCALE2REF_FILTER) += vf_scale.o scale.o
> diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
> index 1cf1340..3185b17 100644
> --- a/libavfilter/allfilters.c
> +++ b/libavfilter/allfilters.c
> @@ -309,6 +309,7 @@ static void register_all(void)
> REGISTER_FILTER(SCALE, scale, vf);
> REGISTER_FILTER(SCALE_CUDA, scale_cuda, vf);
> REGISTER_FILTER(SCALE_NPP, scale_npp, vf);
> + REGISTER_FILTER(SCALE_OPENCL, scale_opencl, vf);
> REGISTER_FILTER(SCALE_QSV, scale_qsv, vf);
> REGISTER_FILTER(SCALE_VAAPI, scale_vaapi, vf);
> REGISTER_FILTER(SCALE2REF, scale2ref, vf);
> diff --git a/libavfilter/opencl/scale.cl b/libavfilter/opencl/scale.cl
> new file mode 100644
> index 0000000..b0e6cb2
> --- /dev/null
> +++ b/libavfilter/opencl/scale.cl
> @@ -0,0 +1,165 @@
> +/*
> + * Copyright (c) 2018 Gabriel Machado
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA
> + */
> +
> +__kernel void neighbor(__write_only image2d_t dst,
> + __read_only image2d_t src)
> +{
> + const sampler_t sampler = (CLK_NORMALIZED_COORDS_TRUE |
> + CLK_ADDRESS_CLAMP_TO_EDGE |
> + CLK_FILTER_NEAREST);
> +
> + int2 coord = {get_global_id(0), get_global_id(1)};
> + int2 size = {get_global_size(0), get_global_size(1)};
> +
> + float2 pos = (convert_float2(coord) + 0.5) / convert_float2(size);
> +
> + float4 c = read_imagef(src, sampler, pos);
> + write_imagef(dst, coord, c);
> +}
> +
> +__kernel void bilinear(__write_only image2d_t dst,
> + __read_only image2d_t src)
> +{
> + const sampler_t sampler = (CLK_NORMALIZED_COORDS_TRUE |
> + CLK_ADDRESS_CLAMP_TO_EDGE |
> + CLK_FILTER_LINEAR);
> +
> + int2 coord = {get_global_id(0), get_global_id(1)};
> + int2 size = {get_global_size(0), get_global_size(1)};
> +
> + float2 pos = (convert_float2(coord) + 0.5) / convert_float2(size);
> +
> + float4 c = read_imagef(src, sampler, pos);
> + write_imagef(dst, coord, c);
> +}
> +
> +// https://developer.nvidia.com/gpugems/GPUGems/gpugems_ch24.html
> +float MitchellNetravali(float x, float B, float C)
> +{
> + float t = fabs(x);
> + float tt = t*t;
> + float ttt = tt*t;
> +
> + if (t < 1) {
> + return ((12 - 9 * B - 6 * C) * ttt +
> + (-18 + 12 * B + 6 * C) * tt + (6 - 2 * B)) / 6;
> + } else if ((t >= 1) && (t < 2)) {
> + return ((-B - 6 * C) * ttt +
> + (6 * B + 30 * C) * tt + (-12 * B - 48 * C) *
> + t + (8 * B + 24 * C)) / 6;
> + } else {
> + return 0;
> + }
> +}
>
License unclear, I don't think you can use it. Moreover it comes from a
book.
+
> +float4 cubic(float4 c0, float4 c1, float4 c2, float4 c3, float t)
> +{
> + float B = 0, C = 0.6; // libswscale default
> + float a = MitchellNetravali(t + 1, B, C);
> + float b = MitchellNetravali(t, B, C);
> + float c = MitchellNetravali(1 - t, B, C);
> + float d = MitchellNetravali(2 - t, B, C);
> + return a*c0 + b*c1 + c*c2 + d*c3;
> +}
> +
> +__kernel void bicubic(__write_only image2d_t dst,
> + __read_only image2d_t src)
> +{
> + const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
> + CLK_ADDRESS_CLAMP_TO_EDGE |
> + CLK_FILTER_NEAREST);
> +
> + int2 dst_coord = {get_global_id(0), get_global_id(1)};
> +
> + float2 dst_size = {get_global_size(0), get_global_size(1)};
> + float2 src_size = convert_float2(get_image_dim(src));
> +
> + float2 uv = convert_float2(dst_coord) / dst_size;
> +
> + float2 src_pos = uv * convert_float2(src_size) - 0.5;
> +
> + float2 src_coordf;
> + float2 t = fract(src_pos, &src_coordf);
> + int2 src_coord = convert_int2(src_coordf);
> +
> +#define TEX(x,y) read_imagef(src, sampler, src_coord + (int2){x,y})
> + float4 col = cubic(cubic(TEX(-1,-1), TEX(0,-1), TEX(1,-1), TEX(2,-1),
> t.x),
> + cubic(TEX(-1, 0), TEX(0, 0), TEX(1, 0), TEX(2, 0),
> t.x),
> + cubic(TEX(-1, 1), TEX(0, 1), TEX(1, 1), TEX(2, 1),
> t.x),
> + cubic(TEX(-1, 2), TEX(0, 2), TEX(1, 2), TEX(2, 2),
> t.x),
> + t.y);
> +#undef TEX
> +
> + write_imagef(dst, dst_coord, col);
> +}
> +
> +// https://www.shadertoy.com/view/4df3Dn
> +// 4x4 bicubic filter using 4 bilinear texture lookups
> +// cubic B-spline basis functions
> +float w0(float a) { return (1.0/6.0)*(a*(a*(-a + 3.0) - 3.0) + 1.0); }
> +float w1(float a) { return (1.0/6.0)*(a*a*(3.0*a - 6.0) + 4.0); }
> +float w2(float a) { return (1.0/6.0)*(a*(a*(-3.0*a + 3.0) + 3.0) + 1.0); }
> +float w3(float a) { return (1.0/6.0)*(a*a*a); }
>
No license, can't use it. Shadertoy has no explicit license.
Moreover the whole filter is incorrectly designed. Take a look at what mpv
does and how it has no explicit per-algorithm scaling functions.
More information about the ffmpeg-devel
mailing list