[FFmpeg-devel] [PATCH 3/3][RFC] avfilter/vf_chromakey: Add OpenCL acceleration
wm4
nfxjfg at googlemail.com
Wed Sep 30 12:45:53 CEST 2015
On Thu, 24 Sep 2015 17:12:25 +0200
Timo Rothenpieler <timo at rothenpieler.org> wrote:
> Signed-off-by: Timo Rothenpieler <timo at rothenpieler.org>
> ---
> doc/filters.texi | 5 +
> libavfilter/chromakey_opencl_kernel.h | 98 +++++++++++++++++++
> libavfilter/opencl_allkernels.c | 2 +
> libavfilter/vf_chromakey.c | 179 +++++++++++++++++++++++++++++++++-
> 4 files changed, 283 insertions(+), 1 deletion(-)
> create mode 100644 libavfilter/chromakey_opencl_kernel.h
>
> diff --git a/doc/filters.texi b/doc/filters.texi
> index 044876c..4faf4b9 100644
> --- a/doc/filters.texi
> +++ b/doc/filters.texi
> @@ -3734,6 +3734,11 @@ Signals that the color passed is already in YUV instead of RGB.
>
> Litteral colors like "green" or "red" don't make sense with this enabled anymore.
> This can be used to pass exact YUV values as hexadecimal numbers.
> +
> + at item opencl
> +If set to 1, specify using OpenCL capabilities, only available if
> +FFmpeg was configured with @code{--enable-opencl}. Default value is 0.
> +
> @end table
>
> @subsection Examples
> diff --git a/libavfilter/chromakey_opencl_kernel.h b/libavfilter/chromakey_opencl_kernel.h
> new file mode 100644
> index 0000000..56bbc79
> --- /dev/null
> +++ b/libavfilter/chromakey_opencl_kernel.h
> @@ -0,0 +1,98 @@
> +/*
> + * Copyright (c) 2015 Timo Rothenpieler <timo at rothenpieler.org>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AVFILTER_CHROMAKEY_OPENCL_KERNEL_H
> +#define AVFILTER_CHROMAKEY_OPENCL_KERNEL_H
> +
> +#include "libavutil/opencl.h"
> +
> +const char *ff_kernel_chromakey_opencl = AV_OPENCL_KERNEL(
> +
> +inline unsigned char get_pixel(global unsigned char *src,
> + int x,
> + int y,
> + int w,
> + int h,
> + int linesize,
> + int hsub_log2,
> + int vsub_log2,
> + unsigned char def)
> +{
> + if (x < 0 || x >= w || y < 0 || x >= w)
> + return def;
> +
> + x >>= hsub_log2;
> + y >>= vsub_log2;
> +
> + return src[linesize * y + x];
> +}
> +
> +kernel void chromakey(global unsigned char *src_u,
> + global unsigned char *src_v,
> + global unsigned char *dst,
> + int linesize_u,
> + int linesize_v,
> + int linesize_a,
> + int height,
> + int width,
> + int hsub_log2,
> + int vsub_log2,
> + unsigned char chromakey_u,
> + unsigned char chromakey_v,
> + float similarity,
> + float blend
> + )
> +{
> + int x = get_global_id(0);
> + int y = get_global_id(1);
> + unsigned char res;
> +
> + int xo, yo, du, dv;
> + double diff = 0.0;
> +
> + if (x >= width || y >= height)
> + return;
> +
> + for (yo = 0; yo < 3; yo++) {
> + for (xo = 0; xo < 3; xo++) {
> + du = get_pixel(src_u, x + xo - 1, y + yo - 1, width, height, linesize_u, hsub_log2, vsub_log2, chromakey_u);
> + dv = get_pixel(src_v, x + xo - 1, y + yo - 1, width, height, linesize_v, hsub_log2, vsub_log2, chromakey_v);
> +
> + du -= chromakey_u;
> + dv -= chromakey_v;
> +
> + diff += sqrt((du * du + dv * dv) / (double)(255.0 * 255.0));
> + }
> + }
> +
> + diff /= 9.0;
> +
> + if (blend > 0.0001) {
> + res = clamp((diff - similarity) / blend, 0.0, 1.0) * 255.0;
> + } else {
> + res = (diff > similarity) ? 255 : 0;
> + }
> +
> + dst[linesize_a * y + x] = res;
> +}
> +
> +);
> +
> +#endif /* AVFILTER_CHROMAKEY_OPENCL_KERNEL_H */
> diff --git a/libavfilter/opencl_allkernels.c b/libavfilter/opencl_allkernels.c
> index 6d80fa8..fc05e66 100644
> --- a/libavfilter/opencl_allkernels.c
> +++ b/libavfilter/opencl_allkernels.c
> @@ -23,6 +23,7 @@
> #include "libavutil/opencl.h"
> #include "deshake_opencl_kernel.h"
> #include "unsharp_opencl_kernel.h"
> +#include "chromakey_opencl_kernel.h"
> #endif
>
> #define OPENCL_REGISTER_KERNEL_CODE(X, x) \
> @@ -37,5 +38,6 @@ void ff_opencl_register_filter_kernel_code_all(void)
> #if CONFIG_OPENCL
> OPENCL_REGISTER_KERNEL_CODE(DESHAKE, deshake);
> OPENCL_REGISTER_KERNEL_CODE(UNSHARP, unsharp);
> + OPENCL_REGISTER_KERNEL_CODE(CHROMAKEY, chromakey);
> #endif
> }
> diff --git a/libavfilter/vf_chromakey.c b/libavfilter/vf_chromakey.c
> index 47fdea631..8f15f3e 100644
> --- a/libavfilter/vf_chromakey.c
> +++ b/libavfilter/vf_chromakey.c
> @@ -25,6 +25,10 @@
> #include "internal.h"
> #include "video.h"
>
> +#if CONFIG_OPENCL
> +#include "libavutil/opencl_internal.h"
> +#endif
> +
> typedef struct ChromakeyContext {
> const AVClass *class;
>
> @@ -35,8 +39,152 @@ typedef struct ChromakeyContext {
> float blend;
>
> int is_yuv;
> +
> + int opencl;
> +
> +#if CONFIG_OPENCL
> + cl_command_queue command_queue;
> + cl_program program;
> + cl_kernel kernel;
> +
> + cl_mem cl_inbuf_u;
> + size_t cl_inbuf_u_size;
> + cl_mem cl_inbuf_v;
> + size_t cl_inbuf_v_size;
> + cl_mem cl_outbuf;
> + size_t cl_outbuf_size;
> +#endif
> } ChromakeyContext;
>
> +#if CONFIG_OPENCL
> +#define ROUND_TO_16(a) (((((a) - 1)/16)+1)*16)
> +
> +static av_cold int opencl_chromakey_init(AVFilterContext *avctx)
> +{
> + int res = 0;
> + ChromakeyContext *ctx = avctx->priv;
> +
> + if (res = av_opencl_init(NULL))
> + return res;
> +
> + ctx->command_queue = av_opencl_get_command_queue();
> + if (!ctx->command_queue) {
> + av_log(ctx, AV_LOG_ERROR, "Unable to get OpenCL command queue in filter 'chromakey'\n");
> + return AVERROR(EINVAL);
> + }
> +
> + ctx->program = av_opencl_compile("chromakey", NULL);
> + if (!ctx->program) {
> + av_log(ctx, AV_LOG_ERROR, "OpenCL failed to compile program 'chromakey'\n");
> + return AVERROR(EINVAL);
> + }
> +
> + ctx->kernel = clCreateKernel(ctx->program, "chromakey", &res);
> + if (res != CL_SUCCESS) {
> + av_log(ctx, AV_LOG_ERROR, "OpenCL failed to create kernel 'chromakey'\n");
> + return AVERROR(EINVAL);
> + }
> +
> + return res;
> +}
> +
> +static av_cold void opencl_chromakey_uninit(AVFilterContext *avctx)
> +{
> + ChromakeyContext *ctx = avctx->priv;
> +
> + if (ctx->cl_inbuf_u)
> + av_opencl_buffer_release(&ctx->cl_inbuf_u);
> + if (ctx->cl_inbuf_v)
> + av_opencl_buffer_release(&ctx->cl_inbuf_v);
> + if (ctx->cl_outbuf)
> + av_opencl_buffer_release(&ctx->cl_outbuf);
> + if (ctx->kernel)
> + clReleaseKernel(ctx->kernel);
> + if (ctx->program)
> + clReleaseProgram(ctx->program);
> +
> + ctx->command_queue = NULL;
> +
> + av_opencl_uninit();
> +}
> +
> +static int opencl_chromakey_frame(AVFilterContext *avctx, AVFrame *frame)
> +{
> + ChromakeyContext *ctx = avctx->priv;
> + int res = 0;
> + int hsub_log2 = 0, vsub_log2 = 0;
> +
> + size_t global_work_size[2] = { (size_t)ROUND_TO_16(frame->width), (size_t)ROUND_TO_16(frame->height) };
> +
> + FFOpenclParam param = { 0 };
> + param.ctx = avctx;
> + param.kernel = ctx->kernel;
> +
> + if (frame->format == AV_PIX_FMT_YUVA420P || frame->format == AV_PIX_FMT_YUVA422P)
> + hsub_log2 = 1;
> +
> + if (frame->format == AV_PIX_FMT_YUVA420P)
> + vsub_log2 = 1;
> +
> + if (!ctx->cl_inbuf_u || !ctx->cl_inbuf_v || !ctx->cl_outbuf) {
> + ctx->cl_inbuf_u_size = frame->linesize[1] * (frame->height >> vsub_log2);
> + ctx->cl_inbuf_v_size = frame->linesize[2] * (frame->height >> vsub_log2);
> + ctx->cl_outbuf_size = frame->linesize[3] * frame->height;
> +
> + res = av_opencl_buffer_create(&ctx->cl_inbuf_u, ctx->cl_inbuf_u_size, CL_MEM_READ_ONLY, NULL);
> + if (res)
> + return res;
> +
> + res = av_opencl_buffer_create(&ctx->cl_inbuf_v, ctx->cl_inbuf_v_size, CL_MEM_READ_ONLY, NULL);
> + if (res)
> + return res;
> +
> + res = av_opencl_buffer_create(&ctx->cl_outbuf, ctx->cl_outbuf_size, CL_MEM_READ_WRITE, NULL);
> + if (res)
> + return res;
> + }
> +
> + res = av_opencl_buffer_write(ctx->cl_inbuf_u, frame->data[1], ctx->cl_inbuf_u_size);
> + if (res)
> + return res;
> +
> + res = av_opencl_buffer_write(ctx->cl_inbuf_v, frame->data[2], ctx->cl_inbuf_v_size);
> + if (res)
> + return res;
> +
> + res = avpriv_opencl_set_parameter(¶m,
> + FF_OPENCL_PARAM_INFO(ctx->cl_inbuf_u),
> + FF_OPENCL_PARAM_INFO(ctx->cl_inbuf_v),
> + FF_OPENCL_PARAM_INFO(ctx->cl_outbuf),
> + FF_OPENCL_PARAM_INFO(frame->linesize[1]),
> + FF_OPENCL_PARAM_INFO(frame->linesize[2]),
> + FF_OPENCL_PARAM_INFO(frame->linesize[3]),
> + FF_OPENCL_PARAM_INFO(frame->height),
> + FF_OPENCL_PARAM_INFO(frame->width),
> + FF_OPENCL_PARAM_INFO(hsub_log2),
> + FF_OPENCL_PARAM_INFO(vsub_log2),
> + FF_OPENCL_PARAM_INFO(ctx->chromakey_uv[0]),
> + FF_OPENCL_PARAM_INFO(ctx->chromakey_uv[1]),
> + FF_OPENCL_PARAM_INFO(ctx->similarity),
> + FF_OPENCL_PARAM_INFO(ctx->blend),
> + NULL);
> + if (res)
> + return res;
> +
> + res = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
> + if (res != CL_SUCCESS) {
> + av_log(ctx, AV_LOG_ERROR, "OpenCL run kernel error occurred: %s\n", av_opencl_errstr(res));
> + return AVERROR_EXTERNAL;
> + }
> +
> + res = av_opencl_buffer_read(frame->data[3], ctx->cl_outbuf, ctx->cl_outbuf_size);
> + if (res)
> + return res;
> +
> + return res;
> +}
> +#endif
> +
> static uint8_t do_chromakey_pixel(ChromakeyContext *ctx, uint8_t u[9], uint8_t v[9])
> {
> double diff = 0.0;
> @@ -110,10 +258,18 @@ static int do_chromakey_slice(AVFilterContext *avctx, void *arg, int jobnr, int
> static int filter_frame(AVFilterLink *link, AVFrame *frame)
> {
> AVFilterContext *avctx = link->dst;
> + ChromakeyContext *ctx = avctx->priv;
> int res;
>
> - if (res = avctx->internal->execute(avctx, do_chromakey_slice, frame, NULL, FFMIN(frame->height, avctx->graph->nb_threads)))
> + if (CONFIG_OPENCL && ctx->opencl) {
> +#if CONFIG_OPENCL
> + if (res = opencl_chromakey_frame(avctx, frame)) {
> + return res;
> + }
> +#endif
> + } else if (res = avctx->internal->execute(avctx, do_chromakey_slice, frame, NULL, FFMIN(frame->height, avctx->graph->nb_threads))) {
> return res;
> + }
>
> return ff_filter_frame(avctx->outputs[0], frame);
> }
> @@ -134,9 +290,28 @@ static av_cold int initialize_chromakey(AVFilterContext *avctx)
> ctx->chromakey_uv[1] = RGB_TO_V(ctx->chromakey_rgba);
> }
>
> + if (ctx->opencl) {
> +#if CONFIG_OPENCL
> + return opencl_chromakey_init(avctx);
> +#else
> + av_log(ctx, AV_LOG_ERROR, "OpenCL support was not enabled in this build, cannot be selected\n");
> + return AVERROR(EINVAL);
> +#endif
> + }
> +
> return 0;
> }
>
> +static av_cold void uninitialize_chromakey(AVFilterContext *avctx)
> +{
> +#if CONFIG_OPENCL
> + ChromakeyContext *ctx = avctx->priv;
> +
> + if (ctx->opencl)
> + opencl_chromakey_uninit(avctx);
> +#endif
> +}
> +
> static av_cold int query_formats(AVFilterContext *avctx)
> {
> static const enum AVPixelFormat pixel_fmts[] = {
> @@ -181,6 +356,7 @@ static const AVOption chromakey_options[] = {
> { "similarity", "set the chromakey similarity value", OFFSET(similarity), AV_OPT_TYPE_FLOAT, { .dbl = 0.01 }, 0.01, 1.0, FLAGS },
> { "blend", "set the chromakey key blend value", OFFSET(blend), AV_OPT_TYPE_FLOAT, { .dbl = 0.0 }, 0.0, 1.0, FLAGS },
> { "yuv", "color parameter is in yuv instead of rgb", OFFSET(is_yuv), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
> + { "opencl", "use OpenCL filtering capabilities", OFFSET(opencl), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
> { NULL }
> };
>
> @@ -192,6 +368,7 @@ AVFilter ff_vf_chromakey = {
> .priv_size = sizeof(ChromakeyContext),
> .priv_class = &chromakey_class,
> .init = initialize_chromakey,
> + .uninit = uninitialize_chromakey,
> .query_formats = query_formats,
> .inputs = chromakey_inputs,
> .outputs = chromakey_outputs,
I'm not quite sure of what I think of this software/opencl hybrid
approach. On one hand, it's good that they share the "user interface"
(options etc.). On the other hand, the OpenCL part duplicates the
entire actual filter code. And unlike with asm, there's no good way to
test that they do the same thing. Also, the amount of OpenCL
boilerplate looks a bit high, considering that it already uses shared
OpenCL utility code.
But since there are already 2 other filters which do this, there's not
much of a reason to reject this, assuming it works and it's reasonably
equivalent to software filtering.
More information about the ffmpeg-devel
mailing list