[FFmpeg-cvslog] lavf: add transpose_opencl filter
Ruiling Song
git at videolan.org
Mon Dec 3 02:07:55 EET 2018
ffmpeg | branch: master | Ruiling Song <ruiling.song at intel.com> | Wed Nov 28 10:27:38 2018 +0800| [416dc9a5e81729e3313bcc13aebc5faa082c63a3] | committer: Mark Thompson
lavf: add transpose_opencl filter
Signed-off-by: Ruiling Song <ruiling.song at intel.com>
Signed-off-by: Mark Thompson <sw at jkqxz.net>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=416dc9a5e81729e3313bcc13aebc5faa082c63a3
---
configure | 1 +
libavfilter/Makefile | 1 +
libavfilter/allfilters.c | 1 +
libavfilter/opencl/transpose.cl | 35 +++++
libavfilter/opencl_source.h | 1 +
libavfilter/transpose.h | 34 +++++
libavfilter/vf_transpose.c | 14 +-
libavfilter/vf_transpose_opencl.c | 288 ++++++++++++++++++++++++++++++++++++++
8 files changed, 362 insertions(+), 13 deletions(-)
diff --git a/configure b/configure
index ed77b4ee81..1870b4d189 100755
--- a/configure
+++ b/configure
@@ -3480,6 +3480,7 @@ tinterlace_merge_test_deps="tinterlace_filter"
tinterlace_pad_test_deps="tinterlace_filter"
tonemap_filter_deps="const_nan"
tonemap_opencl_filter_deps="opencl const_nan"
+transpose_opencl_filter_deps="opencl"
unsharp_opencl_filter_deps="opencl"
uspp_filter_deps="gpl avcodec"
vaguedenoiser_filter_deps="gpl"
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 1895fa2b0d..6e2658186d 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -393,6 +393,7 @@ OBJS-$(CONFIG_TONEMAP_OPENCL_FILTER) += vf_tonemap_opencl.o colorspace.o
OBJS-$(CONFIG_TPAD_FILTER) += vf_tpad.o
OBJS-$(CONFIG_TRANSPOSE_FILTER) += vf_transpose.o
OBJS-$(CONFIG_TRANSPOSE_NPP_FILTER) += vf_transpose_npp.o cuda_check.o
+OBJS-$(CONFIG_TRANSPOSE_OPENCL_FILTER) += vf_transpose_opencl.o opencl.o opencl/transpose.o
OBJS-$(CONFIG_TRIM_FILTER) += trim.o
OBJS-$(CONFIG_UNPREMULTIPLY_FILTER) += vf_premultiply.o framesync.o
OBJS-$(CONFIG_UNSHARP_FILTER) += vf_unsharp.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 837c99eb75..a600069500 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -372,6 +372,7 @@ extern AVFilter ff_vf_tonemap_opencl;
extern AVFilter ff_vf_tpad;
extern AVFilter ff_vf_transpose;
extern AVFilter ff_vf_transpose_npp;
+extern AVFilter ff_vf_transpose_opencl;
extern AVFilter ff_vf_trim;
extern AVFilter ff_vf_unpremultiply;
extern AVFilter ff_vf_unsharp;
diff --git a/libavfilter/opencl/transpose.cl b/libavfilter/opencl/transpose.cl
new file mode 100644
index 0000000000..e6388aba8f
--- /dev/null
+++ b/libavfilter/opencl/transpose.cl
@@ -0,0 +1,35 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+kernel void transpose(__write_only image2d_t dst,
+ __read_only image2d_t src,
+ int dir) {
+ const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
+ CLK_ADDRESS_CLAMP_TO_EDGE |
+ CLK_FILTER_NEAREST);
+
+ int2 size = get_image_dim(dst);
+ int x = get_global_id(0);
+ int y = get_global_id(1);
+
+ int xin = (dir & 2) ? (size.y - 1 - y) : y;
+ int yin = (dir & 1) ? (size.x - 1 - x) : x;
+ float4 data = read_imagef(src, sampler, (int2)(xin, yin));
+
+ if (x < size.x && y < size.y)
+ write_imagef(dst, (int2)(x, y), data);
+}
diff --git a/libavfilter/opencl_source.h b/libavfilter/opencl_source.h
index 2f67d890b3..4118138c30 100644
--- a/libavfilter/opencl_source.h
+++ b/libavfilter/opencl_source.h
@@ -25,6 +25,7 @@ extern const char *ff_opencl_source_convolution;
extern const char *ff_opencl_source_neighbor;
extern const char *ff_opencl_source_overlay;
extern const char *ff_opencl_source_tonemap;
+extern const char *ff_opencl_source_transpose;
extern const char *ff_opencl_source_unsharp;
#endif /* AVFILTER_OPENCL_SOURCE_H */
diff --git a/libavfilter/transpose.h b/libavfilter/transpose.h
new file mode 100644
index 0000000000..d4bb4da1ae
--- /dev/null
+++ b/libavfilter/transpose.h
@@ -0,0 +1,34 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef AVFILTER_TRANSPOSE_H
+#define AVFILTER_TRANSPOSE_H
+
+enum PassthroughType {
+ TRANSPOSE_PT_TYPE_NONE,
+ TRANSPOSE_PT_TYPE_LANDSCAPE,
+ TRANSPOSE_PT_TYPE_PORTRAIT,
+};
+
+enum TransposeDir {
+ TRANSPOSE_CCLOCK_FLIP,
+ TRANSPOSE_CLOCK,
+ TRANSPOSE_CCLOCK,
+ TRANSPOSE_CLOCK_FLIP,
+};
+
+#endif
diff --git a/libavfilter/vf_transpose.c b/libavfilter/vf_transpose.c
index 74a4bbcf58..dd54947bd9 100644
--- a/libavfilter/vf_transpose.c
+++ b/libavfilter/vf_transpose.c
@@ -38,19 +38,7 @@
#include "formats.h"
#include "internal.h"
#include "video.h"
-
-typedef enum {
- TRANSPOSE_PT_TYPE_NONE,
- TRANSPOSE_PT_TYPE_LANDSCAPE,
- TRANSPOSE_PT_TYPE_PORTRAIT,
-} PassthroughType;
-
-enum TransposeDir {
- TRANSPOSE_CCLOCK_FLIP,
- TRANSPOSE_CLOCK,
- TRANSPOSE_CCLOCK,
- TRANSPOSE_CLOCK_FLIP,
-};
+#include "transpose.h"
typedef struct TransVtable {
void (*transpose_8x8)(uint8_t *src, ptrdiff_t src_linesize,
diff --git a/libavfilter/vf_transpose_opencl.c b/libavfilter/vf_transpose_opencl.c
new file mode 100644
index 0000000000..dd678e91cd
--- /dev/null
+++ b/libavfilter/vf_transpose_opencl.c
@@ -0,0 +1,288 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include <float.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/common.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/mem.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+
+#include "avfilter.h"
+#include "internal.h"
+#include "opencl.h"
+#include "opencl_source.h"
+#include "video.h"
+#include "transpose.h"
+
+typedef struct TransposeOpenCLContext {
+ OpenCLFilterContext ocf;
+ int initialised;
+ int passthrough; ///< PassthroughType, landscape passthrough mode enabled
+ int dir; ///< TransposeDir
+ cl_kernel kernel;
+ cl_command_queue command_queue;
+} TransposeOpenCLContext;
+
+static int transpose_opencl_init(AVFilterContext *avctx)
+{
+ TransposeOpenCLContext *ctx = avctx->priv;
+ cl_int cle;
+ int err;
+
+ err = ff_opencl_filter_load_program(avctx, &ff_opencl_source_transpose, 1);
+ if (err < 0)
+ goto fail;
+
+ ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context,
+ ctx->ocf.hwctx->device_id,
+ 0, &cle);
+ CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL "
+ "command queue %d.\n", cle);
+
+ ctx->kernel = clCreateKernel(ctx->ocf.program, "transpose", &cle);
+ CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create kernel %d.\n", cle);
+
+
+ ctx->initialised = 1;
+ return 0;
+
+fail:
+ if (ctx->command_queue)
+ clReleaseCommandQueue(ctx->command_queue);
+ if (ctx->kernel)
+ clReleaseKernel(ctx->kernel);
+ return err;
+}
+
+static int transpose_opencl_config_output(AVFilterLink *outlink)
+{
+ AVFilterContext *avctx = outlink->src;
+ TransposeOpenCLContext *s = avctx->priv;
+ AVFilterLink *inlink = avctx->inputs[0];
+ const AVPixFmtDescriptor *desc_in = av_pix_fmt_desc_get(inlink->format);
+ int ret;
+
+ if ((inlink->w >= inlink->h &&
+ s->passthrough == TRANSPOSE_PT_TYPE_LANDSCAPE) ||
+ (inlink->w <= inlink->h &&
+ s->passthrough == TRANSPOSE_PT_TYPE_PORTRAIT)) {
+ if (inlink->hw_frames_ctx) {
+ outlink->hw_frames_ctx = av_buffer_ref(inlink->hw_frames_ctx);
+ if (!outlink->hw_frames_ctx)
+ return AVERROR(ENOMEM);
+ }
+ av_log(avctx, AV_LOG_VERBOSE,
+ "w:%d h:%d -> w:%d h:%d (passthrough mode)\n",
+ inlink->w, inlink->h, inlink->w, inlink->h);
+
+ return 0;
+ } else {
+ s->passthrough = TRANSPOSE_PT_TYPE_NONE;
+ }
+
+ if (desc_in->log2_chroma_w != desc_in->log2_chroma_h) {
+ av_log(avctx, AV_LOG_ERROR, "Input format %s not supported.\n",
+ desc_in->name);
+ return AVERROR(EINVAL);
+ }
+
+ s->ocf.output_width = inlink->h;
+ s->ocf.output_height = inlink->w;
+ ret = ff_opencl_filter_config_output(outlink);
+ if (ret < 0)
+ return ret;
+
+ if (inlink->sample_aspect_ratio.num)
+ outlink->sample_aspect_ratio = av_div_q((AVRational) { 1, 1 },
+ inlink->sample_aspect_ratio);
+ else
+ outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
+
+ av_log(avctx, AV_LOG_VERBOSE,
+ "w:%d h:%d dir:%d -> w:%d h:%d rotation:%s vflip:%d\n",
+ inlink->w, inlink->h, s->dir, outlink->w, outlink->h,
+ s->dir == 1 || s->dir == 3 ? "clockwise" : "counterclockwise",
+ s->dir == 0 || s->dir == 3);
+ return 0;
+}
+
+static AVFrame *get_video_buffer(AVFilterLink *inlink, int w, int h)
+{
+ TransposeOpenCLContext *s = inlink->dst->priv;
+
+ return s->passthrough ?
+ ff_null_get_video_buffer (inlink, w, h) :
+ ff_default_get_video_buffer(inlink, w, h);
+}
+
+static int transpose_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input)
+{
+ AVFilterContext *avctx = inlink->dst;
+ AVFilterLink *outlink = avctx->outputs[0];
+ TransposeOpenCLContext *ctx = avctx->priv;
+ AVFrame *output = NULL;
+ size_t global_work[2];
+ cl_mem src, dst;
+ cl_int cle;
+ int err, p;
+
+ av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n",
+ av_get_pix_fmt_name(input->format),
+ input->width, input->height, input->pts);
+
+ if (!input->hw_frames_ctx)
+ return AVERROR(EINVAL);
+
+ if (ctx->passthrough)
+ return ff_filter_frame(outlink, input);
+
+ output = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+ if (!output) {
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ err = av_frame_copy_props(output, input);
+ if (err < 0)
+ goto fail;
+
+ if (input->sample_aspect_ratio.num == 0) {
+ output->sample_aspect_ratio = input->sample_aspect_ratio;
+ } else {
+ output->sample_aspect_ratio.num = input->sample_aspect_ratio.den;
+ output->sample_aspect_ratio.den = input->sample_aspect_ratio.num;
+ }
+
+ if (!ctx->initialised) {
+ err = transpose_opencl_init(avctx);
+ if (err < 0)
+ goto fail;
+ }
+
+ for (p = 0; p < FF_ARRAY_ELEMS(output->data); p++) {
+ src = (cl_mem) input->data[p];
+ dst = (cl_mem) output->data[p];
+
+ if (!dst)
+ break;
+ CL_SET_KERNEL_ARG(ctx->kernel, 0, cl_mem, &dst);
+ CL_SET_KERNEL_ARG(ctx->kernel, 1, cl_mem, &src);
+ CL_SET_KERNEL_ARG(ctx->kernel, 2, cl_int, &ctx->dir);
+
+ err = ff_opencl_filter_work_size_from_image(avctx, global_work, output,
+ p, 16);
+
+ cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL,
+ global_work, NULL,
+ 0, NULL, NULL);
+ CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue kernel: %d.\n", cle);
+ }
+ cle = clFinish(ctx->command_queue);
+ CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to finish command queue: %d.\n", cle);
+
+ av_frame_free(&input);
+
+ av_log(ctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n",
+ av_get_pix_fmt_name(output->format),
+ output->width, output->height, output->pts);
+
+ return ff_filter_frame(outlink, output);
+
+fail:
+ clFinish(ctx->command_queue);
+ av_frame_free(&input);
+ av_frame_free(&output);
+ return err;
+}
+
+static av_cold void transpose_opencl_uninit(AVFilterContext *avctx)
+{
+ TransposeOpenCLContext *ctx = avctx->priv;
+ cl_int cle;
+
+ if (ctx->kernel) {
+ cle = clReleaseKernel(ctx->kernel);
+ if (cle != CL_SUCCESS)
+ av_log(avctx, AV_LOG_ERROR, "Failed to release "
+ "kernel: %d.\n", cle);
+ }
+
+ if (ctx->command_queue) {
+ cle = clReleaseCommandQueue(ctx->command_queue);
+ if (cle != CL_SUCCESS)
+ av_log(avctx, AV_LOG_ERROR, "Failed to release "
+ "command queue: %d.\n", cle);
+ }
+
+ ff_opencl_filter_uninit(avctx);
+}
+
+#define OFFSET(x) offsetof(TransposeOpenCLContext, x)
+#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
+static const AVOption transpose_opencl_options[] = {
+ { "dir", "set transpose direction", OFFSET(dir), AV_OPT_TYPE_INT, { .i64 = TRANSPOSE_CCLOCK_FLIP }, 0, 3, FLAGS, "dir" },
+ { "cclock_flip", "rotate counter-clockwise with vertical flip", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CCLOCK_FLIP }, .flags=FLAGS, .unit = "dir" },
+ { "clock", "rotate clockwise", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CLOCK }, .flags=FLAGS, .unit = "dir" },
+ { "cclock", "rotate counter-clockwise", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CCLOCK }, .flags=FLAGS, .unit = "dir" },
+ { "clock_flip", "rotate clockwise with vertical flip", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CLOCK_FLIP }, .flags=FLAGS, .unit = "dir" },
+
+ { "passthrough", "do not apply transposition if the input matches the specified geometry",
+ OFFSET(passthrough), AV_OPT_TYPE_INT, {.i64=TRANSPOSE_PT_TYPE_NONE}, 0, INT_MAX, FLAGS, "passthrough" },
+ { "none", "always apply transposition", 0, AV_OPT_TYPE_CONST, {.i64=TRANSPOSE_PT_TYPE_NONE}, INT_MIN, INT_MAX, FLAGS, "passthrough" },
+ { "portrait", "preserve portrait geometry", 0, AV_OPT_TYPE_CONST, {.i64=TRANSPOSE_PT_TYPE_PORTRAIT}, INT_MIN, INT_MAX, FLAGS, "passthrough" },
+ { "landscape", "preserve landscape geometry", 0, AV_OPT_TYPE_CONST, {.i64=TRANSPOSE_PT_TYPE_LANDSCAPE}, INT_MIN, INT_MAX, FLAGS, "passthrough" },
+
+ { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(transpose_opencl);
+
+static const AVFilterPad transpose_opencl_inputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .get_video_buffer = get_video_buffer,
+ .filter_frame = &transpose_opencl_filter_frame,
+ .config_props = &ff_opencl_filter_config_input,
+ },
+ { NULL }
+};
+
+static const AVFilterPad transpose_opencl_outputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .config_props = &transpose_opencl_config_output,
+ },
+ { NULL }
+};
+
+AVFilter ff_vf_transpose_opencl = {
+ .name = "transpose_opencl",
+ .description = NULL_IF_CONFIG_SMALL("Transpose input video"),
+ .priv_size = sizeof(TransposeOpenCLContext),
+ .priv_class = &transpose_opencl_class,
+ .init = &ff_opencl_filter_init,
+ .uninit = &transpose_opencl_uninit,
+ .query_formats = &ff_opencl_filter_query_formats,
+ .inputs = transpose_opencl_inputs,
+ .outputs = transpose_opencl_outputs,
+ .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};
More information about the ffmpeg-cvslog
mailing list