[FFmpeg-cvslog] avfilter/vf_gblur_vulkan: add sizeV option
Wu Jianhua
git at videolan.org
Tue Feb 22 07:15:01 EET 2022
ffmpeg | branch: master | Wu Jianhua <jianhua.wu at intel.com> | Mon Jan 10 15:53:22 2022 +0800| [82ef4c708e64b7e41df1dd6e0440f5ac0d385c95] | committer: Lynne
avfilter/vf_gblur_vulkan: add sizeV option
This commit added a sizeV option, integrated some identical operations
to a separate function, and updated the CGS for horizontal and vertical
respectively.
The following command is on how to apply sizeV option:
ffmpeg -init_hw_device vulkan -i input.264 -vf \
hwupload,gblur_vulkan=size=127:sigma=20:sizeV=3:sigmaV=0.5,hwdownload,format=yuv420p \
-y out.264
Signed-off-by: Wu Jianhua <jianhua.wu at intel.com>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=82ef4c708e64b7e41df1dd6e0440f5ac0d385c95
---
libavfilter/vf_gblur_vulkan.c | 284 ++++++++++++++++++++----------------------
1 file changed, 134 insertions(+), 150 deletions(-)
diff --git a/libavfilter/vf_gblur_vulkan.c b/libavfilter/vf_gblur_vulkan.c
index a2e33d1c90..d61f3c778c 100644
--- a/libavfilter/vf_gblur_vulkan.c
+++ b/libavfilter/vf_gblur_vulkan.c
@@ -1,5 +1,5 @@
/*
- * copyright (c) 2021 Wu Jianhua <jianhua.wu at intel.com>
+ * copyright (c) 2021-2022 Wu Jianhua <jianhua.wu at intel.com>
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
@@ -42,39 +42,25 @@ typedef struct GBlurVulkanContext {
int initialized;
int size;
+ int sizeV;
int planes;
- int kernel_size;
float sigma;
float sigmaV;
AVFrame *tmpframe;
} GBlurVulkanContext;
-static const char gblur_horizontal[] = {
- C(0, void gblur(const ivec2 pos, const int index) )
- C(0, { )
- C(1, vec4 sum = texture(input_image[index], pos) * kernel[0]; )
- C(0, )
- C(1, for(int i = 1; i < kernel.length(); i++) { )
- C(2, sum += texture(input_image[index], pos + vec2(i, 0.0)) * kernel[i]; )
- C(2, sum += texture(input_image[index], pos - vec2(i, 0.0)) * kernel[i]; )
- C(1, } )
- C(0, )
- C(1, imageStore(output_image[index], pos, sum); )
- C(0, } )
-};
-
-static const char gblur_vertical[] = {
- C(0, void gblur(const ivec2 pos, const int index) )
- C(0, { )
- C(1, vec4 sum = texture(input_image[index], pos) * kernel[0]; )
- C(0, )
- C(1, for(int i = 1; i < kernel.length(); i++) { )
- C(2, sum += texture(input_image[index], pos + vec2(0.0, i)) * kernel[i]; )
- C(2, sum += texture(input_image[index], pos - vec2(0.0, i)) * kernel[i]; )
- C(1, } )
- C(0, )
- C(1, imageStore(output_image[index], pos, sum); )
- C(0, } )
+static const char gblur_func[] = {
+ C(0, void gblur(const ivec2 pos, const int index) )
+ C(0, { )
+ C(1, vec4 sum = texture(input_images[index], pos) * kernel[0]; )
+ C(0, )
+ C(1, for(int i = 1; i < kernel.length(); i++) { )
+ C(2, sum += texture(input_images[index], pos + OFFSET) * kernel[i]; )
+ C(2, sum += texture(input_images[index], pos - OFFSET) * kernel[i]; )
+ C(1, } )
+ C(0, )
+ C(1, imageStore(output_images[index], pos, sum); )
+ C(0, } )
};
static inline float gaussian(float sigma, float x)
@@ -109,38 +95,120 @@ static void init_gaussian_kernel(float *kernel, float sigma, float kernel_size)
}
}
-static av_cold void init_gaussian_params(GBlurVulkanContext *s)
+static inline void init_kernel_size(GBlurVulkanContext *s, int *out_size)
{
- if (!(s->size & 1)) {
- av_log(s, AV_LOG_WARNING, "kernel size should be odd\n");
- s->size++;
+ int size = *out_size;
+
+ if (!(size & 1)) {
+ av_log(s, AV_LOG_WARNING, "The kernel size should be odd\n");
+ size++;
}
+
+ *out_size = (size >> 1) + 1;
+}
+
+static av_cold void init_gaussian_params(GBlurVulkanContext *s)
+{
if (s->sigmaV <= 0)
s->sigmaV = s->sigma;
- s->kernel_size = (s->size >> 1) + 1;
+ init_kernel_size(s, &s->size);
+
+ if (s->sizeV <= 0)
+ s->sizeV = s->size;
+ else
+ init_kernel_size(s, &s->sizeV);
+
s->tmpframe = NULL;
}
-static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
+static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl, FFVkSPIRVShader *shd,
+ FFVkBuffer *params_buf, VkDescriptorBufferInfo *params_desc,
+ int ksize, float sigma)
{
int err = 0;
- char *kernel_def;
uint8_t *kernel_mapped;
- FFVkSPIRVShader *shd;
+
+ const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
+
+ FFVulkanDescriptorSetBinding buf_desc = {
+ .name = "data",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .mem_quali = "readonly",
+ .mem_layout = "std430",
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .updater = NULL,
+ .buf_content = NULL,
+ };
+
+ char *kernel_def = av_asprintf("float kernel[%i];", ksize);
+ if (!kernel_def)
+ return AVERROR(ENOMEM);
+
+ buf_desc.updater = params_desc;
+ buf_desc.buf_content = kernel_def;
+
+ RET(ff_vk_add_descriptor_set(&s->vkctx, pl, shd, &buf_desc, 1, 0));
+
+ GLSLD( gblur_func );
+ GLSLC(0, void main() );
+ GLSLC(0, { );
+ GLSLC(1, ivec2 size; );
+ GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
+ for (int i = 0; i < planes; i++) {
+ GLSLC(0, );
+ GLSLF(1, size = imageSize(output_images[%i]); ,i);
+ GLSLC(1, if (IS_WITHIN(pos, size)) { );
+ if (s->planes & (1 << i)) {
+ GLSLF(2, gblur(pos, %i); ,i);
+ } else {
+ GLSLF(2, vec4 res = texture(input_images[%i], pos); ,i);
+ GLSLF(2, imageStore(output_images[%i], pos, res); ,i);
+ }
+ GLSLC(1, } );
+ }
+ GLSLC(0, } );
+
+ RET(ff_vk_compile_shader(&s->vkctx, shd, "main"));
+
+ RET(ff_vk_init_pipeline_layout(&s->vkctx, pl));
+ RET(ff_vk_init_compute_pipeline(&s->vkctx, pl));
+
+ RET(ff_vk_create_buf(&s->vkctx, params_buf, sizeof(float) * ksize,
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
+ RET(ff_vk_map_buffers(&s->vkctx, params_buf, &kernel_mapped, 1, 0));
+
+ init_gaussian_kernel((float *)kernel_mapped, sigma, ksize);
+
+ RET(ff_vk_unmap_buffers(&s->vkctx, params_buf, 1, 1));
+
+ params_desc->buffer = params_buf->buf;
+ params_desc->range = VK_WHOLE_SIZE;
+
+ ff_vk_update_descriptor_set(&s->vkctx, pl, 1);
+
+fail:
+ av_free(kernel_def);
+ return err;
+}
+
+static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
+{
+ int err = 0;
GBlurVulkanContext *s = ctx->priv;
+ FFVkSPIRVShader *shd;
const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
FFVulkanDescriptorSetBinding image_descs[] = {
{
- .name = "input_image",
+ .name = "input_images",
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.dimensions = 2,
.elems = planes,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
},
{
- .name = "output_image",
+ .name = "output_images",
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
.mem_quali = "writeonly",
@@ -150,34 +218,18 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
},
};
- FFVulkanDescriptorSetBinding buf_desc = {
- .name = "data",
- .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .mem_quali = "readonly",
- .mem_layout = "std430",
- .stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = NULL,
- .buf_content = NULL,
- };
-
image_descs[0].sampler = ff_vk_init_sampler(&s->vkctx, 1, VK_FILTER_LINEAR);
if (!image_descs[0].sampler)
- return AVERROR_EXTERNAL;
+ return AVERROR_EXTERNAL;
init_gaussian_params(s);
- kernel_def = av_asprintf("float kernel[%i];", s->kernel_size);
- if (!kernel_def)
- return AVERROR(ENOMEM);
-
- buf_desc.buf_content = kernel_def;
-
ff_vk_qf_init(&s->vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
- { /* Create shader for the horizontal pass */
+ {
+ /* Create shader for the horizontal pass */
image_descs[0].updater = s->input_images;
image_descs[1].updater = s->tmp_images;
- buf_desc.updater = &s->params_desc_hor;
s->pl_hor = ff_vk_create_pipeline(&s->vkctx, &s->qf);
if (!s->pl_hor) {
@@ -191,52 +243,18 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
goto fail;
}
- ff_vk_set_compute_shader_sizes(shd, (int [3]){ CGS, CGS, 1 });
+ ff_vk_set_compute_shader_sizes(shd, (int [3]){ CGS, 1, 1 });
RET(ff_vk_add_descriptor_set(&s->vkctx, s->pl_hor, shd, image_descs, FF_ARRAY_ELEMS(image_descs), 0));
- RET(ff_vk_add_descriptor_set(&s->vkctx, s->pl_hor, shd, &buf_desc, 1, 0));
-
- GLSLD( gblur_horizontal );
- GLSLC(0, void main() );
- GLSLC(0, { );
- GLSLC(1, ivec2 size; );
- GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
- for (int i = 0; i < planes; i++) {
- GLSLC(0, );
- GLSLF(1, size = imageSize(output_image[%i]); ,i);
- GLSLC(1, if (IS_WITHIN(pos, size)) { );
- if (s->planes & (1 << i)) {
- GLSLF(2, gblur(pos, %i); ,i);
- } else {
- GLSLF(2, vec4 res = texture(input_image[%i], pos); ,i);
- GLSLF(2, imageStore(output_image[%i], pos, res); ,i);
- }
- GLSLC(1, } );
- }
- GLSLC(0, } );
-
- RET(ff_vk_compile_shader(&s->vkctx, shd, "main"));
- RET(ff_vk_init_pipeline_layout(&s->vkctx, s->pl_hor));
- RET(ff_vk_init_compute_pipeline(&s->vkctx, s->pl_hor));
-
- RET(ff_vk_create_buf(&s->vkctx, &s->params_buf_hor, sizeof(float) * s->kernel_size,
- VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
- RET(ff_vk_map_buffers(&s->vkctx, &s->params_buf_hor, &kernel_mapped, 1, 0));
-
- init_gaussian_kernel((float *)kernel_mapped, s->sigma, s->kernel_size);
-
- RET(ff_vk_unmap_buffers(&s->vkctx, &s->params_buf_hor, 1, 1));
-
- s->params_desc_hor.buffer = s->params_buf_hor.buf;
- s->params_desc_hor.range = VK_WHOLE_SIZE;
-
- ff_vk_update_descriptor_set(&s->vkctx, s->pl_hor, 1);
+ GLSLC(0, #define OFFSET (vec2(i, 0.0)));
+ RET(init_gblur_pipeline(s, s->pl_hor, shd, &s->params_buf_hor, &s->params_desc_hor,
+ s->size, s->sigma));
}
- { /* Create shader for the vertical pass */
+ {
+ /* Create shader for the vertical pass */
image_descs[0].updater = s->tmp_images;
image_descs[1].updater = s->output_images;
- buf_desc.updater = &s->params_desc_ver;
s->pl_ver = ff_vk_create_pipeline(&s->vkctx, &s->qf);
if (!s->pl_ver) {
@@ -250,46 +268,12 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
goto fail;
}
- ff_vk_set_compute_shader_sizes(shd, (int [3]){ CGS, CGS, 1 });
+ ff_vk_set_compute_shader_sizes(shd, (int [3]){ 1, CGS, 1 });
RET(ff_vk_add_descriptor_set(&s->vkctx, s->pl_ver, shd, image_descs, FF_ARRAY_ELEMS(image_descs), 0));
- RET(ff_vk_add_descriptor_set(&s->vkctx, s->pl_ver, shd, &buf_desc, 1, 0));
-
- GLSLD( gblur_vertical );
- GLSLC(0, void main() );
- GLSLC(0, { );
- GLSLC(1, ivec2 size; );
- GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
- for (int i = 0; i < planes; i++) {
- GLSLC(0, );
- GLSLF(1, size = imageSize(output_image[%i]); ,i);
- GLSLC(1, if (IS_WITHIN(pos, size)) { );
- if (s->planes & (1 << i)) {
- GLSLF(2, gblur(pos, %i); ,i);
- } else {
- GLSLF(2, vec4 res = texture(input_image[%i], pos); ,i);
- GLSLF(2, imageStore(output_image[%i], pos, res); ,i);
- }
- GLSLC(1, } );
- }
- GLSLC(0, } );
-
- RET(ff_vk_compile_shader(&s->vkctx, shd, "main"));
-
- RET(ff_vk_init_pipeline_layout(&s->vkctx, s->pl_ver));
- RET(ff_vk_init_compute_pipeline(&s->vkctx, s->pl_ver));
-
- RET(ff_vk_create_buf(&s->vkctx, &s->params_buf_ver, sizeof(float) * s->kernel_size,
- VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
- RET(ff_vk_map_buffers(&s->vkctx, &s->params_buf_ver, &kernel_mapped, 1, 0));
-
- init_gaussian_kernel((float *)kernel_mapped, s->sigmaV, s->kernel_size);
-
- RET(ff_vk_unmap_buffers(&s->vkctx, &s->params_buf_ver, 1, 1));
- s->params_desc_ver.buffer = s->params_buf_ver.buf;
- s->params_desc_ver.range = VK_WHOLE_SIZE;
-
- ff_vk_update_descriptor_set(&s->vkctx, s->pl_ver, 1);
+ GLSLC(0, #define OFFSET (vec2(0.0, i)));
+ RET(init_gblur_pipeline(s, s->pl_ver, shd, &s->params_buf_ver, &s->params_desc_ver,
+ s->sizeV, s->sigmaV));
}
RET(ff_vk_create_exec_ctx(&s->vkctx, &s->exec, &s->qf));
@@ -297,7 +281,6 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
s->initialized = 1;
fail:
- av_free(kernel_def);
return err;
}
@@ -318,22 +301,21 @@ static int process_frames(AVFilterContext *avctx, AVFrame *outframe, AVFrame *in
{
int err;
VkCommandBuffer cmd_buf;
-
- const VkFormat *input_formats = NULL;
- const VkFormat *output_formats = NULL;
GBlurVulkanContext *s = avctx->priv;
FFVulkanFunctions *vk = &s->vkctx.vkfn;
- AVVkFrame *in = (AVVkFrame *)inframe->data[0];
- AVVkFrame *tmp = (AVVkFrame *)s->tmpframe->data[0];
+
+ const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
+
+ AVVkFrame *in = (AVVkFrame *)inframe->data[0];
AVVkFrame *out = (AVVkFrame *)outframe->data[0];
+ AVVkFrame *tmp = (AVVkFrame *)s->tmpframe->data[0];
- int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
+ const VkFormat *input_formats = av_vkfmt_from_pixfmt(s->vkctx.input_format);
+ const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format);
ff_vk_start_exec_recording(&s->vkctx, s->exec);
cmd_buf = ff_vk_get_exec_buf(s->exec);
- input_formats = av_vkfmt_from_pixfmt(s->vkctx.input_format);
- output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format);
for (int i = 0; i < planes; i++) {
RET(ff_vk_create_imageview(&s->vkctx, s->exec, &s->input_images[i].imageView,
in->img[i],
@@ -418,11 +400,11 @@ static int process_frames(AVFilterContext *avctx, AVFrame *outframe, AVFrame *in
ff_vk_bind_pipeline_exec(&s->vkctx, s->exec, s->pl_hor);
vk->CmdDispatch(cmd_buf, FFALIGN(s->vkctx.output_width, CGS)/CGS,
- FFALIGN(s->vkctx.output_height, CGS)/CGS, 1);
+ s->vkctx.output_height, 1);
ff_vk_bind_pipeline_exec(&s->vkctx, s->exec, s->pl_ver);
- vk->CmdDispatch(cmd_buf, FFALIGN(s->vkctx.output_width, CGS)/CGS,
+ vk->CmdDispatch(cmd_buf,s->vkctx.output_width,
FFALIGN(s->vkctx.output_height, CGS)/CGS, 1);
ff_vk_add_exec_dep(&s->vkctx, s->exec, inframe, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
@@ -435,6 +417,7 @@ static int process_frames(AVFilterContext *avctx, AVFrame *outframe, AVFrame *in
ff_vk_qf_rotate(&s->qf);
return 0;
+
fail:
ff_vk_discard_exec_deps(s->exec);
return err;
@@ -482,10 +465,11 @@ fail:
#define OFFSET(x) offsetof(GBlurVulkanContext, x)
#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
static const AVOption gblur_vulkan_options[] = {
- { "sigma", "Set sigma", OFFSET(sigma), AV_OPT_TYPE_FLOAT, {.dbl = 0.5}, 0.01, 1024.0, FLAGS },
- { "sigmaV", "Set vertical sigma", OFFSET(sigmaV), AV_OPT_TYPE_FLOAT, {.dbl = 0}, 0.0, 1024.0, FLAGS },
- { "planes", "Set planes to filter", OFFSET(planes), AV_OPT_TYPE_INT, {.i64 = 0xF}, 0, 0xF, FLAGS },
- { "size", "Set kernel size", OFFSET(size), AV_OPT_TYPE_INT, {.i64 = 19}, 1, GBLUR_MAX_KERNEL_SIZE, FLAGS },
+ { "sigma", "Set sigma", OFFSET(sigma), AV_OPT_TYPE_FLOAT, { .dbl = 0.5 }, 0.01, 1024.0, FLAGS },
+ { "sigmaV", "Set vertical sigma", OFFSET(sigmaV), AV_OPT_TYPE_FLOAT, { .dbl = 0 }, 0.0, 1024.0, FLAGS },
+ { "planes", "Set planes to filter", OFFSET(planes), AV_OPT_TYPE_INT, { .i64 = 0xF }, 0, 0xF, FLAGS },
+ { "size", "Set kernel size", OFFSET(size), AV_OPT_TYPE_INT, { .i64 = 19 }, 1, GBLUR_MAX_KERNEL_SIZE, FLAGS },
+ { "sizeV", "Set vertical kernel size", OFFSET(sizeV), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, GBLUR_MAX_KERNEL_SIZE, FLAGS },
{ NULL },
};
More information about the ffmpeg-cvslog
mailing list