FFmpeg
vf_nlmeans_vulkan.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) Lynne
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/mem.h"
22 #include "libavutil/random_seed.h"
23 #include "libavutil/vulkan_spirv.h"
24 #include "libavutil/opt.h"
25 #include "vulkan_filter.h"
26 
27 #include "filters.h"
28 #include "video.h"
29 
30 #define TYPE_NAME "vec4"
31 #define TYPE_ELEMS 4
32 #define TYPE_SIZE (TYPE_ELEMS*4)
33 
34 typedef struct NLMeansVulkanContext {
36 
40  VkSampler sampler;
41 
44 
46 
50 
51  int *xoffsets;
52  int *yoffsets;
54  float strength[4];
55  int patch[4];
56 
57  struct nlmeans_opts {
58  int r;
59  double s;
60  double sc[4];
61  int p;
62  int pc[4];
63  int t;
64  } opts;
66 
67 static void insert_first(FFVulkanShader *shd, int r, const char *off, int horiz, int plane, int comp)
68 {
69  GLSLF(4, s1 = texture(input_img[%i], pos + ivec2(%i + %s, %i + %s))[%i];
70  ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? off : "0", comp);
71 
72  GLSLF(4, s2[0] = texture(input_img[%i], pos + offs[0] + ivec2(%i + %s, %i + %s))[%i];
73  ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? off : "0", comp);
74  GLSLF(4, s2[1] = texture(input_img[%i], pos + offs[1] + ivec2(%i + %s, %i + %s))[%i];
75  ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? off : "0", comp);
76  GLSLF(4, s2[2] = texture(input_img[%i], pos + offs[2] + ivec2(%i + %s, %i + %s))[%i];
77  ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? off : "0", comp);
78  GLSLF(4, s2[3] = texture(input_img[%i], pos + offs[3] + ivec2(%i + %s, %i + %s))[%i];
79  ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? off : "0", comp);
80 
81  GLSLC(4, s2 = (s1 - s2) * (s1 - s2); );
82 }
83 
84 static void insert_horizontal_pass(FFVulkanShader *shd, int nb_rows, int first, int plane, int comp)
85 {
86  GLSLF(1, pos.y = int(gl_GlobalInvocationID.x) * %i; ,nb_rows);
87  if (!first)
88  GLSLC(1, barrier(); );
89  GLSLC(0, );
90  GLSLF(1, if (pos.y < height[%i]) { ,plane);
91  GLSLC(2, #pragma unroll(1) );
92  GLSLF(2, for (r = 0; r < %i; r++) { ,nb_rows);
93  GLSLC(3, prefix_sum = DTYPE(0); );
94  GLSLC(3, offset = int_stride * uint64_t(pos.y + r); );
95  GLSLC(3, dst = DataBuffer(uint64_t(integral_data) + offset); );
96  GLSLC(0, );
97  GLSLF(3, for (pos.x = 0; pos.x < width[%i]; pos.x++) { ,plane);
98  if (first)
99  insert_first(shd, 0, "r", 0, plane, comp);
100  else
101  GLSLC(4, s2 = dst.v[pos.x]; );
102  GLSLC(4, dst.v[pos.x] = s2 + prefix_sum; );
103  GLSLC(4, prefix_sum += s2; );
104  GLSLC(3, } );
105  GLSLC(2, } );
106  GLSLC(1, } );
107  GLSLC(0, );
108 }
109 
110 static void insert_vertical_pass(FFVulkanShader *shd, int nb_rows, int first, int plane, int comp)
111 {
112  GLSLF(1, pos.x = int(gl_GlobalInvocationID.x) * %i; ,nb_rows);
113  GLSLC(1, #pragma unroll(1) );
114  GLSLF(1, for (r = 0; r < %i; r++) ,nb_rows);
115  GLSLC(2, psum[r] = DTYPE(0); );
116  GLSLC(0, );
117  if (!first)
118  GLSLC(1, barrier(); );
119  GLSLC(0, );
120  GLSLF(1, if (pos.x < width[%i]) { ,plane);
121  GLSLF(2, for (pos.y = 0; pos.y < height[%i]; pos.y++) { ,plane);
122  GLSLC(3, offset = int_stride * uint64_t(pos.y); );
123  GLSLC(3, dst = DataBuffer(uint64_t(integral_data) + offset); );
124  GLSLC(0, );
125  GLSLC(3, #pragma unroll(1) );
126  GLSLF(3, for (r = 0; r < %i; r++) { ,nb_rows);
127  if (first)
128  insert_first(shd, 0, "r", 1, plane, comp);
129  else
130  GLSLC(4, s2 = dst.v[pos.x + r]; );
131  GLSLC(4, dst.v[pos.x + r] = s2 + psum[r]; );
132  GLSLC(4, psum[r] += s2; );
133  GLSLC(3, } );
134  GLSLC(2, } );
135  GLSLC(1, } );
136  GLSLC(0, );
137 }
138 
139 static void insert_weights_pass(FFVulkanShader *shd, int nb_rows, int vert,
140  int t, int dst_comp, int plane, int comp)
141 {
142  GLSLF(1, p = patch_size[%i]; ,dst_comp);
143  GLSLC(0, );
144  GLSLC(1, barrier(); );
145  GLSLC(0, );
146  if (!vert) {
147  GLSLF(1, for (pos.y = 0; pos.y < height[%i]; pos.y++) { ,plane);
148  GLSLF(2, if (gl_GlobalInvocationID.x*%i >= width[%i]) ,nb_rows, plane);
149  GLSLC(3, break; );
150  GLSLF(2, for (r = 0; r < %i; r++) { ,nb_rows);
151  GLSLF(3, pos.x = int(gl_GlobalInvocationID.x) * %i + r; ,nb_rows);
152  } else {
153  GLSLF(1, for (pos.x = 0; pos.x < width[%i]; pos.x++) { ,plane);
154  GLSLF(2, if (gl_GlobalInvocationID.x*%i >= height[%i]) ,nb_rows, plane);
155  GLSLC(3, break; );
156  GLSLF(2, for (r = 0; r < %i; r++) { ,nb_rows);
157  GLSLF(3, pos.y = int(gl_GlobalInvocationID.x) * %i + r; ,nb_rows);
158  }
159  GLSLC(0, );
160  GLSLC(3, a = DTYPE(0); );
161  GLSLC(3, b = DTYPE(0); );
162  GLSLC(3, c = DTYPE(0); );
163  GLSLC(3, d = DTYPE(0); );
164  GLSLC(0, );
165  GLSLC(3, lt = ((pos.x - p) < 0) || ((pos.y - p) < 0); );
166  GLSLC(0, );
167  GLSLF(3, src[0] = texture(input_img[%i], pos + offs[0])[%i]; ,plane, comp);
168  GLSLF(3, src[1] = texture(input_img[%i], pos + offs[1])[%i]; ,plane, comp);
169  GLSLF(3, src[2] = texture(input_img[%i], pos + offs[2])[%i]; ,plane, comp);
170  GLSLF(3, src[3] = texture(input_img[%i], pos + offs[3])[%i]; ,plane, comp);
171  GLSLC(0, );
172  GLSLC(3, if (lt == false) { );
173  GLSLC(3, offset = int_stride * uint64_t(pos.y - p); );
174  GLSLC(3, dst = DataBuffer(uint64_t(integral_data) + offset); );
175  GLSLC(4, a = dst.v[pos.x - p]; );
176  GLSLC(4, c = dst.v[pos.x + p]; );
177  GLSLC(3, offset = int_stride * uint64_t(pos.y + p); );
178  GLSLC(3, dst = DataBuffer(uint64_t(integral_data) + offset); );
179  GLSLC(4, b = dst.v[pos.x - p]; );
180  GLSLC(4, d = dst.v[pos.x + p]; );
181  GLSLC(3, } );
182  GLSLC(0, );
183  GLSLC(3, patch_diff = d + a - b - c; );
184  GLSLF(3, w = exp(patch_diff * strength[%i]); ,dst_comp);
185  GLSLC(3, w_sum = w[0] + w[1] + w[2] + w[3]; );
186  GLSLC(3, sum = dot(w, src*255); );
187  GLSLC(0, );
188  if (t > 1) {
189  GLSLF(3, atomicAdd(weights_%i[pos.y*ws_stride[%i] + pos.x], w_sum); ,dst_comp, dst_comp);
190  GLSLF(3, atomicAdd(sums_%i[pos.y*ws_stride[%i] + pos.x], sum); ,dst_comp, dst_comp);
191  } else {
192  GLSLF(3, weights_%i[pos.y*ws_stride[%i] + pos.x] += w_sum; ,dst_comp, dst_comp);
193  GLSLF(3, sums_%i[pos.y*ws_stride[%i] + pos.x] += sum; ,dst_comp, dst_comp);
194  }
195  GLSLC(2, } );
196  GLSLC(1, } );
197 }
198 
199 typedef struct HorizontalPushData {
200  uint32_t width[4];
201  uint32_t height[4];
202  uint32_t ws_stride[4];
203  int32_t patch_size[4];
204  float strength[4];
205  VkDeviceAddress integral_base;
206  uint64_t integral_size;
207  uint64_t int_stride;
208  uint32_t xyoffs_start;
209 } HorizontalPushData;
210 
211 static av_cold int init_weights_pipeline(FFVulkanContext *vkctx, FFVkExecPool *exec,
212  FFVulkanShader *shd,
213  VkSampler sampler, FFVkSPIRVCompiler *spv,
214  int width, int height, int t,
215  const AVPixFmtDescriptor *desc,
216  int planes, int *nb_rows)
217 {
218  int err;
219  uint8_t *spv_data;
220  size_t spv_len;
221  void *spv_opaque = NULL;
223  int max_dim = FFMAX(width, height);
224  uint32_t max_wg = vkctx->props.properties.limits.maxComputeWorkGroupSize[0];
225  int wg_size, wg_rows;
226 
227  /* Round the max workgroup size to the previous power of two */
228  wg_size = max_wg;
229  wg_rows = 1;
230 
231  if (max_wg > max_dim) {
232  wg_size = max_dim;
233  } else if (max_wg < max_dim) {
234  /* Make it fit */
235  while (wg_size*wg_rows < max_dim)
236  wg_rows++;
237  }
238 
239  RET(ff_vk_shader_init(vkctx, shd, "nlmeans_weights",
240  VK_SHADER_STAGE_COMPUTE_BIT,
241  (const char *[]) { "GL_EXT_buffer_reference",
242  "GL_EXT_buffer_reference2" }, 2,
243  wg_size, 1, 1,
244  0));
245 
246  *nb_rows = wg_rows;
247 
248  if (t > 1)
249  GLSLC(0, #extension GL_EXT_shader_atomic_float : require );
250  GLSLC(0, #extension GL_ARB_gpu_shader_int64 : require );
251  GLSLC(0, );
252  GLSLF(0, #define DTYPE %s ,TYPE_NAME);
253  GLSLF(0, #define T_ALIGN %i ,TYPE_SIZE);
254  GLSLC(0, );
255  GLSLC(0, layout(buffer_reference, buffer_reference_align = T_ALIGN) buffer DataBuffer { );
256  GLSLC(1, DTYPE v[]; );
257  GLSLC(0, }; );
258  GLSLC(0, );
259  GLSLC(0, layout(push_constant, std430) uniform pushConstants { );
260  GLSLC(1, uvec4 width; );
261  GLSLC(1, uvec4 height; );
262  GLSLC(1, uvec4 ws_stride; );
263  GLSLC(1, ivec4 patch_size; );
264  GLSLC(1, vec4 strength; );
265  GLSLC(1, DataBuffer integral_base; );
266  GLSLC(1, uint64_t integral_size; );
267  GLSLC(1, uint64_t int_stride; );
268  GLSLC(1, uint xyoffs_start; );
269  GLSLC(0, }; );
270  GLSLC(0, );
271 
272  ff_vk_shader_add_push_const(shd, 0, sizeof(HorizontalPushData),
273  VK_SHADER_STAGE_COMPUTE_BIT);
274 
275  desc_set = (FFVulkanDescriptorSetBinding []) {
276  {
277  .name = "input_img",
278  .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
279  .dimensions = 2,
280  .elems = planes,
281  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
282  .samplers = DUP_SAMPLER(sampler),
283  },
284  {
285  .name = "weights_buffer_0",
286  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
287  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
288  .buf_content = "float weights_0[];",
289  },
290  {
291  .name = "sums_buffer_0",
292  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
293  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
294  .buf_content = "float sums_0[];",
295  },
296  {
297  .name = "weights_buffer_1",
298  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
299  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
300  .buf_content = "float weights_1[];",
301  },
302  {
303  .name = "sums_buffer_1",
304  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
305  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
306  .buf_content = "float sums_1[];",
307  },
308  {
309  .name = "weights_buffer_2",
310  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
311  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
312  .buf_content = "float weights_2[];",
313  },
314  {
315  .name = "sums_buffer_2",
316  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
317  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
318  .buf_content = "float sums_2[];",
319  },
320  {
321  .name = "weights_buffer_3",
322  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
323  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
324  .buf_content = "float weights_3[];",
325  },
326  {
327  .name = "sums_buffer_3",
328  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
329  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
330  .buf_content = "float sums_3[];",
331  },
332  };
333  RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set, 1 + 2*desc->nb_components, 0, 0));
334 
335  desc_set = (FFVulkanDescriptorSetBinding []) {
336  {
337  .name = "xyoffsets_buffer",
338  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
339  .mem_quali = "readonly",
340  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
341  .buf_content = "ivec2 xyoffsets[];",
342  },
343  };
344  RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set, 1, 1, 0));
345 
346  GLSLC(0, );
347  GLSLC(0, void main() );
348  GLSLC(0, { );
349  GLSLC(1, uint64_t offset; );
350  GLSLC(1, DataBuffer dst; );
351  GLSLC(1, float s1; );
352  GLSLC(1, DTYPE s2; );
353  GLSLC(1, DTYPE prefix_sum; );
354  GLSLF(1, DTYPE psum[%i]; ,*nb_rows);
355  GLSLC(1, int r; );
356  GLSLC(1, ivec2 pos; );
357  GLSLC(1, int p; );
358  GLSLC(0, );
359  GLSLC(1, DataBuffer integral_data; );
360  GLSLF(1, ivec2 offs[%i]; ,TYPE_ELEMS);
361  GLSLC(0, );
362  GLSLC(1, int invoc_idx = int(gl_WorkGroupID.z); );
363  GLSLC(0, );
364  GLSLC(1, offset = integral_size * invoc_idx; );
365  GLSLC(1, integral_data = DataBuffer(uint64_t(integral_base) + offset); );
366  for (int i = 0; i < TYPE_ELEMS; i++)
367  GLSLF(1, offs[%i] = xyoffsets[xyoffs_start + %i*invoc_idx + %i]; ,i,TYPE_ELEMS,i);
368  GLSLC(0, );
369  GLSLC(1, DTYPE a; );
370  GLSLC(1, DTYPE b; );
371  GLSLC(1, DTYPE c; );
372  GLSLC(1, DTYPE d; );
373  GLSLC(0, );
374  GLSLC(1, DTYPE patch_diff; );
375  if (TYPE_ELEMS == 4) {
376  GLSLC(1, vec4 src; );
377  GLSLC(1, vec4 w; );
378  } else {
379  GLSLC(1, vec4 src[4]; );
380  GLSLC(1, vec4 w[4]; );
381  }
382  GLSLC(1, float w_sum; );
383  GLSLC(1, float sum; );
384  GLSLC(0, );
385  GLSLC(1, bool lt; );
386  GLSLC(1, bool gt; );
387  GLSLC(0, );
388 
389  for (int i = 0; i < desc->nb_components; i++) {
390  int off = desc->comp[i].offset / (FFALIGN(desc->comp[i].depth, 8)/8);
391  if (width >= height) {
392  insert_horizontal_pass(shd, *nb_rows, 1, desc->comp[i].plane, off);
393  insert_vertical_pass(shd, *nb_rows, 0, desc->comp[i].plane, off);
394  insert_weights_pass(shd, *nb_rows, 0, t, i, desc->comp[i].plane, off);
395  } else {
396  insert_vertical_pass(shd, *nb_rows, 1, desc->comp[i].plane, off);
397  insert_horizontal_pass(shd, *nb_rows, 0, desc->comp[i].plane, off);
398  insert_weights_pass(shd, *nb_rows, 1, t, i, desc->comp[i].plane, off);
399  }
400  }
401 
402  GLSLC(0, } );
403 
404  RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main", &spv_opaque));
405  RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main"));
406 
407  RET(ff_vk_shader_register_exec(vkctx, exec, shd));
408 
409 fail:
410  if (spv_opaque)
411  spv->free_shader(spv, &spv_opaque);
412 
413  return err;
414 }
415 
416 typedef struct DenoisePushData {
417  uint32_t ws_stride[4];
418 } DenoisePushData;
419 
420 static av_cold int init_denoise_pipeline(FFVulkanContext *vkctx, FFVkExecPool *exec,
421  FFVulkanShader *shd,
422  VkSampler sampler, FFVkSPIRVCompiler *spv,
423  const AVPixFmtDescriptor *desc, int planes)
424 {
425  int err;
426  uint8_t *spv_data;
427  size_t spv_len;
428  void *spv_opaque = NULL;
430 
431  RET(ff_vk_shader_init(vkctx, shd, "nlmeans_denoise",
432  VK_SHADER_STAGE_COMPUTE_BIT,
433  (const char *[]) { "GL_EXT_buffer_reference",
434  "GL_EXT_buffer_reference2" }, 2,
435  32, 32, 1,
436  0));
437 
438  GLSLC(0, layout(push_constant, std430) uniform pushConstants { );
439  GLSLC(1, uvec4 ws_stride; );
440  GLSLC(0, }; );
441 
442  ff_vk_shader_add_push_const(shd, 0, sizeof(DenoisePushData),
443  VK_SHADER_STAGE_COMPUTE_BIT);
444 
445  desc_set = (FFVulkanDescriptorSetBinding []) {
446  {
447  .name = "input_img",
448  .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
449  .dimensions = 2,
450  .elems = planes,
451  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
452  .samplers = DUP_SAMPLER(sampler),
453  },
454  {
455  .name = "output_img",
456  .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
457  .mem_layout = ff_vk_shader_rep_fmt(vkctx->output_format, FF_VK_REP_FLOAT),
458  .mem_quali = "writeonly",
459  .dimensions = 2,
460  .elems = planes,
461  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
462  },
463  };
464  RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set, 2, 0, 0));
465 
466  desc_set = (FFVulkanDescriptorSetBinding []) {
467  {
468  .name = "weights_buffer_0",
469  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
470  .mem_quali = "readonly",
471  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
472  .buf_content = "float weights_0[];",
473  },
474  {
475  .name = "sums_buffer_0",
476  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
477  .mem_quali = "readonly",
478  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
479  .buf_content = "float sums_0[];",
480  },
481  {
482  .name = "weights_buffer_1",
483  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
484  .mem_quali = "readonly",
485  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
486  .buf_content = "float weights_1[];",
487  },
488  {
489  .name = "sums_buffer_1",
490  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
491  .mem_quali = "readonly",
492  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
493  .buf_content = "float sums_1[];",
494  },
495  {
496  .name = "weights_buffer_2",
497  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
498  .mem_quali = "readonly",
499  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
500  .buf_content = "float weights_2[];",
501  },
502  {
503  .name = "sums_buffer_2",
504  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
505  .mem_quali = "readonly",
506  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
507  .buf_content = "float sums_2[];",
508  },
509  {
510  .name = "weights_buffer_3",
511  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
512  .mem_quali = "readonly",
513  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
514  .buf_content = "float weights_3[];",
515  },
516  {
517  .name = "sums_buffer_3",
518  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
519  .mem_quali = "readonly",
520  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
521  .buf_content = "float sums_3[];",
522  },
523  };
524 
525  RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set, 2*desc->nb_components, 0, 0));
526 
527  GLSLC(0, void main() );
528  GLSLC(0, { );
529  GLSLC(1, ivec2 size; );
530  GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
531  GLSLC(1, const uint plane = uint(gl_WorkGroupID.z); );
532  GLSLC(0, );
533  GLSLC(1, float w_sum; );
534  GLSLC(1, float sum; );
535  GLSLC(1, vec4 src; );
536  GLSLC(1, vec4 r; );
537  GLSLC(0, );
538  GLSLC(1, size = imageSize(output_img[plane]); );
539  GLSLC(1, if (!IS_WITHIN(pos, size)) );
540  GLSLC(2, return; );
541  GLSLC(0, );
542  GLSLC(1, src = texture(input_img[plane], pos); );
543  GLSLC(0, );
544  for (int c = 0; c < desc->nb_components; c++) {
545  int off = desc->comp[c].offset / (FFALIGN(desc->comp[c].depth, 8)/8);
546  GLSLF(1, if (plane == %i) { ,desc->comp[c].plane);
547  GLSLF(2, w_sum = weights_%i[pos.y*ws_stride[%i] + pos.x]; ,c, c);
548  GLSLF(2, sum = sums_%i[pos.y*ws_stride[%i] + pos.x]; ,c, c);
549  GLSLF(2, r[%i] = (sum + src[%i]*255) / (1.0 + w_sum) / 255; ,off, off);
550  GLSLC(1, } );
551  GLSLC(0, );
552  }
553  GLSLC(1, imageStore(output_img[plane], pos, r); );
554  GLSLC(0, } );
555 
556  RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main", &spv_opaque));
557  RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main"));
558 
559  RET(ff_vk_shader_register_exec(vkctx, exec, shd));
560 
561 fail:
562  if (spv_opaque)
563  spv->free_shader(spv, &spv_opaque);
564 
565  return err;
566 }
567 
569 {
570  int rad, err;
571  int xcnt = 0, ycnt = 0;
572  NLMeansVulkanContext *s = ctx->priv;
573  FFVulkanContext *vkctx = &s->vkctx;
574  const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
575  FFVkSPIRVCompiler *spv = NULL;
576  int *offsets_buf;
577  int offsets_dispatched = 0, nb_dispatches = 0;
578 
579  const AVPixFmtDescriptor *desc;
581  if (!desc)
582  return AVERROR(EINVAL);
583 
584  if (!(s->opts.r & 1)) {
585  s->opts.r |= 1;
586  av_log(ctx, AV_LOG_WARNING, "Research size should be odd, setting to %i",
587  s->opts.r);
588  }
589 
590  if (!(s->opts.p & 1)) {
591  s->opts.p |= 1;
592  av_log(ctx, AV_LOG_WARNING, "Patch size should be odd, setting to %i",
593  s->opts.p);
594  }
595 
596  for (int i = 0; i < 4; i++) {
597  double str = (s->opts.sc[i] > 1.0) ? s->opts.sc[i] : s->opts.s;
598  int ps = (s->opts.pc[i] ? s->opts.pc[i] : s->opts.p);
599  str = 10.0f*str;
600  str *= -str;
601  str = 255.0*255.0 / str;
602  s->strength[i] = str;
603  if (!(ps & 1)) {
604  ps |= 1;
605  av_log(ctx, AV_LOG_WARNING, "Patch size should be odd, setting to %i",
606  ps);
607  }
608  s->patch[i] = ps / 2;
609  }
610 
611  rad = s->opts.r/2;
612  s->nb_offsets = (2*rad + 1)*(2*rad + 1) - 1;
613  s->xoffsets = av_malloc(s->nb_offsets*sizeof(*s->xoffsets));
614  s->yoffsets = av_malloc(s->nb_offsets*sizeof(*s->yoffsets));
615  s->nb_offsets = 0;
616 
617  for (int x = -rad; x <= rad; x++) {
618  for (int y = -rad; y <= rad; y++) {
619  if (!x && !y)
620  continue;
621 
622  s->xoffsets[xcnt++] = x;
623  s->yoffsets[ycnt++] = y;
624  s->nb_offsets++;
625  }
626  }
627 
628  RET(ff_vk_create_buf(&s->vkctx, &s->xyoffsets_buf, 2*s->nb_offsets*sizeof(int32_t), NULL, NULL,
629  VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
630  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
631  VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
632  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT));
633  RET(ff_vk_map_buffer(&s->vkctx, &s->xyoffsets_buf, (uint8_t **)&offsets_buf, 0));
634 
635  for (int i = 0; i < 2*s->nb_offsets; i += 2) {
636  offsets_buf[i + 0] = s->xoffsets[i >> 1];
637  offsets_buf[i + 1] = s->yoffsets[i >> 1];
638  }
639 
640  RET(ff_vk_unmap_buffer(&s->vkctx, &s->xyoffsets_buf, 1));
641 
642  s->opts.t = FFMIN(s->opts.t, (FFALIGN(s->nb_offsets, TYPE_ELEMS) / TYPE_ELEMS));
643  if (!vkctx->atomic_float_feats.shaderBufferFloat32AtomicAdd) {
644  av_log(ctx, AV_LOG_WARNING, "Device doesn't support atomic float adds, "
645  "disabling dispatch parallelism\n");
646  s->opts.t = 1;
647  }
648 
649  spv = ff_vk_spirv_init();
650  if (!spv) {
651  av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
652  return AVERROR_EXTERNAL;
653  }
654 
655  ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT);
656  RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, 1, 0, 0, 0, NULL));
657  RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_NEAREST));
658 
659  RET(init_weights_pipeline(vkctx, &s->e, &s->shd_weights, s->sampler,
660  spv, s->vkctx.output_width, s->vkctx.output_height,
661  s->opts.t, desc, planes, &s->pl_weights_rows));
662 
663  RET(init_denoise_pipeline(vkctx, &s->e, &s->shd_denoise, s->sampler,
664  spv, desc, planes));
665 
666  RET(ff_vk_shader_update_desc_buffer(vkctx, &s->e.contexts[0], &s->shd_weights,
667  1, 0, 0,
668  &s->xyoffsets_buf, 0, s->xyoffsets_buf.size,
669  VK_FORMAT_UNDEFINED));
670 
671  do {
672  int wg_invoc = FFMIN((s->nb_offsets - offsets_dispatched)/TYPE_ELEMS, s->opts.t);
673  wg_invoc = FFMIN(wg_invoc, vkctx->props.properties.limits.maxComputeWorkGroupCount[2]);
674  offsets_dispatched += wg_invoc * TYPE_ELEMS;
675  nb_dispatches++;
676  } while (offsets_dispatched < s->nb_offsets);
677 
678  av_log(ctx, AV_LOG_VERBOSE, "Filter initialized, %i x/y offsets, %i dispatches\n",
679  s->nb_offsets, nb_dispatches);
680 
681  s->initialized = 1;
682 
683 fail:
684  if (spv)
685  spv->uninit(&spv);
686 
687  return err;
688 }
689 
690 static int denoise_pass(NLMeansVulkanContext *s, FFVkExecContext *exec,
691  FFVkBuffer *ws_vk, uint32_t ws_stride[4])
692 {
693  FFVulkanContext *vkctx = &s->vkctx;
694  FFVulkanFunctions *vk = &vkctx->vkfn;
695  VkBufferMemoryBarrier2 buf_bar[8];
696  int nb_buf_bar = 0;
697 
698  DenoisePushData pd = {
699  { ws_stride[0], ws_stride[1], ws_stride[2], ws_stride[3] },
700  };
701 
702  /* Denoise pass pipeline */
703  ff_vk_exec_bind_shader(vkctx, exec, &s->shd_denoise);
704 
705  /* Push data */
706  ff_vk_shader_update_push_const(vkctx, exec, &s->shd_denoise,
707  VK_SHADER_STAGE_COMPUTE_BIT,
708  0, sizeof(pd), &pd);
709 
710  buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
711  .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
712  .srcStageMask = ws_vk->stage,
713  .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
714  .srcAccessMask = ws_vk->access,
715  .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT,
716  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
717  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
718  .buffer = ws_vk->buf,
719  .size = ws_vk->size,
720  .offset = 0,
721  };
722 
723  vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
724  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
725  .pBufferMemoryBarriers = buf_bar,
726  .bufferMemoryBarrierCount = nb_buf_bar,
727  });
728  ws_vk->stage = buf_bar[0].dstStageMask;
729  ws_vk->access = buf_bar[0].dstAccessMask;
730 
731  /* End of denoise pass */
732  vk->CmdDispatch(exec->buf,
733  FFALIGN(vkctx->output_width, s->shd_denoise.lg_size[0])/s->shd_denoise.lg_size[0],
734  FFALIGN(vkctx->output_height, s->shd_denoise.lg_size[1])/s->shd_denoise.lg_size[1],
735  av_pix_fmt_count_planes(s->vkctx.output_format));
736 
737  return 0;
738 }
739 
740 static int nlmeans_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
741 {
742  int err;
743  AVFrame *out = NULL;
744  AVFilterContext *ctx = link->dst;
745  NLMeansVulkanContext *s = ctx->priv;
746  AVFilterLink *outlink = ctx->outputs[0];
747  FFVulkanContext *vkctx = &s->vkctx;
748  FFVulkanFunctions *vk = &vkctx->vkfn;
749 
750  const AVPixFmtDescriptor *desc;
751  int plane_widths[4];
752  int plane_heights[4];
753 
754  int offsets_dispatched = 0;
755 
756  /* Integral */
757  AVBufferRef *integral_buf = NULL;
758  FFVkBuffer *integral_vk;
759  size_t int_stride;
760  size_t int_size;
761 
762  /* Weights/sums */
763  AVBufferRef *ws_buf = NULL;
764  FFVkBuffer *ws_vk;
765  VkDeviceSize weights_offs[4];
766  VkDeviceSize sums_offs[4];
767  uint32_t ws_stride[4];
768  size_t ws_size[4];
769  size_t ws_total_size = 0;
770 
771  FFVkExecContext *exec;
772  VkImageView in_views[AV_NUM_DATA_POINTERS];
773  VkImageView out_views[AV_NUM_DATA_POINTERS];
774  VkImageMemoryBarrier2 img_bar[8];
775  int nb_img_bar = 0;
776  VkBufferMemoryBarrier2 buf_bar[8];
777  int nb_buf_bar = 0;
778 
779  if (!s->initialized)
780  RET(init_filter(ctx));
781 
783  if (!desc)
784  return AVERROR(EINVAL);
785 
786  /* Integral image */
787  int_stride = s->shd_weights.lg_size[0]*s->pl_weights_rows*TYPE_SIZE;
788  int_size = s->shd_weights.lg_size[0]*s->pl_weights_rows*int_stride;
789 
790  /* Plane dimensions */
791  for (int i = 0; i < desc->nb_components; i++) {
792  plane_widths[i] = !i || (i == 3) ? vkctx->output_width : AV_CEIL_RSHIFT(vkctx->output_width, desc->log2_chroma_w);
793  plane_heights[i] = !i || (i == 3) ? vkctx->output_height : AV_CEIL_RSHIFT(vkctx->output_height, desc->log2_chroma_w);
794  plane_widths[i] = FFALIGN(plane_widths[i], s->shd_denoise.lg_size[0]);
795  plane_heights[i] = FFALIGN(plane_heights[i], s->shd_denoise.lg_size[1]);
796 
797  ws_stride[i] = plane_widths[i];
798  ws_size[i] = ws_stride[i] * plane_heights[i] * sizeof(float);
799  ws_total_size += ws_size[i];
800  }
801 
802  /* Buffers */
803  err = ff_vk_get_pooled_buffer(&s->vkctx, &s->integral_buf_pool, &integral_buf,
804  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
805  VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
806  NULL,
807  s->opts.t * int_size,
808  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
809  if (err < 0)
810  return err;
811  integral_vk = (FFVkBuffer *)integral_buf->data;
812 
813  err = ff_vk_get_pooled_buffer(&s->vkctx, &s->ws_buf_pool, &ws_buf,
814  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
815  VK_BUFFER_USAGE_TRANSFER_DST_BIT |
816  VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
817  NULL,
818  ws_total_size * 2,
819  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
820  if (err < 0)
821  return err;
822  ws_vk = (FFVkBuffer *)ws_buf->data;
823 
824  weights_offs[0] = 0;
825  sums_offs[0] = ws_total_size;
826  for (int i = 1; i < desc->nb_components; i++) {
827  weights_offs[i] = weights_offs[i - 1] + ws_size[i - 1];
828  sums_offs[i] = sums_offs[i - 1] + ws_size[i - 1];
829  }
830 
831  /* Output frame */
832  out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
833  if (!out) {
834  err = AVERROR(ENOMEM);
835  goto fail;
836  }
837 
838  /* Execution context */
839  exec = ff_vk_exec_get(&s->vkctx, &s->e);
840  ff_vk_exec_start(vkctx, exec);
841 
842  /* Dependencies */
843  RET(ff_vk_exec_add_dep_frame(vkctx, exec, in,
844  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
845  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
846  RET(ff_vk_exec_add_dep_frame(vkctx, exec, out,
847  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
848  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
849 
850  RET(ff_vk_exec_add_dep_buf(vkctx, exec, &integral_buf, 1, 0));
851  integral_buf = NULL;
852 
853  RET(ff_vk_exec_add_dep_buf(vkctx, exec, &ws_buf, 1, 0));
854  ws_buf = NULL;
855 
856  /* Input frame prep */
857  RET(ff_vk_create_imageviews(vkctx, exec, in_views, in));
858  ff_vk_frame_barrier(vkctx, exec, in, img_bar, &nb_img_bar,
859  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
860  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
861  VK_ACCESS_SHADER_READ_BIT,
862  VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
863  VK_QUEUE_FAMILY_IGNORED);
864 
865  /* Output frame prep */
866  RET(ff_vk_create_imageviews(vkctx, exec, out_views, out));
867  ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar,
868  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
869  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
870  VK_ACCESS_SHADER_WRITE_BIT,
871  VK_IMAGE_LAYOUT_GENERAL,
872  VK_QUEUE_FAMILY_IGNORED);
873 
874  nb_buf_bar = 0;
875  buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
876  .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
877  .srcStageMask = ws_vk->stage,
878  .dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT,
879  .srcAccessMask = ws_vk->access,
880  .dstAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT,
881  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
882  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
883  .buffer = ws_vk->buf,
884  .size = ws_vk->size,
885  .offset = 0,
886  };
887  buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
888  .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
889  .srcStageMask = integral_vk->stage,
890  .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
891  .srcAccessMask = integral_vk->access,
892  .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
893  VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
894  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
895  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
896  .buffer = integral_vk->buf,
897  .size = integral_vk->size,
898  .offset = 0,
899  };
900 
901  vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
902  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
903  .pImageMemoryBarriers = img_bar,
904  .imageMemoryBarrierCount = nb_img_bar,
905  .pBufferMemoryBarriers = buf_bar,
906  .bufferMemoryBarrierCount = nb_buf_bar,
907  });
908  ws_vk->stage = buf_bar[0].dstStageMask;
909  ws_vk->access = buf_bar[0].dstAccessMask;
910  integral_vk->stage = buf_bar[1].dstStageMask;
911  integral_vk->access = buf_bar[1].dstAccessMask;
912 
913  /* Buffer zeroing */
914  vk->CmdFillBuffer(exec->buf, ws_vk->buf, 0, ws_vk->size, 0x0);
915 
916  nb_buf_bar = 0;
917  buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
918  .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
919  .srcStageMask = ws_vk->stage,
920  .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
921  .srcAccessMask = ws_vk->access,
922  .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
923  VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
924  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
925  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
926  .buffer = ws_vk->buf,
927  .size = ws_vk->size,
928  .offset = 0,
929  };
930 
931  vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
932  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
933  .pBufferMemoryBarriers = buf_bar,
934  .bufferMemoryBarrierCount = nb_buf_bar,
935  });
936  ws_vk->stage = buf_bar[0].dstStageMask;
937  ws_vk->access = buf_bar[0].dstAccessMask;
938 
939  /* Update weights descriptors */
940  ff_vk_shader_update_img_array(vkctx, exec, &s->shd_weights, in, in_views, 0, 0,
941  VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
942  s->sampler);
943  for (int i = 0; i < desc->nb_components; i++) {
944  RET(ff_vk_shader_update_desc_buffer(&s->vkctx, exec, &s->shd_weights, 0, 1 + i*2 + 0, 0,
945  ws_vk, weights_offs[i], ws_size[i],
946  VK_FORMAT_UNDEFINED));
947  RET(ff_vk_shader_update_desc_buffer(&s->vkctx, exec, &s->shd_weights, 0, 1 + i*2 + 1, 0,
948  ws_vk, sums_offs[i], ws_size[i],
949  VK_FORMAT_UNDEFINED));
950  }
951 
952  /* Update denoise descriptors */
953  ff_vk_shader_update_img_array(vkctx, exec, &s->shd_denoise, in, in_views, 0, 0,
954  VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
955  s->sampler);
956  ff_vk_shader_update_img_array(vkctx, exec, &s->shd_denoise, out, out_views, 0, 1,
957  VK_IMAGE_LAYOUT_GENERAL, s->sampler);
958  for (int i = 0; i < desc->nb_components; i++) {
959  RET(ff_vk_shader_update_desc_buffer(&s->vkctx, exec, &s->shd_denoise, 1, i*2 + 0, 0,
960  ws_vk, weights_offs[i], ws_size[i],
961  VK_FORMAT_UNDEFINED));
962  RET(ff_vk_shader_update_desc_buffer(&s->vkctx, exec, &s->shd_denoise, 1, i*2 + 1, 0,
963  ws_vk, sums_offs[i], ws_size[i],
964  VK_FORMAT_UNDEFINED));
965  }
966 
967  /* Weights pipeline */
968  ff_vk_exec_bind_shader(vkctx, exec, &s->shd_weights);
969 
970  do {
971  int wg_invoc;
972  HorizontalPushData pd = {
973  { plane_widths[0], plane_widths[1], plane_widths[2], plane_widths[3] },
974  { plane_heights[0], plane_heights[1], plane_heights[2], plane_heights[3] },
975  { ws_stride[0], ws_stride[1], ws_stride[2], ws_stride[3] },
976  { s->patch[0], s->patch[1], s->patch[2], s->patch[3] },
977  { s->strength[0], s->strength[1], s->strength[2], s->strength[2], },
978  integral_vk->address,
979  (uint64_t)int_size,
980  (uint64_t)int_stride,
981  offsets_dispatched,
982  };
983 
984  /* Push data */
985  ff_vk_shader_update_push_const(vkctx, exec, &s->shd_weights,
986  VK_SHADER_STAGE_COMPUTE_BIT,
987  0, sizeof(pd), &pd);
988 
989  if (offsets_dispatched) {
990  nb_buf_bar = 0;
991  buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
992  .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
993  .srcStageMask = integral_vk->stage,
994  .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
995  .srcAccessMask = integral_vk->access,
996  .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
997  VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
998  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
999  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
1000  .buffer = integral_vk->buf,
1001  .size = integral_vk->size,
1002  .offset = 0,
1003  };
1004 
1005  vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
1006  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
1007  .pBufferMemoryBarriers = buf_bar,
1008  .bufferMemoryBarrierCount = nb_buf_bar,
1009  });
1010  integral_vk->stage = buf_bar[1].dstStageMask;
1011  integral_vk->access = buf_bar[1].dstAccessMask;
1012  }
1013 
1014  wg_invoc = FFMIN((s->nb_offsets - offsets_dispatched)/TYPE_ELEMS, s->opts.t);
1015  wg_invoc = FFMIN(wg_invoc, vkctx->props.properties.limits.maxComputeWorkGroupCount[2]);
1016 
1017  /* End of horizontal pass */
1018  vk->CmdDispatch(exec->buf, 1, 1, wg_invoc);
1019 
1020  offsets_dispatched += wg_invoc * TYPE_ELEMS;
1021  } while (offsets_dispatched < s->nb_offsets);
1022 
1023  RET(denoise_pass(s, exec, ws_vk, ws_stride));
1024 
1025  err = ff_vk_exec_submit(vkctx, exec);
1026  if (err < 0)
1027  return err;
1028 
1029  err = av_frame_copy_props(out, in);
1030  if (err < 0)
1031  goto fail;
1032 
1033  av_frame_free(&in);
1034 
1035  return ff_filter_frame(outlink, out);
1036 
1037 fail:
1038  av_buffer_unref(&integral_buf);
1039  av_buffer_unref(&ws_buf);
1040  av_frame_free(&in);
1041  av_frame_free(&out);
1042  return err;
1043 }
1044 
1045 static void nlmeans_vulkan_uninit(AVFilterContext *avctx)
1046 {
1047  NLMeansVulkanContext *s = avctx->priv;
1048  FFVulkanContext *vkctx = &s->vkctx;
1049  FFVulkanFunctions *vk = &vkctx->vkfn;
1050 
1051  ff_vk_exec_pool_free(vkctx, &s->e);
1052  ff_vk_shader_free(vkctx, &s->shd_weights);
1053  ff_vk_shader_free(vkctx, &s->shd_denoise);
1054 
1055  av_buffer_pool_uninit(&s->integral_buf_pool);
1056  av_buffer_pool_uninit(&s->ws_buf_pool);
1057 
1058  if (s->sampler)
1059  vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler,
1060  vkctx->hwctx->alloc);
1061 
1062  ff_vk_uninit(&s->vkctx);
1063 
1064  av_freep(&s->xoffsets);
1065  av_freep(&s->yoffsets);
1066 
1067  s->initialized = 0;
1068 }
1069 
1070 #define OFFSET(x) offsetof(NLMeansVulkanContext, x)
1071 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
1072 static const AVOption nlmeans_vulkan_options[] = {
1073  { "s", "denoising strength for all components", OFFSET(opts.s), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS },
1074  { "p", "patch size for all components", OFFSET(opts.p), AV_OPT_TYPE_INT, { .i64 = 3*2+1 }, 0, 99, FLAGS },
1075  { "r", "research window radius", OFFSET(opts.r), AV_OPT_TYPE_INT, { .i64 = 7*2+1 }, 0, 99, FLAGS },
1076  { "t", "parallelism", OFFSET(opts.t), AV_OPT_TYPE_INT, { .i64 = 36 }, 1, 168, FLAGS },
1077 
1078  { "s1", "denoising strength for component 1", OFFSET(opts.sc[0]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS },
1079  { "s2", "denoising strength for component 2", OFFSET(opts.sc[1]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS },
1080  { "s3", "denoising strength for component 3", OFFSET(opts.sc[2]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS },
1081  { "s4", "denoising strength for component 4", OFFSET(opts.sc[3]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS },
1082 
1083  { "p1", "patch size for component 1", OFFSET(opts.pc[0]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS },
1084  { "p2", "patch size for component 2", OFFSET(opts.pc[1]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS },
1085  { "p3", "patch size for component 3", OFFSET(opts.pc[2]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS },
1086  { "p4", "patch size for component 4", OFFSET(opts.pc[3]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS },
1087 
1088  { NULL }
1089 };
1090 
1091 AVFILTER_DEFINE_CLASS(nlmeans_vulkan);
1092 
1093 static const AVFilterPad nlmeans_vulkan_inputs[] = {
1094  {
1095  .name = "default",
1096  .type = AVMEDIA_TYPE_VIDEO,
1097  .filter_frame = &nlmeans_vulkan_filter_frame,
1098  .config_props = &ff_vk_filter_config_input,
1099  },
1100 };
1101 
1102 static const AVFilterPad nlmeans_vulkan_outputs[] = {
1103  {
1104  .name = "default",
1105  .type = AVMEDIA_TYPE_VIDEO,
1106  .config_props = &ff_vk_filter_config_output,
1107  },
1108 };
1109 
1111  .name = "nlmeans_vulkan",
1112  .description = NULL_IF_CONFIG_SMALL("Non-local means denoiser (Vulkan)"),
1113  .priv_size = sizeof(NLMeansVulkanContext),
1114  .init = &ff_vk_filter_init,
1115  .uninit = &nlmeans_vulkan_uninit,
1116  FILTER_INPUTS(nlmeans_vulkan_inputs),
1117  FILTER_OUTPUTS(nlmeans_vulkan_outputs),
1119  .priv_class = &nlmeans_vulkan_class,
1120  .flags = AVFILTER_FLAG_HWDEVICE,
1121  .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
1122 };
ff_get_video_buffer
AVFrame * ff_get_video_buffer(AVFilterLink *link, int w, int h)
Request a picture buffer with a specific set of permissions.
Definition: video.c:116
NLMeansVulkanContext::nlmeans_opts::p
int p
Definition: vf_nlmeans_vulkan.c:61
ff_vk_create_buf
int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext, void *alloc_pNext, VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
Definition: vulkan.c:928
AV_LOG_WARNING
#define AV_LOG_WARNING
Something somehow does not look correct.
Definition: log.h:215
FFVulkanContext::output_height
int output_height
Definition: vulkan.h:303
r
const char * r
Definition: vf_curves.c:127
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
NLMeansVulkanContext::opts
struct NLMeansVulkanContext::nlmeans_opts opts
NLMeansVulkanContext::sampler
VkSampler sampler
Definition: vf_nlmeans_vulkan.c:40
ff_vk_shader_free
void ff_vk_shader_free(FFVulkanContext *s, FFVulkanShader *shd)
Free a shader.
Definition: vulkan.c:2413
ff_vk_shader_init
int ff_vk_shader_init(FFVulkanContext *s, FFVulkanShader *shd, const char *name, VkPipelineStageFlags stage, const char *extensions[], int nb_extensions, int lg_x, int lg_y, int lg_z, uint32_t required_subgroup_size)
Initialize a shader object, with a specific set of extensions, type+bind, local group size,...
Definition: vulkan.c:1564
out
FILE * out
Definition: movenc.c:55
NLMeansVulkanContext::shd_weights
FFVulkanShader shd_weights
Definition: vf_nlmeans_vulkan.c:48
comp
static void comp(unsigned char *dst, ptrdiff_t dst_stride, unsigned char *src, ptrdiff_t src_stride, int add)
Definition: eamad.c:81
AVBufferPool
The buffer pool.
Definition: buffer_internal.h:88
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1062
av_pix_fmt_desc_get
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:3025
AVBufferRef::data
uint8_t * data
The data buffer.
Definition: buffer.h:90
RET
#define RET(x)
Definition: vulkan.h:67
FFVkBuffer::access
VkAccessFlags2 access
Definition: vulkan.h:96
ff_vk_qf_init
int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, VkQueueFlagBits dev_family)
Chooses a QF and loads it into a context.
Definition: vulkan.c:228
FFVkBuffer::stage
VkPipelineStageFlags2 stage
Definition: vulkan.h:95
av_frame_free
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:162
NLMeansVulkanContext::integral_buf_pool
AVBufferPool * integral_buf_pool
Definition: vf_nlmeans_vulkan.c:42
FILTER_INPUTS
#define FILTER_INPUTS(array)
Definition: filters.h:262
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:389
ff_vk_filter_init
int ff_vk_filter_init(AVFilterContext *avctx)
General lavfi IO functions.
Definition: vulkan_filter.c:233
ff_vk_map_buffer
static int ff_vk_map_buffer(FFVulkanContext *s, FFVkBuffer *buf, uint8_t **mem, int invalidate)
Definition: vulkan.h:481
w
uint8_t w
Definition: llviddspenc.c:38
NLMeansVulkanContext::xoffsets
int * xoffsets
Definition: vf_nlmeans_vulkan.c:51
AVOption
AVOption.
Definition: opt.h:429
b
#define b
Definition: input.c:41
AV_LOG_VERBOSE
#define AV_LOG_VERBOSE
Detailed information.
Definition: log.h:225
FFVkBuffer::address
VkDeviceAddress address
Definition: vulkan.h:92
ff_vk_exec_get
FFVkExecContext * ff_vk_exec_get(FFVulkanContext *s, FFVkExecPool *pool)
Retrieve an execution pool.
Definition: vulkan.c:485
NLMeansVulkanContext::strength
float strength[4]
Definition: vf_nlmeans_vulkan.c:54
ff_vk_uninit
void ff_vk_uninit(FFVulkanContext *s)
Frees main context.
Definition: vulkan.c:2452
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
FFVkSPIRVCompiler::uninit
void(* uninit)(struct FFVkSPIRVCompiler **ctx)
Definition: vulkan_spirv.h:32
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:205
NLMeansVulkanContext::initialized
int initialized
Definition: vf_nlmeans_vulkan.c:37
video.h
AV_PIX_FMT_VULKAN
@ AV_PIX_FMT_VULKAN
Vulkan hardware images.
Definition: pixfmt.h:379
ff_vk_exec_add_dep_frame
int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f, VkPipelineStageFlagBits2 wait_stage, VkPipelineStageFlagBits2 signal_stage)
Definition: vulkan.c:692
FFVkBuffer::buf
VkBuffer buf
Definition: vulkan.h:88
av_malloc
#define av_malloc(s)
Definition: tableprint_vlc.h:30
NLMeansVulkanContext::yoffsets
int * yoffsets
Definition: vf_nlmeans_vulkan.c:52
av_pix_fmt_count_planes
int av_pix_fmt_count_planes(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:3065
AVVulkanDeviceContext::alloc
const VkAllocationCallbacks * alloc
Custom memory allocator, else NULL.
Definition: hwcontext_vulkan.h:63
AVFilterContext::priv
void * priv
private data for use by the filter
Definition: avfilter.h:472
fail
#define fail()
Definition: checkasm.h:188
vulkan_filter.h
insert_first
static void insert_first(FFVulkanShader *shd, int r, const char *off, int horiz, int plane, int comp)
Definition: vf_nlmeans_vulkan.c:67
ff_vk_shader_update_img_array
void ff_vk_shader_update_img_array(FFVulkanContext *s, FFVkExecContext *e, FFVulkanShader *shd, AVFrame *f, VkImageView *views, int set, int binding, VkImageLayout layout, VkSampler sampler)
Update a descriptor in a buffer with an image array.
Definition: vulkan.c:2356
ff_vk_shader_register_exec
int ff_vk_shader_register_exec(FFVulkanContext *s, FFVkExecPool *pool, FFVulkanShader *shd)
Register a shader with an exec pool.
Definition: vulkan.c:2053
NLMeansVulkanContext::shd_denoise
FFVulkanShader shd_denoise
Definition: vf_nlmeans_vulkan.c:49
ff_vk_shader_add_descriptor_set
int ff_vk_shader_add_descriptor_set(FFVulkanContext *s, FFVulkanShader *shd, FFVulkanDescriptorSetBinding *desc, int nb, int singular, int print_to_shader_only)
Add descriptor to a shader.
Definition: vulkan.c:1928
FFVulkanContext::atomic_float_feats
VkPhysicalDeviceShaderAtomicFloatFeaturesEXT atomic_float_feats
Definition: vulkan.h:285
AVFilterPad
A filter pad used for either input or output.
Definition: filters.h:38
first
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But first
Definition: rate_distortion.txt:12
GLSLC
#define GLSLC(N, S)
Definition: vulkan.h:44
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:209
av_cold
#define av_cold
Definition: attributes.h:90
main
int main
Definition: dovi_rpuenc.c:37
float
float
Definition: af_crystalizer.c:122
FFVulkanContext::output_width
int output_width
Definition: vulkan.h:302
NLMeansVulkanContext::ws_buf_pool
AVBufferPool * ws_buf_pool
Definition: vf_nlmeans_vulkan.c:43
s
#define s(width, name)
Definition: cbs_vp9.c:198
AV_CEIL_RSHIFT
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:60
AV_OPT_TYPE_DOUBLE
@ AV_OPT_TYPE_DOUBLE
Underlying C type is double.
Definition: opt.h:267
FLAGS
#define FLAGS
insert_weights_pass
static void insert_weights_pass(FFVulkanShader *shd, int nb_rows, int vert, int t, int dst_comp, int plane, int comp)
Definition: vf_nlmeans_vulkan.c:139
filters.h
FF_VK_REP_FLOAT
@ FF_VK_REP_FLOAT
Definition: vulkan.h:367
ctx
AVFormatContext * ctx
Definition: movenc.c:49
ff_vf_nlmeans_vulkan
const AVFilter ff_vf_nlmeans_vulkan
ff_vk_exec_add_dep_buf
int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e, AVBufferRef **deps, int nb_deps, int ref)
Execution dependency management.
Definition: vulkan.c:570
ff_vk_exec_pool_free
void ff_vk_exec_pool_free(FFVulkanContext *s, FFVkExecPool *pool)
Definition: vulkan.c:238
FILTER_OUTPUTS
#define FILTER_OUTPUTS(array)
Definition: filters.h:263
link
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a link
Definition: filter_design.txt:23
opts
AVDictionary * opts
Definition: movenc.c:51
ff_vk_shader_rep_fmt
const char * ff_vk_shader_rep_fmt(enum AVPixelFormat pix_fmt, enum FFVkShaderRepFormat rep_fmt)
Definition: vulkan.c:1290
NULL
#define NULL
Definition: coverity.c:32
av_frame_copy_props
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:713
av_buffer_unref
void av_buffer_unref(AVBufferRef **buf)
Free a given reference and automatically free the buffer if there are no more references to it.
Definition: buffer.c:139
DUP_SAMPLER
#define DUP_SAMPLER(x)
Definition: vulkan.h:73
av_buffer_pool_uninit
void av_buffer_pool_uninit(AVBufferPool **ppool)
Mark the pool as being available for freeing.
Definition: buffer.c:328
ff_vk_filter_config_output
int ff_vk_filter_config_output(AVFilterLink *outlink)
Definition: vulkan_filter.c:209
FFVkBuffer::size
size_t size
Definition: vulkan.h:91
NLMeansVulkanContext::nlmeans_opts
Definition: vf_nlmeans_vulkan.c:57
ff_vk_exec_pool_init
int ff_vk_exec_pool_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, FFVkExecPool *pool, int nb_contexts, int nb_queries, VkQueryType query_type, int query_64bit, const void *query_create_pnext)
Allocates/frees an execution pool.
Definition: vulkan.c:300
FFVulkanContext
Definition: vulkan.h:263
AVFILTER_DEFINE_CLASS
#define AVFILTER_DEFINE_CLASS(fname)
Definition: filters.h:273
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
FF_FILTER_FLAG_HWFRAME_AWARE
#define FF_FILTER_FLAG_HWFRAME_AWARE
The filter is aware of hardware frames, and any hardware frame context should not be automatically pr...
Definition: filters.h:206
NLMeansVulkanContext::qf
FFVkQueueFamilyCtx qf
Definition: vf_nlmeans_vulkan.c:39
NLMeansVulkanContext::patch
int patch[4]
Definition: vf_nlmeans_vulkan.c:55
init
int(* init)(AVBSFContext *ctx)
Definition: dts2pts.c:368
NLMeansVulkanContext
Definition: vf_nlmeans_vulkan.c:34
ff_vk_shader_update_push_const
void ff_vk_shader_update_push_const(FFVulkanContext *s, FFVkExecContext *e, FFVulkanShader *shd, VkShaderStageFlagBits stage, int offset, size_t size, void *src)
Update push constant in a shader.
Definition: vulkan.c:2369
NLMeansVulkanContext::xyoffsets_buf
FFVkBuffer xyoffsets_buf
Definition: vf_nlmeans_vulkan.c:45
FFVulkanDescriptorSetBinding
Definition: vulkan.h:75
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:94
height
#define height
Definition: dsp.h:85
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:83
for
for(k=2;k<=8;++k)
Definition: h264pred_template.c:425
AVFILTER_FLAG_HWDEVICE
#define AVFILTER_FLAG_HWDEVICE
The filter can create hardware frames using AVFilterContext.hw_device_ctx.
Definition: avfilter.h:173
NLMeansVulkanContext::nlmeans_opts::t
int t
Definition: vf_nlmeans_vulkan.c:63
size
int size
Definition: twinvq_data.h:10344
AV_NUM_DATA_POINTERS
#define AV_NUM_DATA_POINTERS
Definition: frame.h:390
FFVkQueueFamilyCtx
Definition: vulkan.h:102
FFVulkanShader
Definition: vulkan.h:179
FFVulkanContext::output_format
enum AVPixelFormat output_format
Definition: vulkan.h:304
FFVkSPIRVCompiler::compile_shader
int(* compile_shader)(FFVulkanContext *s, struct FFVkSPIRVCompiler *ctx, FFVulkanShader *shd, uint8_t **data, size_t *size, const char *entrypoint, void **opaque)
Definition: vulkan_spirv.h:28
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
insert_horizontal_pass
static void insert_horizontal_pass(FFVulkanShader *shd, int nb_rows, int first, int plane, int comp)
Definition: vf_nlmeans_vulkan.c:84
AVERROR_EXTERNAL
#define AVERROR_EXTERNAL
Generic error in an external library.
Definition: error.h:59
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
FFVkExecContext
Definition: vulkan.h:107
ff_vk_shader_update_desc_buffer
int ff_vk_shader_update_desc_buffer(FFVulkanContext *s, FFVkExecContext *e, FFVulkanShader *shd, int set, int bind, int elem, FFVkBuffer *buf, VkDeviceSize offset, VkDeviceSize len, VkFormat fmt)
Update a descriptor in a buffer with a buffer.
Definition: vulkan.c:2290
FFVulkanDescriptorSetBinding::name
const char * name
Definition: vulkan.h:76
TYPE_SIZE
#define TYPE_SIZE
Definition: vf_nlmeans_vulkan.c:32
FFVkSPIRVCompiler
Definition: vulkan_spirv.h:26
layout
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel layout
Definition: filter_design.txt:18
uninit
static void uninit(AVBSFContext *ctx)
Definition: pcm_rechunk.c:68
NLMeansVulkanContext::pl_weights_rows
int pl_weights_rows
Definition: vf_nlmeans_vulkan.c:47
ff_vk_exec_start
int ff_vk_exec_start(FFVulkanContext *s, FFVkExecContext *e)
Start/submit/wait an execution.
Definition: vulkan.c:508
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
init_filter
static int init_filter(FilteringContext *fctx, AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, const char *filter_spec)
Definition: transcode.c:260
ff_vk_frame_barrier
void ff_vk_frame_barrier(FFVulkanContext *s, FFVkExecContext *e, AVFrame *pic, VkImageMemoryBarrier2 *bar, int *nb_bar, VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage, VkAccessFlagBits new_access, VkImageLayout new_layout, uint32_t new_qf)
Definition: vulkan.c:1521
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
ff_vk_shader_link
int ff_vk_shader_link(FFVulkanContext *s, FFVulkanShader *shd, uint8_t *spirv, size_t spirv_len, const char *entrypoint)
Link a shader into an executable.
Definition: vulkan.c:1853
ff_vk_unmap_buffer
static int ff_vk_unmap_buffer(FFVulkanContext *s, FFVkBuffer *buf, int flush)
Definition: vulkan.h:488
insert_vertical_pass
static void insert_vertical_pass(FFVulkanShader *shd, int nb_rows, int first, int plane, int comp)
Definition: vf_nlmeans_vulkan.c:110
vulkan_spirv.h
FFVulkanContext::props
VkPhysicalDeviceProperties2 props
Definition: vulkan.h:269
AVFilterPad::name
const char * name
Pad name.
Definition: filters.h:44
FFVkSPIRVCompiler::free_shader
void(* free_shader)(struct FFVkSPIRVCompiler *ctx, void **opaque)
Definition: vulkan_spirv.h:31
ff_vk_exec_bind_shader
void ff_vk_exec_bind_shader(FFVulkanContext *s, FFVkExecContext *e, FFVulkanShader *shd)
Bind a shader.
Definition: vulkan.c:2379
NLMeansVulkanContext::nb_offsets
int nb_offsets
Definition: vf_nlmeans_vulkan.c:53
AVFilter
Filter definition.
Definition: avfilter.h:201
TYPE_NAME
#define TYPE_NAME
Definition: vf_nlmeans_vulkan.c:30
FFVulkanContext::vkfn
FFVulkanFunctions vkfn
Definition: vulkan.h:267
FFVkExecPool
Definition: vulkan.h:241
pos
unsigned int pos
Definition: spdifenc.c:414
ff_vk_shader_add_push_const
int ff_vk_shader_add_push_const(FFVulkanShader *shd, int offset, int size, VkShaderStageFlagBits stage)
Add/update push constants for execution.
Definition: vulkan.c:1223
OFFSET
#define OFFSET(x)
FFVkExecContext::buf
VkCommandBuffer buf
Definition: vulkan.h:119
NLMeansVulkanContext::nlmeans_opts::s
double s
Definition: vf_nlmeans_vulkan.c:59
NLMeansVulkanContext::nlmeans_opts::pc
int pc[4]
Definition: vf_nlmeans_vulkan.c:62
random_seed.h
buffer
the frame and frame reference mechanism is intended to as much as expensive copies of that data while still allowing the filters to produce correct results The data is stored in buffers represented by AVFrame structures Several references can point to the same frame buffer
Definition: filter_design.txt:49
GLSLF
#define GLSLF(N, S,...)
Definition: vulkan.h:54
AV_OPT_TYPE_INT
@ AV_OPT_TYPE_INT
Underlying C type is int.
Definition: opt.h:259
AVFilterContext
An instance of a filter.
Definition: avfilter.h:457
NLMeansVulkanContext::nlmeans_opts::sc
double sc[4]
Definition: vf_nlmeans_vulkan.c:60
desc
const char * desc
Definition: libsvtav1.c:79
ff_vk_filter_config_input
int ff_vk_filter_config_input(AVFilterLink *inlink)
Definition: vulkan_filter.c:176
AVMEDIA_TYPE_VIDEO
@ AVMEDIA_TYPE_VIDEO
Definition: avutil.h:201
FFVulkanContext::hwctx
AVVulkanDeviceContext * hwctx
Definition: vulkan.h:291
mem.h
AVBufferRef
A reference to a data buffer.
Definition: buffer.h:82
NLMeansVulkanContext::e
FFVkExecPool e
Definition: vf_nlmeans_vulkan.c:38
AVPixFmtDescriptor
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:69
AVVulkanDeviceContext::act_dev
VkDevice act_dev
Active device.
Definition: hwcontext_vulkan.h:84
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:78
ff_vk_init_sampler
int ff_vk_init_sampler(FFVulkanContext *s, VkSampler *sampler, int unnorm_coords, VkFilter filt)
Create a sampler.
Definition: vulkan.c:1244
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
TYPE_ELEMS
#define TYPE_ELEMS
Definition: vf_nlmeans_vulkan.c:31
FFVkBuffer
Definition: vulkan.h:87
int32_t
int32_t
Definition: audioconvert.c:56
ff_vk_exec_submit
int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e)
Definition: vulkan.c:807
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
ff_vk_create_imageviews
int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e, VkImageView views[AV_NUM_DATA_POINTERS], AVFrame *f)
Create an imageview and add it as a dependency to an execution.
Definition: vulkan.c:1446
width
#define width
Definition: dsp.h:85
NLMeansVulkanContext::nlmeans_opts::r
int r
Definition: vf_nlmeans_vulkan.c:58
FILTER_SINGLE_PIXFMT
#define FILTER_SINGLE_PIXFMT(pix_fmt_)
Definition: filters.h:252
FFVulkanFunctions
Definition: vulkan_functions.h:263
ff_vk_get_pooled_buffer
int ff_vk_get_pooled_buffer(FFVulkanContext *ctx, AVBufferPool **buf_pool, AVBufferRef **buf, VkBufferUsageFlags usage, void *create_pNext, size_t size, VkMemoryPropertyFlagBits mem_props)
Initialize a pool and create AVBufferRefs containing FFVkBuffer.
Definition: vulkan.c:1171
src
#define src
Definition: vp8dsp.c:248
planes
static const struct @458 planes[]
NLMeansVulkanContext::vkctx
FFVulkanContext vkctx
Definition: vf_nlmeans_vulkan.c:35