FFmpeg
inter_template.c
Go to the documentation of this file.
1 /*
2  * VVC inter prediction DSP
3  *
4  * Copyright (C) 2022 Nuo Mi
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
24 #include "libavutil/imgutils.h"
25 
26 #define TMP_STRIDE EDGE_EMU_BUFFER_STRIDE
27 static void av_always_inline FUNC(put_scaled)(uint8_t *_dst, const ptrdiff_t _dst_stride,
28  const uint8_t *const _src, ptrdiff_t _src_stride, const int src_height,
29  const int _x, const int _y, const int dx, const int dy,
30  const int height, const int8_t *hf, const int8_t *vf, const int width, const int is_uni, const int is_chroma)
31 {
32  int16_t tmp_array[TMP_STRIDE * MAX_PB_SIZE];
33  int16_t *tmp = tmp_array;
34  pixel *dst = (pixel*)_dst;
35  int16_t *dst16 = (int16_t*)_dst;
36  const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel);
37  const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
38  const int shift = FFMAX(2, 14 - BIT_DEPTH);
39  const int offset = 1 << (shift - 1);
40  const int taps = is_chroma ? VVC_INTER_CHROMA_TAPS : VVC_INTER_LUMA_TAPS;
41  const int extra = is_chroma ? CHROMA_EXTRA : LUMA_EXTRA;
42  const int extra_before = is_chroma ? CHROMA_EXTRA_BEFORE : LUMA_EXTRA_BEFORE;
43  const int shift1 = 6 - is_chroma;
44  const int shift2 = 4 + is_chroma;
45  const int x0 = SCALED_INT(_x);
46  const int y0 = SCALED_INT(_y);
47 
48  for (int i = 0; i < width; i++) {
49  const int tx = _x + dx * i;
50  const int x = SCALED_INT(tx) - x0;
51  const int mx = av_zero_extend(tx >> shift1, shift2);
52  const int8_t *filter = hf + mx * taps;
53  const pixel *src = (pixel*)_src - extra_before * src_stride;
54 
55  for (int j = 0; j < src_height + extra; j++) {
56  tmp[j] = (is_chroma ? CHROMA_FILTER(src, 1) : LUMA_FILTER(src, 1)) >> (BIT_DEPTH - 8);
57  src += src_stride;
58  }
59  tmp += TMP_STRIDE;
60  }
61 
62  for (int i = 0; i < height; i++) {
63  const int ty = _y + dy * i;
64  const int x = SCALED_INT(ty) - y0;
65  const int mx = av_zero_extend(ty >> shift1, shift2);
66  const int8_t *filter = vf + mx * taps;
67 
68  tmp = tmp_array + extra_before;
69  for (int j = 0; j < width; j++) {
70  const int val = (is_chroma ? CHROMA_FILTER(tmp, 1) : LUMA_FILTER(tmp, 1)) >> 6;
71  if (is_uni)
72  dst[j] = av_clip_pixel((val + offset) >> shift);
73  else
74  dst16[j] = val;
75  tmp += TMP_STRIDE;
76  }
77  if (is_uni)
78  dst += dst_stride;
79  else
80  dst16 += dst_stride;
81  }
82 }
83 
84 static void FUNC(put_luma_scaled)(int16_t *_dst,
85  const uint8_t *_src, ptrdiff_t _src_stride, const int src_height,
86  const int x, const int y, const int dx, const int dy,
87  const int height, const int8_t *hf, const int8_t *vf, const int width)
88 {
89  FUNC(put_scaled)((uint8_t *)_dst, MAX_PB_SIZE * sizeof(pixel), _src, _src_stride, src_height, x, y, dx, dy, height, hf, vf, width, 0, 0);
90 }
91 
92 static void FUNC(put_chroma_scaled)(int16_t *_dst,
93  const uint8_t *_src, ptrdiff_t _src_stride, const int src_height,
94  const int x, const int y, const int dx, const int dy,
95  const int height, const int8_t *hf, const int8_t *vf, const int width)
96 {
97  FUNC(put_scaled)((uint8_t *)_dst, MAX_PB_SIZE * sizeof(pixel), _src, _src_stride, src_height, x, y, dx, dy, height, hf, vf, width, 0, 1);
98 }
99 
100 static void FUNC(put_uni_luma_scaled)(uint8_t *_dst, const ptrdiff_t _dst_stride,
101  const uint8_t *_src, ptrdiff_t _src_stride, const int src_height,
102  const int x, const int y, const int dx, const int dy,
103  const int height, const int8_t *hf, const int8_t *vf, const int width)
104 {
105  FUNC(put_scaled)(_dst, _dst_stride, _src, _src_stride, src_height, x, y, dx, dy, height, hf, vf, width, 1, 0);
106 }
107 
108 static void FUNC(put_uni_chroma_scaled)(uint8_t *_dst, const ptrdiff_t _dst_stride,
109  const uint8_t *_src, ptrdiff_t _src_stride, const int src_height,
110  const int x, const int y, const int dx, const int dy,
111  const int height, const int8_t *hf, const int8_t *vf, const int width)
112 {
113  FUNC(put_scaled)(_dst, _dst_stride, _src, _src_stride, src_height, x, y, dx, dy, height, hf, vf, width, 1, 1);
114 }
115 
116 static void av_always_inline FUNC(put_uni_w_scaled)(uint8_t *_dst, const ptrdiff_t _dst_stride,
117  const uint8_t *const _src, ptrdiff_t _src_stride, const int src_height,
118  const int _x, const int _y, const int dx, const int dy, const int denom, const int wx, const int _ox,
119  const int height, const int8_t *hf, const int8_t *vf, const int width, const int is_chroma)
120 {
121  int16_t tmp_array[TMP_STRIDE * MAX_PB_SIZE];
122  int16_t *tmp = tmp_array;
123  pixel *dst = (pixel*)_dst;
124  const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel);
125  const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
126  const int shift = FFMAX(2, 14 - BIT_DEPTH);
127  const int offset = 1 << (shift - 1);
128  const int ox = _ox * (1 << (BIT_DEPTH - 8));
129  const int taps = is_chroma ? VVC_INTER_CHROMA_TAPS : VVC_INTER_LUMA_TAPS;
130  const int extra = is_chroma ? CHROMA_EXTRA : LUMA_EXTRA;
131  const int extra_before = is_chroma ? CHROMA_EXTRA_BEFORE : LUMA_EXTRA_BEFORE;
132  const int shift1 = 6 - is_chroma;
133  const int shift2 = 4 + is_chroma;
134  const int x0 = SCALED_INT(_x);
135  const int y0 = SCALED_INT(_y);
136 
137  for (int i = 0; i < width; i++) {
138  const int tx = _x + dx * i;
139  const int x = SCALED_INT(tx) - x0;
140  const int mx = av_zero_extend(tx >> shift1, shift2);
141  const int8_t *filter = hf + mx * taps;
142  const pixel *src = (pixel*)_src - extra_before * src_stride;
143 
144  for (int j = 0; j < src_height + extra; j++) {
145  tmp[j] = (is_chroma ? CHROMA_FILTER(src, 1) : LUMA_FILTER(src, 1)) >> (BIT_DEPTH - 8);
146  src += src_stride;
147  }
148  tmp += TMP_STRIDE;
149  }
150 
151  for (int i = 0; i < height; i++) {
152  const int ty = _y + dy * i;
153  const int x = SCALED_INT(ty) - y0;
154  const int mx = av_zero_extend(ty >> shift1, shift2);
155  const int8_t *filter = vf + mx * taps;
156 
157  tmp = tmp_array + extra_before;
158  for (int j = 0; j < width; j++) {
159  const int val = (is_chroma ? CHROMA_FILTER(tmp, 1) : LUMA_FILTER(tmp, 1)) >> 6;
160  dst[j] = av_clip_pixel(((wx * val + offset) >> shift) + ox);
161  tmp += TMP_STRIDE;
162  }
163  dst += dst_stride;
164  }
165 }
166 
167 static void FUNC(put_uni_luma_w_scaled)(uint8_t *_dst, const ptrdiff_t _dst_stride,
168  const uint8_t *_src, ptrdiff_t _src_stride, const int src_height,
169  const int x, const int y, const int dx, const int dy, const int denom, const int wx, const int ox,
170  const int height, const int8_t *hf, const int8_t *vf, const int width)
171 {
172  FUNC(put_uni_w_scaled)(_dst, _dst_stride, _src, _src_stride, src_height, x, y, dx, dy, denom, wx, ox, height, hf, vf, width, 0);
173 }
174 
175 static void FUNC(put_uni_chroma_w_scaled)(uint8_t *_dst, const ptrdiff_t _dst_stride,
176  const uint8_t *_src, ptrdiff_t _src_stride, const int src_height,
177  const int x, const int y, const int dx, const int dy, const int denom, const int wx, const int ox,
178  const int height, const int8_t *hf, const int8_t *vf, const int width)
179 {
180  FUNC(put_uni_w_scaled)(_dst, _dst_stride, _src, _src_stride, src_height, x, y, dx, dy, denom, wx, ox, height, hf, vf, width, 1);
181 }
182 
183 #undef TMP_STRIDE
184 
185 static void FUNC(avg)(uint8_t *_dst, const ptrdiff_t _dst_stride,
186  const int16_t *src0, const int16_t *src1, const int width, const int height)
187 {
188  pixel *dst = (pixel*)_dst;
189  const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel);
190  const int shift = FFMAX(3, 15 - BIT_DEPTH);
191  const int offset = 1 << (shift - 1);
192 
193  for (int y = 0; y < height; y++) {
194  for (int x = 0; x < width; x++)
195  dst[x] = av_clip_pixel((src0[x] + src1[x] + offset) >> shift);
196  src0 += MAX_PB_SIZE;
197  src1 += MAX_PB_SIZE;
198  dst += dst_stride;
199  }
200 }
201 
202 static void FUNC(w_avg)(uint8_t *_dst, const ptrdiff_t _dst_stride,
203  const int16_t *src0, const int16_t *src1, const int width, const int height,
204  const int denom, const int w0, const int w1, const int o0, const int o1)
205 {
206  pixel *dst = (pixel*)_dst;
207  const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel);
208  const int shift = denom + FFMAX(3, 15 - BIT_DEPTH);
209  const int offset = ((o0 + o1) * (1 << (BIT_DEPTH - 8)) + 1) * (1 << (shift - 1));
210 
211  for (int y = 0; y < height; y++) {
212  for (int x = 0; x < width; x++)
213  dst[x] = av_clip_pixel((src0[x] * w0 + src1[x] * w1 + offset) >> shift);
214  src0 += MAX_PB_SIZE;
215  src1 += MAX_PB_SIZE;
216  dst += dst_stride;
217  }
218 }
219 
220 static void FUNC(put_ciip)(uint8_t *_dst, const ptrdiff_t _dst_stride,
221  const int width, const int height,
222  const uint8_t *_inter, const ptrdiff_t _inter_stride, const int intra_weight)
223 {
224  pixel *dst = (pixel *)_dst;
225  pixel *inter = (pixel *)_inter;
226  const size_t dst_stride = _dst_stride / sizeof(pixel);
227  const size_t inter_stride = _inter_stride / sizeof(pixel);
228  const int inter_weight = 4 - intra_weight;
229 
230  for (int y = 0; y < height; y++) {
231  for (int x = 0; x < width; x++)
232  dst[x] = (dst[x] * intra_weight + inter[x] * inter_weight + 2) >> 2;
233  dst += dst_stride;
234  inter += inter_stride;
235  }
236 }
237 
238 static void FUNC(put_gpm)(uint8_t *_dst, ptrdiff_t dst_stride,
239  const int width, const int height,
240  const int16_t *src0, const int16_t *src1,
241  const uint8_t *weights, const int step_x, const int step_y)
242 {
243  const int shift = FFMAX(5, 17 - BIT_DEPTH);
244  const int offset = 1 << (shift - 1);
245  pixel *dst = (pixel *)_dst;
246 
247  dst_stride /= sizeof(pixel);
248  for (int y = 0; y < height; y++) {
249  for (int x = 0; x < width; x++) {
250  const uint8_t w = weights[x * step_x];
251  dst[x] = av_clip_pixel((src0[x] * w + src1[x] * (8 - w) + offset) >> shift);
252  }
253  dst += dst_stride;
254  src0 += MAX_PB_SIZE;
255  src1 += MAX_PB_SIZE;
256  weights += step_y;
257  }
258 }
259 
260 //8.5.6.3.3 Luma integer sample fetching process, add one extra pad line
261 static void FUNC(bdof_fetch_samples)(int16_t *_dst, const uint8_t *_src, const ptrdiff_t _src_stride,
262  const int x_frac, const int y_frac, const int width, const int height)
263 {
264  const int x_off = (x_frac >> 3) - 1;
265  const int y_off = (y_frac >> 3) - 1;
266  const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
267  const pixel *src = (pixel*)_src + (x_off) + y_off * src_stride;
268  int16_t *dst = _dst - 1 - MAX_PB_SIZE;
269  const int shift = 14 - BIT_DEPTH;
270  const int bdof_width = width + 2 * BDOF_BORDER_EXT;
271 
272  // top
273  for (int i = 0; i < bdof_width; i++)
274  dst[i] = src[i] << shift;
275 
276  dst += MAX_PB_SIZE;
277  src += src_stride;
278 
279  for (int i = 0; i < height; i++) {
280  dst[0] = src[0] << shift;
281  dst[1 + width] = src[1 + width] << shift;
282  dst += MAX_PB_SIZE;
283  src += src_stride;
284  }
285  for (int i = 0; i < bdof_width; i++)
286  dst[i] = src[i] << shift;
287 }
288 
289 //8.5.6.3.3 Luma integer sample fetching process
290 static void FUNC(fetch_samples)(int16_t *_dst, const uint8_t *_src, const ptrdiff_t _src_stride, const int x_frac, const int y_frac)
291 {
293 }
294 
295 static void FUNC(prof_grad_filter)(int16_t *gradient_h, int16_t *gradient_v, const ptrdiff_t gradient_stride,
296  const int16_t *_src, const ptrdiff_t src_stride, const int width, const int height)
297 {
298  const int shift = 6;
299  const int16_t *src = _src;
300 
301  for (int y = 0; y < height; y++) {
302  const int16_t *p = src;
303  for (int x = 0; x < width; x++) {
304  gradient_h[x] = (p[1] >> shift) - (p[-1] >> shift);
305  gradient_v[x] = (p[src_stride] >> shift) - (p[-src_stride] >> shift);
306  p++;
307  }
308  gradient_h += gradient_stride;
309  gradient_v += gradient_stride;
310  src += src_stride;
311  }
312 }
313 
314 static void FUNC(apply_prof)(int16_t *dst, const int16_t *src, const int16_t *diff_mv_x, const int16_t *diff_mv_y)
315 {
316  const int limit = (1 << FFMAX(13, BIT_DEPTH + 1)); ///< dILimit
317 
318  int16_t gradient_h[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE];
319  int16_t gradient_v[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE];
321 
322  for (int y = 0; y < AFFINE_MIN_BLOCK_SIZE; y++) {
323  for (int x = 0; x < AFFINE_MIN_BLOCK_SIZE; x++) {
324  const int o = y * AFFINE_MIN_BLOCK_SIZE + x;
325  const int di = gradient_h[o] * diff_mv_x[o] + gradient_v[o] * diff_mv_y[o];
326  const int val = src[x] + av_clip(di, -limit, limit - 1);
327  dst[x] = val;
328 
329  }
330  src += MAX_PB_SIZE;
331  dst += MAX_PB_SIZE;
332  }
333 }
334 
335 static void FUNC(apply_prof_uni)(uint8_t *_dst, const ptrdiff_t _dst_stride, const int16_t *src, const int16_t *diff_mv_x, const int16_t *diff_mv_y)
336 {
337  const int limit = (1 << FFMAX(13, BIT_DEPTH + 1)); ///< dILimit
338  pixel *dst = (pixel*)_dst;
339  const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel);
340  const int shift = 14 - BIT_DEPTH;
341 #if BIT_DEPTH < 14
342  const int offset = 1 << (shift - 1);
343 #else
344  const int offset = 0;
345 #endif
346  int16_t gradient_h[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE];
347  int16_t gradient_v[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE];
348 
350 
351  for (int y = 0; y < AFFINE_MIN_BLOCK_SIZE; y++) {
352  for (int x = 0; x < AFFINE_MIN_BLOCK_SIZE; x++) {
353  const int o = y * AFFINE_MIN_BLOCK_SIZE + x;
354  const int di = gradient_h[o] * diff_mv_x[o] + gradient_v[o] * diff_mv_y[o];
355  const int val = src[x] + av_clip(di, -limit, limit - 1);
356  dst[x] = av_clip_pixel((val + offset) >> shift);
357 
358  }
359  src += MAX_PB_SIZE;
360  dst += dst_stride;
361  }
362 }
363 
364 static void FUNC(apply_prof_uni_w)(uint8_t *_dst, const ptrdiff_t _dst_stride,
365  const int16_t *src, const int16_t *diff_mv_x, const int16_t *diff_mv_y,
366  const int denom, const int wx, const int _ox)
367 {
368  const int limit = (1 << FFMAX(13, BIT_DEPTH + 1)); ///< dILimit
369  pixel *dst = (pixel*)_dst;
370  const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel);
371  const int shift = denom + FFMAX(2, 14 - BIT_DEPTH);
372  const int offset = 1 << (shift - 1);
373  const int ox = _ox * (1 << (BIT_DEPTH - 8));
374  int16_t gradient_h[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE];
375  int16_t gradient_v[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE];
376 
378 
379  for (int y = 0; y < AFFINE_MIN_BLOCK_SIZE; y++) {
380  for (int x = 0; x < AFFINE_MIN_BLOCK_SIZE; x++) {
381  const int o = y * AFFINE_MIN_BLOCK_SIZE + x;
382  const int di = gradient_h[o] * diff_mv_x[o] + gradient_v[o] * diff_mv_y[o];
383  const int val = src[x] + av_clip(di, -limit, limit - 1);
384  dst[x] = av_clip_pixel(((val * wx + offset) >> shift) + ox);
385  }
386  src += MAX_PB_SIZE;
387  dst += dst_stride;
388  }
389 }
390 
391 static void FUNC(derive_bdof_vx_vy)(const int16_t *_src0, const int16_t *_src1,
392  const int pad_left, const int pad_top, const int pad_right, const int pad_bottom,
393  const int16_t **gradient_h, const int16_t **gradient_v,
394  int* vx, int* vy)
395 {
396  const int shift2 = 4;
397  const int shift3 = 1;
398  const int thres = 1 << 4;
399  int sgx2 = 0, sgy2 = 0, sgxgy = 0, sgxdi = 0, sgydi = 0;
400 
401  for (int y = -1; y < BDOF_MIN_BLOCK_SIZE + 1; y++) {
402  const int dy = y + (pad_top && y < 0) - (pad_bottom && y == BDOF_MIN_BLOCK_SIZE); // we pad for the first and last row
403  const int16_t *src0 = _src0 + dy * MAX_PB_SIZE;
404  const int16_t *src1 = _src1 + dy * MAX_PB_SIZE;
405 
406  for (int x = -1; x < BDOF_MIN_BLOCK_SIZE + 1; x++) {
407  const int dx = x + (pad_left && x < 0) - (pad_right && x == BDOF_MIN_BLOCK_SIZE); // we pad for the first and last col
408  const int diff = (src0[dx] >> shift2) - (src1[dx] >> shift2);
409  const int idx = BDOF_BLOCK_SIZE * dy + dx;
410  const int temph = (gradient_h[0][idx] + gradient_h[1][idx]) >> shift3;
411  const int tempv = (gradient_v[0][idx] + gradient_v[1][idx]) >> shift3;
412 
413  sgx2 += FFABS(temph);
414  sgy2 += FFABS(tempv);
415  sgxgy += VVC_SIGN(tempv) * temph;
416  sgxdi += -VVC_SIGN(temph) * diff;
417  sgydi += -VVC_SIGN(tempv) * diff;
418  }
419  }
420  *vx = sgx2 > 0 ? av_clip((sgxdi * (1 << 2)) >> av_log2(sgx2) , -thres + 1, thres - 1) : 0;
421  *vy = sgy2 > 0 ? av_clip(((sgydi * (1 << 2)) - ((*vx * sgxgy) >> 1)) >> av_log2(sgy2), -thres + 1, thres - 1) : 0;
422 }
423 
424 static void FUNC(apply_bdof_min_block)(pixel* dst, const ptrdiff_t dst_stride, const int16_t *src0, const int16_t *src1,
425  const int16_t **gh, const int16_t **gv, const int vx, const int vy)
426 {
427  const int shift4 = 15 - BIT_DEPTH;
428  const int offset4 = 1 << (shift4 - 1);
429 
430  for (int y = 0; y < BDOF_MIN_BLOCK_SIZE; y++) {
431  for (int x = 0; x < BDOF_MIN_BLOCK_SIZE; x++) {
432  const int idx = y * BDOF_BLOCK_SIZE + x;
433  const int bdof_offset = vx * (gh[0][idx] - gh[1][idx]) + vy * (gv[0][idx] - gv[1][idx]);
434  dst[x] = av_clip_pixel((src0[x] + offset4 + src1[x] + bdof_offset) >> shift4);
435  }
436  dst += dst_stride;
437  src0 += MAX_PB_SIZE;
438  src1 += MAX_PB_SIZE;
439  }
440 }
441 
442 static void FUNC(apply_bdof)(uint8_t *_dst, const ptrdiff_t _dst_stride, const int16_t *_src0, const int16_t *_src1,
443  const int block_w, const int block_h)
444 {
445  int16_t gradient_h[2][BDOF_BLOCK_SIZE * BDOF_BLOCK_SIZE];
446  int16_t gradient_v[2][BDOF_BLOCK_SIZE * BDOF_BLOCK_SIZE];
447  int vx, vy;
448  const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel);
449  pixel* dst = (pixel*)_dst;
450 
451  FUNC(prof_grad_filter)(gradient_h[0], gradient_v[0], BDOF_BLOCK_SIZE,
452  _src0, MAX_PB_SIZE, block_w, block_h);
453  FUNC(prof_grad_filter)(gradient_h[1], gradient_v[1], BDOF_BLOCK_SIZE,
454  _src1, MAX_PB_SIZE, block_w, block_h);
455 
456  for (int y = 0; y < block_h; y += BDOF_MIN_BLOCK_SIZE) {
457  for (int x = 0; x < block_w; x += BDOF_MIN_BLOCK_SIZE) {
458  const int16_t* src0 = _src0 + y * MAX_PB_SIZE + x;
459  const int16_t* src1 = _src1 + y * MAX_PB_SIZE + x;
460  pixel *d = dst + x;
461  const int idx = BDOF_BLOCK_SIZE * y + x;
462  const int16_t* gh[] = { gradient_h[0] + idx, gradient_h[1] + idx };
463  const int16_t* gv[] = { gradient_v[0] + idx, gradient_v[1] + idx };
464  FUNC(derive_bdof_vx_vy)(src0, src1, !x, !y, x + BDOF_MIN_BLOCK_SIZE == block_w, y + BDOF_MIN_BLOCK_SIZE == block_h, gh, gv, &vx, &vy);
465  FUNC(apply_bdof_min_block)(d, dst_stride, src0, src1, gh, gv, vx, vy);
466  }
467  dst += BDOF_MIN_BLOCK_SIZE * dst_stride;
468  }
469 }
470 
471 #define DMVR_FILTER(src, stride) \
472  (filter[0] * src[x] + \
473  filter[1] * src[x + stride])
474 
475 #define DMVR_FILTER2(filter, src0, src1) \
476  (filter[0] * src0 + filter[1] * src1)
477 
478 //8.5.3.2.2 Luma sample bilinear interpolation process
479 static void FUNC(dmvr)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
480  const int height, const intptr_t mx, const intptr_t my, const int width)
481 {
482 #if BIT_DEPTH != 10
483  const pixel *src = (const pixel *)_src;
484  const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
485 #if BIT_DEPTH > 10
486  const int shift4 = BIT_DEPTH - 10;
487  const int offset4 = 1 << (shift4 - 1);
488  #define DMVR_SHIFT(s) (((s) + offset4) >> shift4)
489 #else
490  #define DMVR_SHIFT(s) ((s) << (10 - BIT_DEPTH))
491 #endif // BIT_DEPTH > 10
492 
493  for (int y = 0; y < height; y++) {
494  for (int x = 0; x < width; x++)
495  dst[x] = DMVR_SHIFT(src[x]);
496  src += src_stride;
497  dst += MAX_PB_SIZE;
498  }
499 #undef DMVR_SHIFT
500 #else
501  av_image_copy_plane((uint8_t*)dst, sizeof(int16_t) * MAX_PB_SIZE, _src, _src_stride,
502  width * sizeof(pixel), height);
503 #endif // BIT_DEPTH != 10
504 }
505 
506 //8.5.3.2.2 Luma sample bilinear interpolation process
507 static void FUNC(dmvr_h)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
508  const int height, const intptr_t mx, const intptr_t my, const int width)
509 {
510  const pixel *src = (const pixel*)_src;
511  const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
512  const int8_t *filter = ff_vvc_inter_luma_dmvr_filters[mx];
513  const int shift1 = BIT_DEPTH - 6;
514  const int offset1 = 1 << (shift1 - 1);
515 
516  for (int y = 0; y < height; y++) {
517  for (int x = 0; x < width; x++)
518  dst[x] = (DMVR_FILTER(src, 1) + offset1) >> shift1;
519  src += src_stride;
520  dst += MAX_PB_SIZE;
521  }
522 }
523 
524 //8.5.3.2.2 Luma sample bilinear interpolation process
525 static void FUNC(dmvr_v)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
526  const int height, const intptr_t mx, const intptr_t my, const int width)
527 {
528  const pixel *src = (pixel*)_src;
529  const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
530  const int8_t *filter = ff_vvc_inter_luma_dmvr_filters[my];
531  const int shift1 = BIT_DEPTH - 6;
532  const int offset1 = 1 << (shift1 - 1);
533 
534  for (int y = 0; y < height; y++) {
535  for (int x = 0; x < width; x++)
536  dst[x] = (DMVR_FILTER(src, src_stride) + offset1) >> shift1;
537  src += src_stride;
538  dst += MAX_PB_SIZE;
539  }
540 
541 }
542 
543 //8.5.3.2.2 Luma sample bilinear interpolation process
544 static void FUNC(dmvr_hv)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
545  const int height, const intptr_t mx, const intptr_t my, const int width)
546 {
547  int16_t tmp_array[MAX_PB_SIZE * 2];
548  int16_t *tmp0 = tmp_array;
549  int16_t *tmp1 = tmp_array + MAX_PB_SIZE;
550  const pixel *src = (const pixel*)_src;
551  const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
552  const int8_t *filter_x = ff_vvc_inter_luma_dmvr_filters[mx];
553  const int8_t *filter_y = ff_vvc_inter_luma_dmvr_filters[my];
554  const int shift1 = BIT_DEPTH - 6;
555  const int offset1 = 1 << (shift1 - 1);
556  const int shift2 = 4;
557  const int offset2 = 1 << (shift2 - 1);
558 
559  src -= BILINEAR_EXTRA_BEFORE * src_stride;
560  for (int x = 0; x < width; x++)
561  tmp0[x] = (DMVR_FILTER2(filter_x, src[x], src[x + 1]) + offset1) >> shift1;
562  src += src_stride;
563 
564  for (int y = 1; y < height + BILINEAR_EXTRA; y++) {
565  for (int x = 0; x < width; x++) {
566  tmp1[x] = (DMVR_FILTER2(filter_x, src[x], src[x + 1]) + offset1) >> shift1;
567  dst[x] = (DMVR_FILTER2(filter_y, tmp0[x], tmp1[x]) + offset2) >> shift2;
568  }
569  src += src_stride;
570  dst += MAX_PB_SIZE;
571  FFSWAP(int16_t *, tmp0, tmp1);
572  }
573 }
574 
575 #define PEL_FUNC(dst, C, idx1, idx2, a) \
576  do { \
577  for (int w = 0; w < 7; w++) \
578  inter->dst[C][w][idx1][idx2] = FUNC(a); \
579  } while (0) \
580 
581 #define DIR_FUNCS(d, C, c) \
582  PEL_FUNC(put_##d, C, 0, 0, put_##d##_pixels); \
583  PEL_FUNC(put_##d, C, 0, 1, put_##d##_##c##_h); \
584  PEL_FUNC(put_##d, C, 1, 0, put_##d##_##c##_v); \
585  PEL_FUNC(put_##d, C, 1, 1, put_##d##_##c##_hv); \
586  PEL_FUNC(put_##d##_w, C, 0, 0, put_##d##_w_pixels); \
587  PEL_FUNC(put_##d##_w, C, 0, 1, put_##d##_##c##_w_h); \
588  PEL_FUNC(put_##d##_w, C, 1, 0, put_##d##_##c##_w_v); \
589  PEL_FUNC(put_##d##_w, C, 1, 1, put_##d##_##c##_w_hv);
590 
591 #define FUNCS(C, c) \
592  PEL_FUNC(put, C, 0, 0, put_pixels); \
593  PEL_FUNC(put, C, 0, 1, put_##c##_h); \
594  PEL_FUNC(put, C, 1, 0, put_##c##_v); \
595  PEL_FUNC(put, C, 1, 1, put_##c##_hv); \
596  DIR_FUNCS(uni, C, c); \
597 
599 {
600  FUNCS(LUMA, luma);
601  FUNCS(CHROMA, chroma);
602 
603  for (int i = 0; i < FF_ARRAY_ELEMS(inter->put_scaled[LUMA]); i++) {
604  inter->put_scaled[LUMA][i] = FUNC(put_luma_scaled);
605  inter->put_scaled[CHROMA][i] = FUNC(put_chroma_scaled);
606  inter->put_uni_scaled[LUMA][i] = FUNC(put_uni_luma_scaled);
607  inter->put_uni_scaled[CHROMA][i] = FUNC(put_uni_chroma_scaled);
608  inter->put_uni_w_scaled[LUMA][i] = FUNC(put_uni_luma_w_scaled);
609  inter->put_uni_w_scaled[CHROMA][i] = FUNC(put_uni_chroma_w_scaled);
610  }
611 
612  inter->avg = FUNC(avg);
613  inter->w_avg = FUNC(w_avg);
614 
615  inter->dmvr[0][0] = FUNC(dmvr);
616  inter->dmvr[0][1] = FUNC(dmvr_h);
617  inter->dmvr[1][0] = FUNC(dmvr_v);
618  inter->dmvr[1][1] = FUNC(dmvr_hv);
619 
620  inter->put_ciip = FUNC(put_ciip);
621  inter->put_gpm = FUNC(put_gpm);
622 
623  inter->fetch_samples = FUNC(fetch_samples);
624  inter->bdof_fetch_samples = FUNC(bdof_fetch_samples);
625  inter->apply_prof = FUNC(apply_prof);
626  inter->apply_prof_uni = FUNC(apply_prof_uni);
627  inter->apply_prof_uni_w = FUNC(apply_prof_uni_w);
628  inter->apply_bdof = FUNC(apply_bdof);
629  inter->sad = vvc_sad;
630 }
631 
632 #undef FUNCS
633 #undef PEL_FUNC
634 #undef DMVR_FUNCS
_dst
uint8_t * _dst
Definition: dsp.h:52
dmvr_hv
static void FUNC() dmvr_hv(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, const int height, const intptr_t mx, const intptr_t my, const int width)
Definition: inter_template.c:544
av_clip
#define av_clip
Definition: common.h:100
TMP_STRIDE
#define TMP_STRIDE
Definition: inter_template.c:26
LUMA
#define LUMA
Definition: filter.c:31
VVC_INTER_LUMA_TAPS
#define VVC_INTER_LUMA_TAPS
Definition: data.h:52
ff_vvc_inter_luma_dmvr_filters
const int8_t ff_vvc_inter_luma_dmvr_filters[VVC_INTER_LUMA_DMVR_FACTS][VVC_INTER_LUMA_DMVR_TAPS]
Definition: data.c:1985
src1
const pixel * src1
Definition: h264pred_template.c:421
BILINEAR_EXTRA
#define BILINEAR_EXTRA
Definition: ctu.h:62
CHROMA_FILTER
#define CHROMA_FILTER(src, stride)
Definition: h2656_inter_template.c:336
ff_vvc_inter_dsp_init
static void FUNC() ff_vvc_inter_dsp_init(VVCInterDSPContext *const inter)
Definition: inter_template.c:598
tmp
static uint8_t tmp[11]
Definition: aes_ctr.c:28
w
uint8_t w
Definition: llviddspenc.c:38
CHROMA_EXTRA_BEFORE
#define CHROMA_EXTRA_BEFORE
Definition: h2656_inter_template.c:24
apply_bdof
static void FUNC() apply_bdof(uint8_t *_dst, const ptrdiff_t _dst_stride, const int16_t *_src0, const int16_t *_src1, const int block_w, const int block_h)
Definition: inter_template.c:442
chroma
static av_always_inline void chroma(WaveformContext *s, AVFrame *in, AVFrame *out, int component, int intensity, int offset_y, int offset_x, int column, int mirror, int jobnr, int nb_jobs)
Definition: vf_waveform.c:1639
bdof_fetch_samples
static void FUNC() bdof_fetch_samples(int16_t *_dst, const uint8_t *_src, const ptrdiff_t _src_stride, const int x_frac, const int y_frac, const int width, const int height)
Definition: inter_template.c:261
prof_grad_filter
static void FUNC() prof_grad_filter(int16_t *gradient_h, int16_t *gradient_v, const ptrdiff_t gradient_stride, const int16_t *_src, const ptrdiff_t src_stride, const int width, const int height)
Definition: inter_template.c:295
filter
void(* filter)(uint8_t *src, int stride, int qscale)
Definition: h263dsp.c:29
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
vvc_sad
static int vvc_sad(const int16_t *src0, const int16_t *src1, int dx, int dy, const int block_w, const int block_h)
Definition: dsp.c:29
dmvr
static void FUNC() dmvr(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, const int height, const intptr_t mx, const intptr_t my, const int width)
Definition: inter_template.c:479
put_scaled
static void av_always_inline FUNC() put_scaled(uint8_t *_dst, const ptrdiff_t _dst_stride, const uint8_t *const _src, ptrdiff_t _src_stride, const int src_height, const int _x, const int _y, const int dx, const int dy, const int height, const int8_t *hf, const int8_t *vf, const int width, const int is_uni, const int is_chroma)
Definition: inter_template.c:27
_src
uint8_t ptrdiff_t const uint8_t * _src
Definition: dsp.h:52
av_image_copy_plane
void av_image_copy_plane(uint8_t *dst, int dst_linesize, const uint8_t *src, int src_linesize, int bytewidth, int height)
Copy image plane from src to dst.
Definition: imgutils.c:374
apply_bdof_min_block
static void FUNC() apply_bdof_min_block(pixel *dst, const ptrdiff_t dst_stride, const int16_t *src0, const int16_t *src1, const int16_t **gh, const int16_t **gv, const int vx, const int vy)
Definition: inter_template.c:424
BDOF_BLOCK_SIZE
#define BDOF_BLOCK_SIZE
Definition: dsp.c:62
mx
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t mx
Definition: dsp.h:53
put_uni_chroma_scaled
static void FUNC() put_uni_chroma_scaled(uint8_t *_dst, const ptrdiff_t _dst_stride, const uint8_t *_src, ptrdiff_t _src_stride, const int src_height, const int x, const int y, const int dx, const int dy, const int height, const int8_t *hf, const int8_t *vf, const int width)
Definition: inter_template.c:108
h2656_inter_template.c
VVC_SIGN
#define VVC_SIGN(v)
Definition: dsp.c:27
val
static double val(void *priv, double ch)
Definition: aeval.c:77
SCALED_INT
#define SCALED_INT(pos)
Definition: ctu.h:64
FF_ARRAY_ELEMS
#define FF_ARRAY_ELEMS(a)
Definition: sinewin_tablegen.c:29
VVC_INTER_CHROMA_TAPS
#define VVC_INTER_CHROMA_TAPS
Definition: data.h:54
apply_prof
static void FUNC() apply_prof(int16_t *dst, const int16_t *src, const int16_t *diff_mv_x, const int16_t *diff_mv_y)
Definition: inter_template.c:314
LUMA_EXTRA
#define LUMA_EXTRA
Definition: h2656_inter_template.c:27
put_ciip
static void FUNC() put_ciip(uint8_t *_dst, const ptrdiff_t _dst_stride, const int width, const int height, const uint8_t *_inter, const ptrdiff_t _inter_stride, const int intra_weight)
Definition: inter_template.c:220
my
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t my
Definition: dsp.h:53
put_uni_w_scaled
static void av_always_inline FUNC() put_uni_w_scaled(uint8_t *_dst, const ptrdiff_t _dst_stride, const uint8_t *const _src, ptrdiff_t _src_stride, const int src_height, const int _x, const int _y, const int dx, const int dy, const int denom, const int wx, const int _ox, const int height, const int8_t *hf, const int8_t *vf, const int width, const int is_chroma)
Definition: inter_template.c:116
FFABS
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
Definition: common.h:74
FUNCS
#define FUNCS(C, c)
Definition: inter_template.c:591
pixel
uint8_t pixel
Definition: tiny_ssim.c:41
put_uni_luma_w_scaled
static void FUNC() put_uni_luma_w_scaled(uint8_t *_dst, const ptrdiff_t _dst_stride, const uint8_t *_src, ptrdiff_t _src_stride, const int src_height, const int x, const int y, const int dx, const int dy, const int denom, const int wx, const int ox, const int height, const int8_t *hf, const int8_t *vf, const int width)
Definition: inter_template.c:167
LUMA_EXTRA_BEFORE
#define LUMA_EXTRA_BEFORE
Definition: h2656_inter_template.c:26
dmvr_v
static void FUNC() dmvr_v(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, const int height, const intptr_t mx, const intptr_t my, const int width)
Definition: inter_template.c:525
BDOF_BORDER_EXT
#define BDOF_BORDER_EXT
Definition: dsp.c:61
apply_prof_uni
static void FUNC() apply_prof_uni(uint8_t *_dst, const ptrdiff_t _dst_stride, const int16_t *src, const int16_t *diff_mv_x, const int16_t *diff_mv_y)
Definition: inter_template.c:335
height
#define height
Definition: dsp.h:85
shift
static int shift(int a, int b)
Definition: bonk.c:261
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:83
diff
static av_always_inline int diff(const struct color_info *a, const struct color_info *b, const int trans_thresh)
Definition: vf_paletteuse.c:166
AFFINE_MIN_BLOCK_SIZE
#define AFFINE_MIN_BLOCK_SIZE
Definition: ctu.h:68
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
av_zero_extend
#define av_zero_extend
Definition: common.h:151
put_uni_luma_scaled
static void FUNC() put_uni_luma_scaled(uint8_t *_dst, const ptrdiff_t _dst_stride, const uint8_t *_src, ptrdiff_t _src_stride, const int src_height, const int x, const int y, const int dx, const int dy, const int height, const int8_t *hf, const int8_t *vf, const int width)
Definition: inter_template.c:100
DMVR_FILTER
#define DMVR_FILTER(src, stride)
Definition: inter_template.c:471
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
BDOF_MIN_BLOCK_SIZE
#define BDOF_MIN_BLOCK_SIZE
Definition: dsp.c:63
DMVR_SHIFT
#define DMVR_SHIFT(s)
weights
static const int weights[]
Definition: hevc_pel.c:32
derive_bdof_vx_vy
static void FUNC() derive_bdof_vx_vy(const int16_t *_src0, const int16_t *_src1, const int pad_left, const int pad_top, const int pad_right, const int pad_bottom, const int16_t **gradient_h, const int16_t **gradient_v, int *vx, int *vy)
Definition: inter_template.c:391
av_always_inline
#define av_always_inline
Definition: attributes.h:49
put_gpm
static void FUNC() put_gpm(uint8_t *_dst, ptrdiff_t dst_stride, const int width, const int height, const int16_t *src0, const int16_t *src1, const uint8_t *weights, const int step_x, const int step_y)
Definition: inter_template.c:238
hf
uint8_t ptrdiff_t const uint8_t ptrdiff_t int const int8_t * hf
Definition: dsp.h:249
avg
static void FUNC() avg(uint8_t *_dst, const ptrdiff_t _dst_stride, const int16_t *src0, const int16_t *src1, const int width, const int height)
Definition: inter_template.c:185
shift2
static const uint8_t shift2[6]
Definition: dxa.c:49
limit
static double limit(double x)
Definition: vf_pseudocolor.c:142
MAX_PB_SIZE
#define MAX_PB_SIZE
Definition: dsp.h:32
dmvr_h
static void FUNC() dmvr_h(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, const int height, const intptr_t mx, const intptr_t my, const int width)
Definition: inter_template.c:507
CHROMA_EXTRA
#define CHROMA_EXTRA
Definition: h2656_inter_template.c:25
FFSWAP
#define FFSWAP(type, a, b)
Definition: macros.h:52
DMVR_FILTER2
#define DMVR_FILTER2(filter, src0, src1)
Definition: inter_template.c:475
w_avg
static void FUNC() w_avg(uint8_t *_dst, const ptrdiff_t _dst_stride, const int16_t *src0, const int16_t *src1, const int width, const int height, const int denom, const int w0, const int w1, const int o0, const int o1)
Definition: inter_template.c:202
CHROMA
@ CHROMA
Definition: vf_waveform.c:49
av_clip_pixel
#define av_clip_pixel(a)
Definition: bit_depth_template.c:98
FUNC
#define FUNC(a)
Definition: bit_depth_template.c:104
BILINEAR_EXTRA_BEFORE
#define BILINEAR_EXTRA_BEFORE
Definition: ctu.h:60
VVCInterDSPContext
Definition: dsp.h:47
src0
const pixel *const src0
Definition: h264pred_template.c:420
put_luma_scaled
static void FUNC() put_luma_scaled(int16_t *_dst, const uint8_t *_src, ptrdiff_t _src_stride, const int src_height, const int x, const int y, const int dx, const int dy, const int height, const int8_t *hf, const int8_t *vf, const int width)
Definition: inter_template.c:84
BIT_DEPTH
#define BIT_DEPTH
Definition: dsp_init.c:38
put_chroma_scaled
static void FUNC() put_chroma_scaled(int16_t *_dst, const uint8_t *_src, ptrdiff_t _src_stride, const int src_height, const int x, const int y, const int dx, const int dy, const int height, const int8_t *hf, const int8_t *vf, const int width)
Definition: inter_template.c:92
imgutils.h
width
#define width
Definition: dsp.h:85
vf
uint8_t ptrdiff_t const uint8_t ptrdiff_t int const int8_t const int8_t * vf
Definition: dsp.h:249
put_uni_chroma_w_scaled
static void FUNC() put_uni_chroma_w_scaled(uint8_t *_dst, const ptrdiff_t _dst_stride, const uint8_t *_src, ptrdiff_t _src_stride, const int src_height, const int x, const int y, const int dx, const int dy, const int denom, const int wx, const int ox, const int height, const int8_t *hf, const int8_t *vf, const int width)
Definition: inter_template.c:175
shift1
static const uint8_t shift1[6]
Definition: dxa.c:48
av_log2
int av_log2(unsigned v)
Definition: intmath.c:26
src
#define src
Definition: vp8dsp.c:248
apply_prof_uni_w
static void FUNC() apply_prof_uni_w(uint8_t *_dst, const ptrdiff_t _dst_stride, const int16_t *src, const int16_t *diff_mv_x, const int16_t *diff_mv_y, const int denom, const int wx, const int _ox)
Definition: inter_template.c:364
LUMA_FILTER
#define LUMA_FILTER(src, stride)
Definition: h2656_inter_template.c:87
fetch_samples
static void FUNC() fetch_samples(int16_t *_dst, const uint8_t *_src, const ptrdiff_t _src_stride, const int x_frac, const int y_frac)
Definition: inter_template.c:290