Go to the documentation of this file.
33 #define PUT_PROTOTYPE(name, depth, opt) \
34 void ff_vvc_put_ ## name ## _ ## depth ## _##opt(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, int height, const int8_t *hf, const int8_t *vf, int width);
36 #define PUT_PROTOTYPES(name, bitd, opt) \
37 PUT_PROTOTYPE(name##2, bitd, opt) \
38 PUT_PROTOTYPE(name##4, bitd, opt) \
39 PUT_PROTOTYPE(name##8, bitd, opt) \
40 PUT_PROTOTYPE(name##12, bitd, opt) \
41 PUT_PROTOTYPE(name##16, bitd, opt) \
42 PUT_PROTOTYPE(name##24, bitd, opt) \
43 PUT_PROTOTYPE(name##32, bitd, opt) \
44 PUT_PROTOTYPE(name##48, bitd, opt) \
45 PUT_PROTOTYPE(name##64, bitd, opt) \
46 PUT_PROTOTYPE(name##128, bitd, opt)
48 #define PUT_BPC_PROTOTYPES(name, opt) \
49 PUT_PROTOTYPES(name, 8, opt) \
50 PUT_PROTOTYPES(name, 10, opt) \
51 PUT_PROTOTYPES(name, 12, opt)
53 #define PUT_TAP_PROTOTYPES(n, opt) \
54 PUT_BPC_PROTOTYPES(n##tap_h, opt) \
55 PUT_BPC_PROTOTYPES(n##tap_v, opt) \
56 PUT_BPC_PROTOTYPES(n##tap_hv, opt)
66 #define bf(fn, bd, opt) fn##_##bd##_##opt
67 #define BF(fn, bpc, opt) fn##_##bpc##bpc_##opt
69 #define AVG_BPC_PROTOTYPES(bpc, opt) \
70 void BF(ff_vvc_avg, bpc, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
71 const int16_t *src0, const int16_t *src1, intptr_t width, intptr_t height, intptr_t pixel_max); \
72 void BF(ff_vvc_w_avg, bpc, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
73 const int16_t *src0, const int16_t *src1, intptr_t width, intptr_t height, \
74 intptr_t denom, intptr_t w0, intptr_t w1, intptr_t o0, intptr_t o1, intptr_t pixel_max);
76 #define AVG_PROTOTYPES(bd, opt) \
77 void bf(ff_vvc_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
78 const int16_t *src0, const int16_t *src1, int width, int height); \
79 void bf(ff_vvc_w_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
80 const int16_t *src0, const int16_t *src1, int width, int height, \
81 int denom, int w0, int w1, int o0, int o1);
91 #define DMVR_PROTOTYPES(bd, opt) \
92 void ff_vvc_dmvr_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, \
93 int height, intptr_t mx, intptr_t my, int width); \
94 void ff_vvc_dmvr_h_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, \
95 int height, intptr_t mx, intptr_t my, int width); \
96 void ff_vvc_dmvr_v_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, \
97 int height, intptr_t mx, intptr_t my, int width); \
98 void ff_vvc_dmvr_hv_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, \
99 int height, intptr_t mx, intptr_t my, int width); \
106 const int16_t *
src0,
const int16_t *
src1,
int w,
int h,
int pixel_max); \
108 #define OF_PROTOTYPES(bd, opt) \
109 static void ff_vvc_apply_bdof_##bd##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
110 const int16_t *src0, const int16_t *src1, int w, int h) \
112 ff_vvc_apply_bdof##_##opt(dst, dst_stride, src0, src1, w, h, (1 << bd) - 1); \
119 #define ALF_BPC_PROTOTYPES(bpc, opt) \
120 void BF(ff_vvc_alf_filter_luma, bpc, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
121 const uint8_t *src, ptrdiff_t src_stride, ptrdiff_t width, ptrdiff_t height, \
122 const int16_t *filter, const int16_t *clip, ptrdiff_t stride, ptrdiff_t vb_pos, ptrdiff_t pixel_max); \
123 void BF(ff_vvc_alf_filter_chroma, bpc, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
124 const uint8_t *src, ptrdiff_t src_stride, ptrdiff_t width, ptrdiff_t height, \
125 const int16_t *filter, const int16_t *clip, ptrdiff_t stride, ptrdiff_t vb_pos, ptrdiff_t pixel_max); \
126 void BF(ff_vvc_alf_classify_grad, bpc, opt)(int *gradient_sum, \
127 const uint8_t *src, ptrdiff_t src_stride, intptr_t width, intptr_t height, intptr_t vb_pos); \
128 void BF(ff_vvc_alf_classify, bpc, opt)(int *class_idx, int *transpose_idx, const int *gradient_sum, \
129 intptr_t width, intptr_t height, intptr_t vb_pos, intptr_t bit_depth); \
131 #define ALF_PROTOTYPES(bpc, bd, opt) \
132 void bf(ff_vvc_alf_filter_luma, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src, ptrdiff_t src_stride, \
133 int width, int height, const int16_t *filter, const int16_t *clip, const int vb_pos); \
134 void bf(ff_vvc_alf_filter_chroma, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src, ptrdiff_t src_stride, \
135 int width, int height, const int16_t *filter, const int16_t *clip, const int vb_pos); \
136 void bf(ff_vvc_alf_classify, bd, opt)(int *class_idx, int *transpose_idx, \
137 const uint8_t *src, ptrdiff_t src_stride, int width, int height, int vb_pos, int *gradient_tmp); \
147 #if HAVE_SSE4_EXTERNAL
148 #define FW_PUT(name, depth, opt) \
149 void ff_vvc_put_ ## name ## _ ## depth ## _##opt(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, \
150 int height, const int8_t *hf, const int8_t *vf, int width) \
152 ff_h2656_put_## name ## _ ## depth ## _##opt(dst, 2 * MAX_PB_SIZE, src, srcstride, height, hf, vf, width); \
155 #define FW_PUT_TAP(fname, bitd, opt ) \
156 FW_PUT(fname##4, bitd, opt ) \
157 FW_PUT(fname##8, bitd, opt ) \
158 FW_PUT(fname##16, bitd, opt ) \
159 FW_PUT(fname##32, bitd, opt ) \
160 FW_PUT(fname##64, bitd, opt ) \
161 FW_PUT(fname##128, bitd, opt ) \
163 #define FW_PUT_4TAP(fname, bitd, opt) \
164 FW_PUT(fname ## 2, bitd, opt) \
165 FW_PUT_TAP(fname, bitd, opt)
167 #define FW_PUT_4TAP_SSE4(bitd) \
168 FW_PUT_4TAP(pixels, bitd, sse4) \
169 FW_PUT_4TAP(4tap_h, bitd, sse4) \
170 FW_PUT_4TAP(4tap_v, bitd, sse4) \
171 FW_PUT_4TAP(4tap_hv, bitd, sse4)
173 #define FW_PUT_8TAP_SSE4(bitd) \
174 FW_PUT_TAP(8tap_h, bitd, sse4) \
175 FW_PUT_TAP(8tap_v, bitd, sse4) \
176 FW_PUT_TAP(8tap_hv, bitd, sse4)
178 #define FW_PUT_SSE4(bitd) \
179 FW_PUT_4TAP_SSE4(bitd) \
180 FW_PUT_8TAP_SSE4(bitd)
187 #if HAVE_AVX2_EXTERNAL
188 #define FW_PUT_TAP_AVX2(n, bitd) \
189 FW_PUT(n ## tap_h32, bitd, avx2) \
190 FW_PUT(n ## tap_h64, bitd, avx2) \
191 FW_PUT(n ## tap_h128, bitd, avx2) \
192 FW_PUT(n ## tap_v32, bitd, avx2) \
193 FW_PUT(n ## tap_v64, bitd, avx2) \
194 FW_PUT(n ## tap_v128, bitd, avx2)
196 #define FW_PUT_AVX2(bitd) \
197 FW_PUT(pixels32, bitd, avx2) \
198 FW_PUT(pixels64, bitd, avx2) \
199 FW_PUT(pixels128, bitd, avx2) \
200 FW_PUT_TAP_AVX2(4, bitd) \
201 FW_PUT_TAP_AVX2(8, bitd) \
207 #define FW_PUT_TAP_16BPC_AVX2(n, bitd) \
208 FW_PUT(n ## tap_h16, bitd, avx2) \
209 FW_PUT(n ## tap_v16, bitd, avx2) \
210 FW_PUT(n ## tap_hv16, bitd, avx2) \
211 FW_PUT(n ## tap_hv32, bitd, avx2) \
212 FW_PUT(n ## tap_hv64, bitd, avx2) \
213 FW_PUT(n ## tap_hv128, bitd, avx2)
215 #define FW_PUT_16BPC_AVX2(bitd) \
216 FW_PUT(pixels16, bitd, avx2) \
217 FW_PUT_TAP_16BPC_AVX2(4, bitd) \
218 FW_PUT_TAP_16BPC_AVX2(8, bitd)
220 FW_PUT_16BPC_AVX2(10)
221 FW_PUT_16BPC_AVX2(12)
223 #define AVG_FUNCS(bpc, bd, opt) \
224 void bf(ff_vvc_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
225 const int16_t *src0, const int16_t *src1, int width, int height) \
227 BF(ff_vvc_avg, bpc, opt)(dst, dst_stride, src0, src1, width, height, (1 << bd) - 1); \
229 void bf(ff_vvc_w_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
230 const int16_t *src0, const int16_t *src1, int width, int height, \
231 int denom, int w0, int w1, int o0, int o1) \
233 BF(ff_vvc_w_avg, bpc, opt)(dst, dst_stride, src0, src1, width, height, \
234 denom, w0, w1, o0, o1, (1 << bd) - 1); \
237 AVG_FUNCS(8, 8, avx2)
238 AVG_FUNCS(16, 10, avx2)
239 AVG_FUNCS(16, 12, avx2)
241 #define ALF_FUNCS(bpc, bd, opt) \
242 void bf(ff_vvc_alf_filter_luma, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src, ptrdiff_t src_stride, \
243 int width, int height, const int16_t *filter, const int16_t *clip, const int vb_pos) \
245 const int param_stride = (width >> 2) * ALF_NUM_COEFF_LUMA; \
246 BF(ff_vvc_alf_filter_luma, bpc, opt)(dst, dst_stride, src, src_stride, width, height, \
247 filter, clip, param_stride, vb_pos, (1 << bd) - 1); \
249 void bf(ff_vvc_alf_filter_chroma, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src, ptrdiff_t src_stride, \
250 int width, int height, const int16_t *filter, const int16_t *clip, const int vb_pos) \
252 BF(ff_vvc_alf_filter_chroma, bpc, opt)(dst, dst_stride, src, src_stride, width, height, \
253 filter, clip, 0, vb_pos,(1 << bd) - 1); \
255 void bf(ff_vvc_alf_classify, bd, opt)(int *class_idx, int *transpose_idx, \
256 const uint8_t *src, ptrdiff_t src_stride, int width, int height, int vb_pos, int *gradient_tmp) \
258 BF(ff_vvc_alf_classify_grad, bpc, opt)(gradient_tmp, src, src_stride, width, height, vb_pos); \
259 BF(ff_vvc_alf_classify, bpc, opt)(class_idx, transpose_idx, gradient_tmp, width, height, vb_pos, bd); \
262 ALF_FUNCS(8, 8, avx2)
263 ALF_FUNCS(16, 10, avx2)
264 ALF_FUNCS(16, 12, avx2)
268 #define PEL_LINK(dst, C, W, idx1, idx2, name, D, opt) \
269 dst[C][W][idx1][idx2] = ff_vvc_put_## name ## _ ## D ## _##opt; \
270 dst ## _uni[C][W][idx1][idx2] = ff_h2656_put_uni_ ## name ## _ ## D ## _##opt; \
272 #define MC_TAP_LINKS(pointer, C, my, mx, fname, bitd, opt ) \
273 PEL_LINK(pointer, C, 1, my , mx , fname##4 , bitd, opt ); \
274 PEL_LINK(pointer, C, 2, my , mx , fname##8 , bitd, opt ); \
275 PEL_LINK(pointer, C, 3, my , mx , fname##16, bitd, opt ); \
276 PEL_LINK(pointer, C, 4, my , mx , fname##32, bitd, opt ); \
277 PEL_LINK(pointer, C, 5, my , mx , fname##64, bitd, opt ); \
278 PEL_LINK(pointer, C, 6, my , mx , fname##128, bitd, opt );
280 #define MC_8TAP_LINKS(pointer, my, mx, fname, bitd, opt) \
281 MC_TAP_LINKS(pointer, LUMA, my, mx, fname, bitd, opt)
283 #define MC_8TAP_LINKS_SSE4(bd) \
284 MC_8TAP_LINKS(c->inter.put, 0, 0, pixels, bd, sse4); \
285 MC_8TAP_LINKS(c->inter.put, 0, 1, 8tap_h, bd, sse4); \
286 MC_8TAP_LINKS(c->inter.put, 1, 0, 8tap_v, bd, sse4); \
287 MC_8TAP_LINKS(c->inter.put, 1, 1, 8tap_hv, bd, sse4)
289 #define MC_4TAP_LINKS(pointer, my, mx, fname, bitd, opt) \
290 PEL_LINK(pointer, CHROMA, 0, my , mx , fname##2 , bitd, opt ); \
291 MC_TAP_LINKS(pointer, CHROMA, my, mx, fname, bitd, opt) \
293 #define MC_4TAP_LINKS_SSE4(bd) \
294 MC_4TAP_LINKS(c->inter.put, 0, 0, pixels, bd, sse4); \
295 MC_4TAP_LINKS(c->inter.put, 0, 1, 4tap_h, bd, sse4); \
296 MC_4TAP_LINKS(c->inter.put, 1, 0, 4tap_v, bd, sse4); \
297 MC_4TAP_LINKS(c->inter.put, 1, 1, 4tap_hv, bd, sse4)
299 #define MC_LINK_SSE4(bd) \
300 MC_4TAP_LINKS_SSE4(bd) \
301 MC_8TAP_LINKS_SSE4(bd)
303 #define MC_TAP_LINKS_AVX2(C,tap,bd) do { \
304 PEL_LINK(c->inter.put, C, 4, 0, 0, pixels32, bd, avx2) \
305 PEL_LINK(c->inter.put, C, 5, 0, 0, pixels64, bd, avx2) \
306 PEL_LINK(c->inter.put, C, 6, 0, 0, pixels128, bd, avx2) \
307 PEL_LINK(c->inter.put, C, 4, 0, 1, tap##tap_h32, bd, avx2) \
308 PEL_LINK(c->inter.put, C, 5, 0, 1, tap##tap_h64, bd, avx2) \
309 PEL_LINK(c->inter.put, C, 6, 0, 1, tap##tap_h128, bd, avx2) \
310 PEL_LINK(c->inter.put, C, 4, 1, 0, tap##tap_v32, bd, avx2) \
311 PEL_LINK(c->inter.put, C, 5, 1, 0, tap##tap_v64, bd, avx2) \
312 PEL_LINK(c->inter.put, C, 6, 1, 0, tap##tap_v128, bd, avx2) \
315 #define MC_LINKS_AVX2(bd) \
316 MC_TAP_LINKS_AVX2(LUMA, 8, bd); \
317 MC_TAP_LINKS_AVX2(CHROMA, 4, bd);
319 #define MC_TAP_LINKS_16BPC_AVX2(C, tap, bd) do { \
320 PEL_LINK(c->inter.put, C, 3, 0, 0, pixels16, bd, avx2) \
321 PEL_LINK(c->inter.put, C, 3, 0, 1, tap##tap_h16, bd, avx2) \
322 PEL_LINK(c->inter.put, C, 3, 1, 0, tap##tap_v16, bd, avx2) \
323 PEL_LINK(c->inter.put, C, 3, 1, 1, tap##tap_hv16, bd, avx2) \
324 PEL_LINK(c->inter.put, C, 4, 1, 1, tap##tap_hv32, bd, avx2) \
325 PEL_LINK(c->inter.put, C, 5, 1, 1, tap##tap_hv64, bd, avx2) \
326 PEL_LINK(c->inter.put, C, 6, 1, 1, tap##tap_hv128, bd, avx2) \
329 #define MC_LINKS_16BPC_AVX2(bd) \
330 MC_TAP_LINKS_16BPC_AVX2(LUMA, 8, bd); \
331 MC_TAP_LINKS_16BPC_AVX2(CHROMA, 4, bd);
333 #define AVG_INIT(bd, opt) do { \
334 c->inter.avg = bf(ff_vvc_avg, bd, opt); \
335 c->inter.w_avg = bf(ff_vvc_w_avg, bd, opt); \
338 #define DMVR_INIT(bd) do { \
339 c->inter.dmvr[0][0] = ff_vvc_dmvr_##bd##_avx2; \
340 c->inter.dmvr[0][1] = ff_vvc_dmvr_h_##bd##_avx2; \
341 c->inter.dmvr[1][0] = ff_vvc_dmvr_v_##bd##_avx2; \
342 c->inter.dmvr[1][1] = ff_vvc_dmvr_hv_##bd##_avx2; \
345 #define OF_INIT(bd) do { \
346 c->inter.apply_bdof = ff_vvc_apply_bdof_##bd##_avx2; \
349 #define ALF_INIT(bd) do { \
350 c->alf.filter[LUMA] = ff_vvc_alf_filter_luma_##bd##_avx2; \
351 c->alf.filter[CHROMA] = ff_vvc_alf_filter_chroma_##bd##_avx2; \
352 c->alf.classify = ff_vvc_alf_classify_##bd##_avx2; \
355 int ff_vvc_sad_avx2(
const int16_t *
src0,
const int16_t *
src1,
int dx,
int dy,
int block_w,
int block_h);
356 #define SAD_INIT() c->inter.sad = ff_vvc_sad_avx2
386 MC_LINKS_16BPC_AVX2(10);
400 MC_LINKS_16BPC_AVX2(12);
void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int bd)
void ff_vvc_apply_bdof_avx2(uint8_t *dst, ptrdiff_t dst_stride, const int16_t *src0, const int16_t *src1, int w, int h, int pixel_max)
#define ALF_PROTOTYPES(bpc, bd, opt)
#define EXTERNAL_AVX2_FAST(flags)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
static atomic_int cpu_flags
#define ALF_BPC_PROTOTYPES(bpc, opt)
#define AVG_PROTOTYPES(bd, opt)
#define OF_PROTOTYPES(bd, opt)
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
#define PUT_BPC_PROTOTYPES(name, opt)
#define DMVR_PROTOTYPES(bd, opt)
#define EXTERNAL_SSE4(flags)
#define AVG_BPC_PROTOTYPES(bpc, opt)
#define PUT_TAP_PROTOTYPES(n, opt)