FFmpeg
hevc_deblock.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License along
15  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17  */
18 
19 #include <string.h>
20 
21 #include "libavutil/intreadwrite.h"
22 #include "libavutil/macros.h"
23 #include "libavutil/mem_internal.h"
24 
25 #include "libavcodec/hevcdsp.h"
26 
27 #include "checkasm.h"
28 
29 static const uint32_t pixel_mask[3] = { 0xffffffff, 0x03ff03ff, 0x0fff0fff };
30 
31 #define SIZEOF_PIXEL ((bit_depth + 7) / 8)
32 #define BUF_STRIDE (16 * 2)
33 #define BUF_LINES (16)
34 // large buffer sizes based on high bit depth
35 #define BUF_OFFSET (2 * BUF_STRIDE * BUF_LINES)
36 #define BUF_SIZE (2 * BUF_STRIDE * BUF_LINES + BUF_OFFSET * 2)
37 
38 #define randomize_buffers(buf0, buf1, size) \
39  do { \
40  uint32_t mask = pixel_mask[(bit_depth - 8) >> 1]; \
41  int k; \
42  for (k = 0; k < size; k += 4) { \
43  uint32_t r = rnd() & mask; \
44  AV_WN32A(buf0 + k, r); \
45  AV_WN32A(buf1 + k, r); \
46  } \
47  } while (0)
48 
50 {
51  // see tctable[] in hevc_filter.c, we check full range
52  int32_t tc[2] = { rnd() % 25, rnd() % 25 };
53  // no_p, no_q can only be { 0,0 } for the simpler assembly (non *_c
54  // variant) functions, see deblocking_filter_CTB() in hevc_filter.c
55  uint8_t no_p[2] = { rnd() & c, rnd() & c };
56  uint8_t no_q[2] = { rnd() & c, rnd() & c };
57  LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
58  LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
59 
60  declare_func(void, uint8_t *pix, ptrdiff_t stride, int32_t *tc, uint8_t *no_p, uint8_t *no_q);
61 
62  if (check_func(c ? h->hevc_h_loop_filter_chroma_c : h->hevc_h_loop_filter_chroma,
63  "hevc_h_loop_filter_chroma%d%s", bit_depth, c ? "_full" : ""))
64  {
65  randomize_buffers(buf0, buf1, BUF_SIZE);
66 
67  call_ref(buf0 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);
68  call_new(buf1 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);
69  if (memcmp(buf0, buf1, BUF_SIZE))
70  fail();
71  bench_new(buf1 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);
72  }
73 
74  if (check_func(c ? h->hevc_v_loop_filter_chroma_c : h->hevc_v_loop_filter_chroma,
75  "hevc_v_loop_filter_chroma%d%s", bit_depth, c ? "_full" : ""))
76  {
77  randomize_buffers(buf0, buf1, BUF_SIZE);
78 
79  call_ref(buf0 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);
80  call_new(buf1 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);
81  if (memcmp(buf0, buf1, BUF_SIZE))
82  fail();
83  bench_new(buf1 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);
84  }
85 }
86 
87 #define P3 buf[-4 * xstride]
88 #define P2 buf[-3 * xstride]
89 #define P1 buf[-2 * xstride]
90 #define P0 buf[-1 * xstride]
91 #define Q0 buf[0 * xstride]
92 #define Q1 buf[1 * xstride]
93 #define Q2 buf[2 * xstride]
94 #define Q3 buf[3 * xstride]
95 
96 #define TC25(x) ((tc[x] * 5 + 1) >> 1)
97 #define MASK(x) (uint16_t)(x & ((1 << (bit_depth)) - 1))
98 #define GET(x) ((SIZEOF_PIXEL == 1) ? *(uint8_t*)(&x) : *(uint16_t*)(&x))
99 #define SET(x, y) do { \
100  uint16_t z = MASK(y); \
101  if (SIZEOF_PIXEL == 1) \
102  *(uint8_t*)(&x) = z; \
103  else \
104  *(uint16_t*)(&x) = z; \
105 } while (0)
106 #define RANDCLIP(x, diff) av_clip(GET(x) - (diff), 0, \
107  (1 << (bit_depth)) - 1) + rnd() % FFMAX(2 * (diff), 1)
108 
109 // NOTE: this function doesn't work 'correctly' in that it won't always choose
110 // strong/strong or weak/weak, in most cases it tends to but will sometimes mix
111 // weak/strong or even skip sometimes. This is more useful to test correctness
112 // for these functions, though it does make benching them difficult. The easiest
113 // way to bench these functions is to check an overall decode since there are too
114 // many paths and ways to trigger the deblock: we would have to bench all
115 // permutations of weak/strong/skip/nd_q/nd_p/no_q/no_p and it quickly becomes
116 // too much.
117 static void randomize_luma_buffers(int type, int *beta, int32_t tc[2],
118  uint8_t *buf, ptrdiff_t xstride, ptrdiff_t ystride, int bit_depth)
119 {
120  int i, j, b3, tc25, tc25diff, b3diff;
121  // both tc & beta are unscaled inputs
122  // minimum useful value is 1, full range 0-24
123  tc[0] = (rnd() % 25) + 1;
124  tc[1] = (rnd() % 25) + 1;
125  // minimum useful value for 8bit is 8
126  *beta = (rnd() % 57) + 8;
127 
128  switch (type) {
129  case 0: // strong
130  for (j = 0; j < 2; j++) {
131  tc25 = TC25(j) << (bit_depth - 8);
132  tc25diff = FFMAX(tc25 - 1, 0);
133  // 4 lines per tc
134  for (i = 0; i < 4; i++) {
135  b3 = (*beta << (bit_depth - 8)) >> 3;
136 
137  SET(P0, rnd() % (1 << bit_depth));
138  SET(Q0, RANDCLIP(P0, tc25diff));
139 
140  // p3 - p0 up to beta3 budget
141  b3diff = rnd() % b3;
142  SET(P3, RANDCLIP(P0, b3diff));
143  // q3 - q0, reduced budget
144  b3diff = rnd() % FFMAX(b3 - b3diff, 1);
145  SET(Q3, RANDCLIP(Q0, b3diff));
146 
147  // same concept, budget across 4 pixels
148  b3 -= b3diff = rnd() % FFMAX(b3, 1);
149  SET(P2, RANDCLIP(P0, b3diff));
150  b3 -= b3diff = rnd() % FFMAX(b3, 1);
151  SET(Q2, RANDCLIP(Q0, b3diff));
152 
153  // extra reduced budget for weighted pixels
154  b3 -= b3diff = rnd() % FFMAX(b3 - (1 << (bit_depth - 8)), 1);
155  SET(P1, RANDCLIP(P0, b3diff));
156  b3 -= b3diff = rnd() % FFMAX(b3 - (1 << (bit_depth - 8)), 1);
157  SET(Q1, RANDCLIP(Q0, b3diff));
158 
159  buf += ystride;
160  }
161  }
162  break;
163  case 1: // weak
164  for (j = 0; j < 2; j++) {
165  tc25 = TC25(j) << (bit_depth - 8);
166  tc25diff = FFMAX(tc25 - 1, 0);
167  // 4 lines per tc
168  for (i = 0; i < 4; i++) {
169  // Weak filtering is signficantly simpler to activate as
170  // we only need to satisfy d0 + d3 < beta, which
171  // can be simplified to d0 + d0 < beta. Using the above
172  // derivations but substiuting b3 for b1 and ensuring
173  // that P0/Q0 are at least 1/2 tc25diff apart (tending
174  // towards 1/2 range).
175  b3 = (*beta << (bit_depth - 8)) >> 1;
176 
177  SET(P0, rnd() % (1 << bit_depth));
178  SET(Q0, RANDCLIP(P0, tc25diff >> 1) +
179  (tc25diff >> 1) * (P0 < (1 << (bit_depth - 1))) ? 1 : -1);
180 
181  // p3 - p0 up to beta3 budget
182  b3diff = rnd() % b3;
183  SET(P3, RANDCLIP(P0, b3diff));
184  // q3 - q0, reduced budget
185  b3diff = rnd() % FFMAX(b3 - b3diff, 1);
186  SET(Q3, RANDCLIP(Q0, b3diff));
187 
188  // same concept, budget across 4 pixels
189  b3 -= b3diff = rnd() % FFMAX(b3, 1);
190  SET(P2, RANDCLIP(P0, b3diff));
191  b3 -= b3diff = rnd() % FFMAX(b3, 1);
192  SET(Q2, RANDCLIP(Q0, b3diff));
193 
194  // extra reduced budget for weighted pixels
195  b3 -= b3diff = rnd() % FFMAX(b3 - (1 << (bit_depth - 8)), 1);
196  SET(P1, RANDCLIP(P0, b3diff));
197  b3 -= b3diff = rnd() % FFMAX(b3 - (1 << (bit_depth - 8)), 1);
198  SET(Q1, RANDCLIP(Q0, b3diff));
199 
200  buf += ystride;
201  }
202  }
203  break;
204  case 2: // none
205  *beta = 0; // ensure skip
206  for (i = 0; i < 8; i++) {
207  // we can just fill with completely random data, nothing should be touched.
208  SET(P3, rnd()); SET(P2, rnd()); SET(P1, rnd()); SET(P0, rnd());
209  SET(Q0, rnd()); SET(Q1, rnd()); SET(Q2, rnd()); SET(Q3, rnd());
210  buf += ystride;
211  }
212  break;
213  }
214 }
215 
217 {
218  const char *type;
219  const char *types[3] = { "strong", "weak", "skip" };
220  int beta;
221  int32_t tc[2] = {0};
222  uint8_t no_p[2] = { rnd() & c, rnd() & c };
223  uint8_t no_q[2] = { rnd() & c, rnd() & c };
224  LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
225  LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
226  uint8_t *ptr0 = buf0 + BUF_OFFSET,
227  *ptr1 = buf1 + BUF_OFFSET;
228 
229  declare_func(void, uint8_t *pix, ptrdiff_t stride, int beta, int32_t *tc, uint8_t *no_p, uint8_t *no_q);
230 
231  for (int j = 0; j < 3; j++) {
232  type = types[j];
233  if (check_func(c ? h->hevc_h_loop_filter_luma_c : h->hevc_h_loop_filter_luma,
234  "hevc_h_loop_filter_luma%d_%s%s", bit_depth, type, c ? "_full" : ""))
235  {
237  memcpy(buf1, buf0, BUF_SIZE);
238 
239  call_ref(ptr0, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);
240  call_new(ptr1, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);
241  if (memcmp(buf0, buf1, BUF_SIZE))
242  fail();
243  bench_new(ptr1, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);
244  }
245 
246  if (check_func(c ? h->hevc_v_loop_filter_luma_c : h->hevc_v_loop_filter_luma,
247  "hevc_v_loop_filter_luma%d_%s%s", bit_depth, type, c ? "_full" : ""))
248  {
250  memcpy(buf1, buf0, BUF_SIZE);
251 
252  call_ref(ptr0, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);
253  call_new(ptr1, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);
254  if (memcmp(buf0, buf1, BUF_SIZE))
255  fail();
256  bench_new(ptr1, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);
257  }
258  }
259 }
260 
262 {
264  int bit_depth;
265  for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
268  }
269  report("chroma");
270  for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
273  }
274  report("chroma_full");
275  for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
278  }
279  report("luma");
280  for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
283  }
284  report("luma_full");
285 }
SIZEOF_PIXEL
#define SIZEOF_PIXEL
Definition: hevc_deblock.c:31
SET
#define SET(x, y)
Definition: hevc_deblock.c:99
mem_internal.h
check_func
#define check_func(func,...)
Definition: checkasm.h:170
Q2
#define Q2
Definition: hevc_deblock.c:93
Q3
#define Q3
Definition: hevc_deblock.c:94
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
call_ref
#define call_ref(...)
Definition: checkasm.h:185
bit_depth
static void bit_depth(AudioStatsContext *s, const uint64_t *const mask, uint8_t *depth)
Definition: af_astats.c:245
macros.h
fail
#define fail()
Definition: checkasm.h:179
checkasm.h
type
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf type
Definition: writing_filters.txt:86
rnd
#define rnd()
Definition: checkasm.h:163
b3
static double b3(void *priv, double x, double y)
Definition: vf_xfade.c:2037
intreadwrite.h
Q0
#define Q0
Definition: hevc_deblock.c:91
BUF_STRIDE
#define BUF_STRIDE
Definition: hevc_deblock.c:32
P2
#define P2
Definition: hevc_deblock.c:88
randomize_buffers
#define randomize_buffers(buf0, buf1, size)
Definition: hevc_deblock.c:38
hevcdsp.h
P1
#define P1
Definition: hevc_deblock.c:89
call_new
#define call_new(...)
Definition: checkasm.h:288
LOCAL_ALIGNED_32
#define LOCAL_ALIGNED_32(t, v,...)
Definition: mem_internal.h:156
P3
#define P3
Definition: hevc_deblock.c:87
BUF_SIZE
#define BUF_SIZE
Definition: hevc_deblock.c:36
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
check_deblock_luma
static void check_deblock_luma(HEVCDSPContext *h, int bit_depth, int c)
Definition: hevc_deblock.c:216
TC25
#define TC25(x)
Definition: hevc_deblock.c:96
HEVCDSPContext
Definition: hevcdsp.h:47
report
#define report
Definition: checkasm.h:182
bench_new
#define bench_new(...)
Definition: checkasm.h:358
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:255
P0
#define P0
Definition: hevc_deblock.c:90
randomize_luma_buffers
static void randomize_luma_buffers(int type, int *beta, int32_t tc[2], uint8_t *buf, ptrdiff_t xstride, ptrdiff_t ystride, int bit_depth)
Definition: hevc_deblock.c:117
stride
#define stride
Definition: h264pred_template.c:537
pixel_mask
static const uint32_t pixel_mask[3]
Definition: hevc_deblock.c:29
ff_hevc_dsp_init
void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth)
Definition: hevcdsp.c:128
checkasm_check_hevc_deblock
void checkasm_check_hevc_deblock(void)
Definition: hevc_deblock.c:261
tc
#define tc
Definition: regdef.h:69
check_deblock_chroma
static void check_deblock_chroma(HEVCDSPContext *h, int bit_depth, int c)
Definition: hevc_deblock.c:49
declare_func
#define declare_func(ret,...)
Definition: checkasm.h:174
BUF_OFFSET
#define BUF_OFFSET
Definition: hevc_deblock.c:35
int32_t
int32_t
Definition: audioconvert.c:56
h
h
Definition: vp9dsp_template.c:2038
RANDCLIP
#define RANDCLIP(x, diff)
Definition: hevc_deblock.c:106
Q1
#define Q1
Definition: hevc_deblock.c:92