FFmpeg
vp8dsp.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016 Martin Storsjo
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License along
17  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
18  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19  */
20 
21 #include <stdbool.h>
22 #include <string.h>
23 
24 #include "config_components.h"
25 #include "libavcodec/vp8dsp.h"
26 
27 #include "libavutil/common.h"
28 #include "libavutil/intreadwrite.h"
29 #include "libavutil/mem_internal.h"
30 
31 #include "checkasm.h"
32 
33 #define PIXEL_STRIDE 16
34 
35 #define randomize_buffers(src, dst, stride, coef) \
36  do { \
37  int x, y; \
38  for (y = 0; y < 4; y++) { \
39  AV_WN32A((src) + y * (stride), rnd()); \
40  AV_WN32A((dst) + y * (stride), rnd()); \
41  for (x = 0; x < 4; x++) \
42  (coef)[y * 4 + x] = (src)[y * (stride) + x] - \
43  (dst)[y * (stride) + x]; \
44  } \
45  } while (0)
46 
47 static void dct4x4(int16_t *coef)
48 {
49  int i;
50  for (i = 0; i < 4; i++) {
51  const int a1 = (coef[i*4 + 0] + coef[i*4 + 3]) * 8;
52  const int b1 = (coef[i*4 + 1] + coef[i*4 + 2]) * 8;
53  const int c1 = (coef[i*4 + 1] - coef[i*4 + 2]) * 8;
54  const int d1 = (coef[i*4 + 0] - coef[i*4 + 3]) * 8;
55  coef[i*4 + 0] = a1 + b1;
56  coef[i*4 + 1] = (c1 * 2217 + d1 * 5352 + 14500) >> 12;
57  coef[i*4 + 2] = a1 - b1;
58  coef[i*4 + 3] = (d1 * 2217 - c1 * 5352 + 7500) >> 12;
59  }
60  for (i = 0; i < 4; i++) {
61  const int a1 = coef[i + 0*4] + coef[i + 3*4];
62  const int b1 = coef[i + 1*4] + coef[i + 2*4];
63  const int c1 = coef[i + 1*4] - coef[i + 2*4];
64  const int d1 = coef[i + 0*4] - coef[i + 3*4];
65  coef[i + 0*4] = (a1 + b1 + 7) >> 4;
66  coef[i + 1*4] = ((c1 * 2217 + d1 * 5352 + 12000) >> 16) + !!d1;
67  coef[i + 2*4] = (a1 - b1 + 7) >> 4;
68  coef[i + 3*4] = (d1 * 2217 - c1 * 5352 + 51000) >> 16;
69  }
70 }
71 
72 static void wht4x4(int16_t *coef)
73 {
74  int i;
75  for (i = 0; i < 4; i++) {
76  int a1 = coef[0 * 4 + i];
77  int b1 = coef[1 * 4 + i];
78  int c1 = coef[2 * 4 + i];
79  int d1 = coef[3 * 4 + i];
80  int e1;
81  a1 += b1;
82  d1 -= c1;
83  e1 = (a1 - d1) >> 1;
84  b1 = e1 - b1;
85  c1 = e1 - c1;
86  a1 -= c1;
87  d1 += b1;
88  coef[0 * 4 + i] = a1;
89  coef[1 * 4 + i] = c1;
90  coef[2 * 4 + i] = d1;
91  coef[3 * 4 + i] = b1;
92  }
93  for (i = 0; i < 4; i++) {
94  int a1 = coef[i * 4 + 0];
95  int b1 = coef[i * 4 + 1];
96  int c1 = coef[i * 4 + 2];
97  int d1 = coef[i * 4 + 3];
98  int e1;
99  a1 += b1;
100  d1 -= c1;
101  e1 = (a1 - d1) >> 1;
102  b1 = e1 - b1;
103  c1 = e1 - c1;
104  a1 -= c1;
105  d1 += b1;
106  coef[i * 4 + 0] = a1 * 2;
107  coef[i * 4 + 1] = c1 * 2;
108  coef[i * 4 + 2] = d1 * 2;
109  coef[i * 4 + 3] = b1 * 2;
110  }
111 }
112 
113 static void check_idct(VP8DSPContext *d, bool is_vp7)
114 {
115  LOCAL_ALIGNED_16(uint8_t, src, [4 * 4]);
116  LOCAL_ALIGNED_16(uint8_t, dst, [4 * 4]);
117  LOCAL_ALIGNED_16(uint8_t, dst0, [4 * 4]);
118  LOCAL_ALIGNED_16(uint8_t, dst1, [4 * 4]);
119  LOCAL_ALIGNED_16(int16_t, coef, [4 * 4]);
120  LOCAL_ALIGNED_16(int16_t, subcoef0, [4 * 4]);
121  LOCAL_ALIGNED_16(int16_t, subcoef1, [4 * 4]);
122  int dc;
123  declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, int16_t *block, ptrdiff_t stride);
124 
125  randomize_buffers(src, dst, 4, coef);
126 
127  dct4x4(coef);
128 
129  for (dc = 0; dc <= 1; dc++) {
130  void (*idct)(uint8_t *, int16_t *, ptrdiff_t) = dc ? d->vp8_idct_dc_add : d->vp8_idct_add;
131 
132  if (check_func(idct, "vp%d_idct_%sadd", 8 - is_vp7, dc ? "dc_" : "")) {
133  if (dc) {
134  memset(subcoef0, 0, 4 * 4 * sizeof(int16_t));
135  subcoef0[0] = coef[0];
136  } else {
137  memcpy(subcoef0, coef, 4 * 4 * sizeof(int16_t));
138  }
139  memcpy(dst0, dst, 4 * 4);
140  memcpy(dst1, dst, 4 * 4);
141  memcpy(subcoef1, subcoef0, 4 * 4 * sizeof(int16_t));
142  // Note, this uses a pixel stride of 4, even though the real decoder uses a stride as a
143  // multiple of 16. If optimizations want to take advantage of that, this test needs to be
144  // updated to make it more like the h264dsp tests.
145  call_ref(dst0, subcoef0, 4);
146  call_new(dst1, subcoef1, 4);
147  if (memcmp(dst0, dst1, 4 * 4) ||
148  memcmp(subcoef0, subcoef1, 4 * 4 * sizeof(int16_t)))
149  fail();
150 
151  bench_new(dst1, subcoef1, 4);
152  }
153  }
154 }
155 
156 static void check_idct_dc4(VP8DSPContext *d, bool is_vp7)
157 {
158  LOCAL_ALIGNED_16(uint8_t, src, [4 * 4 * 4]);
159  LOCAL_ALIGNED_16(uint8_t, dst, [4 * 4 * 4]);
160  LOCAL_ALIGNED_16(uint8_t, dst0, [4 * 4 * 4]);
161  LOCAL_ALIGNED_16(uint8_t, dst1, [4 * 4 * 4]);
162  LOCAL_ALIGNED_16(int16_t, coef, [4], [4 * 4]);
163  LOCAL_ALIGNED_16(int16_t, subcoef0, [4], [4 * 4]);
164  LOCAL_ALIGNED_16(int16_t, subcoef1, [4], [4 * 4]);
165  int i, chroma;
166  declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, int16_t block[4][16], ptrdiff_t stride);
167 
168  for (chroma = 0; chroma <= 1; chroma++) {
169  void (*idct4dc)(uint8_t *, int16_t[4][16], ptrdiff_t) = chroma ? d->vp8_idct_dc_add4uv : d->vp8_idct_dc_add4y;
170  if (check_func(idct4dc, "vp%d_idct_dc_add4%s", 8 - is_vp7, chroma ? "uv" : "y")) {
171  ptrdiff_t stride = chroma ? 8 : 16;
172  int w = chroma ? 2 : 4;
173  for (i = 0; i < 4; i++) {
174  int blockx = 4 * (i % w);
175  int blocky = 4 * (i / w);
176  randomize_buffers(src + stride * blocky + blockx, dst + stride * blocky + blockx, stride, coef[i]);
177  dct4x4(coef[i]);
178  memset(&coef[i][1], 0, 15 * sizeof(int16_t));
179  }
180 
181  memcpy(dst0, dst, 4 * 4 * 4);
182  memcpy(dst1, dst, 4 * 4 * 4);
183  memcpy(subcoef0, coef, 4 * 4 * 4 * sizeof(int16_t));
184  memcpy(subcoef1, coef, 4 * 4 * 4 * sizeof(int16_t));
185  call_ref(dst0, subcoef0, stride);
186  call_new(dst1, subcoef1, stride);
187  if (memcmp(dst0, dst1, 4 * 4 * 4) ||
188  memcmp(subcoef0, subcoef1, 4 * 4 * 4 * sizeof(int16_t)))
189  fail();
190  bench_new(dst1, subcoef1, stride);
191  }
192  }
193 
194 }
195 
196 static void check_luma_dc_wht(VP8DSPContext *d, bool is_vp7)
197 {
198  LOCAL_ALIGNED_16(int16_t, dc, [4 * 4]);
199  LOCAL_ALIGNED_16(int16_t, dc0, [4 * 4]);
200  LOCAL_ALIGNED_16(int16_t, dc1, [4 * 4]);
201  int16_t block[4][4][16];
202  LOCAL_ALIGNED_16(int16_t, block0, [4], [4][16]);
203  LOCAL_ALIGNED_16(int16_t, block1, [4], [4][16]);
204  int dc_only;
205  int blockx, blocky;
206  declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t block[4][4][16], int16_t dc[16]);
207 
208  for (blocky = 0; blocky < 4; blocky++) {
209  for (blockx = 0; blockx < 4; blockx++) {
210  uint8_t src[16], dst[16];
211  randomize_buffers(src, dst, 4, block[blocky][blockx]);
212 
213  dct4x4(block[blocky][blockx]);
214  dc[blocky * 4 + blockx] = block[blocky][blockx][0];
215  block[blocky][blockx][0] = rnd();
216  }
217  }
218  wht4x4(dc);
219 
220  for (dc_only = 0; dc_only <= 1; dc_only++) {
221  void (*idct)(int16_t [4][4][16], int16_t [16]) = dc_only ? d->vp8_luma_dc_wht_dc : d->vp8_luma_dc_wht;
222 
223  if (check_func(idct, "vp%d_luma_dc_wht%s", 8 - is_vp7, dc_only ? "_dc" : "")) {
224  if (dc_only) {
225  memset(dc0, 0, 16 * sizeof(int16_t));
226  dc0[0] = dc[0];
227  } else {
228  memcpy(dc0, dc, 16 * sizeof(int16_t));
229  }
230  memcpy(dc1, dc0, 16 * sizeof(int16_t));
231  memcpy(block0, block, 4 * 4 * 16 * sizeof(int16_t));
232  memcpy(block1, block, 4 * 4 * 16 * sizeof(int16_t));
233  call_ref(block0, dc0);
234  call_new(block1, dc1);
235  if (memcmp(block0, block1, 4 * 4 * 16 * sizeof(int16_t)) ||
236  memcmp(dc0, dc1, 16 * sizeof(int16_t)))
237  fail();
238  bench_new(block1, dc1);
239  }
240  }
241 }
242 
243 #define SRC_BUF_STRIDE 32
244 #define SRC_BUF_SIZE (((size << (size < 16)) + 5) * SRC_BUF_STRIDE)
245 // The mc subpixel interpolation filter needs the 2 previous pixels in either
246 // direction, the +1 is to make sure the actual load addresses always are
247 // unaligned.
248 #define src (buf + 2 * SRC_BUF_STRIDE + 2 + 1)
249 
250 #undef randomize_buffers
251 #define randomize_buffers() \
252  do { \
253  int k; \
254  for (k = 0; k < SRC_BUF_SIZE; k += 4) { \
255  AV_WN32A(buf + k, rnd()); \
256  } \
257  } while (0)
258 
259 static void check_mc(VP8DSPContext *d)
260 {
261  LOCAL_ALIGNED_16(uint8_t, buf, [32 * 32]);
262  LOCAL_ALIGNED_16(uint8_t, dst0, [16 * 16]);
263  LOCAL_ALIGNED_16(uint8_t, dst1, [16 * 16]);
264  int type, k, dx, dy;
265  declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, ptrdiff_t,
266  const uint8_t *, ptrdiff_t, int, int, int);
267 
268  for (type = 0; type < 2; type++) {
269  for (k = 1; k < 8; k++) {
270  int hsize = k / 3;
271  int size = 16 >> hsize;
272  int height = (size << 1) >> (k % 3);
273  for (dy = 0; dy < 3; dy++) {
274  for (dx = 0; dx < 3; dx++) {
275  char str[100];
277 
278  if (dx || dy) {
279  if (type == 0) {
280  static const char *dx_names[] = { "", "h4", "h6" };
281  static const char *dy_names[] = { "", "v4", "v6" };
282  snprintf(str, sizeof(str), "epel%d_%s%s", size, dx_names[dx], dy_names[dy]);
283  } else {
284  snprintf(str, sizeof(str), "bilin%d_%s%s", size, dx ? "h" : "", dy ? "v" : "");
285  }
286  } else {
287  snprintf(str, sizeof(str), "pixels%d", size);
288  }
289 
290  if (check_func(func, "vp8_put_%s", str)) {
291  int mx, my;
292  int i;
293  if (type == 0) {
294  mx = dx == 2 ? 2 + 2 * (rnd() % 3) : dx == 1 ? 1 + 2 * (rnd() % 4) : 0;
295  my = dy == 2 ? 2 + 2 * (rnd() % 3) : dy == 1 ? 1 + 2 * (rnd() % 4) : 0;
296  } else {
297  mx = dx ? 1 + (rnd() % 7) : 0;
298  my = dy ? 1 + (rnd() % 7) : 0;
299  }
301  for (i = -2; i <= 3; i++) {
302  int val = (i == -1 || i == 2) ? 0 : 0xff;
303  // Set pixels in the first row and column to the maximum pattern,
304  // to test for potential overflows in the filter.
305  src[i ] = val;
306  src[i * SRC_BUF_STRIDE] = val;
307  }
308  call_ref(dst0, size, src, SRC_BUF_STRIDE, height, mx, my);
309  call_new(dst1, size, src, SRC_BUF_STRIDE, height, mx, my);
310  if (memcmp(dst0, dst1, size * height))
311  fail();
312  bench_new(dst1, size, src, SRC_BUF_STRIDE, height, mx, my);
313  }
314  }
315  }
316  }
317  }
318 }
319 
320 #undef randomize_buffers
321 
322 #define setpx(a, b, c) buf[(a) + (b) * jstride] = av_clip_uint8(c)
323 // Set the pixel to c +/- [0,d]
324 #define setdx(a, b, c, d) setpx(a, b, c - (d) + (rnd() % ((d) * 2 + 1)))
325 // Set the pixel to c +/- [d,d+e] (making sure it won't be clipped)
326 #define setdx2(a, b, o, c, d, e) setpx(a, b, o = c + ((d) + (rnd() % (e))) * (c >= 128 ? -1 : 1))
327 
328 static void randomize_loopfilter_buffers(int lineoff, int str,
329  int dir, int flim_E, int flim_I,
330  int hev_thresh, uint8_t *buf,
331  int force_hev)
332 {
333  uint32_t mask = 0xff;
334  int off = dir ? lineoff : lineoff * str;
335  int istride = dir ? 1 : str;
336  int jstride = dir ? str : 1;
337  int i;
338  for (i = 0; i < 8; i += 2) {
339  // Row 0 will trigger hev for q0/q1, row 2 will trigger hev for p0/p1,
340  // rows 4 and 6 will not trigger hev.
341  // force_hev 1 will make sure all rows trigger hev, while force_hev -1
342  // makes none of them trigger it.
343  int idx = off + i * istride, p2, p1, p0, q0, q1, q2;
344  setpx(idx, 0, q0 = rnd() & mask);
345  if (i == 0 && force_hev >= 0 || force_hev > 0)
346  setdx2(idx, 1, q1, q0, hev_thresh + 1, flim_I - hev_thresh - 1);
347  else
348  setdx(idx, 1, q1 = q0, hev_thresh);
349  setdx(idx, 2, q2 = q1, flim_I);
350  setdx(idx, 3, q2, flim_I);
351  setdx(idx, -1, p0 = q0, flim_E >> 2);
352  if (i == 2 && force_hev >= 0 || force_hev > 0)
353  setdx2(idx, -2, p1, p0, hev_thresh + 1, flim_I - hev_thresh - 1);
354  else
355  setdx(idx, -2, p1 = p0, hev_thresh);
356  setdx(idx, -3, p2 = p1, flim_I);
357  setdx(idx, -4, p2, flim_I);
358  }
359 }
360 
361 // Fill the buffer with random pixels
362 static void fill_loopfilter_buffers(uint8_t *buf, ptrdiff_t stride, int w, int h)
363 {
364  int x, y;
365  for (y = 0; y < h; y++)
366  for (x = 0; x < w; x++)
367  buf[y * stride + x] = rnd() & 0xff;
368 }
369 
370 #define randomize_buffers(buf, lineoff, str, force_hev) \
371  randomize_loopfilter_buffers(lineoff, str, dir, flim_E, flim_I, hev_thresh, buf, force_hev)
372 
373 static void check_loopfilter_16y(VP8DSPContext *d, bool is_vp7)
374 {
375  LOCAL_ALIGNED_16(uint8_t, base0, [32 + 16 * 16]);
376  LOCAL_ALIGNED_16(uint8_t, base1, [32 + 16 * 16]);
377  int dir, edge, force_hev;
378  int flim_E = 20, flim_I = 10, hev_thresh = 7;
379  declare_func(void, uint8_t *, ptrdiff_t, int, int, int);
380 
381  for (dir = 0; dir < 2; dir++) {
382  int midoff = dir ? 4 * 16 : 4;
383  int midoff_aligned = dir ? 4 * 16 : 16;
384  uint8_t *buf0 = base0 + midoff_aligned;
385  uint8_t *buf1 = base1 + midoff_aligned;
386  for (edge = 0; edge < 2; edge++) {
387  void (*func)(uint8_t *, ptrdiff_t, int, int, int) = NULL;
388  switch (dir << 1 | edge) {
389  case (0 << 1) | 0: func = d->vp8_h_loop_filter16y; break;
390  case (1 << 1) | 0: func = d->vp8_v_loop_filter16y; break;
391  case (0 << 1) | 1: func = d->vp8_h_loop_filter16y_inner; break;
392  case (1 << 1) | 1: func = d->vp8_v_loop_filter16y_inner; break;
393  }
394  if (check_func(func, "vp%d_loop_filter16y%s_%s", 8 - is_vp7, edge ? "_inner" : "", dir ? "v" : "h")) {
395  for (force_hev = -1; force_hev <= 1; force_hev++) {
396  fill_loopfilter_buffers(buf0 - midoff, 16, 16, 16);
397  randomize_buffers(buf0, 0, 16, force_hev);
398  randomize_buffers(buf0, 8, 16, force_hev);
399  memcpy(buf1 - midoff, buf0 - midoff, 16 * 16);
400  call_ref(buf0, 16, flim_E, flim_I, hev_thresh);
401  call_new(buf1, 16, flim_E, flim_I, hev_thresh);
402  if (memcmp(buf0 - midoff, buf1 - midoff, 16 * 16))
403  fail();
404  }
405  fill_loopfilter_buffers(buf0 - midoff, 16, 16, 16);
406  randomize_buffers(buf0, 0, 16, 0);
407  randomize_buffers(buf0, 8, 16, 0);
408  bench_new(buf0, 16, flim_E, flim_I, hev_thresh);
409  }
410  }
411  }
412 }
413 
414 static void check_loopfilter_8uv(VP8DSPContext *d, bool is_vp7)
415 {
416  LOCAL_ALIGNED_16(uint8_t, base0u, [32 + 16 * 16]);
417  LOCAL_ALIGNED_16(uint8_t, base0v, [32 + 16 * 16]);
418  LOCAL_ALIGNED_16(uint8_t, base1u, [32 + 16 * 16]);
419  LOCAL_ALIGNED_16(uint8_t, base1v, [32 + 16 * 16]);
420  int dir, edge, force_hev;
421  int flim_E = 20, flim_I = 10, hev_thresh = 7;
422  declare_func(void, uint8_t *, uint8_t *, ptrdiff_t, int, int, int);
423 
424  for (dir = 0; dir < 2; dir++) {
425  int midoff = dir ? 4 * 16 : 4;
426  int midoff_aligned = dir ? 4 * 16 : 16;
427  uint8_t *buf0u = base0u + midoff_aligned;
428  uint8_t *buf0v = base0v + midoff_aligned;
429  uint8_t *buf1u = base1u + midoff_aligned;
430  uint8_t *buf1v = base1v + midoff_aligned;
431  for (edge = 0; edge < 2; edge++) {
432  void (*func)(uint8_t *, uint8_t *, ptrdiff_t, int, int, int) = NULL;
433  switch (dir << 1 | edge) {
434  case (0 << 1) | 0: func = d->vp8_h_loop_filter8uv; break;
435  case (1 << 1) | 0: func = d->vp8_v_loop_filter8uv; break;
436  case (0 << 1) | 1: func = d->vp8_h_loop_filter8uv_inner; break;
437  case (1 << 1) | 1: func = d->vp8_v_loop_filter8uv_inner; break;
438  }
439  if (check_func(func, "vp%d_loop_filter8uv%s_%s", 8 - is_vp7, edge ? "_inner" : "", dir ? "v" : "h")) {
440  for (force_hev = -1; force_hev <= 1; force_hev++) {
441  fill_loopfilter_buffers(buf0u - midoff, 16, 16, 16);
442  fill_loopfilter_buffers(buf0v - midoff, 16, 16, 16);
443  randomize_buffers(buf0u, 0, 16, force_hev);
444  randomize_buffers(buf0v, 0, 16, force_hev);
445  memcpy(buf1u - midoff, buf0u - midoff, 16 * 16);
446  memcpy(buf1v - midoff, buf0v - midoff, 16 * 16);
447 
448  call_ref(buf0u, buf0v, 16, flim_E, flim_I, hev_thresh);
449  call_new(buf1u, buf1v, 16, flim_E, flim_I, hev_thresh);
450  if (memcmp(buf0u - midoff, buf1u - midoff, 16 * 16) ||
451  memcmp(buf0v - midoff, buf1v - midoff, 16 * 16))
452  fail();
453  }
454  fill_loopfilter_buffers(buf0u - midoff, 16, 16, 16);
455  fill_loopfilter_buffers(buf0v - midoff, 16, 16, 16);
456  randomize_buffers(buf0u, 0, 16, 0);
457  randomize_buffers(buf0v, 0, 16, 0);
458  bench_new(buf0u, buf0v, 16, flim_E, flim_I, hev_thresh);
459  }
460  }
461  }
462 }
463 
464 static void check_loopfilter_simple(VP8DSPContext *d, bool is_vp7)
465 {
466  LOCAL_ALIGNED_16(uint8_t, base0, [32 + 16 * 16]);
467  LOCAL_ALIGNED_16(uint8_t, base1, [32 + 16 * 16]);
468  int dir;
469  int flim_E = 20, flim_I = 30, hev_thresh = 0;
470  declare_func(void, uint8_t *, ptrdiff_t, int);
471 
472  for (dir = 0; dir < 2; dir++) {
473  int midoff = dir ? 4 * 16 : 4;
474  int midoff_aligned = dir ? 4 * 16 : 16;
475  uint8_t *buf0 = base0 + midoff_aligned;
476  uint8_t *buf1 = base1 + midoff_aligned;
477  void (*func)(uint8_t *, ptrdiff_t, int) = dir ? d->vp8_v_loop_filter_simple : d->vp8_h_loop_filter_simple;
478  if (check_func(func, "vp%d_loop_filter_simple_%s", 8 - is_vp7, dir ? "v" : "h")) {
479  fill_loopfilter_buffers(buf0 - midoff, 16, 16, 16);
480  randomize_buffers(buf0, 0, 16, -1);
481  randomize_buffers(buf0, 8, 16, -1);
482  memcpy(buf1 - midoff, buf0 - midoff, 16 * 16);
483  call_ref(buf0, 16, flim_E);
484  call_new(buf1, 16, flim_E);
485  if (memcmp(buf0 - midoff, buf1 - midoff, 16 * 16))
486  fail();
487  bench_new(buf0, 16, flim_E);
488  }
489  }
490 }
491 
492 static void checkasm_check_vp78dsp(VP8DSPContext *d, bool is_vp7)
493 {
494 #if CONFIG_VP7_DECODER
495  if (is_vp7)
496  ff_vp7dsp_init(d);
497  else
498 #endif
499  ff_vp8dsp_init(d);
500  check_idct(d, is_vp7);
501  check_idct_dc4(d, is_vp7);
502  check_luma_dc_wht(d, is_vp7);
503  report("idct");
504  check_loopfilter_16y(d, is_vp7);
505  check_loopfilter_8uv(d, is_vp7);
506  check_loopfilter_simple(d, is_vp7);
507  report("loopfilter");
508 }
509 
511 {
512  VP8DSPContext d;
513 
514  ff_vp78dsp_init(&d);
515  check_mc(&d);
516  report("mc");
517  checkasm_check_vp78dsp(&d, false);
518 #if CONFIG_VP7_DECODER
519  checkasm_check_vp78dsp(&d, true);
520 #endif
521 }
func
int(* func)(AVBPrint *dst, const char *in, const char *arg)
Definition: jacosubdec.c:68
check_loopfilter_8uv
static void check_loopfilter_8uv(VP8DSPContext *d, bool is_vp7)
Definition: vp8dsp.c:414
VP8DSPContext::vp8_h_loop_filter8uv
void(* vp8_h_loop_filter8uv)(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
Definition: vp8dsp.h:54
declare_func_emms
#define declare_func_emms(cpu_flags, ret,...)
Definition: checkasm.h:185
VP8DSPContext::vp8_h_loop_filter8uv_inner
void(* vp8_h_loop_filter8uv_inner)(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
Definition: vp8dsp.h:65
q1
static const uint8_t q1[256]
Definition: twofish.c:100
mem_internal.h
VP8DSPContext::vp8_v_loop_filter8uv
void(* vp8_v_loop_filter8uv)(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
Definition: vp8dsp.h:52
mask
int mask
Definition: mediacodecdec_common.c:154
check_luma_dc_wht
static void check_luma_dc_wht(VP8DSPContext *d, bool is_vp7)
Definition: vp8dsp.c:196
check_idct_dc4
static void check_idct_dc4(VP8DSPContext *d, bool is_vp7)
Definition: vp8dsp.c:156
w
uint8_t w
Definition: llviddspenc.c:38
check_func
#define check_func(func,...)
Definition: checkasm.h:179
VP8DSPContext::vp8_v_loop_filter16y
void(* vp8_v_loop_filter16y)(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
Definition: vp8dsp.h:48
chroma
static av_always_inline void chroma(WaveformContext *s, AVFrame *in, AVFrame *out, int component, int intensity, int offset_y, int offset_x, int column, int mirror, int jobnr, int nb_jobs)
Definition: vf_waveform.c:1639
wht4x4
static void wht4x4(int16_t *coef)
Definition: vp8dsp.c:72
c1
static const uint64_t c1
Definition: murmur3.c:52
call_ref
#define call_ref(...)
Definition: checkasm.h:194
checkasm_check_vp78dsp
static void checkasm_check_vp78dsp(VP8DSPContext *d, bool is_vp7)
Definition: vp8dsp.c:492
b1
static double b1(void *priv, double x, double y)
Definition: vf_xfade.c:2034
randomize_buffers
#define randomize_buffers(src, dst, stride, coef)
Definition: vp8dsp.c:370
fail
#define fail()
Definition: checkasm.h:188
check_loopfilter_16y
static void check_loopfilter_16y(VP8DSPContext *d, bool is_vp7)
Definition: vp8dsp.c:373
checkasm.h
val
static double val(void *priv, double ch)
Definition: aeval.c:77
type
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf type
Definition: writing_filters.txt:86
idct
static void idct(int16_t block[64])
Definition: 4xm.c:167
ff_vp7dsp_init
void ff_vp7dsp_init(VP8DSPContext *c)
rnd
#define rnd()
Definition: checkasm.h:172
ff_vp8dsp_init
void ff_vp8dsp_init(VP8DSPContext *c)
VP8DSPContext::vp8_v_loop_filter16y_inner
void(* vp8_v_loop_filter16y_inner)(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
Definition: vp8dsp.h:58
vp8dsp.h
intreadwrite.h
randomize_loopfilter_buffers
static void randomize_loopfilter_buffers(int lineoff, int str, int dir, int flim_E, int flim_I, int hev_thresh, uint8_t *buf, int force_hev)
Definition: vp8dsp.c:328
LOCAL_ALIGNED_16
#define LOCAL_ALIGNED_16(t, v,...)
Definition: mem_internal.h:150
VP8DSPContext::vp8_h_loop_filter_simple
void(* vp8_h_loop_filter_simple)(uint8_t *dst, ptrdiff_t stride, int flim)
Definition: vp8dsp.h:70
check_loopfilter_simple
static void check_loopfilter_simple(VP8DSPContext *d, bool is_vp7)
Definition: vp8dsp.c:464
q0
static const uint8_t q0[256]
Definition: twofish.c:81
VP8DSPContext::vp8_v_loop_filter_simple
void(* vp8_v_loop_filter_simple)(uint8_t *dst, ptrdiff_t stride, int flim)
Definition: vp8dsp.h:69
if
if(ret)
Definition: filter_design.txt:179
checkasm_check_vp8dsp
void checkasm_check_vp8dsp(void)
Definition: vp8dsp.c:510
call_new
#define call_new(...)
Definition: checkasm.h:297
NULL
#define NULL
Definition: coverity.c:32
VP8DSPContext::vp8_h_loop_filter16y
void(* vp8_h_loop_filter16y)(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
Definition: vp8dsp.h:50
src
#define src
Definition: vp8dsp.c:248
vp8_mc_func
void(* vp8_mc_func)(uint8_t *dst, ptrdiff_t dstStride, const uint8_t *src, ptrdiff_t srcStride, int h, int x, int y)
Definition: vp8dsp.h:33
VP8DSPContext::put_vp8_bilinear_pixels_tab
vp8_mc_func put_vp8_bilinear_pixels_tab[3][3][3]
Definition: vp8dsp.h:81
VP8DSPContext::vp8_h_loop_filter16y_inner
void(* vp8_h_loop_filter16y_inner)(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
Definition: vp8dsp.h:60
VP8DSPContext::vp8_luma_dc_wht
void(* vp8_luma_dc_wht)(int16_t block[4][4][16], int16_t dc[16])
Definition: vp8dsp.h:38
check_mc
static void check_mc(VP8DSPContext *d)
Definition: vp8dsp.c:259
setdx
#define setdx(a, b, c, d)
Definition: vp8dsp.c:324
VP8DSPContext
Definition: vp8dsp.h:37
setdx2
#define setdx2(a, b, o, c, d, e)
Definition: vp8dsp.c:326
dc
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff) *mv_scale Intra DC Prediction block[y][x] dc[1]
Definition: snow.txt:400
size
int size
Definition: twinvq_data.h:10344
VP8DSPContext::vp8_idct_dc_add
void(* vp8_idct_dc_add)(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
Definition: vp8dsp.h:41
height
#define height
block1
static int16_t block1[64]
Definition: dct.c:120
VP8DSPContext::vp8_v_loop_filter8uv_inner
void(* vp8_v_loop_filter8uv_inner)(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
Definition: vp8dsp.h:62
report
#define report
Definition: checkasm.h:191
bench_new
#define bench_new(...)
Definition: checkasm.h:368
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
VP8DSPContext::put_vp8_epel_pixels_tab
vp8_mc_func put_vp8_epel_pixels_tab[3][3][3]
first dimension: 4-log2(width) second dimension: 0 if no vertical interpolation is needed; 1 4-tap ve...
Definition: vp8dsp.h:80
common.h
stride
#define stride
Definition: h264pred_template.c:537
dct4x4
static void dct4x4(int16_t *coef)
Definition: vp8dsp.c:47
fill_loopfilter_buffers
static void fill_loopfilter_buffers(uint8_t *buf, ptrdiff_t stride, int w, int h)
Definition: vp8dsp.c:362
VP8DSPContext::vp8_idct_dc_add4uv
void(* vp8_idct_dc_add4uv)(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride)
Definition: vp8dsp.h:44
SRC_BUF_STRIDE
#define SRC_BUF_STRIDE
Definition: vp8dsp.c:243
AV_CPU_FLAG_MMX
#define AV_CPU_FLAG_MMX
standard MMX
Definition: cpu.h:30
VP8DSPContext::vp8_idct_dc_add4y
void(* vp8_idct_dc_add4y)(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride)
Definition: vp8dsp.h:42
VP8DSPContext::vp8_luma_dc_wht_dc
void(* vp8_luma_dc_wht_dc)(int16_t block[4][4][16], int16_t dc[16])
Definition: vp8dsp.h:39
declare_func
#define declare_func(ret,...)
Definition: checkasm.h:183
VP8DSPContext::vp8_idct_add
void(* vp8_idct_add)(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
Definition: vp8dsp.h:40
setpx
#define setpx(a, b, c)
Definition: vp8dsp.c:322
check_idct
static void check_idct(VP8DSPContext *d, bool is_vp7)
Definition: vp8dsp.c:113
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
a1
static double a1(void *priv, double x, double y)
Definition: vf_xfade.c:2029
h
h
Definition: vp9dsp_template.c:2070
snprintf
#define snprintf
Definition: snprintf.h:34
ff_vp78dsp_init
av_cold void ff_vp78dsp_init(VP8DSPContext *dsp)
Definition: vp8dsp.c:668