FFmpeg
vp9block.c
Go to the documentation of this file.
1 /*
2  * VP9 compatible video decoder
3  *
4  * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5  * Copyright (C) 2013 Clément Bœsch <u pkh me>
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include "libavutil/avassert.h"
25 
26 #include "vp56.h"
27 #include "vp9.h"
28 #include "vp9data.h"
29 #include "vp9dec.h"
30 
31 static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h,
32  ptrdiff_t stride, int v)
33 {
34  switch (w) {
35  case 1:
36  do {
37  *ptr = v;
38  ptr += stride;
39  } while (--h);
40  break;
41  case 2: {
42  int v16 = v * 0x0101;
43  do {
44  AV_WN16A(ptr, v16);
45  ptr += stride;
46  } while (--h);
47  break;
48  }
49  case 4: {
50  uint32_t v32 = v * 0x01010101;
51  do {
52  AV_WN32A(ptr, v32);
53  ptr += stride;
54  } while (--h);
55  break;
56  }
57  case 8: {
58 #if HAVE_FAST_64BIT
59  uint64_t v64 = v * 0x0101010101010101ULL;
60  do {
61  AV_WN64A(ptr, v64);
62  ptr += stride;
63  } while (--h);
64 #else
65  uint32_t v32 = v * 0x01010101;
66  do {
67  AV_WN32A(ptr, v32);
68  AV_WN32A(ptr + 4, v32);
69  ptr += stride;
70  } while (--h);
71 #endif
72  break;
73  }
74  }
75 }
76 
77 static void decode_mode(VP9TileData *td)
78 {
79  static const uint8_t left_ctx[N_BS_SIZES] = {
80  0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
81  };
82  static const uint8_t above_ctx[N_BS_SIZES] = {
83  0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
84  };
85  static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
88  };
89  VP9Context *s = td->s;
90  VP9Block *b = td->b;
91  int row = td->row, col = td->col, row7 = td->row7;
92  enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
93  int bw4 = ff_vp9_bwh_tab[1][b->bs][0], w4 = FFMIN(s->cols - col, bw4);
94  int bh4 = ff_vp9_bwh_tab[1][b->bs][1], h4 = FFMIN(s->rows - row, bh4), y;
95  int have_a = row > 0, have_l = col > td->tile_col_start;
96  int vref, filter_id;
97 
98  if (!s->s.h.segmentation.enabled) {
99  b->seg_id = 0;
100  } else if (s->s.h.keyframe || s->s.h.intraonly) {
101  b->seg_id = !s->s.h.segmentation.update_map ? 0 :
102  vp8_rac_get_tree(td->c, ff_vp9_segmentation_tree, s->s.h.segmentation.prob);
103  } else if (!s->s.h.segmentation.update_map ||
104  (s->s.h.segmentation.temporal &&
106  s->s.h.segmentation.pred_prob[s->above_segpred_ctx[col] +
107  td->left_segpred_ctx[row7]]))) {
108  if (!s->s.h.errorres && s->s.frames[REF_FRAME_SEGMAP].segmentation_map) {
109  int pred = 8, x;
110  uint8_t *refsegmap = s->s.frames[REF_FRAME_SEGMAP].segmentation_map;
111 
112  if (!s->s.frames[REF_FRAME_SEGMAP].uses_2pass)
113  ff_thread_await_progress(&s->s.frames[REF_FRAME_SEGMAP].tf, row >> 3, 0);
114  for (y = 0; y < h4; y++) {
115  int idx_base = (y + row) * 8 * s->sb_cols + col;
116  for (x = 0; x < w4; x++)
117  pred = FFMIN(pred, refsegmap[idx_base + x]);
118  }
119  av_assert1(pred < 8);
120  b->seg_id = pred;
121  } else {
122  b->seg_id = 0;
123  }
124 
125  memset(&s->above_segpred_ctx[col], 1, w4);
126  memset(&td->left_segpred_ctx[row7], 1, h4);
127  } else {
129  s->s.h.segmentation.prob);
130 
131  memset(&s->above_segpred_ctx[col], 0, w4);
132  memset(&td->left_segpred_ctx[row7], 0, h4);
133  }
134  if (s->s.h.segmentation.enabled &&
135  (s->s.h.segmentation.update_map || s->s.h.keyframe || s->s.h.intraonly)) {
136  setctx_2d(&s->s.frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col],
137  bw4, bh4, 8 * s->sb_cols, b->seg_id);
138  }
139 
140  b->skip = s->s.h.segmentation.enabled &&
141  s->s.h.segmentation.feat[b->seg_id].skip_enabled;
142  if (!b->skip) {
143  int c = td->left_skip_ctx[row7] + s->above_skip_ctx[col];
144  b->skip = vp56_rac_get_prob(td->c, s->prob.p.skip[c]);
145  td->counts.skip[c][b->skip]++;
146  }
147 
148  if (s->s.h.keyframe || s->s.h.intraonly) {
149  b->intra = 1;
150  } else if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].ref_enabled) {
151  b->intra = !s->s.h.segmentation.feat[b->seg_id].ref_val;
152  } else {
153  int c, bit;
154 
155  if (have_a && have_l) {
156  c = s->above_intra_ctx[col] + td->left_intra_ctx[row7];
157  c += (c == 2);
158  } else {
159  c = have_a ? 2 * s->above_intra_ctx[col] :
160  have_l ? 2 * td->left_intra_ctx[row7] : 0;
161  }
162  bit = vp56_rac_get_prob(td->c, s->prob.p.intra[c]);
163  td->counts.intra[c][bit]++;
164  b->intra = !bit;
165  }
166 
167  if ((b->intra || !b->skip) && s->s.h.txfmmode == TX_SWITCHABLE) {
168  int c;
169  if (have_a) {
170  if (have_l) {
171  c = (s->above_skip_ctx[col] ? max_tx :
172  s->above_txfm_ctx[col]) +
173  (td->left_skip_ctx[row7] ? max_tx :
174  td->left_txfm_ctx[row7]) > max_tx;
175  } else {
176  c = s->above_skip_ctx[col] ? 1 :
177  (s->above_txfm_ctx[col] * 2 > max_tx);
178  }
179  } else if (have_l) {
180  c = td->left_skip_ctx[row7] ? 1 :
181  (td->left_txfm_ctx[row7] * 2 > max_tx);
182  } else {
183  c = 1;
184  }
185  switch (max_tx) {
186  case TX_32X32:
187  b->tx = vp56_rac_get_prob(td->c, s->prob.p.tx32p[c][0]);
188  if (b->tx) {
189  b->tx += vp56_rac_get_prob(td->c, s->prob.p.tx32p[c][1]);
190  if (b->tx == 2)
191  b->tx += vp56_rac_get_prob(td->c, s->prob.p.tx32p[c][2]);
192  }
193  td->counts.tx32p[c][b->tx]++;
194  break;
195  case TX_16X16:
196  b->tx = vp56_rac_get_prob(td->c, s->prob.p.tx16p[c][0]);
197  if (b->tx)
198  b->tx += vp56_rac_get_prob(td->c, s->prob.p.tx16p[c][1]);
199  td->counts.tx16p[c][b->tx]++;
200  break;
201  case TX_8X8:
202  b->tx = vp56_rac_get_prob(td->c, s->prob.p.tx8p[c]);
203  td->counts.tx8p[c][b->tx]++;
204  break;
205  case TX_4X4:
206  b->tx = TX_4X4;
207  break;
208  }
209  } else {
210  b->tx = FFMIN(max_tx, s->s.h.txfmmode);
211  }
212 
213  if (s->s.h.keyframe || s->s.h.intraonly) {
214  uint8_t *a = &s->above_mode_ctx[col * 2];
215  uint8_t *l = &td->left_mode_ctx[(row7) << 1];
216 
217  b->comp = 0;
218  if (b->bs > BS_8x8) {
219  // FIXME the memory storage intermediates here aren't really
220  // necessary, they're just there to make the code slightly
221  // simpler for now
222  b->mode[0] =
224  ff_vp9_default_kf_ymode_probs[a[0]][l[0]]);
225  if (b->bs != BS_8x4) {
227  ff_vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
228  l[0] =
229  a[1] = b->mode[1];
230  } else {
231  l[0] =
232  a[1] =
233  b->mode[1] = b->mode[0];
234  }
235  if (b->bs != BS_4x8) {
236  b->mode[2] =
238  ff_vp9_default_kf_ymode_probs[a[0]][l[1]]);
239  if (b->bs != BS_8x4) {
241  ff_vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
242  l[1] =
243  a[1] = b->mode[3];
244  } else {
245  l[1] =
246  a[1] =
247  b->mode[3] = b->mode[2];
248  }
249  } else {
250  b->mode[2] = b->mode[0];
251  l[1] =
252  a[1] =
253  b->mode[3] = b->mode[1];
254  }
255  } else {
258  b->mode[3] =
259  b->mode[2] =
260  b->mode[1] = b->mode[0];
261  // FIXME this can probably be optimized
262  memset(a, b->mode[0], ff_vp9_bwh_tab[0][b->bs][0]);
263  memset(l, b->mode[0], ff_vp9_bwh_tab[0][b->bs][1]);
264  }
267  } else if (b->intra) {
268  b->comp = 0;
269  if (b->bs > BS_8x8) {
271  s->prob.p.y_mode[0]);
272  td->counts.y_mode[0][b->mode[0]]++;
273  if (b->bs != BS_8x4) {
275  s->prob.p.y_mode[0]);
276  td->counts.y_mode[0][b->mode[1]]++;
277  } else {
278  b->mode[1] = b->mode[0];
279  }
280  if (b->bs != BS_4x8) {
282  s->prob.p.y_mode[0]);
283  td->counts.y_mode[0][b->mode[2]]++;
284  if (b->bs != BS_8x4) {
286  s->prob.p.y_mode[0]);
287  td->counts.y_mode[0][b->mode[3]]++;
288  } else {
289  b->mode[3] = b->mode[2];
290  }
291  } else {
292  b->mode[2] = b->mode[0];
293  b->mode[3] = b->mode[1];
294  }
295  } else {
296  static const uint8_t size_group[10] = {
297  3, 3, 3, 3, 2, 2, 2, 1, 1, 1
298  };
299  int sz = size_group[b->bs];
300 
302  s->prob.p.y_mode[sz]);
303  b->mode[1] =
304  b->mode[2] =
305  b->mode[3] = b->mode[0];
306  td->counts.y_mode[sz][b->mode[3]]++;
307  }
309  s->prob.p.uv_mode[b->mode[3]]);
310  td->counts.uv_mode[b->mode[3]][b->uvmode]++;
311  } else {
312  static const uint8_t inter_mode_ctx_lut[14][14] = {
313  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
314  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
315  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
316  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
317  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
318  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
319  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
320  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
321  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
322  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
323  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
324  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
325  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
326  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
327  };
328 
329  if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].ref_enabled) {
330  av_assert2(s->s.h.segmentation.feat[b->seg_id].ref_val != 0);
331  b->comp = 0;
332  b->ref[0] = s->s.h.segmentation.feat[b->seg_id].ref_val - 1;
333  } else {
334  // read comp_pred flag
335  if (s->s.h.comppredmode != PRED_SWITCHABLE) {
336  b->comp = s->s.h.comppredmode == PRED_COMPREF;
337  } else {
338  int c;
339 
340  // FIXME add intra as ref=0xff (or -1) to make these easier?
341  if (have_a) {
342  if (have_l) {
343  if (s->above_comp_ctx[col] && td->left_comp_ctx[row7]) {
344  c = 4;
345  } else if (s->above_comp_ctx[col]) {
346  c = 2 + (td->left_intra_ctx[row7] ||
347  td->left_ref_ctx[row7] == s->s.h.fixcompref);
348  } else if (td->left_comp_ctx[row7]) {
349  c = 2 + (s->above_intra_ctx[col] ||
350  s->above_ref_ctx[col] == s->s.h.fixcompref);
351  } else {
352  c = (!s->above_intra_ctx[col] &&
353  s->above_ref_ctx[col] == s->s.h.fixcompref) ^
354  (!td->left_intra_ctx[row7] &&
355  td->left_ref_ctx[row & 7] == s->s.h.fixcompref);
356  }
357  } else {
358  c = s->above_comp_ctx[col] ? 3 :
359  (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->s.h.fixcompref);
360  }
361  } else if (have_l) {
362  c = td->left_comp_ctx[row7] ? 3 :
363  (!td->left_intra_ctx[row7] && td->left_ref_ctx[row7] == s->s.h.fixcompref);
364  } else {
365  c = 1;
366  }
367  b->comp = vp56_rac_get_prob(td->c, s->prob.p.comp[c]);
368  td->counts.comp[c][b->comp]++;
369  }
370 
371  // read actual references
372  // FIXME probably cache a few variables here to prevent repetitive
373  // memory accesses below
374  if (b->comp) { /* two references */
375  int fix_idx = s->s.h.signbias[s->s.h.fixcompref], var_idx = !fix_idx, c, bit;
376 
377  b->ref[fix_idx] = s->s.h.fixcompref;
378  // FIXME can this codeblob be replaced by some sort of LUT?
379  if (have_a) {
380  if (have_l) {
381  if (s->above_intra_ctx[col]) {
382  if (td->left_intra_ctx[row7]) {
383  c = 2;
384  } else {
385  c = 1 + 2 * (td->left_ref_ctx[row7] != s->s.h.varcompref[1]);
386  }
387  } else if (td->left_intra_ctx[row7]) {
388  c = 1 + 2 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]);
389  } else {
390  int refl = td->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
391 
392  if (refl == refa && refa == s->s.h.varcompref[1]) {
393  c = 0;
394  } else if (!td->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
395  if ((refa == s->s.h.fixcompref && refl == s->s.h.varcompref[0]) ||
396  (refl == s->s.h.fixcompref && refa == s->s.h.varcompref[0])) {
397  c = 4;
398  } else {
399  c = (refa == refl) ? 3 : 1;
400  }
401  } else if (!td->left_comp_ctx[row7]) {
402  if (refa == s->s.h.varcompref[1] && refl != s->s.h.varcompref[1]) {
403  c = 1;
404  } else {
405  c = (refl == s->s.h.varcompref[1] &&
406  refa != s->s.h.varcompref[1]) ? 2 : 4;
407  }
408  } else if (!s->above_comp_ctx[col]) {
409  if (refl == s->s.h.varcompref[1] && refa != s->s.h.varcompref[1]) {
410  c = 1;
411  } else {
412  c = (refa == s->s.h.varcompref[1] &&
413  refl != s->s.h.varcompref[1]) ? 2 : 4;
414  }
415  } else {
416  c = (refl == refa) ? 4 : 2;
417  }
418  }
419  } else {
420  if (s->above_intra_ctx[col]) {
421  c = 2;
422  } else if (s->above_comp_ctx[col]) {
423  c = 4 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]);
424  } else {
425  c = 3 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]);
426  }
427  }
428  } else if (have_l) {
429  if (td->left_intra_ctx[row7]) {
430  c = 2;
431  } else if (td->left_comp_ctx[row7]) {
432  c = 4 * (td->left_ref_ctx[row7] != s->s.h.varcompref[1]);
433  } else {
434  c = 3 * (td->left_ref_ctx[row7] != s->s.h.varcompref[1]);
435  }
436  } else {
437  c = 2;
438  }
439  bit = vp56_rac_get_prob(td->c, s->prob.p.comp_ref[c]);
440  b->ref[var_idx] = s->s.h.varcompref[bit];
441  td->counts.comp_ref[c][bit]++;
442  } else /* single reference */ {
443  int bit, c;
444 
445  if (have_a && !s->above_intra_ctx[col]) {
446  if (have_l && !td->left_intra_ctx[row7]) {
447  if (td->left_comp_ctx[row7]) {
448  if (s->above_comp_ctx[col]) {
449  c = 1 + (!s->s.h.fixcompref || !td->left_ref_ctx[row7] ||
450  !s->above_ref_ctx[col]);
451  } else {
452  c = (3 * !s->above_ref_ctx[col]) +
453  (!s->s.h.fixcompref || !td->left_ref_ctx[row7]);
454  }
455  } else if (s->above_comp_ctx[col]) {
456  c = (3 * !td->left_ref_ctx[row7]) +
457  (!s->s.h.fixcompref || !s->above_ref_ctx[col]);
458  } else {
459  c = 2 * !td->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
460  }
461  } else if (s->above_intra_ctx[col]) {
462  c = 2;
463  } else if (s->above_comp_ctx[col]) {
464  c = 1 + (!s->s.h.fixcompref || !s->above_ref_ctx[col]);
465  } else {
466  c = 4 * (!s->above_ref_ctx[col]);
467  }
468  } else if (have_l && !td->left_intra_ctx[row7]) {
469  if (td->left_intra_ctx[row7]) {
470  c = 2;
471  } else if (td->left_comp_ctx[row7]) {
472  c = 1 + (!s->s.h.fixcompref || !td->left_ref_ctx[row7]);
473  } else {
474  c = 4 * (!td->left_ref_ctx[row7]);
475  }
476  } else {
477  c = 2;
478  }
479  bit = vp56_rac_get_prob(td->c, s->prob.p.single_ref[c][0]);
480  td->counts.single_ref[c][0][bit]++;
481  if (!bit) {
482  b->ref[0] = 0;
483  } else {
484  // FIXME can this codeblob be replaced by some sort of LUT?
485  if (have_a) {
486  if (have_l) {
487  if (td->left_intra_ctx[row7]) {
488  if (s->above_intra_ctx[col]) {
489  c = 2;
490  } else if (s->above_comp_ctx[col]) {
491  c = 1 + 2 * (s->s.h.fixcompref == 1 ||
492  s->above_ref_ctx[col] == 1);
493  } else if (!s->above_ref_ctx[col]) {
494  c = 3;
495  } else {
496  c = 4 * (s->above_ref_ctx[col] == 1);
497  }
498  } else if (s->above_intra_ctx[col]) {
499  if (td->left_intra_ctx[row7]) {
500  c = 2;
501  } else if (td->left_comp_ctx[row7]) {
502  c = 1 + 2 * (s->s.h.fixcompref == 1 ||
503  td->left_ref_ctx[row7] == 1);
504  } else if (!td->left_ref_ctx[row7]) {
505  c = 3;
506  } else {
507  c = 4 * (td->left_ref_ctx[row7] == 1);
508  }
509  } else if (s->above_comp_ctx[col]) {
510  if (td->left_comp_ctx[row7]) {
511  if (td->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
512  c = 3 * (s->s.h.fixcompref == 1 ||
513  td->left_ref_ctx[row7] == 1);
514  } else {
515  c = 2;
516  }
517  } else if (!td->left_ref_ctx[row7]) {
518  c = 1 + 2 * (s->s.h.fixcompref == 1 ||
519  s->above_ref_ctx[col] == 1);
520  } else {
521  c = 3 * (td->left_ref_ctx[row7] == 1) +
522  (s->s.h.fixcompref == 1 || s->above_ref_ctx[col] == 1);
523  }
524  } else if (td->left_comp_ctx[row7]) {
525  if (!s->above_ref_ctx[col]) {
526  c = 1 + 2 * (s->s.h.fixcompref == 1 ||
527  td->left_ref_ctx[row7] == 1);
528  } else {
529  c = 3 * (s->above_ref_ctx[col] == 1) +
530  (s->s.h.fixcompref == 1 || td->left_ref_ctx[row7] == 1);
531  }
532  } else if (!s->above_ref_ctx[col]) {
533  if (!td->left_ref_ctx[row7]) {
534  c = 3;
535  } else {
536  c = 4 * (td->left_ref_ctx[row7] == 1);
537  }
538  } else if (!td->left_ref_ctx[row7]) {
539  c = 4 * (s->above_ref_ctx[col] == 1);
540  } else {
541  c = 2 * (td->left_ref_ctx[row7] == 1) +
542  2 * (s->above_ref_ctx[col] == 1);
543  }
544  } else {
545  if (s->above_intra_ctx[col] ||
546  (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
547  c = 2;
548  } else if (s->above_comp_ctx[col]) {
549  c = 3 * (s->s.h.fixcompref == 1 || s->above_ref_ctx[col] == 1);
550  } else {
551  c = 4 * (s->above_ref_ctx[col] == 1);
552  }
553  }
554  } else if (have_l) {
555  if (td->left_intra_ctx[row7] ||
556  (!td->left_comp_ctx[row7] && !td->left_ref_ctx[row7])) {
557  c = 2;
558  } else if (td->left_comp_ctx[row7]) {
559  c = 3 * (s->s.h.fixcompref == 1 || td->left_ref_ctx[row7] == 1);
560  } else {
561  c = 4 * (td->left_ref_ctx[row7] == 1);
562  }
563  } else {
564  c = 2;
565  }
566  bit = vp56_rac_get_prob(td->c, s->prob.p.single_ref[c][1]);
567  td->counts.single_ref[c][1][bit]++;
568  b->ref[0] = 1 + bit;
569  }
570  }
571  }
572 
573  if (b->bs <= BS_8x8) {
574  if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].skip_enabled) {
575  b->mode[0] =
576  b->mode[1] =
577  b->mode[2] =
578  b->mode[3] = ZEROMV;
579  } else {
580  static const uint8_t off[10] = {
581  3, 0, 0, 1, 0, 0, 0, 0, 0, 0
582  };
583 
584  // FIXME this needs to use the LUT tables from find_ref_mvs
585  // because not all are -1,0/0,-1
586  int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
587  [td->left_mode_ctx[row7 + off[b->bs]]];
588 
590  s->prob.p.mv_mode[c]);
591  b->mode[1] =
592  b->mode[2] =
593  b->mode[3] = b->mode[0];
594  td->counts.mv_mode[c][b->mode[0] - 10]++;
595  }
596  }
597 
598  if (s->s.h.filtermode == FILTER_SWITCHABLE) {
599  int c;
600 
601  if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
602  if (have_l && td->left_mode_ctx[row7] >= NEARESTMV) {
603  c = s->above_filter_ctx[col] == td->left_filter_ctx[row7] ?
604  td->left_filter_ctx[row7] : 3;
605  } else {
606  c = s->above_filter_ctx[col];
607  }
608  } else if (have_l && td->left_mode_ctx[row7] >= NEARESTMV) {
609  c = td->left_filter_ctx[row7];
610  } else {
611  c = 3;
612  }
613 
614  filter_id = vp8_rac_get_tree(td->c, ff_vp9_filter_tree,
615  s->prob.p.filter[c]);
616  td->counts.filter[c][filter_id]++;
617  b->filter = ff_vp9_filter_lut[filter_id];
618  } else {
619  b->filter = s->s.h.filtermode;
620  }
621 
622  if (b->bs > BS_8x8) {
623  int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][td->left_mode_ctx[row7]];
624 
626  s->prob.p.mv_mode[c]);
627  td->counts.mv_mode[c][b->mode[0] - 10]++;
628  ff_vp9_fill_mv(td, b->mv[0], b->mode[0], 0);
629 
630  if (b->bs != BS_8x4) {
632  s->prob.p.mv_mode[c]);
633  td->counts.mv_mode[c][b->mode[1] - 10]++;
634  ff_vp9_fill_mv(td, b->mv[1], b->mode[1], 1);
635  } else {
636  b->mode[1] = b->mode[0];
637  AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
638  AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
639  }
640 
641  if (b->bs != BS_4x8) {
643  s->prob.p.mv_mode[c]);
644  td->counts.mv_mode[c][b->mode[2] - 10]++;
645  ff_vp9_fill_mv(td, b->mv[2], b->mode[2], 2);
646 
647  if (b->bs != BS_8x4) {
649  s->prob.p.mv_mode[c]);
650  td->counts.mv_mode[c][b->mode[3] - 10]++;
651  ff_vp9_fill_mv(td, b->mv[3], b->mode[3], 3);
652  } else {
653  b->mode[3] = b->mode[2];
654  AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
655  AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
656  }
657  } else {
658  b->mode[2] = b->mode[0];
659  AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
660  AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
661  b->mode[3] = b->mode[1];
662  AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
663  AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
664  }
665  } else {
666  ff_vp9_fill_mv(td, b->mv[0], b->mode[0], -1);
667  AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
668  AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
669  AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
670  AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
671  AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
672  AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
673  }
674 
675  vref = b->ref[b->comp ? s->s.h.signbias[s->s.h.varcompref[0]] : 0];
676  }
677 
678 #if HAVE_FAST_64BIT
679 #define SPLAT_CTX(var, val, n) \
680  switch (n) { \
681  case 1: var = val; break; \
682  case 2: AV_WN16A(&var, val * 0x0101); break; \
683  case 4: AV_WN32A(&var, val * 0x01010101); break; \
684  case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
685  case 16: { \
686  uint64_t v64 = val * 0x0101010101010101ULL; \
687  AV_WN64A( &var, v64); \
688  AV_WN64A(&((uint8_t *) &var)[8], v64); \
689  break; \
690  } \
691  }
692 #else
693 #define SPLAT_CTX(var, val, n) \
694  switch (n) { \
695  case 1: var = val; break; \
696  case 2: AV_WN16A(&var, val * 0x0101); break; \
697  case 4: AV_WN32A(&var, val * 0x01010101); break; \
698  case 8: { \
699  uint32_t v32 = val * 0x01010101; \
700  AV_WN32A( &var, v32); \
701  AV_WN32A(&((uint8_t *) &var)[4], v32); \
702  break; \
703  } \
704  case 16: { \
705  uint32_t v32 = val * 0x01010101; \
706  AV_WN32A( &var, v32); \
707  AV_WN32A(&((uint8_t *) &var)[4], v32); \
708  AV_WN32A(&((uint8_t *) &var)[8], v32); \
709  AV_WN32A(&((uint8_t *) &var)[12], v32); \
710  break; \
711  } \
712  }
713 #endif
714 
715  switch (ff_vp9_bwh_tab[1][b->bs][0]) {
716 #define SET_CTXS(perf, dir, off, n) \
717  do { \
718  SPLAT_CTX(perf->dir##_skip_ctx[off], b->skip, n); \
719  SPLAT_CTX(perf->dir##_txfm_ctx[off], b->tx, n); \
720  SPLAT_CTX(perf->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
721  if (!s->s.h.keyframe && !s->s.h.intraonly) { \
722  SPLAT_CTX(perf->dir##_intra_ctx[off], b->intra, n); \
723  SPLAT_CTX(perf->dir##_comp_ctx[off], b->comp, n); \
724  SPLAT_CTX(perf->dir##_mode_ctx[off], b->mode[3], n); \
725  if (!b->intra) { \
726  SPLAT_CTX(perf->dir##_ref_ctx[off], vref, n); \
727  if (s->s.h.filtermode == FILTER_SWITCHABLE) { \
728  SPLAT_CTX(perf->dir##_filter_ctx[off], filter_id, n); \
729  } \
730  } \
731  } \
732  } while (0)
733  case 1: SET_CTXS(s, above, col, 1); break;
734  case 2: SET_CTXS(s, above, col, 2); break;
735  case 4: SET_CTXS(s, above, col, 4); break;
736  case 8: SET_CTXS(s, above, col, 8); break;
737  }
738  switch (ff_vp9_bwh_tab[1][b->bs][1]) {
739  case 1: SET_CTXS(td, left, row7, 1); break;
740  case 2: SET_CTXS(td, left, row7, 2); break;
741  case 4: SET_CTXS(td, left, row7, 4); break;
742  case 8: SET_CTXS(td, left, row7, 8); break;
743  }
744 #undef SPLAT_CTX
745 #undef SET_CTXS
746 
747  if (!s->s.h.keyframe && !s->s.h.intraonly) {
748  if (b->bs > BS_8x8) {
749  int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
750 
751  AV_COPY32(&td->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
752  AV_COPY32(&td->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
753  AV_WN32A(&td->left_mv_ctx[row7 * 2 + 1][0], mv0);
754  AV_WN32A(&td->left_mv_ctx[row7 * 2 + 1][1], mv1);
755  AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
756  AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
757  AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
758  AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
759  } else {
760  int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
761 
762  for (n = 0; n < w4 * 2; n++) {
763  AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
764  AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
765  }
766  for (n = 0; n < h4 * 2; n++) {
767  AV_WN32A(&td->left_mv_ctx[row7 * 2 + n][0], mv0);
768  AV_WN32A(&td->left_mv_ctx[row7 * 2 + n][1], mv1);
769  }
770  }
771  }
772 
773  // FIXME kinda ugly
774  for (y = 0; y < h4; y++) {
775  int x, o = (row + y) * s->sb_cols * 8 + col;
776  VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[o];
777 
778  if (b->intra) {
779  for (x = 0; x < w4; x++) {
780  mv[x].ref[0] =
781  mv[x].ref[1] = -1;
782  }
783  } else if (b->comp) {
784  for (x = 0; x < w4; x++) {
785  mv[x].ref[0] = b->ref[0];
786  mv[x].ref[1] = b->ref[1];
787  AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
788  AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
789  }
790  } else {
791  for (x = 0; x < w4; x++) {
792  mv[x].ref[0] = b->ref[0];
793  mv[x].ref[1] = -1;
794  AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
795  }
796  }
797  }
798 }
799 
800 // FIXME merge cnt/eob arguments?
801 static av_always_inline int
802 decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
803  int is_tx32x32, int is8bitsperpixel, int bpp, unsigned (*cnt)[6][3],
804  unsigned (*eob)[6][2], uint8_t (*p)[6][11],
805  int nnz, const int16_t *scan, const int16_t (*nb)[2],
806  const int16_t *band_counts, int16_t *qmul)
807 {
808  int i = 0, band = 0, band_left = band_counts[band];
809  const uint8_t *tp = p[0][nnz];
810  uint8_t cache[1024];
811 
812  do {
813  int val, rc;
814 
815  val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
816  eob[band][nnz][val]++;
817  if (!val)
818  break;
819 
820 skip_eob:
821  if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
822  cnt[band][nnz][0]++;
823  if (!--band_left)
824  band_left = band_counts[++band];
825  cache[scan[i]] = 0;
826  nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
827  tp = p[band][nnz];
828  if (++i == n_coeffs)
829  break; //invalid input; blocks should end with EOB
830  goto skip_eob;
831  }
832 
833  rc = scan[i];
834  if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
835  cnt[band][nnz][1]++;
836  val = 1;
837  cache[rc] = 1;
838  } else {
839  cnt[band][nnz][2]++;
840  if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
841  if (!vp56_rac_get_prob_branchy(c, tp[4])) {
842  cache[rc] = val = 2;
843  } else {
844  val = 3 + vp56_rac_get_prob(c, tp[5]);
845  cache[rc] = 3;
846  }
847  } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
848  cache[rc] = 4;
849  if (!vp56_rac_get_prob_branchy(c, tp[7])) {
850  val = vp56_rac_get_prob(c, 159) + 5;
851  } else {
852  val = (vp56_rac_get_prob(c, 165) << 1) + 7;
853  val += vp56_rac_get_prob(c, 145);
854  }
855  } else { // cat 3-6
856  cache[rc] = 5;
857  if (!vp56_rac_get_prob_branchy(c, tp[8])) {
858  if (!vp56_rac_get_prob_branchy(c, tp[9])) {
859  val = 11 + (vp56_rac_get_prob(c, 173) << 2);
860  val += (vp56_rac_get_prob(c, 148) << 1);
861  val += vp56_rac_get_prob(c, 140);
862  } else {
863  val = 19 + (vp56_rac_get_prob(c, 176) << 3);
864  val += (vp56_rac_get_prob(c, 155) << 2);
865  val += (vp56_rac_get_prob(c, 140) << 1);
866  val += vp56_rac_get_prob(c, 135);
867  }
868  } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
869  val = (vp56_rac_get_prob(c, 180) << 4) + 35;
870  val += (vp56_rac_get_prob(c, 157) << 3);
871  val += (vp56_rac_get_prob(c, 141) << 2);
872  val += (vp56_rac_get_prob(c, 134) << 1);
873  val += vp56_rac_get_prob(c, 130);
874  } else {
875  val = 67;
876  if (!is8bitsperpixel) {
877  if (bpp == 12) {
878  val += vp56_rac_get_prob(c, 255) << 17;
879  val += vp56_rac_get_prob(c, 255) << 16;
880  }
881  val += (vp56_rac_get_prob(c, 255) << 15);
882  val += (vp56_rac_get_prob(c, 255) << 14);
883  }
884  val += (vp56_rac_get_prob(c, 254) << 13);
885  val += (vp56_rac_get_prob(c, 254) << 12);
886  val += (vp56_rac_get_prob(c, 254) << 11);
887  val += (vp56_rac_get_prob(c, 252) << 10);
888  val += (vp56_rac_get_prob(c, 249) << 9);
889  val += (vp56_rac_get_prob(c, 243) << 8);
890  val += (vp56_rac_get_prob(c, 230) << 7);
891  val += (vp56_rac_get_prob(c, 196) << 6);
892  val += (vp56_rac_get_prob(c, 177) << 5);
893  val += (vp56_rac_get_prob(c, 153) << 4);
894  val += (vp56_rac_get_prob(c, 140) << 3);
895  val += (vp56_rac_get_prob(c, 133) << 2);
896  val += (vp56_rac_get_prob(c, 130) << 1);
897  val += vp56_rac_get_prob(c, 129);
898  }
899  }
900  }
901 #define STORE_COEF(c, i, v) do { \
902  if (is8bitsperpixel) { \
903  c[i] = v; \
904  } else { \
905  AV_WN32A(&c[i * 2], v); \
906  } \
907 } while (0)
908  if (!--band_left)
909  band_left = band_counts[++band];
910  if (is_tx32x32)
911  STORE_COEF(coef, rc, (int)((vp8_rac_get(c) ? -val : val) * (unsigned)qmul[!!i]) / 2);
912  else
913  STORE_COEF(coef, rc, (vp8_rac_get(c) ? -val : val) * (unsigned)qmul[!!i]);
914  nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
915  tp = p[band][nnz];
916  } while (++i < n_coeffs);
917 
918  return i;
919 }
920 
921 static int decode_coeffs_b_8bpp(VP9TileData *td, int16_t *coef, int n_coeffs,
922  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
923  uint8_t (*p)[6][11], int nnz, const int16_t *scan,
924  const int16_t (*nb)[2], const int16_t *band_counts,
925  int16_t *qmul)
926 {
927  return decode_coeffs_b_generic(td->c, coef, n_coeffs, 0, 1, 8, cnt, eob, p,
928  nnz, scan, nb, band_counts, qmul);
929 }
930 
931 static int decode_coeffs_b32_8bpp(VP9TileData *td, int16_t *coef, int n_coeffs,
932  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
933  uint8_t (*p)[6][11], int nnz, const int16_t *scan,
934  const int16_t (*nb)[2], const int16_t *band_counts,
935  int16_t *qmul)
936 {
937  return decode_coeffs_b_generic(td->c, coef, n_coeffs, 1, 1, 8, cnt, eob, p,
938  nnz, scan, nb, band_counts, qmul);
939 }
940 
941 static int decode_coeffs_b_16bpp(VP9TileData *td, int16_t *coef, int n_coeffs,
942  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
943  uint8_t (*p)[6][11], int nnz, const int16_t *scan,
944  const int16_t (*nb)[2], const int16_t *band_counts,
945  int16_t *qmul)
946 {
947  return decode_coeffs_b_generic(td->c, coef, n_coeffs, 0, 0, td->s->s.h.bpp, cnt, eob, p,
948  nnz, scan, nb, band_counts, qmul);
949 }
950 
951 static int decode_coeffs_b32_16bpp(VP9TileData *td, int16_t *coef, int n_coeffs,
952  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
953  uint8_t (*p)[6][11], int nnz, const int16_t *scan,
954  const int16_t (*nb)[2], const int16_t *band_counts,
955  int16_t *qmul)
956 {
957  return decode_coeffs_b_generic(td->c, coef, n_coeffs, 1, 0, td->s->s.h.bpp, cnt, eob, p,
958  nnz, scan, nb, band_counts, qmul);
959 }
960 
961 static av_always_inline int decode_coeffs(VP9TileData *td, int is8bitsperpixel)
962 {
963  VP9Context *s = td->s;
964  VP9Block *b = td->b;
965  int row = td->row, col = td->col;
966  uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
967  unsigned (*c)[6][3] = td->counts.coef[b->tx][0 /* y */][!b->intra];
968  unsigned (*e)[6][2] = td->counts.eob[b->tx][0 /* y */][!b->intra];
969  int w4 = ff_vp9_bwh_tab[1][b->bs][0] << 1, h4 = ff_vp9_bwh_tab[1][b->bs][1] << 1;
970  int end_x = FFMIN(2 * (s->cols - col), w4);
971  int end_y = FFMIN(2 * (s->rows - row), h4);
972  int n, pl, x, y, ret;
973  int16_t (*qmul)[2] = s->s.h.segmentation.feat[b->seg_id].qmul;
974  int tx = 4 * s->s.h.lossless + b->tx;
975  const int16_t * const *yscans = ff_vp9_scans[tx];
976  const int16_t (* const * ynbs)[2] = ff_vp9_scans_nb[tx];
977  const int16_t *uvscan = ff_vp9_scans[b->uvtx][DCT_DCT];
978  const int16_t (*uvnb)[2] = ff_vp9_scans_nb[b->uvtx][DCT_DCT];
979  uint8_t *a = &s->above_y_nnz_ctx[col * 2];
980  uint8_t *l = &td->left_y_nnz_ctx[(row & 7) << 1];
981  static const int16_t band_counts[4][8] = {
982  { 1, 2, 3, 4, 3, 16 - 13 },
983  { 1, 2, 3, 4, 11, 64 - 21 },
984  { 1, 2, 3, 4, 11, 256 - 21 },
985  { 1, 2, 3, 4, 11, 1024 - 21 },
986  };
987  const int16_t *y_band_counts = band_counts[b->tx];
988  const int16_t *uv_band_counts = band_counts[b->uvtx];
989  int bytesperpixel = is8bitsperpixel ? 1 : 2;
990  int total_coeff = 0;
991 
992 #define MERGE(la, end, step, rd) \
993  for (n = 0; n < end; n += step) \
994  la[n] = !!rd(&la[n])
995 #define MERGE_CTX(step, rd) \
996  do { \
997  MERGE(l, end_y, step, rd); \
998  MERGE(a, end_x, step, rd); \
999  } while (0)
1000 
1001 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \
1002  for (n = 0, y = 0; y < end_y; y += step) { \
1003  for (x = 0; x < end_x; x += step, n += step * step) { \
1004  enum TxfmType txtp = ff_vp9_intra_txfm_type[b->mode[mode_index]]; \
1005  ret = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
1006  (td, td->block + 16 * n * bytesperpixel, 16 * step * step, \
1007  c, e, p, a[x] + l[y], yscans[txtp], \
1008  ynbs[txtp], y_band_counts, qmul[0]); \
1009  a[x] = l[y] = !!ret; \
1010  total_coeff |= !!ret; \
1011  if (step >= 4) { \
1012  AV_WN16A(&td->eob[n], ret); \
1013  } else { \
1014  td->eob[n] = ret; \
1015  } \
1016  } \
1017  }
1018 
1019 #define SPLAT(la, end, step, cond) \
1020  if (step == 2) { \
1021  for (n = 1; n < end; n += step) \
1022  la[n] = la[n - 1]; \
1023  } else if (step == 4) { \
1024  if (cond) { \
1025  for (n = 0; n < end; n += step) \
1026  AV_WN32A(&la[n], la[n] * 0x01010101); \
1027  } else { \
1028  for (n = 0; n < end; n += step) \
1029  memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
1030  } \
1031  } else /* step == 8 */ { \
1032  if (cond) { \
1033  if (HAVE_FAST_64BIT) { \
1034  for (n = 0; n < end; n += step) \
1035  AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
1036  } else { \
1037  for (n = 0; n < end; n += step) { \
1038  uint32_t v32 = la[n] * 0x01010101; \
1039  AV_WN32A(&la[n], v32); \
1040  AV_WN32A(&la[n + 4], v32); \
1041  } \
1042  } \
1043  } else { \
1044  for (n = 0; n < end; n += step) \
1045  memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
1046  } \
1047  }
1048 #define SPLAT_CTX(step) \
1049  do { \
1050  SPLAT(a, end_x, step, end_x == w4); \
1051  SPLAT(l, end_y, step, end_y == h4); \
1052  } while (0)
1053 
1054  /* y tokens */
1055  switch (b->tx) {
1056  case TX_4X4:
1057  DECODE_Y_COEF_LOOP(1, b->bs > BS_8x8 ? n : 0,);
1058  break;
1059  case TX_8X8:
1060  MERGE_CTX(2, AV_RN16A);
1061  DECODE_Y_COEF_LOOP(2, 0,);
1062  SPLAT_CTX(2);
1063  break;
1064  case TX_16X16:
1065  MERGE_CTX(4, AV_RN32A);
1066  DECODE_Y_COEF_LOOP(4, 0,);
1067  SPLAT_CTX(4);
1068  break;
1069  case TX_32X32:
1070  MERGE_CTX(8, AV_RN64A);
1071  DECODE_Y_COEF_LOOP(8, 0, 32);
1072  SPLAT_CTX(8);
1073  break;
1074  }
1075 
1076 #define DECODE_UV_COEF_LOOP(step, v) \
1077  for (n = 0, y = 0; y < end_y; y += step) { \
1078  for (x = 0; x < end_x; x += step, n += step * step) { \
1079  ret = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
1080  (td, td->uvblock[pl] + 16 * n * bytesperpixel, \
1081  16 * step * step, c, e, p, a[x] + l[y], \
1082  uvscan, uvnb, uv_band_counts, qmul[1]); \
1083  a[x] = l[y] = !!ret; \
1084  total_coeff |= !!ret; \
1085  if (step >= 4) { \
1086  AV_WN16A(&td->uveob[pl][n], ret); \
1087  } else { \
1088  td->uveob[pl][n] = ret; \
1089  } \
1090  } \
1091  }
1092 
1093  p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
1094  c = td->counts.coef[b->uvtx][1 /* uv */][!b->intra];
1095  e = td->counts.eob[b->uvtx][1 /* uv */][!b->intra];
1096  w4 >>= s->ss_h;
1097  end_x >>= s->ss_h;
1098  h4 >>= s->ss_v;
1099  end_y >>= s->ss_v;
1100  for (pl = 0; pl < 2; pl++) {
1101  a = &s->above_uv_nnz_ctx[pl][col << !s->ss_h];
1102  l = &td->left_uv_nnz_ctx[pl][(row & 7) << !s->ss_v];
1103  switch (b->uvtx) {
1104  case TX_4X4:
1105  DECODE_UV_COEF_LOOP(1,);
1106  break;
1107  case TX_8X8:
1108  MERGE_CTX(2, AV_RN16A);
1109  DECODE_UV_COEF_LOOP(2,);
1110  SPLAT_CTX(2);
1111  break;
1112  case TX_16X16:
1113  MERGE_CTX(4, AV_RN32A);
1114  DECODE_UV_COEF_LOOP(4,);
1115  SPLAT_CTX(4);
1116  break;
1117  case TX_32X32:
1118  MERGE_CTX(8, AV_RN64A);
1119  DECODE_UV_COEF_LOOP(8, 32);
1120  SPLAT_CTX(8);
1121  break;
1122  }
1123  }
1124 
1125  return total_coeff;
1126 }
1127 
1129 {
1130  return decode_coeffs(td, 1);
1131 }
1132 
1134 {
1135  return decode_coeffs(td, 0);
1136 }
1137 
1138 static av_always_inline void mask_edges(uint8_t (*mask)[8][4], int ss_h, int ss_v,
1139  int row_and_7, int col_and_7,
1140  int w, int h, int col_end, int row_end,
1141  enum TxfmMode tx, int skip_inter)
1142 {
1143  static const unsigned wide_filter_col_mask[2] = { 0x11, 0x01 };
1144  static const unsigned wide_filter_row_mask[2] = { 0x03, 0x07 };
1145 
1146  // FIXME I'm pretty sure all loops can be replaced by a single LUT if
1147  // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
1148  // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
1149  // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
1150 
1151  // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
1152  // edges. This means that for UV, we work on two subsampled blocks at
1153  // a time, and we only use the topleft block's mode information to set
1154  // things like block strength. Thus, for any block size smaller than
1155  // 16x16, ignore the odd portion of the block.
1156  if (tx == TX_4X4 && (ss_v | ss_h)) {
1157  if (h == ss_v) {
1158  if (row_and_7 & 1)
1159  return;
1160  if (!row_end)
1161  h += 1;
1162  }
1163  if (w == ss_h) {
1164  if (col_and_7 & 1)
1165  return;
1166  if (!col_end)
1167  w += 1;
1168  }
1169  }
1170 
1171  if (tx == TX_4X4 && !skip_inter) {
1172  int t = 1 << col_and_7, m_col = (t << w) - t, y;
1173  // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
1174  int m_row_8 = m_col & wide_filter_col_mask[ss_h], m_row_4 = m_col - m_row_8;
1175 
1176  for (y = row_and_7; y < h + row_and_7; y++) {
1177  int col_mask_id = 2 - !(y & wide_filter_row_mask[ss_v]);
1178 
1179  mask[0][y][1] |= m_row_8;
1180  mask[0][y][2] |= m_row_4;
1181  // for odd lines, if the odd col is not being filtered,
1182  // skip odd row also:
1183  // .---. <-- a
1184  // | |
1185  // |___| <-- b
1186  // ^ ^
1187  // c d
1188  //
1189  // if a/c are even row/col and b/d are odd, and d is skipped,
1190  // e.g. right edge of size-66x66.webm, then skip b also (bug)
1191  if ((ss_h & ss_v) && (col_end & 1) && (y & 1)) {
1192  mask[1][y][col_mask_id] |= (t << (w - 1)) - t;
1193  } else {
1194  mask[1][y][col_mask_id] |= m_col;
1195  }
1196  if (!ss_h)
1197  mask[0][y][3] |= m_col;
1198  if (!ss_v) {
1199  if (ss_h && (col_end & 1))
1200  mask[1][y][3] |= (t << (w - 1)) - t;
1201  else
1202  mask[1][y][3] |= m_col;
1203  }
1204  }
1205  } else {
1206  int y, t = 1 << col_and_7, m_col = (t << w) - t;
1207 
1208  if (!skip_inter) {
1209  int mask_id = (tx == TX_8X8);
1210  int l2 = tx + ss_h - 1, step1d;
1211  static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
1212  int m_row = m_col & masks[l2];
1213 
1214  // at odd UV col/row edges tx16/tx32 loopfilter edges, force
1215  // 8wd loopfilter to prevent going off the visible edge.
1216  if (ss_h && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
1217  int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
1218  int m_row_8 = m_row - m_row_16;
1219 
1220  for (y = row_and_7; y < h + row_and_7; y++) {
1221  mask[0][y][0] |= m_row_16;
1222  mask[0][y][1] |= m_row_8;
1223  }
1224  } else {
1225  for (y = row_and_7; y < h + row_and_7; y++)
1226  mask[0][y][mask_id] |= m_row;
1227  }
1228 
1229  l2 = tx + ss_v - 1;
1230  step1d = 1 << l2;
1231  if (ss_v && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
1232  for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
1233  mask[1][y][0] |= m_col;
1234  if (y - row_and_7 == h - 1)
1235  mask[1][y][1] |= m_col;
1236  } else {
1237  for (y = row_and_7; y < h + row_and_7; y += step1d)
1238  mask[1][y][mask_id] |= m_col;
1239  }
1240  } else if (tx != TX_4X4) {
1241  int mask_id;
1242 
1243  mask_id = (tx == TX_8X8) || (h == ss_v);
1244  mask[1][row_and_7][mask_id] |= m_col;
1245  mask_id = (tx == TX_8X8) || (w == ss_h);
1246  for (y = row_and_7; y < h + row_and_7; y++)
1247  mask[0][y][mask_id] |= t;
1248  } else {
1249  int t8 = t & wide_filter_col_mask[ss_h], t4 = t - t8;
1250 
1251  for (y = row_and_7; y < h + row_and_7; y++) {
1252  mask[0][y][2] |= t4;
1253  mask[0][y][1] |= t8;
1254  }
1255  mask[1][row_and_7][2 - !(row_and_7 & wide_filter_row_mask[ss_v])] |= m_col;
1256  }
1257  }
1258 }
1259 
1260 void ff_vp9_decode_block(VP9TileData *td, int row, int col,
1261  VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
1262  enum BlockLevel bl, enum BlockPartition bp)
1263 {
1264  VP9Context *s = td->s;
1265  VP9Block *b = td->b;
1266  enum BlockSize bs = bl * 3 + bp;
1267  int bytesperpixel = s->bytesperpixel;
1268  int w4 = ff_vp9_bwh_tab[1][bs][0], h4 = ff_vp9_bwh_tab[1][bs][1], lvl;
1269  int emu[2];
1270  AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1271 
1272  td->row = row;
1273  td->row7 = row & 7;
1274  td->col = col;
1275  td->col7 = col & 7;
1276 
1277  td->min_mv.x = -(128 + col * 64);
1278  td->min_mv.y = -(128 + row * 64);
1279  td->max_mv.x = 128 + (s->cols - col - w4) * 64;
1280  td->max_mv.y = 128 + (s->rows - row - h4) * 64;
1281 
1282  if (s->pass < 2) {
1283  b->bs = bs;
1284  b->bl = bl;
1285  b->bp = bp;
1286  decode_mode(td);
1287  b->uvtx = b->tx - ((s->ss_h && w4 * 2 == (1 << b->tx)) ||
1288  (s->ss_v && h4 * 2 == (1 << b->tx)));
1289 
1290  if (td->block_structure) {
1291  td->block_structure[td->nb_block_structure].row = row;
1292  td->block_structure[td->nb_block_structure].col = col;
1293  td->block_structure[td->nb_block_structure].block_size_idx_x = av_log2(w4);
1294  td->block_structure[td->nb_block_structure].block_size_idx_y = av_log2(h4);
1295  td->nb_block_structure++;
1296  }
1297 
1298  if (!b->skip) {
1299  int has_coeffs;
1300 
1301  if (bytesperpixel == 1) {
1302  has_coeffs = decode_coeffs_8bpp(td);
1303  } else {
1304  has_coeffs = decode_coeffs_16bpp(td);
1305  }
1306  if (!has_coeffs && b->bs <= BS_8x8 && !b->intra) {
1307  b->skip = 1;
1308  memset(&s->above_skip_ctx[col], 1, w4);
1309  memset(&td->left_skip_ctx[td->row7], 1, h4);
1310  }
1311  } else {
1312  int row7 = td->row7;
1313 
1314 #define SPLAT_ZERO_CTX(v, n) \
1315  switch (n) { \
1316  case 1: v = 0; break; \
1317  case 2: AV_ZERO16(&v); break; \
1318  case 4: AV_ZERO32(&v); break; \
1319  case 8: AV_ZERO64(&v); break; \
1320  case 16: AV_ZERO128(&v); break; \
1321  }
1322 #define SPLAT_ZERO_YUV(dir, var, off, n, dir2) \
1323  do { \
1324  SPLAT_ZERO_CTX(dir##_y_##var[off * 2], n * 2); \
1325  if (s->ss_##dir2) { \
1326  SPLAT_ZERO_CTX(dir##_uv_##var[0][off], n); \
1327  SPLAT_ZERO_CTX(dir##_uv_##var[1][off], n); \
1328  } else { \
1329  SPLAT_ZERO_CTX(dir##_uv_##var[0][off * 2], n * 2); \
1330  SPLAT_ZERO_CTX(dir##_uv_##var[1][off * 2], n * 2); \
1331  } \
1332  } while (0)
1333 
1334  switch (w4) {
1335  case 1: SPLAT_ZERO_YUV(s->above, nnz_ctx, col, 1, h); break;
1336  case 2: SPLAT_ZERO_YUV(s->above, nnz_ctx, col, 2, h); break;
1337  case 4: SPLAT_ZERO_YUV(s->above, nnz_ctx, col, 4, h); break;
1338  case 8: SPLAT_ZERO_YUV(s->above, nnz_ctx, col, 8, h); break;
1339  }
1340  switch (h4) {
1341  case 1: SPLAT_ZERO_YUV(td->left, nnz_ctx, row7, 1, v); break;
1342  case 2: SPLAT_ZERO_YUV(td->left, nnz_ctx, row7, 2, v); break;
1343  case 4: SPLAT_ZERO_YUV(td->left, nnz_ctx, row7, 4, v); break;
1344  case 8: SPLAT_ZERO_YUV(td->left, nnz_ctx, row7, 8, v); break;
1345  }
1346  }
1347 
1348  if (s->pass == 1) {
1349  s->td[0].b++;
1350  s->td[0].block += w4 * h4 * 64 * bytesperpixel;
1351  s->td[0].uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
1352  s->td[0].uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
1353  s->td[0].eob += 4 * w4 * h4;
1354  s->td[0].uveob[0] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
1355  s->td[0].uveob[1] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
1356 
1357  return;
1358  }
1359  }
1360 
1361  // emulated overhangs if the stride of the target buffer can't hold. This
1362  // makes it possible to support emu-edge and so on even if we have large block
1363  // overhangs
1364  emu[0] = (col + w4) * 8 * bytesperpixel > f->linesize[0] ||
1365  (row + h4) > s->rows;
1366  emu[1] = ((col + w4) * 8 >> s->ss_h) * bytesperpixel > f->linesize[1] ||
1367  (row + h4) > s->rows;
1368  if (emu[0]) {
1369  td->dst[0] = td->tmp_y;
1370  td->y_stride = 128;
1371  } else {
1372  td->dst[0] = f->data[0] + yoff;
1373  td->y_stride = f->linesize[0];
1374  }
1375  if (emu[1]) {
1376  td->dst[1] = td->tmp_uv[0];
1377  td->dst[2] = td->tmp_uv[1];
1378  td->uv_stride = 128;
1379  } else {
1380  td->dst[1] = f->data[1] + uvoff;
1381  td->dst[2] = f->data[2] + uvoff;
1382  td->uv_stride = f->linesize[1];
1383  }
1384  if (b->intra) {
1385  if (s->s.h.bpp > 8) {
1386  ff_vp9_intra_recon_16bpp(td, yoff, uvoff);
1387  } else {
1388  ff_vp9_intra_recon_8bpp(td, yoff, uvoff);
1389  }
1390  } else {
1391  if (s->s.h.bpp > 8) {
1393  } else {
1395  }
1396  }
1397  if (emu[0]) {
1398  int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
1399 
1400  for (n = 0; o < w; n++) {
1401  int bw = 64 >> n;
1402 
1403  av_assert2(n <= 4);
1404  if (w & bw) {
1405  s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o * bytesperpixel, f->linesize[0],
1406  td->tmp_y + o * bytesperpixel, 128, h, 0, 0);
1407  o += bw;
1408  }
1409  }
1410  }
1411  if (emu[1]) {
1412  int w = FFMIN(s->cols - col, w4) * 8 >> s->ss_h;
1413  int h = FFMIN(s->rows - row, h4) * 8 >> s->ss_v, n, o = 0;
1414 
1415  for (n = s->ss_h; o < w; n++) {
1416  int bw = 64 >> n;
1417 
1418  av_assert2(n <= 4);
1419  if (w & bw) {
1420  s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o * bytesperpixel, f->linesize[1],
1421  td->tmp_uv[0] + o * bytesperpixel, 128, h, 0, 0);
1422  s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o * bytesperpixel, f->linesize[2],
1423  td->tmp_uv[1] + o * bytesperpixel, 128, h, 0, 0);
1424  o += bw;
1425  }
1426  }
1427  }
1428 
1429  // pick filter level and find edges to apply filter to
1430  if (s->s.h.filter.level &&
1431  (lvl = s->s.h.segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
1432  [b->mode[3] != ZEROMV]) > 0) {
1433  int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
1434  int skip_inter = !b->intra && b->skip, col7 = td->col7, row7 = td->row7;
1435 
1436  setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl);
1437  mask_edges(lflvl->mask[0], 0, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
1438  if (s->ss_h || s->ss_v)
1439  mask_edges(lflvl->mask[1], s->ss_h, s->ss_v, row7, col7, x_end, y_end,
1440  s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
1441  s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
1442  b->uvtx, skip_inter);
1443  }
1444 
1445  if (s->pass == 2) {
1446  s->td[0].b++;
1447  s->td[0].block += w4 * h4 * 64 * bytesperpixel;
1448  s->td[0].uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
1449  s->td[0].uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
1450  s->td[0].eob += 4 * w4 * h4;
1451  s->td[0].uveob[0] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
1452  s->td[0].uveob[1] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
1453  }
1454 }
ff_vp9_inter_recon_8bpp
void ff_vp9_inter_recon_8bpp(VP9TileData *td)
Definition: vp9recon.c:645
DECODE_Y_COEF_LOOP
#define DECODE_Y_COEF_LOOP(step, mode_index, v)
stride
int stride
Definition: mace.c:144
td
#define td
Definition: regdef.h:70
decode_coeffs_b32_8bpp
static int decode_coeffs_b32_8bpp(VP9TileData *td, int16_t *coef, int n_coeffs, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, int16_t *qmul)
Definition: vp9block.c:931
ff_vp9_default_kf_uvmode_probs
const uint8_t ff_vp9_default_kf_uvmode_probs[10][9]
Definition: vp9data.c:201
PRED_SWITCHABLE
@ PRED_SWITCHABLE
Definition: vp9shared.h:51
ff_vp9_filter_tree
const int8_t ff_vp9_filter_tree[2][2]
Definition: vp9data.c:220
SET_CTXS
#define SET_CTXS(perf, dir, off, n)
mv
static const int8_t mv[256][2]
Definition: 4xm.c:79
decode_coeffs
static av_always_inline int decode_coeffs(VP9TileData *td, int is8bitsperpixel)
Definition: vp9block.c:961
PRED_COMPREF
@ PRED_COMPREF
Definition: vp9shared.h:50
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:317
w
uint8_t w
Definition: llviddspenc.c:38
BlockPartition
BlockPartition
Definition: vp9shared.h:34
b
#define b
Definition: input.c:40
REF_FRAME_SEGMAP
#define REF_FRAME_SEGMAP
Definition: vp9shared.h:165
AV_WN32A
#define AV_WN32A(p, v)
Definition: intreadwrite.h:538
VP9Filter
Definition: vp9dec.h:76
BS_4x8
@ BS_4x8
Definition: vp9shared.h:89
decode_coeffs_b_16bpp
static int decode_coeffs_b_16bpp(VP9TileData *td, int16_t *coef, int n_coeffs, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, int16_t *qmul)
Definition: vp9block.c:941
ff_thread_await_progress
the pkt_dts and pkt_pts fields in AVFrame will work as usual Restrictions on codec whose streams don t reset across will not work because their bitstreams cannot be decoded in parallel *The contents of buffers must not be read before ff_thread_await_progress() has been called on them. reget_buffer() and buffer age optimizations no longer work. *The contents of buffers must not be written to after ff_thread_report_progress() has been called on them. This includes draw_edges(). Porting codecs to frame threading
FILTER_SWITCHABLE
@ FILTER_SWITCHABLE
Definition: vp9.h:70
VP9Block
Definition: vp9dec.h:82
decode_mode
static void decode_mode(VP9TileData *td)
Definition: vp9block.c:77
bit
#define bit(string, value)
Definition: cbs_mpeg2.c:58
vp56_rac_get_prob_branchy
static av_always_inline int vp56_rac_get_prob_branchy(VP56RangeCoder *c, int prob)
Definition: vp56.h:287
TX_SWITCHABLE
@ TX_SWITCHABLE
Definition: vp9.h:33
vp8_rac_get
static av_always_inline int vp8_rac_get(VP56RangeCoder *c)
Definition: vp56.h:324
ff_vp9_intramode_tree
const int8_t ff_vp9_intramode_tree[9][2]
Definition: vp9data.c:75
val
static double val(void *priv, double ch)
Definition: aeval.c:76
mask_edges
static av_always_inline void mask_edges(uint8_t(*mask)[8][4], int ss_h, int ss_v, int row_and_7, int col_and_7, int w, int h, int col_end, int row_end, enum TxfmMode tx, int skip_inter)
Definition: vp9block.c:1138
ZEROMV
@ ZEROMV
Definition: vp9shared.h:44
avassert.h
ff_vp9_default_kf_ymode_probs
const uint8_t ff_vp9_default_kf_ymode_probs[10][10][9]
Definition: vp9data.c:87
decode_coeffs_16bpp
static int decode_coeffs_16bpp(VP9TileData *td)
Definition: vp9block.c:1133
mask
static const uint16_t mask[17]
Definition: lzw.c:38
s
#define s(width, name)
Definition: cbs_vp9.c:257
ff_vp9_inter_mode_tree
const int8_t ff_vp9_inter_mode_tree[3][2]
Definition: vp9data.c:214
ff_vp9_scans
const int16_t *const ff_vp9_scans[5][4]
Definition: vp9data.c:600
vp9data.h
AV_WN16A
#define AV_WN16A(p, v)
Definition: intreadwrite.h:534
BS_8x4
@ BS_8x4
Definition: vp9shared.h:88
f
#define f(width, name)
Definition: cbs_vp9.c:255
STORE_COEF
#define STORE_COEF(c, i, v)
vp56.h
if
if(ret)
Definition: filter_design.txt:179
MERGE_CTX
#define MERGE_CTX(step, rd)
VP9Context
Definition: vp9dec.h:94
vp8_rac_get_tree
static av_always_inline int vp8_rac_get_tree(VP56RangeCoder *c, const int8_t(*tree)[2], const uint8_t *probs)
Definition: vp56.h:396
TX_8X8
@ TX_8X8
Definition: vp9.h:29
TX_16X16
@ TX_16X16
Definition: vp9.h:30
ff_vp9_filter_lut
enum FilterMode ff_vp9_filter_lut[3]
Definition: vp9data.c:225
DECODE_UV_COEF_LOOP
#define DECODE_UV_COEF_LOOP(step, v)
ff_vp9_segmentation_tree
const int8_t ff_vp9_segmentation_tree[7][2]
Definition: vp9data.c:65
SPLAT_ZERO_YUV
#define SPLAT_ZERO_YUV(dir, var, off, n, dir2)
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
for
for(j=16;j >0;--j)
Definition: h264pred_template.c:469
ff_vp9_intra_recon_16bpp
void ff_vp9_intra_recon_16bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off)
Definition: vp9recon.c:292
TxfmMode
TxfmMode
Definition: vp9.h:27
vp9.h
DCT_DCT
@ DCT_DCT
Definition: vp9.h:38
ff_vp9_fill_mv
void ff_vp9_fill_mv(VP9TileData *td, VP56mv *mv, int mode, int sb)
Definition: vp9mvs.c:291
BS_8x8
@ BS_8x8
Definition: vp9shared.h:87
AV_RN64A
#define AV_RN64A(p)
Definition: intreadwrite.h:530
TX_4X4
@ TX_4X4
Definition: vp9.h:28
t8
#define t8
Definition: regdef.h:53
N_BS_SIZES
@ N_BS_SIZES
Definition: vp9shared.h:91
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
SPLAT_CTX
#define SPLAT_CTX(var, val, n)
av_assert2
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
Definition: avassert.h:64
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:271
t4
#define t4
Definition: regdef.h:32
av_assert1
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
Definition: avassert.h:53
av_always_inline
#define av_always_inline
Definition: attributes.h:49
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
BlockSize
BlockSize
Definition: vp9shared.h:77
AV_COPY32
#define AV_COPY32(d, s)
Definition: intreadwrite.h:601
decode_coeffs_b_generic
static av_always_inline int decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs, int is_tx32x32, int is8bitsperpixel, int bpp, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, int16_t *qmul)
Definition: vp9block.c:802
AV_RN32A
#define AV_RN32A(p)
Definition: intreadwrite.h:526
ret
ret
Definition: filter_design.txt:187
pred
static const float pred[4]
Definition: siprdata.h:259
VP9mvrefPair
Definition: vp9shared.h:54
left
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled left
Definition: snow.txt:386
VP9TileData
Definition: vp9dec.h:165
AV_WN64A
#define AV_WN64A(p, v)
Definition: intreadwrite.h:542
VP56RangeCoder
Definition: vp56.h:87
VP9Filter::mask
uint8_t mask[2][2][8][4]
Definition: vp9dec.h:79
ff_vp9_bwh_tab
const uint8_t ff_vp9_bwh_tab[2][N_BS_SIZES][2]
Definition: vp9data.c:25
AV_RN16A
#define AV_RN16A(p)
Definition: intreadwrite.h:522
setctx_2d
static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h, ptrdiff_t stride, int v)
Definition: vp9block.c:31
ff_vp9_decode_block
void ff_vp9_decode_block(VP9TileData *td, int row, int col, VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl, enum BlockPartition bp)
Definition: vp9block.c:1260
NEARESTMV
@ NEARESTMV
Definition: vp9shared.h:42
BlockLevel
BlockLevel
Definition: vp9shared.h:70
vp9dec.h
vp56_rac_get_prob
#define vp56_rac_get_prob
Definition: vp56.h:270
CUR_FRAME
#define CUR_FRAME
Definition: vp9shared.h:163
TX_32X32
@ TX_32X32
Definition: vp9.h:31
decode_coeffs_b_8bpp
static int decode_coeffs_b_8bpp(VP9TileData *td, int16_t *coef, int n_coeffs, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, int16_t *qmul)
Definition: vp9block.c:921
ff_vp9_intra_recon_8bpp
void ff_vp9_intra_recon_8bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off)
Definition: vp9recon.c:287
h
h
Definition: vp9dsp_template.c:2038
decode_coeffs_8bpp
static int decode_coeffs_8bpp(VP9TileData *td)
Definition: vp9block.c:1128
VP9Filter::level
uint8_t level[8 *8]
Definition: vp9dec.h:77
av_log2
int av_log2(unsigned v)
Definition: intmath.c:26
ff_vp9_scans_nb
const int16_t(*const [5][4] ff_vp9_scans_nb)[2]
Definition: vp9data.c:1157
decode_coeffs_b32_16bpp
static int decode_coeffs_b32_16bpp(VP9TileData *td, int16_t *coef, int n_coeffs, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, int16_t *qmul)
Definition: vp9block.c:951
ff_vp9_inter_recon_16bpp
void ff_vp9_inter_recon_16bpp(VP9TileData *td)
Definition: vp9recon.c:650