FFmpeg
vp9recon.c
Go to the documentation of this file.
1 /*
2  * VP9 compatible video decoder
3  *
4  * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5  * Copyright (C) 2013 Clément Bœsch <u pkh me>
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include "libavutil/avassert.h"
25 #include "libavutil/frame.h"
26 #include "libavutil/mem_internal.h"
27 
28 #include "progressframe.h"
29 #include "videodsp.h"
30 #include "vp9data.h"
31 #include "vp9dec.h"
32 
33 static av_always_inline int check_intra_mode(VP9TileData *td, int mode, uint8_t **a,
34  uint8_t *dst_edge, ptrdiff_t stride_edge,
35  uint8_t *dst_inner, ptrdiff_t stride_inner,
36  uint8_t *l, int col, int x, int w,
37  int row, int y, enum TxfmMode tx,
38  int p, int ss_h, int ss_v, int bytesperpixel)
39 {
40  const VP9Context *s = td->s;
41  int have_top = row > 0 || y > 0;
42  int have_left = col > td->tile_col_start || x > 0;
43  int have_right = x < w - 1;
44  int bpp = s->s.h.bpp;
45  static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
46  [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
47  { DC_127_PRED, VERT_PRED } },
48  [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
49  { HOR_PRED, HOR_PRED } },
50  [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
51  { LEFT_DC_PRED, DC_PRED } },
63  { HOR_UP_PRED, HOR_UP_PRED } },
65  { HOR_PRED, TM_VP8_PRED } },
66  };
67  static const struct {
68  uint8_t needs_left:1;
69  uint8_t needs_top:1;
70  uint8_t needs_topleft:1;
71  uint8_t needs_topright:1;
72  uint8_t invert_left:1;
73  } edges[N_INTRA_PRED_MODES] = {
74  [VERT_PRED] = { .needs_top = 1 },
75  [HOR_PRED] = { .needs_left = 1 },
76  [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
77  [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
78  [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1,
79  .needs_topleft = 1 },
80  [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1,
81  .needs_topleft = 1 },
82  [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1,
83  .needs_topleft = 1 },
84  [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
85  [HOR_UP_PRED] = { .needs_left = 1, .invert_left = 1 },
86  [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1,
87  .needs_topleft = 1 },
88  [LEFT_DC_PRED] = { .needs_left = 1 },
89  [TOP_DC_PRED] = { .needs_top = 1 },
90  [DC_128_PRED] = { 0 },
91  [DC_127_PRED] = { 0 },
92  [DC_129_PRED] = { 0 }
93  };
94 
95  av_assert2(mode >= 0 && mode < 10);
96  mode = mode_conv[mode][have_left][have_top];
97  if (edges[mode].needs_top) {
98  uint8_t *top, *topleft;
99  int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !ss_h) - x) * 4;
100  int n_px_need_tr = 0;
101 
102  if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
103  n_px_need_tr = 4;
104 
105  // if top of sb64-row, use s->intra_pred_data[] instead of
106  // dst[-stride] for intra prediction (it contains pre- instead of
107  // post-loopfilter data)
108  if (have_top) {
109  top = !(row & 7) && !y ?
110  s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
111  y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
112  if (have_left)
113  topleft = !(row & 7) && !y ?
114  s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
115  y == 0 || x == 0 ? &dst_edge[-stride_edge] :
116  &dst_inner[-stride_inner];
117  }
118 
119  if (have_top &&
120  (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
121  (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
122  n_px_need + n_px_need_tr <= n_px_have) {
123  *a = top;
124  } else {
125  if (have_top) {
126  if (n_px_need <= n_px_have) {
127  memcpy(*a, top, n_px_need * bytesperpixel);
128  } else {
129 #define memset_bpp(c, i1, v, i2, num) do { \
130  if (bytesperpixel == 1) { \
131  memset(&(c)[(i1)], (v)[(i2)], (num)); \
132  } else { \
133  int n, val = AV_RN16A(&(v)[(i2) * 2]); \
134  for (n = 0; n < (num); n++) { \
135  AV_WN16A(&(c)[((i1) + n) * 2], val); \
136  } \
137  } \
138 } while (0)
139  memcpy(*a, top, n_px_have * bytesperpixel);
140  memset_bpp(*a, n_px_have, (*a), n_px_have - 1, n_px_need - n_px_have);
141  }
142  } else {
143 #define memset_val(c, val, num) do { \
144  if (bytesperpixel == 1) { \
145  memset((c), (val), (num)); \
146  } else { \
147  int n; \
148  for (n = 0; n < (num); n++) { \
149  AV_WN16A(&(c)[n * 2], (val)); \
150  } \
151  } \
152 } while (0)
153  memset_val(*a, (128 << (bpp - 8)) - 1, n_px_need);
154  }
155  if (edges[mode].needs_topleft) {
156  if (have_left && have_top) {
157 #define assign_bpp(c, i1, v, i2) do { \
158  if (bytesperpixel == 1) { \
159  (c)[(i1)] = (v)[(i2)]; \
160  } else { \
161  AV_COPY16(&(c)[(i1) * 2], &(v)[(i2) * 2]); \
162  } \
163 } while (0)
164  assign_bpp(*a, -1, topleft, -1);
165  } else {
166 #define assign_val(c, i, v) do { \
167  if (bytesperpixel == 1) { \
168  (c)[(i)] = (v); \
169  } else { \
170  AV_WN16A(&(c)[(i) * 2], (v)); \
171  } \
172 } while (0)
173  assign_val((*a), -1, (128 << (bpp - 8)) + (have_top ? +1 : -1));
174  }
175  }
176  if (tx == TX_4X4 && edges[mode].needs_topright) {
177  if (have_top && have_right &&
178  n_px_need + n_px_need_tr <= n_px_have) {
179  memcpy(&(*a)[4 * bytesperpixel], &top[4 * bytesperpixel], 4 * bytesperpixel);
180  } else {
181  memset_bpp(*a, 4, *a, 3, 4);
182  }
183  }
184  }
185  }
186  if (edges[mode].needs_left) {
187  if (have_left) {
188  int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !ss_v) - y) * 4;
189  uint8_t *dst = x == 0 ? dst_edge : dst_inner;
190  ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
191 
192  if (edges[mode].invert_left) {
193  if (n_px_need <= n_px_have) {
194  for (i = 0; i < n_px_need; i++)
195  assign_bpp(l, i, &dst[i * stride], -1);
196  } else {
197  for (i = 0; i < n_px_have; i++)
198  assign_bpp(l, i, &dst[i * stride], -1);
199  memset_bpp(l, n_px_have, l, n_px_have - 1, n_px_need - n_px_have);
200  }
201  } else {
202  if (n_px_need <= n_px_have) {
203  for (i = 0; i < n_px_need; i++)
204  assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
205  } else {
206  for (i = 0; i < n_px_have; i++)
207  assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
208  memset_bpp(l, 0, l, n_px_need - n_px_have, n_px_need - n_px_have);
209  }
210  }
211  } else {
212  memset_val(l, (128 << (bpp - 8)) + 1, 4 << tx);
213  }
214  }
215 
216  return mode;
217 }
218 
219 static av_always_inline void intra_recon(VP9TileData *td, ptrdiff_t y_off,
220  ptrdiff_t uv_off, int bytesperpixel)
221 {
222  const VP9Context *s = td->s;
223  VP9Block *b = td->b;
224  int row = td->row, col = td->col;
225  int w4 = ff_vp9_bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
226  int h4 = ff_vp9_bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
227  int end_x = FFMIN(2 * (s->cols - col), w4);
228  int end_y = FFMIN(2 * (s->rows - row), h4);
229  int tx = 4 * s->s.h.lossless + b->tx, uvtx = b->uvtx + 4 * s->s.h.lossless;
230  int uvstep1d = 1 << b->uvtx, p;
231  uint8_t *dst = td->dst[0], *dst_r = s->s.frames[CUR_FRAME].tf.f->data[0] + y_off;
232  LOCAL_ALIGNED_32(uint8_t, a_buf, [96]);
233  LOCAL_ALIGNED_32(uint8_t, l, [64]);
234 
235  for (n = 0, y = 0; y < end_y; y += step1d) {
236  uint8_t *ptr = dst, *ptr_r = dst_r;
237  for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d * bytesperpixel,
238  ptr_r += 4 * step1d * bytesperpixel, n += step) {
239  int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
240  y * 2 + x : 0];
241  uint8_t *a = &a_buf[32];
242  enum TxfmType txtp = ff_vp9_intra_txfm_type[mode];
243  int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&td->eob[n]) : td->eob[n];
244 
245  mode = check_intra_mode(td, mode, &a, ptr_r,
246  s->s.frames[CUR_FRAME].tf.f->linesize[0],
247  ptr, td->y_stride, l,
248  col, x, w4, row, y, b->tx, 0, 0, 0, bytesperpixel);
249  s->dsp.intra_pred[b->tx][mode](ptr, td->y_stride, l, a);
250  if (eob)
251  s->dsp.itxfm_add[tx][txtp](ptr, td->y_stride,
252  td->block + 16 * n * bytesperpixel, eob);
253  }
254  dst_r += 4 * step1d * s->s.frames[CUR_FRAME].tf.f->linesize[0];
255  dst += 4 * step1d * td->y_stride;
256  }
257 
258  // U/V
259  w4 >>= s->ss_h;
260  end_x >>= s->ss_h;
261  end_y >>= s->ss_v;
262  step = 1 << (b->uvtx * 2);
263  for (p = 0; p < 2; p++) {
264  dst = td->dst[1 + p];
265  dst_r = s->s.frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
266  for (n = 0, y = 0; y < end_y; y += uvstep1d) {
267  uint8_t *ptr = dst, *ptr_r = dst_r;
268  for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d * bytesperpixel,
269  ptr_r += 4 * uvstep1d * bytesperpixel, n += step) {
270  int mode = b->uvmode;
271  uint8_t *a = &a_buf[32];
272  int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&td->uveob[p][n]) : td->uveob[p][n];
273 
274  mode = check_intra_mode(td, mode, &a, ptr_r,
275  s->s.frames[CUR_FRAME].tf.f->linesize[1],
276  ptr, td->uv_stride, l, col, x, w4, row, y,
277  b->uvtx, p + 1, s->ss_h, s->ss_v, bytesperpixel);
278  s->dsp.intra_pred[b->uvtx][mode](ptr, td->uv_stride, l, a);
279  if (eob)
280  s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, td->uv_stride,
281  td->uvblock[p] + 16 * n * bytesperpixel, eob);
282  }
283  dst_r += 4 * uvstep1d * s->s.frames[CUR_FRAME].tf.f->linesize[1];
284  dst += 4 * uvstep1d * td->uv_stride;
285  }
286  }
287 }
288 
289 void ff_vp9_intra_recon_8bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off)
290 {
291  intra_recon(td, y_off, uv_off, 1);
292 }
293 
294 void ff_vp9_intra_recon_16bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off)
295 {
296  intra_recon(td, y_off, uv_off, 2);
297 }
298 
300  uint8_t *dst, ptrdiff_t dst_stride,
301  const uint8_t *ref, ptrdiff_t ref_stride,
302  const ProgressFrame *ref_frame,
303  ptrdiff_t y, ptrdiff_t x, const VP9mv *mv,
304  int bw, int bh, int w, int h, int bytesperpixel)
305 {
306  const VP9Context *s = td->s;
307  int mx = mv->x, my = mv->y, th;
308 
309  y += my >> 3;
310  x += mx >> 3;
311  ref += y * ref_stride + x * bytesperpixel;
312  mx &= 7;
313  my &= 7;
314  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
315  // we use +7 because the last 7 pixels of each sbrow can be changed in
316  // the longest loopfilter of the next sbrow
317  th = (y + bh + 4 * !!my + 7) >> 6;
319  // The arm/aarch64 _hv filters read one more row than what actually is
320  // needed, so switch to emulated edge one pixel sooner vertically
321  // (!!my * 5) than horizontally (!!mx * 4).
322  if (x < !!mx * 3 || y < !!my * 3 ||
323  x + !!mx * 4 > w - bw || y + !!my * 5 > h - bh) {
324  s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
325  ref - !!my * 3 * ref_stride - !!mx * 3 * bytesperpixel,
326  160, ref_stride,
327  bw + !!mx * 7, bh + !!my * 7,
328  x - !!mx * 3, y - !!my * 3, w, h);
329  ref = td->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
330  ref_stride = 160;
331  }
332  mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
333 }
334 
336  uint8_t *dst_u, uint8_t *dst_v,
337  ptrdiff_t dst_stride,
338  const uint8_t *ref_u, ptrdiff_t src_stride_u,
339  const uint8_t *ref_v, ptrdiff_t src_stride_v,
340  const ProgressFrame *ref_frame,
341  ptrdiff_t y, ptrdiff_t x, const VP9mv *mv,
342  int bw, int bh, int w, int h, int bytesperpixel)
343 {
344  const VP9Context *s = td->s;
345  int mx = mv->x * (1 << !s->ss_h), my = mv->y * (1 << !s->ss_v), th;
346 
347  y += my >> 4;
348  x += mx >> 4;
349  ref_u += y * src_stride_u + x * bytesperpixel;
350  ref_v += y * src_stride_v + x * bytesperpixel;
351  mx &= 15;
352  my &= 15;
353  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
354  // we use +7 because the last 7 pixels of each sbrow can be changed in
355  // the longest loopfilter of the next sbrow
356  th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v);
358  // The arm/aarch64 _hv filters read one more row than what actually is
359  // needed, so switch to emulated edge one pixel sooner vertically
360  // (!!my * 5) than horizontally (!!mx * 4).
361  if (x < !!mx * 3 || y < !!my * 3 ||
362  x + !!mx * 4 > w - bw || y + !!my * 5 > h - bh) {
363  s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
364  ref_u - !!my * 3 * src_stride_u - !!mx * 3 * bytesperpixel,
365  160, src_stride_u,
366  bw + !!mx * 7, bh + !!my * 7,
367  x - !!mx * 3, y - !!my * 3, w, h);
368  ref_u = td->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
369  mc[!!mx][!!my](dst_u, dst_stride, ref_u, 160, bh, mx, my);
370 
371  s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
372  ref_v - !!my * 3 * src_stride_v - !!mx * 3 * bytesperpixel,
373  160, src_stride_v,
374  bw + !!mx * 7, bh + !!my * 7,
375  x - !!mx * 3, y - !!my * 3, w, h);
376  ref_v = td->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
377  mc[!!mx][!!my](dst_v, dst_stride, ref_v, 160, bh, mx, my);
378  } else {
379  mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
380  mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
381  }
382 }
383 
384 #define mc_luma_dir(td, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
385  px, py, pw, ph, bw, bh, w, h, i) \
386  mc_luma_unscaled(td, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
387  mv, bw, bh, w, h, bytesperpixel)
388 #define mc_chroma_dir(td, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
389  row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
390  mc_chroma_unscaled(td, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
391  row, col, mv, bw, bh, w, h, bytesperpixel)
392 #define SCALED 0
393 #define FN(x) x##_8bpp
394 #define BYTES_PER_PIXEL 1
395 #include "vp9_mc_template.c"
396 #undef FN
397 #undef BYTES_PER_PIXEL
398 #define FN(x) x##_16bpp
399 #define BYTES_PER_PIXEL 2
400 #include "vp9_mc_template.c"
401 #undef mc_luma_dir
402 #undef mc_chroma_dir
403 #undef FN
404 #undef BYTES_PER_PIXEL
405 #undef SCALED
406 
408  const vp9_mc_func (*mc)[2],
409  uint8_t *dst, ptrdiff_t dst_stride,
410  const uint8_t *ref, ptrdiff_t ref_stride,
411  const ProgressFrame *ref_frame,
412  ptrdiff_t y, ptrdiff_t x, const VP9mv *in_mv,
413  int px, int py, int pw, int ph,
414  int bw, int bh, int w, int h, int bytesperpixel,
415  const uint16_t *scale, const uint8_t *step)
416 {
417  const VP9Context *s = td->s;
418  if (s->s.frames[CUR_FRAME].tf.f->width == ref_frame->f->width &&
419  s->s.frames[CUR_FRAME].tf.f->height == ref_frame->f->height) {
420  mc_luma_unscaled(td, mc, dst, dst_stride, ref, ref_stride, ref_frame,
421  y, x, in_mv, bw, bh, w, h, bytesperpixel);
422  } else {
423 #define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14)
424  int mx, my;
425  int refbw_m1, refbh_m1;
426  int th;
427  VP9mv mv;
428 
429  mv.x = av_clip(in_mv->x, -(x + pw - px + 4) * 8, (s->cols * 8 - x + px + 3) * 8);
430  mv.y = av_clip(in_mv->y, -(y + ph - py + 4) * 8, (s->rows * 8 - y + py + 3) * 8);
431  // BUG libvpx seems to scale the two components separately. This introduces
432  // rounding errors but we have to reproduce them to be exactly compatible
433  // with the output from libvpx...
434  mx = scale_mv(mv.x * 2, 0) + scale_mv(x * 16, 0);
435  my = scale_mv(mv.y * 2, 1) + scale_mv(y * 16, 1);
436 
437  y = my >> 4;
438  x = mx >> 4;
439  ref += y * ref_stride + x * bytesperpixel;
440  mx &= 15;
441  my &= 15;
442  refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
443  refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
444  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
445  // we use +7 because the last 7 pixels of each sbrow can be changed in
446  // the longest loopfilter of the next sbrow
447  th = (y + refbh_m1 + 4 + 7) >> 6;
449  // The arm/aarch64 _hv filters read one more row than what actually is
450  // needed, so switch to emulated edge one pixel sooner vertically
451  // (y + 5 >= h - refbh_m1) than horizontally (x + 4 >= w - refbw_m1).
452  if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 5 >= h - refbh_m1) {
453  s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
454  ref - 3 * ref_stride - 3 * bytesperpixel,
455  288, ref_stride,
456  refbw_m1 + 8, refbh_m1 + 8,
457  x - 3, y - 3, w, h);
458  ref = td->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
459  ref_stride = 288;
460  }
461  smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
462  }
463 }
464 
466  const vp9_mc_func (*mc)[2],
467  uint8_t *dst_u, uint8_t *dst_v,
468  ptrdiff_t dst_stride,
469  const uint8_t *ref_u, ptrdiff_t src_stride_u,
470  const uint8_t *ref_v, ptrdiff_t src_stride_v,
471  const ProgressFrame *ref_frame,
472  ptrdiff_t y, ptrdiff_t x, const VP9mv *in_mv,
473  int px, int py, int pw, int ph,
474  int bw, int bh, int w, int h, int bytesperpixel,
475  const uint16_t *scale, const uint8_t *step)
476 {
477  const VP9Context *s = td->s;
478  if (s->s.frames[CUR_FRAME].tf.f->width == ref_frame->f->width &&
479  s->s.frames[CUR_FRAME].tf.f->height == ref_frame->f->height) {
480  mc_chroma_unscaled(td, mc, dst_u, dst_v, dst_stride, ref_u, src_stride_u,
481  ref_v, src_stride_v, ref_frame,
482  y, x, in_mv, bw, bh, w, h, bytesperpixel);
483  } else {
484  int mx, my;
485  int refbw_m1, refbh_m1;
486  int th;
487  VP9mv mv;
488 
489  if (s->ss_h) {
490  // BUG https://code.google.com/p/webm/issues/detail?id=820
491  mv.x = av_clip(in_mv->x, -(x + pw - px + 4) * 16, (s->cols * 4 - x + px + 3) * 16);
492  mx = scale_mv(mv.x, 0) + (scale_mv(x * 16, 0) & ~15) + (scale_mv(x * 32, 0) & 15);
493  } else {
494  mv.x = av_clip(in_mv->x, -(x + pw - px + 4) * 8, (s->cols * 8 - x + px + 3) * 8);
495  mx = scale_mv(mv.x * 2, 0) + scale_mv(x * 16, 0);
496  }
497  if (s->ss_v) {
498  // BUG https://code.google.com/p/webm/issues/detail?id=820
499  mv.y = av_clip(in_mv->y, -(y + ph - py + 4) * 16, (s->rows * 4 - y + py + 3) * 16);
500  my = scale_mv(mv.y, 1) + (scale_mv(y * 16, 1) & ~15) + (scale_mv(y * 32, 1) & 15);
501  } else {
502  mv.y = av_clip(in_mv->y, -(y + ph - py + 4) * 8, (s->rows * 8 - y + py + 3) * 8);
503  my = scale_mv(mv.y * 2, 1) + scale_mv(y * 16, 1);
504  }
505 #undef scale_mv
506  y = my >> 4;
507  x = mx >> 4;
508  ref_u += y * src_stride_u + x * bytesperpixel;
509  ref_v += y * src_stride_v + x * bytesperpixel;
510  mx &= 15;
511  my &= 15;
512  refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
513  refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
514  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
515  // we use +7 because the last 7 pixels of each sbrow can be changed in
516  // the longest loopfilter of the next sbrow
517  th = (y + refbh_m1 + 4 + 7) >> (6 - s->ss_v);
519  // The arm/aarch64 _hv filters read one more row than what actually is
520  // needed, so switch to emulated edge one pixel sooner vertically
521  // (y + 5 >= h - refbh_m1) than horizontally (x + 4 >= w - refbw_m1).
522  if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 5 >= h - refbh_m1) {
523  s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
524  ref_u - 3 * src_stride_u - 3 * bytesperpixel,
525  288, src_stride_u,
526  refbw_m1 + 8, refbh_m1 + 8,
527  x - 3, y - 3, w, h);
528  ref_u = td->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
529  smc(dst_u, dst_stride, ref_u, 288, bh, mx, my, step[0], step[1]);
530 
531  s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
532  ref_v - 3 * src_stride_v - 3 * bytesperpixel,
533  288, src_stride_v,
534  refbw_m1 + 8, refbh_m1 + 8,
535  x - 3, y - 3, w, h);
536  ref_v = td->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
537  smc(dst_v, dst_stride, ref_v, 288, bh, mx, my, step[0], step[1]);
538  } else {
539  smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
540  smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
541  }
542  }
543 }
544 
545 #define mc_luma_dir(td, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
546  px, py, pw, ph, bw, bh, w, h, i) \
547  mc_luma_scaled(td, s->dsp.s##mc, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
548  mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
549  s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
550 #define mc_chroma_dir(td, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
551  row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
552  mc_chroma_scaled(td, s->dsp.s##mc, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
553  row, col, mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
554  s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
555 #define SCALED 1
556 #define FN(x) x##_scaled_8bpp
557 #define BYTES_PER_PIXEL 1
558 #include "vp9_mc_template.c"
559 #undef FN
560 #undef BYTES_PER_PIXEL
561 #define FN(x) x##_scaled_16bpp
562 #define BYTES_PER_PIXEL 2
563 #include "vp9_mc_template.c"
564 #undef mc_luma_dir
565 #undef mc_chroma_dir
566 #undef FN
567 #undef BYTES_PER_PIXEL
568 #undef SCALED
569 
570 static av_always_inline void inter_recon(VP9TileData *td, int bytesperpixel)
571 {
572  const VP9Context *s = td->s;
573  VP9Block *b = td->b;
574  int row = td->row, col = td->col;
575 
576  if (s->mvscale[b->ref[0]][0] == REF_INVALID_SCALE ||
577  (b->comp && s->mvscale[b->ref[1]][0] == REF_INVALID_SCALE)) {
578  if (!s->td->error_info) {
579  s->td->error_info = AVERROR_INVALIDDATA;
580  av_log(NULL, AV_LOG_ERROR, "Bitstream not supported, "
581  "reference frame has invalid dimensions\n");
582  }
583  return;
584  }
585 
586  if (s->mvscale[b->ref[0]][0] || (b->comp && s->mvscale[b->ref[1]][0])) {
587  if (bytesperpixel == 1) {
588  inter_pred_scaled_8bpp(td);
589  } else {
590  inter_pred_scaled_16bpp(td);
591  }
592  } else {
593  if (bytesperpixel == 1) {
594  inter_pred_8bpp(td);
595  } else {
596  inter_pred_16bpp(td);
597  }
598  }
599 
600  if (!b->skip) {
601  /* mostly copied intra_recon() */
602 
603  int w4 = ff_vp9_bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
604  int h4 = ff_vp9_bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
605  int end_x = FFMIN(2 * (s->cols - col), w4);
606  int end_y = FFMIN(2 * (s->rows - row), h4);
607  int tx = 4 * s->s.h.lossless + b->tx, uvtx = b->uvtx + 4 * s->s.h.lossless;
608  int uvstep1d = 1 << b->uvtx, p;
609  uint8_t *dst = td->dst[0];
610 
611  // y itxfm add
612  for (n = 0, y = 0; y < end_y; y += step1d) {
613  uint8_t *ptr = dst;
614  for (x = 0; x < end_x; x += step1d,
615  ptr += 4 * step1d * bytesperpixel, n += step) {
616  int eob = b->tx > TX_8X8 ? AV_RN16A(&td->eob[n]) : td->eob[n];
617 
618  if (eob)
619  s->dsp.itxfm_add[tx][DCT_DCT](ptr, td->y_stride,
620  td->block + 16 * n * bytesperpixel, eob);
621  }
622  dst += 4 * td->y_stride * step1d;
623  }
624 
625  // uv itxfm add
626  end_x >>= s->ss_h;
627  end_y >>= s->ss_v;
628  step = 1 << (b->uvtx * 2);
629  for (p = 0; p < 2; p++) {
630  dst = td->dst[p + 1];
631  for (n = 0, y = 0; y < end_y; y += uvstep1d) {
632  uint8_t *ptr = dst;
633  for (x = 0; x < end_x; x += uvstep1d,
634  ptr += 4 * uvstep1d * bytesperpixel, n += step) {
635  int eob = b->uvtx > TX_8X8 ? AV_RN16A(&td->uveob[p][n]) : td->uveob[p][n];
636 
637  if (eob)
638  s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, td->uv_stride,
639  td->uvblock[p] + 16 * n * bytesperpixel, eob);
640  }
641  dst += 4 * uvstep1d * td->uv_stride;
642  }
643  }
644  }
645 }
646 
648 {
649  inter_recon(td, 1);
650 }
651 
653 {
654  inter_recon(td, 2);
655 }
av_clip
#define av_clip
Definition: common.h:100
mem_internal.h
DC_128_PRED
@ DC_128_PRED
Definition: vp9.h:58
mv
static const int8_t mv[256][2]
Definition: 4xm.c:81
VP9TileData::row
int row
Definition: vp9dec.h:171
ph
static int FUNC() ph(CodedBitstreamContext *ctx, RWContext *rw, H266RawPH *current)
Definition: cbs_h266_syntax_template.c:3032
TM_VP8_PRED
@ TM_VP8_PRED
Definition: vp9.h:55
step
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But a word about which is also called distortion Distortion can be quantified by almost any quality measurement one chooses the sum of squared differences is used but more complex methods that consider psychovisual effects can be used as well It makes no difference in this discussion First step
Definition: rate_distortion.txt:58
w
uint8_t w
Definition: llviddspenc.c:38
DC_PRED
@ DC_PRED
Definition: vp9.h:48
b
#define b
Definition: input.c:41
VERT_LEFT_PRED
@ VERT_LEFT_PRED
Definition: vp9.h:53
VP9TileData::block
int16_t * block
Definition: vp9dec.h:226
inter_recon
static av_always_inline void inter_recon(VP9TileData *td, int bytesperpixel)
Definition: vp9recon.c:570
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
VP9TileData::b
VP9Block * b
Definition: vp9dec.h:174
ff_vp9_intra_recon_16bpp
void ff_vp9_intra_recon_16bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off)
Definition: vp9recon.c:294
VP9Block
Definition: vp9dec.h:84
mx
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t mx
Definition: dsp.h:53
DC_127_PRED
@ DC_127_PRED
Definition: vp9.h:59
VERT_PRED
@ VERT_PRED
Definition: vp9.h:46
assign_val
#define assign_val(c, i, v)
check_intra_mode
static av_always_inline int check_intra_mode(VP9TileData *td, int mode, uint8_t **a, uint8_t *dst_edge, ptrdiff_t stride_edge, uint8_t *dst_inner, ptrdiff_t stride_inner, uint8_t *l, int col, int x, int w, int row, int y, enum TxfmMode tx, int p, int ss_h, int ss_v, int bytesperpixel)
Definition: vp9recon.c:33
DIAG_DOWN_RIGHT_PRED
@ DIAG_DOWN_RIGHT_PRED
Definition: vp9.h:50
progressframe.h
VP9TileData::col
int col
Definition: vp9dec.h:171
avassert.h
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:180
HOR_PRED
@ HOR_PRED
Definition: vp9.h:47
mc_chroma_unscaled
static av_always_inline void mc_chroma_unscaled(VP9TileData *td, const vp9_mc_func(*mc)[2], uint8_t *dst_u, uint8_t *dst_v, ptrdiff_t dst_stride, const uint8_t *ref_u, ptrdiff_t src_stride_u, const uint8_t *ref_v, ptrdiff_t src_stride_v, const ProgressFrame *ref_frame, ptrdiff_t y, ptrdiff_t x, const VP9mv *mv, int bw, int bh, int w, int h, int bytesperpixel)
Definition: vp9recon.c:335
VP9mv::y
int16_t y
Definition: vp9shared.h:57
vp9_scaled_mc_func
void(* vp9_scaled_mc_func)(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride, int h, int mx, int my, int dx, int dy)
Definition: vp9dsp.h:36
s
#define s(width, name)
Definition: cbs_vp9.c:198
vp9data.h
LEFT_DC_PRED
@ LEFT_DC_PRED
Definition: vp9.h:56
ff_progress_frame_await
the pkt_dts and pkt_pts fields in AVFrame will work as usual Restrictions on codec whose streams don t reset across will not work because their bitstreams cannot be decoded in parallel *The contents of buffers must not be read before ff_progress_frame_await() has been called on them. reget_buffer() and buffer age optimizations no longer work. *The contents of buffers must not be written to after ff_progress_frame_report() has been called on them. This includes draw_edges(). Porting codecs to frame threading
memset_val
#define memset_val(c, val, num)
ff_vp9_inter_recon_16bpp
void ff_vp9_inter_recon_16bpp(VP9TileData *td)
Definition: vp9recon.c:652
my
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t my
Definition: dsp.h:53
assign_bpp
#define assign_bpp(c, i1, v, i2)
NULL
#define NULL
Definition: coverity.c:32
LOCAL_ALIGNED_32
#define LOCAL_ALIGNED_32(t, v,...)
Definition: mem_internal.h:156
VP9Context
Definition: vp9dec.h:96
mc_luma_scaled
static av_always_inline void mc_luma_scaled(VP9TileData *td, vp9_scaled_mc_func smc, const vp9_mc_func(*mc)[2], uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride, const ProgressFrame *ref_frame, ptrdiff_t y, ptrdiff_t x, const VP9mv *in_mv, int px, int py, int pw, int ph, int bw, int bh, int w, int h, int bytesperpixel, const uint16_t *scale, const uint8_t *step)
Definition: vp9recon.c:407
VP9TileData::uv_stride
ptrdiff_t uv_stride
Definition: vp9dec.h:173
TX_8X8
@ TX_8X8
Definition: vp9.h:29
VP9mv
Definition: vp9shared.h:55
mc_chroma_scaled
static av_always_inline void mc_chroma_scaled(VP9TileData *td, vp9_scaled_mc_func smc, const vp9_mc_func(*mc)[2], uint8_t *dst_u, uint8_t *dst_v, ptrdiff_t dst_stride, const uint8_t *ref_u, ptrdiff_t src_stride_u, const uint8_t *ref_v, ptrdiff_t src_stride_v, const ProgressFrame *ref_frame, ptrdiff_t y, ptrdiff_t x, const VP9mv *in_mv, int px, int py, int pw, int ph, int bw, int bh, int w, int h, int bytesperpixel, const uint16_t *scale, const uint8_t *step)
Definition: vp9recon.c:465
VP9TileData::eob
unsigned eob[4][2][2][6][6][2]
Definition: vp9dec.h:203
vp9_mc_template.c
TxfmMode
TxfmMode
Definition: vp9.h:27
DCT_DCT
@ DCT_DCT
Definition: vp9.h:38
TxfmType
TxfmType
Definition: vp9.h:37
VP9TileData::edge_emu_buffer
uint8_t edge_emu_buffer[135 *144 *2]
Definition: vp9dec.h:207
N_INTRA_PRED_MODES
@ N_INTRA_PRED_MODES
Definition: vp9.h:61
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:83
REF_INVALID_SCALE
#define REF_INVALID_SCALE
Definition: vp9dec.h:42
VERT_RIGHT_PRED
@ VERT_RIGHT_PRED
Definition: vp9.h:51
ref_frame
static int ref_frame(VVCFrame *dst, const VVCFrame *src)
Definition: dec.c:596
BS_8x8
@ BS_8x8
Definition: vp9shared.h:92
scale_mv
#define scale_mv(n, dim)
TX_4X4
@ TX_4X4
Definition: vp9.h:28
frame.h
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
av_assert2
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
Definition: avassert.h:67
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
DC_129_PRED
@ DC_129_PRED
Definition: vp9.h:60
VP9TileData::uveob
uint8_t * uveob[2]
Definition: vp9dec.h:227
ff_vp9_intra_txfm_type
enum TxfmType ff_vp9_intra_txfm_type[14]
Definition: vp9data.c:437
av_always_inline
#define av_always_inline
Definition: attributes.h:49
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
stride
#define stride
Definition: h264pred_template.c:537
VP9TileData::s
const VP9Context * s
Definition: vp9dec.h:168
VP9TileData
Definition: vp9dec.h:167
HOR_UP_PRED
@ HOR_UP_PRED
Definition: vp9.h:54
mode
mode
Definition: ebur128.h:83
ff_vp9_bwh_tab
const uint8_t ff_vp9_bwh_tab[2][N_BS_SIZES][2]
Definition: vp9data.c:25
AV_RN16A
#define AV_RN16A(p)
Definition: intreadwrite.h:518
vp9_mc_func
void(* vp9_mc_func)(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride, int h, int mx, int my)
Definition: vp9dsp.h:33
ref
static int ref[MAX_W *MAX_W]
Definition: jpeg2000dwt.c:112
VP9TileData::dst
uint8_t * dst[3]
Definition: vp9dec.h:172
HOR_DOWN_PRED
@ HOR_DOWN_PRED
Definition: vp9.h:52
vp9dec.h
mc_luma_unscaled
static av_always_inline void mc_luma_unscaled(VP9TileData *td, const vp9_mc_func(*mc)[2], uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride, const ProgressFrame *ref_frame, ptrdiff_t y, ptrdiff_t x, const VP9mv *mv, int bw, int bh, int w, int h, int bytesperpixel)
Definition: vp9recon.c:299
CUR_FRAME
#define CUR_FRAME
Definition: vp9shared.h:168
scale
static void scale(int *out, const int *in, const int w, const int h, const int shift)
Definition: intra.c:291
ProgressFrame
The ProgressFrame structure.
Definition: progressframe.h:73
TOP_DC_PRED
@ TOP_DC_PRED
Definition: vp9.h:57
videodsp.h
DIAG_DOWN_LEFT_PRED
@ DIAG_DOWN_LEFT_PRED
Definition: vp9.h:49
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
memset_bpp
#define memset_bpp(c, i1, v, i2, num)
ff_vp9_inter_recon_8bpp
void ff_vp9_inter_recon_8bpp(VP9TileData *td)
Definition: vp9recon.c:647
AVERROR_INVALIDDATA
#define AVERROR_INVALIDDATA
Invalid data found when processing input.
Definition: error.h:61
VP9TileData::y_stride
ptrdiff_t y_stride
Definition: vp9dec.h:173
h
h
Definition: vp9dsp_template.c:2070
intra_recon
static av_always_inline void intra_recon(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off, int bytesperpixel)
Definition: vp9recon.c:219
VP9TileData::tile_col_start
unsigned tile_col_start
Definition: vp9dec.h:175
VP9mv::x
int16_t x
Definition: vp9shared.h:56
VP9TileData::uvblock
int16_t * uvblock[2]
Definition: vp9dec.h:226
ff_vp9_intra_recon_8bpp
void ff_vp9_intra_recon_8bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off)
Definition: vp9recon.c:289
mc
#define mc
Definition: vf_colormatrix.c:100