FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
vp8.c
Go to the documentation of this file.
1 /*
2  * VP7/VP8 compatible video decoder
3  *
4  * Copyright (C) 2010 David Conrad
5  * Copyright (C) 2010 Ronald S. Bultje
6  * Copyright (C) 2010 Fiona Glaser
7  * Copyright (C) 2012 Daniel Kang
8  * Copyright (C) 2014 Peter Ross
9  *
10  * This file is part of FFmpeg.
11  *
12  * FFmpeg is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public
14  * License as published by the Free Software Foundation; either
15  * version 2.1 of the License, or (at your option) any later version.
16  *
17  * FFmpeg is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with FFmpeg; if not, write to the Free Software
24  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25  */
26 
27 #include "libavutil/imgutils.h"
28 
29 #include "avcodec.h"
30 #include "internal.h"
31 #include "rectangle.h"
32 #include "thread.h"
33 #include "vp8.h"
34 #include "vp8data.h"
35 
36 #if ARCH_ARM
37 # include "arm/vp8.h"
38 #endif
39 
40 #if CONFIG_VP7_DECODER && CONFIG_VP8_DECODER
41 #define VPX(vp7, f) (vp7 ? vp7_ ## f : vp8_ ## f)
42 #elif CONFIG_VP7_DECODER
43 #define VPX(vp7, f) vp7_ ## f
44 #else // CONFIG_VP8_DECODER
45 #define VPX(vp7, f) vp8_ ## f
46 #endif
47 
48 static void free_buffers(VP8Context *s)
49 {
50  int i;
51  if (s->thread_data)
52  for (i = 0; i < MAX_THREADS; i++) {
53 #if HAVE_THREADS
54  pthread_cond_destroy(&s->thread_data[i].cond);
56 #endif
58  }
59  av_freep(&s->thread_data);
62  av_freep(&s->top_nnz);
63  av_freep(&s->top_border);
64 
65  s->macroblocks = NULL;
66 }
67 
68 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
69 {
70  int ret;
71  if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
72  ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
73  return ret;
74  if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) {
76  return AVERROR(ENOMEM);
77  }
78  return 0;
79 }
80 
82 {
85 }
86 
87 #if CONFIG_VP8_DECODER
88 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
89 {
90  int ret;
91 
92  vp8_release_frame(s, dst);
93 
94  if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
95  return ret;
96  if (src->seg_map &&
97  !(dst->seg_map = av_buffer_ref(src->seg_map))) {
98  vp8_release_frame(s, dst);
99  return AVERROR(ENOMEM);
100  }
101 
102  return 0;
103 }
104 #endif /* CONFIG_VP8_DECODER */
105 
106 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
107 {
108  VP8Context *s = avctx->priv_data;
109  int i;
110 
111  for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
112  vp8_release_frame(s, &s->frames[i]);
113  memset(s->framep, 0, sizeof(s->framep));
114 
115  if (free_mem)
116  free_buffers(s);
117 }
118 
119 static void vp8_decode_flush(AVCodecContext *avctx)
120 {
121  vp8_decode_flush_impl(avctx, 0);
122 }
123 
125 {
126  VP8Frame *frame = NULL;
127  int i;
128 
129  // find a free buffer
130  for (i = 0; i < 5; i++)
131  if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
132  &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
133  &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
134  &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
135  frame = &s->frames[i];
136  break;
137  }
138  if (i == 5) {
139  av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
140  abort();
141  }
142  if (frame->tf.f->data[0])
143  vp8_release_frame(s, frame);
144 
145  return frame;
146 }
147 
148 static av_always_inline
149 int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
150 {
151  AVCodecContext *avctx = s->avctx;
152  int i, ret;
153 
154  if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
155  height != s->avctx->height) {
157 
158  ret = ff_set_dimensions(s->avctx, width, height);
159  if (ret < 0)
160  return ret;
161  }
162 
163  s->mb_width = (s->avctx->coded_width + 15) / 16;
164  s->mb_height = (s->avctx->coded_height + 15) / 16;
165 
166  s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
167  FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1;
168  if (!s->mb_layout) { // Frame threading and one thread
169  s->macroblocks_base = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
170  sizeof(*s->macroblocks));
172  } else // Sliced threading
173  s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
174  sizeof(*s->macroblocks));
175  s->top_nnz = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
176  s->top_border = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
178 
179  for (i = 0; i < MAX_THREADS; i++) {
181  av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
182 #if HAVE_THREADS
183  pthread_mutex_init(&s->thread_data[i].lock, NULL);
184  pthread_cond_init(&s->thread_data[i].cond, NULL);
185 #endif
186  }
187 
188  if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
189  (!s->intra4x4_pred_mode_top && !s->mb_layout))
190  return AVERROR(ENOMEM);
191 
192  s->macroblocks = s->macroblocks_base + 1;
193 
194  return 0;
195 }
196 
198 {
199  return update_dimensions(s, width, height, IS_VP7);
200 }
201 
203 {
204  return update_dimensions(s, width, height, IS_VP8);
205 }
206 
207 
209 {
210  VP56RangeCoder *c = &s->c;
211  int i;
212 
214 
215  if (vp8_rac_get(c)) { // update segment feature data
217 
218  for (i = 0; i < 4; i++)
220 
221  for (i = 0; i < 4; i++)
223  }
224  if (s->segmentation.update_map)
225  for (i = 0; i < 3; i++)
226  s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
227 }
228 
230 {
231  VP56RangeCoder *c = &s->c;
232  int i;
233 
234  for (i = 0; i < 4; i++) {
235  if (vp8_rac_get(c)) {
236  s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
237 
238  if (vp8_rac_get(c))
239  s->lf_delta.ref[i] = -s->lf_delta.ref[i];
240  }
241  }
242 
243  for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
244  if (vp8_rac_get(c)) {
245  s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
246 
247  if (vp8_rac_get(c))
248  s->lf_delta.mode[i] = -s->lf_delta.mode[i];
249  }
250  }
251 }
252 
253 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
254 {
255  const uint8_t *sizes = buf;
256  int i;
257 
258  s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
259 
260  buf += 3 * (s->num_coeff_partitions - 1);
261  buf_size -= 3 * (s->num_coeff_partitions - 1);
262  if (buf_size < 0)
263  return -1;
264 
265  for (i = 0; i < s->num_coeff_partitions - 1; i++) {
266  int size = AV_RL24(sizes + 3 * i);
267  if (buf_size - size < 0)
268  return -1;
269 
270  ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
271  buf += size;
272  buf_size -= size;
273  }
274  ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
275 
276  return 0;
277 }
278 
279 static void vp7_get_quants(VP8Context *s)
280 {
281  VP56RangeCoder *c = &s->c;
282 
283  int yac_qi = vp8_rac_get_uint(c, 7);
284  int ydc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
285  int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
286  int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
287  int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
288  int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
289 
290  s->qmat[0].luma_qmul[0] = vp7_ydc_qlookup[ydc_qi];
291  s->qmat[0].luma_qmul[1] = vp7_yac_qlookup[yac_qi];
292  s->qmat[0].luma_dc_qmul[0] = vp7_y2dc_qlookup[y2dc_qi];
293  s->qmat[0].luma_dc_qmul[1] = vp7_y2ac_qlookup[y2ac_qi];
294  s->qmat[0].chroma_qmul[0] = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
295  s->qmat[0].chroma_qmul[1] = vp7_yac_qlookup[uvac_qi];
296 }
297 
298 static void vp8_get_quants(VP8Context *s)
299 {
300  VP56RangeCoder *c = &s->c;
301  int i, base_qi;
302 
303  int yac_qi = vp8_rac_get_uint(c, 7);
304  int ydc_delta = vp8_rac_get_sint(c, 4);
305  int y2dc_delta = vp8_rac_get_sint(c, 4);
306  int y2ac_delta = vp8_rac_get_sint(c, 4);
307  int uvdc_delta = vp8_rac_get_sint(c, 4);
308  int uvac_delta = vp8_rac_get_sint(c, 4);
309 
310  for (i = 0; i < 4; i++) {
311  if (s->segmentation.enabled) {
312  base_qi = s->segmentation.base_quant[i];
313  if (!s->segmentation.absolute_vals)
314  base_qi += yac_qi;
315  } else
316  base_qi = yac_qi;
317 
318  s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta, 7)];
319  s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi, 7)];
320  s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)] * 2;
321  /* 101581>>16 is equivalent to 155/100 */
322  s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] * 101581 >> 16;
323  s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
324  s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
325 
326  s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
327  s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
328  }
329 }
330 
331 /**
332  * Determine which buffers golden and altref should be updated with after this frame.
333  * The spec isn't clear here, so I'm going by my understanding of what libvpx does
334  *
335  * Intra frames update all 3 references
336  * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
337  * If the update (golden|altref) flag is set, it's updated with the current frame
338  * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
339  * If the flag is not set, the number read means:
340  * 0: no update
341  * 1: VP56_FRAME_PREVIOUS
342  * 2: update golden with altref, or update altref with golden
343  */
345 {
346  VP56RangeCoder *c = &s->c;
347 
348  if (update)
349  return VP56_FRAME_CURRENT;
350 
351  switch (vp8_rac_get_uint(c, 2)) {
352  case 1:
353  return VP56_FRAME_PREVIOUS;
354  case 2:
356  }
357  return VP56_FRAME_NONE;
358 }
359 
361 {
362  int i, j;
363  for (i = 0; i < 4; i++)
364  for (j = 0; j < 16; j++)
365  memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
366  sizeof(s->prob->token[i][j]));
367 }
368 
370 {
371  VP56RangeCoder *c = &s->c;
372  int i, j, k, l, m;
373 
374  for (i = 0; i < 4; i++)
375  for (j = 0; j < 8; j++)
376  for (k = 0; k < 3; k++)
377  for (l = 0; l < NUM_DCT_TOKENS-1; l++)
379  int prob = vp8_rac_get_uint(c, 8);
380  for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
381  s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
382  }
383 }
384 
385 #define VP7_MVC_SIZE 17
386 #define VP8_MVC_SIZE 19
387 
389  int mvc_size)
390 {
391  VP56RangeCoder *c = &s->c;
392  int i, j;
393 
394  if (vp8_rac_get(c))
395  for (i = 0; i < 4; i++)
396  s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
397  if (vp8_rac_get(c))
398  for (i = 0; i < 3; i++)
399  s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
400 
401  // 17.2 MV probability update
402  for (i = 0; i < 2; i++)
403  for (j = 0; j < mvc_size; j++)
405  s->prob->mvc[i][j] = vp8_rac_get_nn(c);
406 }
407 
408 static void update_refs(VP8Context *s)
409 {
410  VP56RangeCoder *c = &s->c;
411 
412  int update_golden = vp8_rac_get(c);
413  int update_altref = vp8_rac_get(c);
414 
415  s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
416  s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
417 }
418 
419 static void copy_chroma(AVFrame *dst, AVFrame *src, int width, int height)
420 {
421  int i, j;
422 
423  for (j = 1; j < 3; j++) {
424  for (i = 0; i < height / 2; i++)
425  memcpy(dst->data[j] + i * dst->linesize[j],
426  src->data[j] + i * src->linesize[j], width / 2);
427  }
428 }
429 
430 static void fade(uint8_t *dst, int dst_linesize,
431  const uint8_t *src, int src_linesize,
432  int width, int height,
433  int alpha, int beta)
434 {
435  int i, j;
436  for (j = 0; j < height; j++) {
437  for (i = 0; i < width; i++) {
438  uint8_t y = src[j * src_linesize + i];
439  dst[j * dst_linesize + i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
440  }
441  }
442 }
443 
445 {
446  int alpha = (int8_t) vp8_rac_get_uint(c, 8);
447  int beta = (int8_t) vp8_rac_get_uint(c, 8);
448  int ret;
449 
450  if (!s->keyframe && (alpha || beta)) {
451  int width = s->mb_width * 16;
452  int height = s->mb_height * 16;
453  AVFrame *src, *dst;
454 
455  if (!s->framep[VP56_FRAME_PREVIOUS] ||
456  !s->framep[VP56_FRAME_GOLDEN]) {
457  av_log(s->avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
458  return AVERROR_INVALIDDATA;
459  }
460 
461  dst =
462  src = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
463 
464  /* preserve the golden frame, write a new previous frame */
467  if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0)
468  return ret;
469 
470  dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
471 
472  copy_chroma(dst, src, width, height);
473  }
474 
475  fade(dst->data[0], dst->linesize[0],
476  src->data[0], src->linesize[0],
477  width, height, alpha, beta);
478  }
479 
480  return 0;
481 }
482 
483 static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
484 {
485  VP56RangeCoder *c = &s->c;
486  int part1_size, hscale, vscale, i, j, ret;
487  int width = s->avctx->width;
488  int height = s->avctx->height;
489 
490  s->profile = (buf[0] >> 1) & 7;
491  if (s->profile > 1) {
492  avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
493  return AVERROR_INVALIDDATA;
494  }
495 
496  s->keyframe = !(buf[0] & 1);
497  s->invisible = 0;
498  part1_size = AV_RL24(buf) >> 4;
499 
500  if (buf_size < 4 - s->profile + part1_size) {
501  av_log(s->avctx, AV_LOG_ERROR, "Buffer size %d is too small, needed : %d\n", buf_size, 4 - s->profile + part1_size);
502  return AVERROR_INVALIDDATA;
503  }
504 
505  buf += 4 - s->profile;
506  buf_size -= 4 - s->profile;
507 
508  memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
509 
510  ff_vp56_init_range_decoder(c, buf, part1_size);
511  buf += part1_size;
512  buf_size -= part1_size;
513 
514  /* A. Dimension information (keyframes only) */
515  if (s->keyframe) {
516  width = vp8_rac_get_uint(c, 12);
517  height = vp8_rac_get_uint(c, 12);
518  hscale = vp8_rac_get_uint(c, 2);
519  vscale = vp8_rac_get_uint(c, 2);
520  if (hscale || vscale)
521  avpriv_request_sample(s->avctx, "Upscaling");
522 
526  sizeof(s->prob->pred16x16));
528  sizeof(s->prob->pred8x8c));
529  for (i = 0; i < 2; i++)
530  memcpy(s->prob->mvc[i], vp7_mv_default_prob[i],
531  sizeof(vp7_mv_default_prob[i]));
532  memset(&s->segmentation, 0, sizeof(s->segmentation));
533  memset(&s->lf_delta, 0, sizeof(s->lf_delta));
534  memcpy(s->prob[0].scan, zigzag_scan, sizeof(s->prob[0].scan));
535  }
536 
537  if (s->keyframe || s->profile > 0)
538  memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
539 
540  /* B. Decoding information for all four macroblock-level features */
541  for (i = 0; i < 4; i++) {
542  s->feature_enabled[i] = vp8_rac_get(c);
543  if (s->feature_enabled[i]) {
545 
546  for (j = 0; j < 3; j++)
547  s->feature_index_prob[i][j] =
548  vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
549 
550  if (vp7_feature_value_size[s->profile][i])
551  for (j = 0; j < 4; j++)
552  s->feature_value[i][j] =
554  }
555  }
556 
557  s->segmentation.enabled = 0;
558  s->segmentation.update_map = 0;
559  s->lf_delta.enabled = 0;
560 
561  s->num_coeff_partitions = 1;
562  ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
563 
564  if (!s->macroblocks_base || /* first frame */
565  width != s->avctx->width || height != s->avctx->height ||
566  (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) {
567  if ((ret = vp7_update_dimensions(s, width, height)) < 0)
568  return ret;
569  }
570 
571  /* C. Dequantization indices */
572  vp7_get_quants(s);
573 
574  /* D. Golden frame update flag (a Flag) for interframes only */
575  if (!s->keyframe) {
578  }
579 
580  s->update_last = 1;
581  s->update_probabilities = 1;
582  s->fade_present = 1;
583 
584  if (s->profile > 0) {
586  if (!s->update_probabilities)
587  s->prob[1] = s->prob[0];
588 
589  if (!s->keyframe)
590  s->fade_present = vp8_rac_get(c);
591  }
592 
593  /* E. Fading information for previous frame */
594  if (s->fade_present && vp8_rac_get(c)) {
595  if ((ret = vp7_fade_frame(s ,c)) < 0)
596  return ret;
597  }
598 
599  /* F. Loop filter type */
600  if (!s->profile)
601  s->filter.simple = vp8_rac_get(c);
602 
603  /* G. DCT coefficient ordering specification */
604  if (vp8_rac_get(c))
605  for (i = 1; i < 16; i++)
606  s->prob[0].scan[i] = zigzag_scan[vp8_rac_get_uint(c, 4)];
607 
608  /* H. Loop filter levels */
609  if (s->profile > 0)
610  s->filter.simple = vp8_rac_get(c);
611  s->filter.level = vp8_rac_get_uint(c, 6);
612  s->filter.sharpness = vp8_rac_get_uint(c, 3);
613 
614  /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
616 
617  s->mbskip_enabled = 0;
618 
619  /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
620  if (!s->keyframe) {
621  s->prob->intra = vp8_rac_get_uint(c, 8);
622  s->prob->last = vp8_rac_get_uint(c, 8);
624  }
625 
626  return 0;
627 }
628 
629 static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
630 {
631  VP56RangeCoder *c = &s->c;
632  int header_size, hscale, vscale, ret;
633  int width = s->avctx->width;
634  int height = s->avctx->height;
635 
636  s->keyframe = !(buf[0] & 1);
637  s->profile = (buf[0]>>1) & 7;
638  s->invisible = !(buf[0] & 0x10);
639  header_size = AV_RL24(buf) >> 5;
640  buf += 3;
641  buf_size -= 3;
642 
643  if (s->profile > 3)
644  av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
645 
646  if (!s->profile)
648  sizeof(s->put_pixels_tab));
649  else // profile 1-3 use bilinear, 4+ aren't defined so whatever
651  sizeof(s->put_pixels_tab));
652 
653  if (header_size > buf_size - 7 * s->keyframe) {
654  av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
655  return AVERROR_INVALIDDATA;
656  }
657 
658  if (s->keyframe) {
659  if (AV_RL24(buf) != 0x2a019d) {
661  "Invalid start code 0x%x\n", AV_RL24(buf));
662  return AVERROR_INVALIDDATA;
663  }
664  width = AV_RL16(buf + 3) & 0x3fff;
665  height = AV_RL16(buf + 5) & 0x3fff;
666  hscale = buf[4] >> 6;
667  vscale = buf[6] >> 6;
668  buf += 7;
669  buf_size -= 7;
670 
671  if (hscale || vscale)
672  avpriv_request_sample(s->avctx, "Upscaling");
673 
677  sizeof(s->prob->pred16x16));
679  sizeof(s->prob->pred8x8c));
680  memcpy(s->prob->mvc, vp8_mv_default_prob,
681  sizeof(s->prob->mvc));
682  memset(&s->segmentation, 0, sizeof(s->segmentation));
683  memset(&s->lf_delta, 0, sizeof(s->lf_delta));
684  }
685 
686  ff_vp56_init_range_decoder(c, buf, header_size);
687  buf += header_size;
688  buf_size -= header_size;
689 
690  if (s->keyframe) {
691  s->colorspace = vp8_rac_get(c);
692  if (s->colorspace)
693  av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
694  s->fullrange = vp8_rac_get(c);
695  }
696 
697  if ((s->segmentation.enabled = vp8_rac_get(c)))
699  else
700  s->segmentation.update_map = 0; // FIXME: move this to some init function?
701 
702  s->filter.simple = vp8_rac_get(c);
703  s->filter.level = vp8_rac_get_uint(c, 6);
704  s->filter.sharpness = vp8_rac_get_uint(c, 3);
705 
706  if ((s->lf_delta.enabled = vp8_rac_get(c)))
707  if (vp8_rac_get(c))
708  update_lf_deltas(s);
709 
710  if (setup_partitions(s, buf, buf_size)) {
711  av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
712  return AVERROR_INVALIDDATA;
713  }
714 
715  if (!s->macroblocks_base || /* first frame */
716  width != s->avctx->width || height != s->avctx->height ||
717  (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height)
718  if ((ret = vp8_update_dimensions(s, width, height)) < 0)
719  return ret;
720 
721  vp8_get_quants(s);
722 
723  if (!s->keyframe) {
724  update_refs(s);
726  s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
727  }
728 
729  // if we aren't saving this frame's probabilities for future frames,
730  // make a copy of the current probabilities
731  if (!(s->update_probabilities = vp8_rac_get(c)))
732  s->prob[1] = s->prob[0];
733 
734  s->update_last = s->keyframe || vp8_rac_get(c);
735 
737 
738  if ((s->mbskip_enabled = vp8_rac_get(c)))
739  s->prob->mbskip = vp8_rac_get_uint(c, 8);
740 
741  if (!s->keyframe) {
742  s->prob->intra = vp8_rac_get_uint(c, 8);
743  s->prob->last = vp8_rac_get_uint(c, 8);
744  s->prob->golden = vp8_rac_get_uint(c, 8);
746  }
747 
748  return 0;
749 }
750 
751 static av_always_inline
752 void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
753 {
754  dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
755  dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
756 }
757 
758 /**
759  * Motion vector coding, 17.1.
760  */
762 {
763  int bit, x = 0;
764 
765  if (vp56_rac_get_prob_branchy(c, p[0])) {
766  int i;
767 
768  for (i = 0; i < 3; i++)
769  x += vp56_rac_get_prob(c, p[9 + i]) << i;
770  for (i = (vp7 ? 7 : 9); i > 3; i--)
771  x += vp56_rac_get_prob(c, p[9 + i]) << i;
772  if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12]))
773  x += 8;
774  } else {
775  // small_mvtree
776  const uint8_t *ps = p + 2;
777  bit = vp56_rac_get_prob(c, *ps);
778  ps += 1 + 3 * bit;
779  x += 4 * bit;
780  bit = vp56_rac_get_prob(c, *ps);
781  ps += 1 + bit;
782  x += 2 * bit;
783  x += vp56_rac_get_prob(c, *ps);
784  }
785 
786  return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
787 }
788 
790 {
791  return read_mv_component(c, p, 1);
792 }
793 
795 {
796  return read_mv_component(c, p, 0);
797 }
798 
799 static av_always_inline
800 const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
801 {
802  if (is_vp7)
803  return vp7_submv_prob;
804 
805  if (left == top)
806  return vp8_submv_prob[4 - !!left];
807  if (!top)
808  return vp8_submv_prob[2];
809  return vp8_submv_prob[1 - !!left];
810 }
811 
812 /**
813  * Split motion vector prediction, 16.4.
814  * @returns the number of motion vectors parsed (2, 4 or 16)
815  */
816 static av_always_inline
818  int layout, int is_vp7)
819 {
820  int part_idx;
821  int n, num;
822  VP8Macroblock *top_mb;
823  VP8Macroblock *left_mb = &mb[-1];
824  const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
825  const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
826  VP56mv *top_mv;
827  VP56mv *left_mv = left_mb->bmv;
828  VP56mv *cur_mv = mb->bmv;
829 
830  if (!layout) // layout is inlined, s->mb_layout is not
831  top_mb = &mb[2];
832  else
833  top_mb = &mb[-s->mb_width - 1];
834  mbsplits_top = vp8_mbsplits[top_mb->partitioning];
835  top_mv = top_mb->bmv;
836 
840  else
841  part_idx = VP8_SPLITMVMODE_8x8;
842  } else {
843  part_idx = VP8_SPLITMVMODE_4x4;
844  }
845 
846  num = vp8_mbsplit_count[part_idx];
847  mbsplits_cur = vp8_mbsplits[part_idx],
848  firstidx = vp8_mbfirstidx[part_idx];
849  mb->partitioning = part_idx;
850 
851  for (n = 0; n < num; n++) {
852  int k = firstidx[n];
853  uint32_t left, above;
854  const uint8_t *submv_prob;
855 
856  if (!(k & 3))
857  left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
858  else
859  left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
860  if (k <= 3)
861  above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
862  else
863  above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
864 
865  submv_prob = get_submv_prob(left, above, is_vp7);
866 
867  if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
868  if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
869  if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
870  mb->bmv[n].y = mb->mv.y +
871  read_mv_component(c, s->prob->mvc[0], is_vp7);
872  mb->bmv[n].x = mb->mv.x +
873  read_mv_component(c, s->prob->mvc[1], is_vp7);
874  } else {
875  AV_ZERO32(&mb->bmv[n]);
876  }
877  } else {
878  AV_WN32A(&mb->bmv[n], above);
879  }
880  } else {
881  AV_WN32A(&mb->bmv[n], left);
882  }
883  }
884 
885  return num;
886 }
887 
888 /**
889  * The vp7 reference decoder uses a padding macroblock column (added to right
890  * edge of the frame) to guard against illegal macroblock offsets. The
891  * algorithm has bugs that permit offsets to straddle the padding column.
892  * This function replicates those bugs.
893  *
894  * @param[out] edge_x macroblock x address
895  * @param[out] edge_y macroblock y address
896  *
897  * @return macroblock offset legal (boolean)
898  */
899 static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width,
900  int xoffset, int yoffset, int boundary,
901  int *edge_x, int *edge_y)
902 {
903  int vwidth = mb_width + 1;
904  int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
905  if (new < boundary || new % vwidth == vwidth - 1)
906  return 0;
907  *edge_y = new / vwidth;
908  *edge_x = new % vwidth;
909  return 1;
910 }
911 
912 static const VP56mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock)
913 {
914  return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
915 }
916 
917 static av_always_inline
919  int mb_x, int mb_y, int layout)
920 {
921  VP8Macroblock *mb_edge[12];
922  enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
923  enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
924  int idx = CNT_ZERO;
925  VP56mv near_mv[3];
926  uint8_t cnt[3] = { 0 };
927  VP56RangeCoder *c = &s->c;
928  int i;
929 
930  AV_ZERO32(&near_mv[0]);
931  AV_ZERO32(&near_mv[1]);
932  AV_ZERO32(&near_mv[2]);
933 
934  for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
935  const VP7MVPred * pred = &vp7_mv_pred[i];
936  int edge_x, edge_y;
937 
938  if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset,
939  pred->yoffset, !s->profile, &edge_x, &edge_y)) {
940  VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1)
941  ? s->macroblocks_base + 1 + edge_x +
942  (s->mb_width + 1) * (edge_y + 1)
943  : s->macroblocks + edge_x +
944  (s->mb_height - edge_y - 1) * 2;
945  uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
946  if (mv) {
947  if (AV_RN32A(&near_mv[CNT_NEAREST])) {
948  if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
949  idx = CNT_NEAREST;
950  } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
951  if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
952  continue;
953  idx = CNT_NEAR;
954  } else {
955  AV_WN32A(&near_mv[CNT_NEAR], mv);
956  idx = CNT_NEAR;
957  }
958  } else {
959  AV_WN32A(&near_mv[CNT_NEAREST], mv);
960  idx = CNT_NEAREST;
961  }
962  } else {
963  idx = CNT_ZERO;
964  }
965  } else {
966  idx = CNT_ZERO;
967  }
968  cnt[idx] += vp7_mv_pred[i].score;
969  }
970 
972 
973  if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
974  mb->mode = VP8_MVMODE_MV;
975 
976  if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
977 
978  if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
979 
980  if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
981  AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
982  else
983  AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR] ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
984 
985  if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
986  mb->mode = VP8_MVMODE_SPLIT;
987  mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
988  } else {
989  mb->mv.y += vp7_read_mv_component(c, s->prob->mvc[0]);
990  mb->mv.x += vp7_read_mv_component(c, s->prob->mvc[1]);
991  mb->bmv[0] = mb->mv;
992  }
993  } else {
994  mb->mv = near_mv[CNT_NEAR];
995  mb->bmv[0] = mb->mv;
996  }
997  } else {
998  mb->mv = near_mv[CNT_NEAREST];
999  mb->bmv[0] = mb->mv;
1000  }
1001  } else {
1002  mb->mode = VP8_MVMODE_ZERO;
1003  AV_ZERO32(&mb->mv);
1004  mb->bmv[0] = mb->mv;
1005  }
1006 }
1007 
1008 static av_always_inline
1010  int mb_x, int mb_y, int layout)
1011 {
1012  VP8Macroblock *mb_edge[3] = { 0 /* top */,
1013  mb - 1 /* left */,
1014  0 /* top-left */ };
1015  enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
1016  enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1017  int idx = CNT_ZERO;
1018  int cur_sign_bias = s->sign_bias[mb->ref_frame];
1019  int8_t *sign_bias = s->sign_bias;
1020  VP56mv near_mv[4];
1021  uint8_t cnt[4] = { 0 };
1022  VP56RangeCoder *c = &s->c;
1023 
1024  if (!layout) { // layout is inlined (s->mb_layout is not)
1025  mb_edge[0] = mb + 2;
1026  mb_edge[2] = mb + 1;
1027  } else {
1028  mb_edge[0] = mb - s->mb_width - 1;
1029  mb_edge[2] = mb - s->mb_width - 2;
1030  }
1031 
1032  AV_ZERO32(&near_mv[0]);
1033  AV_ZERO32(&near_mv[1]);
1034  AV_ZERO32(&near_mv[2]);
1035 
1036  /* Process MB on top, left and top-left */
1037 #define MV_EDGE_CHECK(n) \
1038  { \
1039  VP8Macroblock *edge = mb_edge[n]; \
1040  int edge_ref = edge->ref_frame; \
1041  if (edge_ref != VP56_FRAME_CURRENT) { \
1042  uint32_t mv = AV_RN32A(&edge->mv); \
1043  if (mv) { \
1044  if (cur_sign_bias != sign_bias[edge_ref]) { \
1045  /* SWAR negate of the values in mv. */ \
1046  mv = ~mv; \
1047  mv = ((mv & 0x7fff7fff) + \
1048  0x00010001) ^ (mv & 0x80008000); \
1049  } \
1050  if (!n || mv != AV_RN32A(&near_mv[idx])) \
1051  AV_WN32A(&near_mv[++idx], mv); \
1052  cnt[idx] += 1 + (n != 2); \
1053  } else \
1054  cnt[CNT_ZERO] += 1 + (n != 2); \
1055  } \
1056  }
1057 
1058  MV_EDGE_CHECK(0)
1059  MV_EDGE_CHECK(1)
1060  MV_EDGE_CHECK(2)
1061 
1063  if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
1064  mb->mode = VP8_MVMODE_MV;
1065 
1066  /* If we have three distinct MVs, merge first and last if they're the same */
1067  if (cnt[CNT_SPLITMV] &&
1068  AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
1069  cnt[CNT_NEAREST] += 1;
1070 
1071  /* Swap near and nearest if necessary */
1072  if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
1073  FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
1074  FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
1075  }
1076 
1077  if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
1078  if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
1079  /* Choose the best mv out of 0,0 and the nearest mv */
1080  clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
1081  cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
1082  (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
1083  (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
1084 
1085  if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
1086  mb->mode = VP8_MVMODE_SPLIT;
1087  mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
1088  } else {
1089  mb->mv.y += vp8_read_mv_component(c, s->prob->mvc[0]);
1090  mb->mv.x += vp8_read_mv_component(c, s->prob->mvc[1]);
1091  mb->bmv[0] = mb->mv;
1092  }
1093  } else {
1094  clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
1095  mb->bmv[0] = mb->mv;
1096  }
1097  } else {
1098  clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
1099  mb->bmv[0] = mb->mv;
1100  }
1101  } else {
1102  mb->mode = VP8_MVMODE_ZERO;
1103  AV_ZERO32(&mb->mv);
1104  mb->bmv[0] = mb->mv;
1105  }
1106 }
1107 
1108 static av_always_inline
1110  int mb_x, int keyframe, int layout)
1111 {
1112  uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1113 
1114  if (layout) {
1115  VP8Macroblock *mb_top = mb - s->mb_width - 1;
1116  memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
1117  }
1118  if (keyframe) {
1119  int x, y;
1120  uint8_t *top;
1121  uint8_t *const left = s->intra4x4_pred_mode_left;
1122  if (layout)
1123  top = mb->intra4x4_pred_mode_top;
1124  else
1125  top = s->intra4x4_pred_mode_top + 4 * mb_x;
1126  for (y = 0; y < 4; y++) {
1127  for (x = 0; x < 4; x++) {
1128  const uint8_t *ctx;
1129  ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
1130  *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
1131  left[y] = top[x] = *intra4x4;
1132  intra4x4++;
1133  }
1134  }
1135  } else {
1136  int i;
1137  for (i = 0; i < 16; i++)
1138  intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree,
1140  }
1141 }
1142 
1143 static av_always_inline
1144 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1145  uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
1146 {
1147  VP56RangeCoder *c = &s->c;
1148  const char *vp7_feature_name[] = { "q-index",
1149  "lf-delta",
1150  "partial-golden-update",
1151  "blit-pitch" };
1152  if (is_vp7) {
1153  int i;
1154  *segment = 0;
1155  for (i = 0; i < 4; i++) {
1156  if (s->feature_enabled[i]) {
1159  s->feature_index_prob[i]);
1161  "Feature %s present in macroblock (value 0x%x)\n",
1162  vp7_feature_name[i], s->feature_value[i][index]);
1163  }
1164  }
1165  }
1166  } else if (s->segmentation.update_map) {
1167  int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]);
1168  *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
1169  } else if (s->segmentation.enabled)
1170  *segment = ref ? *ref : *segment;
1171  mb->segment = *segment;
1172 
1173  mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
1174 
1175  if (s->keyframe) {
1178 
1179  if (mb->mode == MODE_I4x4) {
1180  decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
1181  } else {
1182  const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
1183  : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
1184  if (s->mb_layout)
1185  AV_WN32A(mb->intra4x4_pred_mode_top, modes);
1186  else
1187  AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
1188  AV_WN32A(s->intra4x4_pred_mode_left, modes);
1189  }
1190 
1194  } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
1195  // inter MB, 16.2
1196  if (vp56_rac_get_prob_branchy(c, s->prob->last))
1197  mb->ref_frame =
1198  (!is_vp7 && vp56_rac_get_prob(c, s->prob->golden)) ? VP56_FRAME_GOLDEN2 /* altref */
1200  else
1202  s->ref_count[mb->ref_frame - 1]++;
1203 
1204  // motion vectors, 16.3
1205  if (is_vp7)
1206  vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
1207  else
1208  vp8_decode_mvs(s, mb, mb_x, mb_y, layout);
1209  } else {
1210  // intra MB, 16.1
1212 
1213  if (mb->mode == MODE_I4x4)
1214  decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
1215 
1217  s->prob->pred8x8c);
1220  AV_ZERO32(&mb->bmv[0]);
1221  }
1222 }
1223 
1224 /**
1225  * @param r arithmetic bitstream reader context
1226  * @param block destination for block coefficients
1227  * @param probs probabilities to use when reading trees from the bitstream
1228  * @param i initial coeff index, 0 unless a separate DC block is coded
1229  * @param qmul array holding the dc/ac dequant factor at position 0/1
1230  *
1231  * @return 0 if no coeffs were decoded
1232  * otherwise, the index of the last coeff decoded plus one
1233  */
1234 static av_always_inline
1236  uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1237  int i, uint8_t *token_prob, int16_t qmul[2],
1238  const uint8_t scan[16], int vp7)
1239 {
1240  VP56RangeCoder c = *r;
1241  goto skip_eob;
1242  do {
1243  int coeff;
1244 restart:
1245  if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
1246  break;
1247 
1248 skip_eob:
1249  if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
1250  if (++i == 16)
1251  break; // invalid input; blocks should end with EOB
1252  token_prob = probs[i][0];
1253  if (vp7)
1254  goto restart;
1255  goto skip_eob;
1256  }
1257 
1258  if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
1259  coeff = 1;
1260  token_prob = probs[i + 1][1];
1261  } else {
1262  if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
1263  coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
1264  if (coeff)
1265  coeff += vp56_rac_get_prob(&c, token_prob[5]);
1266  coeff += 2;
1267  } else {
1268  // DCT_CAT*
1269  if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
1270  if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
1271  coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
1272  } else { // DCT_CAT2
1273  coeff = 7;
1274  coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
1275  coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
1276  }
1277  } else { // DCT_CAT3 and up
1278  int a = vp56_rac_get_prob(&c, token_prob[8]);
1279  int b = vp56_rac_get_prob(&c, token_prob[9 + a]);
1280  int cat = (a << 1) + b;
1281  coeff = 3 + (8 << cat);
1282  coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
1283  }
1284  }
1285  token_prob = probs[i + 1][2];
1286  }
1287  block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
1288  } while (++i < 16);
1289 
1290  *r = c;
1291  return i;
1292 }
1293 
1294 static av_always_inline
1295 int inter_predict_dc(int16_t block[16], int16_t pred[2])
1296 {
1297  int16_t dc = block[0];
1298  int ret = 0;
1299 
1300  if (pred[1] > 3) {
1301  dc += pred[0];
1302  ret = 1;
1303  }
1304 
1305  if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) {
1306  block[0] = pred[0] = dc;
1307  pred[1] = 0;
1308  } else {
1309  if (pred[0] == dc)
1310  pred[1]++;
1311  block[0] = pred[0] = dc;
1312  }
1313 
1314  return ret;
1315 }
1316 
1318  int16_t block[16],
1319  uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1320  int i, uint8_t *token_prob,
1321  int16_t qmul[2],
1322  const uint8_t scan[16])
1323 {
1324  return decode_block_coeffs_internal(r, block, probs, i,
1325  token_prob, qmul, scan, IS_VP7);
1326 }
1327 
1328 #ifndef vp8_decode_block_coeffs_internal
1330  int16_t block[16],
1331  uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1332  int i, uint8_t *token_prob,
1333  int16_t qmul[2])
1334 {
1335  return decode_block_coeffs_internal(r, block, probs, i,
1336  token_prob, qmul, zigzag_scan, IS_VP8);
1337 }
1338 #endif
1339 
1340 /**
1341  * @param c arithmetic bitstream reader context
1342  * @param block destination for block coefficients
1343  * @param probs probabilities to use when reading trees from the bitstream
1344  * @param i initial coeff index, 0 unless a separate DC block is coded
1345  * @param zero_nhood the initial prediction context for number of surrounding
1346  * all-zero blocks (only left/top, so 0-2)
1347  * @param qmul array holding the dc/ac dequant factor at position 0/1
1348  * @param scan scan pattern (VP7 only)
1349  *
1350  * @return 0 if no coeffs were decoded
1351  * otherwise, the index of the last coeff decoded plus one
1352  */
1353 static av_always_inline
1355  uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1356  int i, int zero_nhood, int16_t qmul[2],
1357  const uint8_t scan[16], int vp7)
1358 {
1359  uint8_t *token_prob = probs[i][zero_nhood];
1360  if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
1361  return 0;
1362  return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i,
1363  token_prob, qmul, scan)
1364  : vp8_decode_block_coeffs_internal(c, block, probs, i,
1365  token_prob, qmul);
1366 }
1367 
1368 static av_always_inline
1370  VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9],
1371  int is_vp7)
1372 {
1373  int i, x, y, luma_start = 0, luma_ctx = 3;
1374  int nnz_pred, nnz, nnz_total = 0;
1375  int segment = mb->segment;
1376  int block_dc = 0;
1377 
1378  if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
1379  nnz_pred = t_nnz[8] + l_nnz[8];
1380 
1381  // decode DC values and do hadamard
1382  nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
1383  nnz_pred, s->qmat[segment].luma_dc_qmul,
1384  zigzag_scan, is_vp7);
1385  l_nnz[8] = t_nnz[8] = !!nnz;
1386 
1387  if (is_vp7 && mb->mode > MODE_I4x4) {
1388  nnz |= inter_predict_dc(td->block_dc,
1389  s->inter_dc_pred[mb->ref_frame - 1]);
1390  }
1391 
1392  if (nnz) {
1393  nnz_total += nnz;
1394  block_dc = 1;
1395  if (nnz == 1)
1396  s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
1397  else
1398  s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
1399  }
1400  luma_start = 1;
1401  luma_ctx = 0;
1402  }
1403 
1404  // luma blocks
1405  for (y = 0; y < 4; y++)
1406  for (x = 0; x < 4; x++) {
1407  nnz_pred = l_nnz[y] + t_nnz[x];
1408  nnz = decode_block_coeffs(c, td->block[y][x],
1409  s->prob->token[luma_ctx],
1410  luma_start, nnz_pred,
1411  s->qmat[segment].luma_qmul,
1412  s->prob[0].scan, is_vp7);
1413  /* nnz+block_dc may be one more than the actual last index,
1414  * but we don't care */
1415  td->non_zero_count_cache[y][x] = nnz + block_dc;
1416  t_nnz[x] = l_nnz[y] = !!nnz;
1417  nnz_total += nnz;
1418  }
1419 
1420  // chroma blocks
1421  // TODO: what to do about dimensions? 2nd dim for luma is x,
1422  // but for chroma it's (y<<1)|x
1423  for (i = 4; i < 6; i++)
1424  for (y = 0; y < 2; y++)
1425  for (x = 0; x < 2; x++) {
1426  nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
1427  nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x],
1428  s->prob->token[2], 0, nnz_pred,
1429  s->qmat[segment].chroma_qmul,
1430  s->prob[0].scan, is_vp7);
1431  td->non_zero_count_cache[i][(y << 1) + x] = nnz;
1432  t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
1433  nnz_total += nnz;
1434  }
1435 
1436  // if there were no coded coeffs despite the macroblock not being marked skip,
1437  // we MUST not do the inner loop filter and should not do IDCT
1438  // Since skip isn't used for bitstream prediction, just manually set it.
1439  if (!nnz_total)
1440  mb->skip = 1;
1441 }
1442 
1443 static av_always_inline
1444 void backup_mb_border(uint8_t *top_border, uint8_t *src_y,
1445  uint8_t *src_cb, uint8_t *src_cr,
1446  int linesize, int uvlinesize, int simple)
1447 {
1448  AV_COPY128(top_border, src_y + 15 * linesize);
1449  if (!simple) {
1450  AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1451  AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
1452  }
1453 }
1454 
1455 static av_always_inline
1456 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
1457  uint8_t *src_cr, int linesize, int uvlinesize, int mb_x,
1458  int mb_y, int mb_width, int simple, int xchg)
1459 {
1460  uint8_t *top_border_m1 = top_border - 32; // for TL prediction
1461  src_y -= linesize;
1462  src_cb -= uvlinesize;
1463  src_cr -= uvlinesize;
1464 
1465 #define XCHG(a, b, xchg) \
1466  do { \
1467  if (xchg) \
1468  AV_SWAP64(b, a); \
1469  else \
1470  AV_COPY64(b, a); \
1471  } while (0)
1472 
1473  XCHG(top_border_m1 + 8, src_y - 8, xchg);
1474  XCHG(top_border, src_y, xchg);
1475  XCHG(top_border + 8, src_y + 8, 1);
1476  if (mb_x < mb_width - 1)
1477  XCHG(top_border + 32, src_y + 16, 1);
1478 
1479  // only copy chroma for normal loop filter
1480  // or to initialize the top row to 127
1481  if (!simple || !mb_y) {
1482  XCHG(top_border_m1 + 16, src_cb - 8, xchg);
1483  XCHG(top_border_m1 + 24, src_cr - 8, xchg);
1484  XCHG(top_border + 16, src_cb, 1);
1485  XCHG(top_border + 24, src_cr, 1);
1486  }
1487 }
1488 
1489 static av_always_inline
1490 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
1491 {
1492  if (!mb_x)
1493  return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
1494  else
1495  return mb_y ? mode : LEFT_DC_PRED8x8;
1496 }
1497 
1498 static av_always_inline
1499 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
1500 {
1501  if (!mb_x)
1502  return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
1503  else
1504  return mb_y ? mode : HOR_PRED8x8;
1505 }
1506 
1507 static av_always_inline
1508 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
1509 {
1510  switch (mode) {
1511  case DC_PRED8x8:
1512  return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1513  case VERT_PRED8x8:
1514  return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
1515  case HOR_PRED8x8:
1516  return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
1517  case PLANE_PRED8x8: /* TM */
1518  return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
1519  }
1520  return mode;
1521 }
1522 
1523 static av_always_inline
1524 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
1525 {
1526  if (!mb_x) {
1527  return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
1528  } else {
1529  return mb_y ? mode : HOR_VP8_PRED;
1530  }
1531 }
1532 
1533 static av_always_inline
1534 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y,
1535  int *copy_buf, int vp7)
1536 {
1537  switch (mode) {
1538  case VERT_PRED:
1539  if (!mb_x && mb_y) {
1540  *copy_buf = 1;
1541  return mode;
1542  }
1543  /* fall-through */
1544  case DIAG_DOWN_LEFT_PRED:
1545  case VERT_LEFT_PRED:
1546  return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
1547  case HOR_PRED:
1548  if (!mb_y) {
1549  *copy_buf = 1;
1550  return mode;
1551  }
1552  /* fall-through */
1553  case HOR_UP_PRED:
1554  return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
1555  case TM_VP8_PRED:
1556  return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
1557  case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
1558  * as 16x16/8x8 DC */
1559  case DIAG_DOWN_RIGHT_PRED:
1560  case VERT_RIGHT_PRED:
1561  case HOR_DOWN_PRED:
1562  if (!mb_y || !mb_x)
1563  *copy_buf = 1;
1564  return mode;
1565  }
1566  return mode;
1567 }
1568 
1569 static av_always_inline
1571  VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)
1572 {
1573  int x, y, mode, nnz;
1574  uint32_t tr;
1575 
1576  /* for the first row, we need to run xchg_mb_border to init the top edge
1577  * to 127 otherwise, skip it if we aren't going to deblock */
1578  if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1579  xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1580  s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1581  s->filter.simple, 1);
1582 
1583  if (mb->mode < MODE_I4x4) {
1584  mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7);
1585  s->hpc.pred16x16[mode](dst[0], s->linesize);
1586  } else {
1587  uint8_t *ptr = dst[0];
1588  uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1589  const uint8_t lo = is_vp7 ? 128 : 127;
1590  const uint8_t hi = is_vp7 ? 128 : 129;
1591  uint8_t tr_top[4] = { lo, lo, lo, lo };
1592 
1593  // all blocks on the right edge of the macroblock use bottom edge
1594  // the top macroblock for their topright edge
1595  uint8_t *tr_right = ptr - s->linesize + 16;
1596 
1597  // if we're on the right edge of the frame, said edge is extended
1598  // from the top macroblock
1599  if (mb_y && mb_x == s->mb_width - 1) {
1600  tr = tr_right[-1] * 0x01010101u;
1601  tr_right = (uint8_t *) &tr;
1602  }
1603 
1604  if (mb->skip)
1606 
1607  for (y = 0; y < 4; y++) {
1608  uint8_t *topright = ptr + 4 - s->linesize;
1609  for (x = 0; x < 4; x++) {
1610  int copy = 0, linesize = s->linesize;
1611  uint8_t *dst = ptr + 4 * x;
1612  DECLARE_ALIGNED(4, uint8_t, copy_dst)[5 * 8];
1613 
1614  if ((y == 0 || x == 3) && mb_y == 0) {
1615  topright = tr_top;
1616  } else if (x == 3)
1617  topright = tr_right;
1618 
1619  mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x,
1620  mb_y + y, &copy, is_vp7);
1621  if (copy) {
1622  dst = copy_dst + 12;
1623  linesize = 8;
1624  if (!(mb_y + y)) {
1625  copy_dst[3] = lo;
1626  AV_WN32A(copy_dst + 4, lo * 0x01010101U);
1627  } else {
1628  AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
1629  if (!(mb_x + x)) {
1630  copy_dst[3] = hi;
1631  } else {
1632  copy_dst[3] = ptr[4 * x - s->linesize - 1];
1633  }
1634  }
1635  if (!(mb_x + x)) {
1636  copy_dst[11] =
1637  copy_dst[19] =
1638  copy_dst[27] =
1639  copy_dst[35] = hi;
1640  } else {
1641  copy_dst[11] = ptr[4 * x - 1];
1642  copy_dst[19] = ptr[4 * x + s->linesize - 1];
1643  copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
1644  copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
1645  }
1646  }
1647  s->hpc.pred4x4[mode](dst, topright, linesize);
1648  if (copy) {
1649  AV_COPY32(ptr + 4 * x, copy_dst + 12);
1650  AV_COPY32(ptr + 4 * x + s->linesize, copy_dst + 20);
1651  AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
1652  AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
1653  }
1654 
1655  nnz = td->non_zero_count_cache[y][x];
1656  if (nnz) {
1657  if (nnz == 1)
1658  s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
1659  td->block[y][x], s->linesize);
1660  else
1661  s->vp8dsp.vp8_idct_add(ptr + 4 * x,
1662  td->block[y][x], s->linesize);
1663  }
1664  topright += 4;
1665  }
1666 
1667  ptr += 4 * s->linesize;
1668  intra4x4 += 4;
1669  }
1670  }
1671 
1673  mb_x, mb_y, is_vp7);
1674  s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1675  s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1676 
1677  if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1678  xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1679  s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1680  s->filter.simple, 0);
1681 }
1682 
1683 static const uint8_t subpel_idx[3][8] = {
1684  { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1685  // also function pointer index
1686  { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1687  { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1688 };
1689 
1690 /**
1691  * luma MC function
1692  *
1693  * @param s VP8 decoding context
1694  * @param dst target buffer for block data at block position
1695  * @param ref reference picture buffer at origin (0, 0)
1696  * @param mv motion vector (relative to block position) to get pixel data from
1697  * @param x_off horizontal position of block from origin (0, 0)
1698  * @param y_off vertical position of block from origin (0, 0)
1699  * @param block_w width of block (16, 8 or 4)
1700  * @param block_h height of block (always same as block_w)
1701  * @param width width of src/dst plane data
1702  * @param height height of src/dst plane data
1703  * @param linesize size of a single line of plane data, including padding
1704  * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1705  */
1706 static av_always_inline
1708  ThreadFrame *ref, const VP56mv *mv,
1709  int x_off, int y_off, int block_w, int block_h,
1710  int width, int height, ptrdiff_t linesize,
1711  vp8_mc_func mc_func[3][3])
1712 {
1713  uint8_t *src = ref->f->data[0];
1714 
1715  if (AV_RN32A(mv)) {
1716  int src_linesize = linesize;
1717 
1718  int mx = (mv->x << 1) & 7, mx_idx = subpel_idx[0][mx];
1719  int my = (mv->y << 1) & 7, my_idx = subpel_idx[0][my];
1720 
1721  x_off += mv->x >> 2;
1722  y_off += mv->y >> 2;
1723 
1724  // edge emulation
1725  ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1726  src += y_off * linesize + x_off;
1727  if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1728  y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1730  src - my_idx * linesize - mx_idx,
1731  EDGE_EMU_LINESIZE, linesize,
1732  block_w + subpel_idx[1][mx],
1733  block_h + subpel_idx[1][my],
1734  x_off - mx_idx, y_off - my_idx,
1735  width, height);
1736  src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1737  src_linesize = EDGE_EMU_LINESIZE;
1738  }
1739  mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1740  } else {
1741  ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1742  mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
1743  linesize, block_h, 0, 0);
1744  }
1745 }
1746 
1747 /**
1748  * chroma MC function
1749  *
1750  * @param s VP8 decoding context
1751  * @param dst1 target buffer for block data at block position (U plane)
1752  * @param dst2 target buffer for block data at block position (V plane)
1753  * @param ref reference picture buffer at origin (0, 0)
1754  * @param mv motion vector (relative to block position) to get pixel data from
1755  * @param x_off horizontal position of block from origin (0, 0)
1756  * @param y_off vertical position of block from origin (0, 0)
1757  * @param block_w width of block (16, 8 or 4)
1758  * @param block_h height of block (always same as block_w)
1759  * @param width width of src/dst plane data
1760  * @param height height of src/dst plane data
1761  * @param linesize size of a single line of plane data, including padding
1762  * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1763  */
1764 static av_always_inline
1766  uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv,
1767  int x_off, int y_off, int block_w, int block_h,
1768  int width, int height, ptrdiff_t linesize,
1769  vp8_mc_func mc_func[3][3])
1770 {
1771  uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1772 
1773  if (AV_RN32A(mv)) {
1774  int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
1775  int my = mv->y & 7, my_idx = subpel_idx[0][my];
1776 
1777  x_off += mv->x >> 3;
1778  y_off += mv->y >> 3;
1779 
1780  // edge emulation
1781  src1 += y_off * linesize + x_off;
1782  src2 += y_off * linesize + x_off;
1783  ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1784  if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1785  y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1787  src1 - my_idx * linesize - mx_idx,
1788  EDGE_EMU_LINESIZE, linesize,
1789  block_w + subpel_idx[1][mx],
1790  block_h + subpel_idx[1][my],
1791  x_off - mx_idx, y_off - my_idx, width, height);
1792  src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1793  mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1794 
1796  src2 - my_idx * linesize - mx_idx,
1797  EDGE_EMU_LINESIZE, linesize,
1798  block_w + subpel_idx[1][mx],
1799  block_h + subpel_idx[1][my],
1800  x_off - mx_idx, y_off - my_idx, width, height);
1801  src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1802  mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1803  } else {
1804  mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1805  mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1806  }
1807  } else {
1808  ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1809  mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1810  mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1811  }
1812 }
1813 
1814 static av_always_inline
1816  ThreadFrame *ref_frame, int x_off, int y_off,
1817  int bx_off, int by_off, int block_w, int block_h,
1818  int width, int height, VP56mv *mv)
1819 {
1820  VP56mv uvmv = *mv;
1821 
1822  /* Y */
1823  vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1824  ref_frame, mv, x_off + bx_off, y_off + by_off,
1825  block_w, block_h, width, height, s->linesize,
1826  s->put_pixels_tab[block_w == 8]);
1827 
1828  /* U/V */
1829  if (s->profile == 3) {
1830  /* this block only applies VP8; it is safe to check
1831  * only the profile, as VP7 profile <= 1 */
1832  uvmv.x &= ~7;
1833  uvmv.y &= ~7;
1834  }
1835  x_off >>= 1;
1836  y_off >>= 1;
1837  bx_off >>= 1;
1838  by_off >>= 1;
1839  width >>= 1;
1840  height >>= 1;
1841  block_w >>= 1;
1842  block_h >>= 1;
1843  vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1844  dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1845  &uvmv, x_off + bx_off, y_off + by_off,
1846  block_w, block_h, width, height, s->uvlinesize,
1847  s->put_pixels_tab[1 + (block_w == 4)]);
1848 }
1849 
1850 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1851  * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1852 static av_always_inline
1853 void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1854  int mb_xy, int ref)
1855 {
1856  /* Don't prefetch refs that haven't been used very often this frame. */
1857  if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
1858  int x_off = mb_x << 4, y_off = mb_y << 4;
1859  int mx = (mb->mv.x >> 2) + x_off + 8;
1860  int my = (mb->mv.y >> 2) + y_off;
1861  uint8_t **src = s->framep[ref]->tf.f->data;
1862  int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
1863  /* For threading, a ff_thread_await_progress here might be useful, but
1864  * it actually slows down the decoder. Since a bad prefetch doesn't
1865  * generate bad decoder output, we don't run it here. */
1866  s->vdsp.prefetch(src[0] + off, s->linesize, 4);
1867  off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
1868  s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
1869  }
1870 }
1871 
1872 /**
1873  * Apply motion vectors to prediction buffer, chapter 18.
1874  */
1875 static av_always_inline
1877  VP8Macroblock *mb, int mb_x, int mb_y)
1878 {
1879  int x_off = mb_x << 4, y_off = mb_y << 4;
1880  int width = 16 * s->mb_width, height = 16 * s->mb_height;
1881  ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1882  VP56mv *bmv = mb->bmv;
1883 
1884  switch (mb->partitioning) {
1885  case VP8_SPLITMVMODE_NONE:
1886  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1887  0, 0, 16, 16, width, height, &mb->mv);
1888  break;
1889  case VP8_SPLITMVMODE_4x4: {
1890  int x, y;
1891  VP56mv uvmv;
1892 
1893  /* Y */
1894  for (y = 0; y < 4; y++) {
1895  for (x = 0; x < 4; x++) {
1896  vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
1897  ref, &bmv[4 * y + x],
1898  4 * x + x_off, 4 * y + y_off, 4, 4,
1899  width, height, s->linesize,
1900  s->put_pixels_tab[2]);
1901  }
1902  }
1903 
1904  /* U/V */
1905  x_off >>= 1;
1906  y_off >>= 1;
1907  width >>= 1;
1908  height >>= 1;
1909  for (y = 0; y < 2; y++) {
1910  for (x = 0; x < 2; x++) {
1911  uvmv.x = mb->bmv[2 * y * 4 + 2 * x ].x +
1912  mb->bmv[2 * y * 4 + 2 * x + 1].x +
1913  mb->bmv[(2 * y + 1) * 4 + 2 * x ].x +
1914  mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
1915  uvmv.y = mb->bmv[2 * y * 4 + 2 * x ].y +
1916  mb->bmv[2 * y * 4 + 2 * x + 1].y +
1917  mb->bmv[(2 * y + 1) * 4 + 2 * x ].y +
1918  mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
1919  uvmv.x = (uvmv.x + 2 + FF_SIGNBIT(uvmv.x)) >> 2;
1920  uvmv.y = (uvmv.y + 2 + FF_SIGNBIT(uvmv.y)) >> 2;
1921  if (s->profile == 3) {
1922  uvmv.x &= ~7;
1923  uvmv.y &= ~7;
1924  }
1925  vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
1926  dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
1927  &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
1928  width, height, s->uvlinesize,
1929  s->put_pixels_tab[2]);
1930  }
1931  }
1932  break;
1933  }
1934  case VP8_SPLITMVMODE_16x8:
1935  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1936  0, 0, 16, 8, width, height, &bmv[0]);
1937  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1938  0, 8, 16, 8, width, height, &bmv[1]);
1939  break;
1940  case VP8_SPLITMVMODE_8x16:
1941  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1942  0, 0, 8, 16, width, height, &bmv[0]);
1943  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1944  8, 0, 8, 16, width, height, &bmv[1]);
1945  break;
1946  case VP8_SPLITMVMODE_8x8:
1947  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1948  0, 0, 8, 8, width, height, &bmv[0]);
1949  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1950  8, 0, 8, 8, width, height, &bmv[1]);
1951  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1952  0, 8, 8, 8, width, height, &bmv[2]);
1953  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1954  8, 8, 8, 8, width, height, &bmv[3]);
1955  break;
1956  }
1957 }
1958 
1959 static av_always_inline
1961 {
1962  int x, y, ch;
1963 
1964  if (mb->mode != MODE_I4x4) {
1965  uint8_t *y_dst = dst[0];
1966  for (y = 0; y < 4; y++) {
1967  uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1968  if (nnz4) {
1969  if (nnz4 & ~0x01010101) {
1970  for (x = 0; x < 4; x++) {
1971  if ((uint8_t) nnz4 == 1)
1972  s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
1973  td->block[y][x],
1974  s->linesize);
1975  else if ((uint8_t) nnz4 > 1)
1976  s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
1977  td->block[y][x],
1978  s->linesize);
1979  nnz4 >>= 8;
1980  if (!nnz4)
1981  break;
1982  }
1983  } else {
1984  s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
1985  }
1986  }
1987  y_dst += 4 * s->linesize;
1988  }
1989  }
1990 
1991  for (ch = 0; ch < 2; ch++) {
1992  uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
1993  if (nnz4) {
1994  uint8_t *ch_dst = dst[1 + ch];
1995  if (nnz4 & ~0x01010101) {
1996  for (y = 0; y < 2; y++) {
1997  for (x = 0; x < 2; x++) {
1998  if ((uint8_t) nnz4 == 1)
1999  s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
2000  td->block[4 + ch][(y << 1) + x],
2001  s->uvlinesize);
2002  else if ((uint8_t) nnz4 > 1)
2003  s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
2004  td->block[4 + ch][(y << 1) + x],
2005  s->uvlinesize);
2006  nnz4 >>= 8;
2007  if (!nnz4)
2008  goto chroma_idct_end;
2009  }
2010  ch_dst += 4 * s->uvlinesize;
2011  }
2012  } else {
2013  s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
2014  }
2015  }
2016 chroma_idct_end:
2017  ;
2018  }
2019 }
2020 
2021 static av_always_inline
2023  VP8FilterStrength *f, int is_vp7)
2024 {
2025  int interior_limit, filter_level;
2026 
2027  if (s->segmentation.enabled) {
2028  filter_level = s->segmentation.filter_level[mb->segment];
2029  if (!s->segmentation.absolute_vals)
2030  filter_level += s->filter.level;
2031  } else
2032  filter_level = s->filter.level;
2033 
2034  if (s->lf_delta.enabled) {
2035  filter_level += s->lf_delta.ref[mb->ref_frame];
2036  filter_level += s->lf_delta.mode[mb->mode];
2037  }
2038 
2039  filter_level = av_clip_uintp2(filter_level, 6);
2040 
2041  interior_limit = filter_level;
2042  if (s->filter.sharpness) {
2043  interior_limit >>= (s->filter.sharpness + 3) >> 2;
2044  interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
2045  }
2046  interior_limit = FFMAX(interior_limit, 1);
2047 
2048  f->filter_level = filter_level;
2049  f->inner_limit = interior_limit;
2050  f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 ||
2051  mb->mode == VP8_MVMODE_SPLIT;
2052 }
2053 
2054 static av_always_inline
2056  int mb_x, int mb_y, int is_vp7)
2057 {
2058  int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
2059  int filter_level = f->filter_level;
2060  int inner_limit = f->inner_limit;
2061  int inner_filter = f->inner_filter;
2062  int linesize = s->linesize;
2063  int uvlinesize = s->uvlinesize;
2064  static const uint8_t hev_thresh_lut[2][64] = {
2065  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2066  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2067  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2068  3, 3, 3, 3 },
2069  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2070  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2071  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2072  2, 2, 2, 2 }
2073  };
2074 
2075  if (!filter_level)
2076  return;
2077 
2078  if (is_vp7) {
2079  bedge_lim_y = filter_level;
2080  bedge_lim_uv = filter_level * 2;
2081  mbedge_lim = filter_level + 2;
2082  } else {
2083  bedge_lim_y =
2084  bedge_lim_uv = filter_level * 2 + inner_limit;
2085  mbedge_lim = bedge_lim_y + 4;
2086  }
2087 
2088  hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
2089 
2090  if (mb_x) {
2091  s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
2092  mbedge_lim, inner_limit, hev_thresh);
2093  s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
2094  mbedge_lim, inner_limit, hev_thresh);
2095  }
2096 
2097 #define H_LOOP_FILTER_16Y_INNER(cond) \
2098  if (cond && inner_filter) { \
2099  s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, \
2100  bedge_lim_y, inner_limit, \
2101  hev_thresh); \
2102  s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, \
2103  bedge_lim_y, inner_limit, \
2104  hev_thresh); \
2105  s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, \
2106  bedge_lim_y, inner_limit, \
2107  hev_thresh); \
2108  s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, \
2109  uvlinesize, bedge_lim_uv, \
2110  inner_limit, hev_thresh); \
2111  }
2112 
2113  H_LOOP_FILTER_16Y_INNER(!is_vp7)
2114 
2115  if (mb_y) {
2116  s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
2117  mbedge_lim, inner_limit, hev_thresh);
2118  s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
2119  mbedge_lim, inner_limit, hev_thresh);
2120  }
2121 
2122  if (inner_filter) {
2123  s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 4 * linesize,
2124  linesize, bedge_lim_y,
2125  inner_limit, hev_thresh);
2126  s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 8 * linesize,
2127  linesize, bedge_lim_y,
2128  inner_limit, hev_thresh);
2129  s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
2130  linesize, bedge_lim_y,
2131  inner_limit, hev_thresh);
2132  s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
2133  dst[2] + 4 * uvlinesize,
2134  uvlinesize, bedge_lim_uv,
2135  inner_limit, hev_thresh);
2136  }
2137 
2138  H_LOOP_FILTER_16Y_INNER(is_vp7)
2139 }
2140 
2141 static av_always_inline
2143  int mb_x, int mb_y)
2144 {
2145  int mbedge_lim, bedge_lim;
2146  int filter_level = f->filter_level;
2147  int inner_limit = f->inner_limit;
2148  int inner_filter = f->inner_filter;
2149  int linesize = s->linesize;
2150 
2151  if (!filter_level)
2152  return;
2153 
2154  bedge_lim = 2 * filter_level + inner_limit;
2155  mbedge_lim = bedge_lim + 4;
2156 
2157  if (mb_x)
2158  s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
2159  if (inner_filter) {
2160  s->vp8dsp.vp8_h_loop_filter_simple(dst + 4, linesize, bedge_lim);
2161  s->vp8dsp.vp8_h_loop_filter_simple(dst + 8, linesize, bedge_lim);
2162  s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
2163  }
2164 
2165  if (mb_y)
2166  s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
2167  if (inner_filter) {
2168  s->vp8dsp.vp8_v_loop_filter_simple(dst + 4 * linesize, linesize, bedge_lim);
2169  s->vp8dsp.vp8_v_loop_filter_simple(dst + 8 * linesize, linesize, bedge_lim);
2170  s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
2171  }
2172 }
2173 
2174 #define MARGIN (16 << 2)
2175 static av_always_inline
2177  VP8Frame *prev_frame, int is_vp7)
2178 {
2179  VP8Context *s = avctx->priv_data;
2180  int mb_x, mb_y;
2181 
2182  s->mv_min.y = -MARGIN;
2183  s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2184  for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
2186  ((s->mb_width + 1) * (mb_y + 1) + 1);
2187  int mb_xy = mb_y * s->mb_width;
2188 
2189  AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2190 
2191  s->mv_min.x = -MARGIN;
2192  s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2193  for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2194  if (mb_y == 0)
2195  AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
2196  DC_PRED * 0x01010101);
2197  decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2198  prev_frame && prev_frame->seg_map ?
2199  prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
2200  s->mv_min.x -= 64;
2201  s->mv_max.x -= 64;
2202  }
2203  s->mv_min.y -= 64;
2204  s->mv_max.y -= 64;
2205  }
2206 }
2207 
2208 static void vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2209  VP8Frame *prev_frame)
2210 {
2211  vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
2212 }
2213 
2214 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2215  VP8Frame *prev_frame)
2216 {
2217  vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
2218 }
2219 
2220 #if HAVE_THREADS
2221 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) \
2222  do { \
2223  int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \
2224  if (otd->thread_mb_pos < tmp) { \
2225  pthread_mutex_lock(&otd->lock); \
2226  td->wait_mb_pos = tmp; \
2227  do { \
2228  if (otd->thread_mb_pos >= tmp) \
2229  break; \
2230  pthread_cond_wait(&otd->cond, &otd->lock); \
2231  } while (1); \
2232  td->wait_mb_pos = INT_MAX; \
2233  pthread_mutex_unlock(&otd->lock); \
2234  } \
2235  } while (0);
2236 
2237 #define update_pos(td, mb_y, mb_x) \
2238  do { \
2239  int pos = (mb_y << 16) | (mb_x & 0xFFFF); \
2240  int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
2241  (num_jobs > 1); \
2242  int is_null = !next_td || !prev_td; \
2243  int pos_check = (is_null) ? 1 \
2244  : (next_td != td && \
2245  pos >= next_td->wait_mb_pos) || \
2246  (prev_td != td && \
2247  pos >= prev_td->wait_mb_pos); \
2248  td->thread_mb_pos = pos; \
2249  if (sliced_threading && pos_check) { \
2250  pthread_mutex_lock(&td->lock); \
2251  pthread_cond_broadcast(&td->cond); \
2252  pthread_mutex_unlock(&td->lock); \
2253  } \
2254  } while (0);
2255 #else
2256 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)
2257 #define update_pos(td, mb_y, mb_x)
2258 #endif
2259 
2261  int jobnr, int threadnr, int is_vp7)
2262 {
2263  VP8Context *s = avctx->priv_data;
2264  VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
2265  int mb_y = td->thread_mb_pos >> 16;
2266  int mb_x, mb_xy = mb_y * s->mb_width;
2267  int num_jobs = s->num_jobs;
2268  VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
2269  VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
2270  VP8Macroblock *mb;
2271  uint8_t *dst[3] = {
2272  curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
2273  curframe->tf.f->data[1] + 8 * mb_y * s->uvlinesize,
2274  curframe->tf.f->data[2] + 8 * mb_y * s->uvlinesize
2275  };
2276  if (mb_y == 0)
2277  prev_td = td;
2278  else
2279  prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2280  if (mb_y == s->mb_height - 1)
2281  next_td = td;
2282  else
2283  next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2284  if (s->mb_layout == 1)
2285  mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2286  else {
2287  // Make sure the previous frame has read its segmentation map,
2288  // if we re-use the same map.
2289  if (prev_frame && s->segmentation.enabled &&
2291  ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
2292  mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2293  memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
2294  AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2295  }
2296 
2297  if (!is_vp7 || mb_y == 0)
2298  memset(td->left_nnz, 0, sizeof(td->left_nnz));
2299 
2300  s->mv_min.x = -MARGIN;
2301  s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2302 
2303  for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2304  // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2305  if (prev_td != td) {
2306  if (threadnr != 0) {
2307  check_thread_pos(td, prev_td,
2308  mb_x + (is_vp7 ? 2 : 1),
2309  mb_y - (is_vp7 ? 2 : 1));
2310  } else {
2311  check_thread_pos(td, prev_td,
2312  mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3,
2313  mb_y - (is_vp7 ? 2 : 1));
2314  }
2315  }
2316 
2317  s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
2318  s->linesize, 4);
2319  s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
2320  dst[2] - dst[1], 2);
2321 
2322  if (!s->mb_layout)
2323  decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2324  prev_frame && prev_frame->seg_map ?
2325  prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
2326 
2327  prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
2328 
2329  if (!mb->skip)
2330  decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7);
2331 
2332  if (mb->mode <= MODE_I4x4)
2333  intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7);
2334  else
2335  inter_predict(s, td, dst, mb, mb_x, mb_y);
2336 
2337  prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
2338 
2339  if (!mb->skip) {
2340  idct_mb(s, td, dst, mb);
2341  } else {
2342  AV_ZERO64(td->left_nnz);
2343  AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
2344 
2345  /* Reset DC block predictors if they would exist
2346  * if the mb had coefficients */
2347  if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
2348  td->left_nnz[8] = 0;
2349  s->top_nnz[mb_x][8] = 0;
2350  }
2351  }
2352 
2353  if (s->deblock_filter)
2354  filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7);
2355 
2356  if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
2357  if (s->filter.simple)
2358  backup_mb_border(s->top_border[mb_x + 1], dst[0],
2359  NULL, NULL, s->linesize, 0, 1);
2360  else
2361  backup_mb_border(s->top_border[mb_x + 1], dst[0],
2362  dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2363  }
2364 
2365  prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
2366 
2367  dst[0] += 16;
2368  dst[1] += 8;
2369  dst[2] += 8;
2370  s->mv_min.x -= 64;
2371  s->mv_max.x -= 64;
2372 
2373  if (mb_x == s->mb_width + 1) {
2374  update_pos(td, mb_y, s->mb_width + 3);
2375  } else {
2376  update_pos(td, mb_y, mb_x);
2377  }
2378  }
2379 }
2380 
2381 static void vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2382  int jobnr, int threadnr)
2383 {
2384  decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1);
2385 }
2386 
2387 static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2388  int jobnr, int threadnr)
2389 {
2390  decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0);
2391 }
2392 
2393 static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata,
2394  int jobnr, int threadnr, int is_vp7)
2395 {
2396  VP8Context *s = avctx->priv_data;
2397  VP8ThreadData *td = &s->thread_data[threadnr];
2398  int mb_x, mb_y = td->thread_mb_pos >> 16, num_jobs = s->num_jobs;
2399  AVFrame *curframe = s->curframe->tf.f;
2400  VP8Macroblock *mb;
2401  VP8ThreadData *prev_td, *next_td;
2402  uint8_t *dst[3] = {
2403  curframe->data[0] + 16 * mb_y * s->linesize,
2404  curframe->data[1] + 8 * mb_y * s->uvlinesize,
2405  curframe->data[2] + 8 * mb_y * s->uvlinesize
2406  };
2407 
2408  if (s->mb_layout == 1)
2409  mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2410  else
2411  mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2412 
2413  if (mb_y == 0)
2414  prev_td = td;
2415  else
2416  prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2417  if (mb_y == s->mb_height - 1)
2418  next_td = td;
2419  else
2420  next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2421 
2422  for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
2423  VP8FilterStrength *f = &td->filter_strength[mb_x];
2424  if (prev_td != td)
2425  check_thread_pos(td, prev_td,
2426  (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
2427  if (next_td != td)
2428  if (next_td != &s->thread_data[0])
2429  check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
2430 
2431  if (num_jobs == 1) {
2432  if (s->filter.simple)
2433  backup_mb_border(s->top_border[mb_x + 1], dst[0],
2434  NULL, NULL, s->linesize, 0, 1);
2435  else
2436  backup_mb_border(s->top_border[mb_x + 1], dst[0],
2437  dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2438  }
2439 
2440  if (s->filter.simple)
2441  filter_mb_simple(s, dst[0], f, mb_x, mb_y);
2442  else
2443  filter_mb(s, dst, f, mb_x, mb_y, is_vp7);
2444  dst[0] += 16;
2445  dst[1] += 8;
2446  dst[2] += 8;
2447 
2448  update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
2449  }
2450 }
2451 
2452 static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata,
2453  int jobnr, int threadnr)
2454 {
2455  filter_mb_row(avctx, tdata, jobnr, threadnr, 1);
2456 }
2457 
2458 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
2459  int jobnr, int threadnr)
2460 {
2461  filter_mb_row(avctx, tdata, jobnr, threadnr, 0);
2462 }
2463 
2464 static av_always_inline
2465 int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
2466  int threadnr, int is_vp7)
2467 {
2468  VP8Context *s = avctx->priv_data;
2469  VP8ThreadData *td = &s->thread_data[jobnr];
2470  VP8ThreadData *next_td = NULL, *prev_td = NULL;
2471  VP8Frame *curframe = s->curframe;
2472  int mb_y, num_jobs = s->num_jobs;
2473 
2474  td->thread_nr = threadnr;
2475  for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
2476  if (mb_y >= s->mb_height)
2477  break;
2478  td->thread_mb_pos = mb_y << 16;
2479  s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
2480  if (s->deblock_filter)
2481  s->filter_mb_row(avctx, tdata, jobnr, threadnr);
2482  update_pos(td, mb_y, INT_MAX & 0xFFFF);
2483 
2484  s->mv_min.y -= 64;
2485  s->mv_max.y -= 64;
2486 
2487  if (avctx->active_thread_type == FF_THREAD_FRAME)
2488  ff_thread_report_progress(&curframe->tf, mb_y, 0);
2489  }
2490 
2491  return 0;
2492 }
2493 
2494 static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2495  int jobnr, int threadnr)
2496 {
2497  return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7);
2498 }
2499 
2500 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2501  int jobnr, int threadnr)
2502 {
2503  return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
2504 }
2505 
2506 
2507 static av_always_inline
2508 int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2509  AVPacket *avpkt, int is_vp7)
2510 {
2511  VP8Context *s = avctx->priv_data;
2512  int ret, i, referenced, num_jobs;
2513  enum AVDiscard skip_thresh;
2514  VP8Frame *av_uninit(curframe), *prev_frame;
2515 
2516  if (is_vp7)
2517  ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
2518  else
2519  ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size);
2520 
2521  if (ret < 0)
2522  goto err;
2523 
2524  prev_frame = s->framep[VP56_FRAME_CURRENT];
2525 
2526  referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
2528 
2529  skip_thresh = !referenced ? AVDISCARD_NONREF
2530  : !s->keyframe ? AVDISCARD_NONKEY
2531  : AVDISCARD_ALL;
2532 
2533  if (avctx->skip_frame >= skip_thresh) {
2534  s->invisible = 1;
2535  memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2536  goto skip_decode;
2537  }
2538  s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
2539 
2540  // release no longer referenced frames
2541  for (i = 0; i < 5; i++)
2542  if (s->frames[i].tf.f->data[0] &&
2543  &s->frames[i] != prev_frame &&
2544  &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
2545  &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
2546  &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
2547  vp8_release_frame(s, &s->frames[i]);
2548 
2549  curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s);
2550 
2551  if (!s->colorspace)
2552  avctx->colorspace = AVCOL_SPC_BT470BG;
2553  if (s->fullrange)
2554  avctx->color_range = AVCOL_RANGE_JPEG;
2555  else
2556  avctx->color_range = AVCOL_RANGE_MPEG;
2557 
2558  /* Given that arithmetic probabilities are updated every frame, it's quite
2559  * likely that the values we have on a random interframe are complete
2560  * junk if we didn't start decode on a keyframe. So just don't display
2561  * anything rather than junk. */
2562  if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
2563  !s->framep[VP56_FRAME_GOLDEN] ||
2564  !s->framep[VP56_FRAME_GOLDEN2])) {
2565  av_log(avctx, AV_LOG_WARNING,
2566  "Discarding interframe without a prior keyframe!\n");
2567  ret = AVERROR_INVALIDDATA;
2568  goto err;
2569  }
2570 
2571  curframe->tf.f->key_frame = s->keyframe;
2572  curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
2574  if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
2575  goto err;
2576 
2577  // check if golden and altref are swapped
2578  if (s->update_altref != VP56_FRAME_NONE)
2580  else
2582 
2583  if (s->update_golden != VP56_FRAME_NONE)
2585  else
2587 
2588  if (s->update_last)
2589  s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
2590  else
2592 
2593  s->next_framep[VP56_FRAME_CURRENT] = curframe;
2594 
2595  if (avctx->codec->update_thread_context)
2596  ff_thread_finish_setup(avctx);
2597 
2598  s->linesize = curframe->tf.f->linesize[0];
2599  s->uvlinesize = curframe->tf.f->linesize[1];
2600 
2601  memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
2602  /* Zero macroblock structures for top/top-left prediction
2603  * from outside the frame. */
2604  if (!s->mb_layout)
2605  memset(s->macroblocks + s->mb_height * 2 - 1, 0,
2606  (s->mb_width + 1) * sizeof(*s->macroblocks));
2607  if (!s->mb_layout && s->keyframe)
2608  memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
2609 
2610  memset(s->ref_count, 0, sizeof(s->ref_count));
2611 
2612  if (s->mb_layout == 1) {
2613  // Make sure the previous frame has read its segmentation map,
2614  // if we re-use the same map.
2615  if (prev_frame && s->segmentation.enabled &&
2617  ff_thread_await_progress(&prev_frame->tf, 1, 0);
2618  if (is_vp7)
2619  vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
2620  else
2621  vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
2622  }
2623 
2624  if (avctx->active_thread_type == FF_THREAD_FRAME)
2625  num_jobs = 1;
2626  else
2627  num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
2628  s->num_jobs = num_jobs;
2629  s->curframe = curframe;
2630  s->prev_frame = prev_frame;
2631  s->mv_min.y = -MARGIN;
2632  s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2633  for (i = 0; i < MAX_THREADS; i++) {
2634  s->thread_data[i].thread_mb_pos = 0;
2635  s->thread_data[i].wait_mb_pos = INT_MAX;
2636  }
2637  if (is_vp7)
2638  avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
2639  num_jobs);
2640  else
2641  avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
2642  num_jobs);
2643 
2644  ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
2645  memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2646 
2647 skip_decode:
2648  // if future frames don't use the updated probabilities,
2649  // reset them to the values we saved
2650  if (!s->update_probabilities)
2651  s->prob[0] = s->prob[1];
2652 
2653  if (!s->invisible) {
2654  if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2655  return ret;
2656  *got_frame = 1;
2657  }
2658 
2659  return avpkt->size;
2660 err:
2661  memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2662  return ret;
2663 }
2664 
2665 int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2666  AVPacket *avpkt)
2667 {
2668  return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP8);
2669 }
2670 
2671 #if CONFIG_VP7_DECODER
2672 static int vp7_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2673  AVPacket *avpkt)
2674 {
2675  return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP7);
2676 }
2677 #endif /* CONFIG_VP7_DECODER */
2678 
2680 {
2681  VP8Context *s = avctx->priv_data;
2682  int i;
2683 
2684  vp8_decode_flush_impl(avctx, 1);
2685  for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2686  av_frame_free(&s->frames[i].tf.f);
2687 
2688  return 0;
2689 }
2690 
2692 {
2693  int i;
2694  for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2695  s->frames[i].tf.f = av_frame_alloc();
2696  if (!s->frames[i].tf.f)
2697  return AVERROR(ENOMEM);
2698  }
2699  return 0;
2700 }
2701 
2702 static av_always_inline
2703 int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
2704 {
2705  VP8Context *s = avctx->priv_data;
2706  int ret;
2707 
2708  s->avctx = avctx;
2709  s->vp7 = avctx->codec->id == AV_CODEC_ID_VP7;
2710  avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2711  avctx->internal->allocate_progress = 1;
2712 
2713  ff_videodsp_init(&s->vdsp, 8);
2714 
2715  ff_vp78dsp_init(&s->vp8dsp);
2716  if (CONFIG_VP7_DECODER && is_vp7) {
2718  ff_vp7dsp_init(&s->vp8dsp);
2721  } else if (CONFIG_VP8_DECODER && !is_vp7) {
2723  ff_vp8dsp_init(&s->vp8dsp);
2726  }
2727 
2728  /* does not change for VP8 */
2729  memcpy(s->prob[0].scan, zigzag_scan, sizeof(s->prob[0].scan));
2730 
2731  if ((ret = vp8_init_frames(s)) < 0) {
2732  ff_vp8_decode_free(avctx);
2733  return ret;
2734  }
2735 
2736  return 0;
2737 }
2738 
2739 #if CONFIG_VP7_DECODER
2740 static int vp7_decode_init(AVCodecContext *avctx)
2741 {
2742  return vp78_decode_init(avctx, IS_VP7);
2743 }
2744 #endif /* CONFIG_VP7_DECODER */
2745 
2747 {
2748  return vp78_decode_init(avctx, IS_VP8);
2749 }
2750 
2751 #if CONFIG_VP8_DECODER
2752 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2753 {
2754  VP8Context *s = avctx->priv_data;
2755  int ret;
2756 
2757  s->avctx = avctx;
2758 
2759  if ((ret = vp8_init_frames(s)) < 0) {
2760  ff_vp8_decode_free(avctx);
2761  return ret;
2762  }
2763 
2764  return 0;
2765 }
2766 
2767 #define REBASE(pic) ((pic) ? (pic) - &s_src->frames[0] + &s->frames[0] : NULL)
2768 
2769 static int vp8_decode_update_thread_context(AVCodecContext *dst,
2770  const AVCodecContext *src)
2771 {
2772  VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2773  int i;
2774 
2775  if (s->macroblocks_base &&
2776  (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2777  free_buffers(s);
2778  s->mb_width = s_src->mb_width;
2779  s->mb_height = s_src->mb_height;
2780  }
2781 
2782  s->prob[0] = s_src->prob[!s_src->update_probabilities];
2783  s->segmentation = s_src->segmentation;
2784  s->lf_delta = s_src->lf_delta;
2785  memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2786 
2787  for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2788  if (s_src->frames[i].tf.f->data[0]) {
2789  int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2790  if (ret < 0)
2791  return ret;
2792  }
2793  }
2794 
2795  s->framep[0] = REBASE(s_src->next_framep[0]);
2796  s->framep[1] = REBASE(s_src->next_framep[1]);
2797  s->framep[2] = REBASE(s_src->next_framep[2]);
2798  s->framep[3] = REBASE(s_src->next_framep[3]);
2799 
2800  return 0;
2801 }
2802 #endif /* CONFIG_VP8_DECODER */
2803 
2804 #if CONFIG_VP7_DECODER
2805 AVCodec ff_vp7_decoder = {
2806  .name = "vp7",
2807  .long_name = NULL_IF_CONFIG_SMALL("On2 VP7"),
2808  .type = AVMEDIA_TYPE_VIDEO,
2809  .id = AV_CODEC_ID_VP7,
2810  .priv_data_size = sizeof(VP8Context),
2811  .init = vp7_decode_init,
2813  .decode = vp7_decode_frame,
2814  .capabilities = CODEC_CAP_DR1,
2816 };
2817 #endif /* CONFIG_VP7_DECODER */
2818 
2819 #if CONFIG_VP8_DECODER
2820 AVCodec ff_vp8_decoder = {
2821  .name = "vp8",
2822  .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2823  .type = AVMEDIA_TYPE_VIDEO,
2824  .id = AV_CODEC_ID_VP8,
2825  .priv_data_size = sizeof(VP8Context),
2831  .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2832  .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2833 };
2834 #endif /* CONFIG_VP7_DECODER */