24 #include "config_components.h"
48 #define VP9_SYNCCODE 0x498342
67 for (
i = 0;
i < n;
i++)
105 f->segmentation_map =
NULL;
117 sz = 64 *
s->sb_cols *
s->sb_rows;
118 if (sz !=
s->frame_extradata_pool_size) {
122 if (!
s->frame_extradata_pool) {
123 s->frame_extradata_pool_size = 0;
127 s->frame_extradata_pool_size = sz;
135 f->segmentation_map =
f->extradata;
164 src->hwaccel_picture_private);
171 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \
172 CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
173 CONFIG_VP9_NVDEC_HWACCEL + \
174 CONFIG_VP9_VAAPI_HWACCEL + \
175 CONFIG_VP9_VDPAU_HWACCEL + \
176 CONFIG_VP9_VIDEOTOOLBOX_HWACCEL)
180 int bytesperpixel =
s->bytesperpixel,
ret, cols, rows;
185 if (!(
s->pix_fmt ==
s->gf_fmt &&
w ==
s->w &&
h ==
s->h)) {
189 switch (
s->pix_fmt) {
192 #if CONFIG_VP9_DXVA2_HWACCEL
195 #if CONFIG_VP9_D3D11VA_HWACCEL
199 #if CONFIG_VP9_NVDEC_HWACCEL
202 #if CONFIG_VP9_VAAPI_HWACCEL
205 #if CONFIG_VP9_VDPAU_HWACCEL
208 #if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL
213 #if CONFIG_VP9_NVDEC_HWACCEL
216 #if CONFIG_VP9_VAAPI_HWACCEL
219 #if CONFIG_VP9_VDPAU_HWACCEL
226 #if CONFIG_VP9_VAAPI_HWACCEL
233 #if CONFIG_VP9_VAAPI_HWACCEL
239 *fmtp++ =
s->pix_fmt;
247 s->gf_fmt =
s->pix_fmt;
255 if (
s->intra_pred_data[0] && cols ==
s->cols && rows ==
s->rows &&
s->pix_fmt ==
s->last_fmt)
258 s->last_fmt =
s->pix_fmt;
259 s->sb_cols = (
w + 63) >> 6;
260 s->sb_rows = (
h + 63) >> 6;
261 s->cols = (
w + 7) >> 3;
262 s->rows = (
h + 7) >> 3;
265 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
269 p =
av_malloc(
s->sb_cols * (128 + 192 * bytesperpixel +
270 lflvl_len *
sizeof(*
s->lflvl) + 16 *
sizeof(*
s->above_mv_ctx)));
273 assign(
s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
274 assign(
s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
275 assign(
s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
276 assign(
s->above_y_nnz_ctx, uint8_t *, 16);
277 assign(
s->above_mode_ctx, uint8_t *, 16);
279 assign(
s->above_uv_nnz_ctx[0], uint8_t *, 16);
280 assign(
s->above_uv_nnz_ctx[1], uint8_t *, 16);
281 assign(
s->above_partition_ctx, uint8_t *, 8);
282 assign(
s->above_skip_ctx, uint8_t *, 8);
283 assign(
s->above_txfm_ctx, uint8_t *, 8);
284 assign(
s->above_segpred_ctx, uint8_t *, 8);
285 assign(
s->above_intra_ctx, uint8_t *, 8);
286 assign(
s->above_comp_ctx, uint8_t *, 8);
287 assign(
s->above_ref_ctx, uint8_t *, 8);
288 assign(
s->above_filter_ctx, uint8_t *, 8);
293 for (
i = 0;
i <
s->active_tile_cols;
i++)
297 if (
s->s.h.bpp !=
s->last_bpp) {
300 s->last_bpp =
s->s.h.bpp;
310 int chroma_blocks, chroma_eobs, bytesperpixel =
s->bytesperpixel;
313 if (
td->b_base &&
td->block_base &&
s->block_alloc_using_2pass ==
s->s.frames[
CUR_FRAME].uses_2pass)
317 chroma_blocks = 64 * 64 >> (
s->ss_h +
s->ss_v);
318 chroma_eobs = 16 * 16 >> (
s->ss_h +
s->ss_v);
320 int sbs =
s->sb_cols *
s->sb_rows;
323 td->block_base =
av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel *
sizeof(int16_t) +
324 16 * 16 + 2 * chroma_eobs) * sbs);
325 if (!
td->b_base || !
td->block_base)
327 td->uvblock_base[0] =
td->block_base + sbs * 64 * 64 * bytesperpixel;
328 td->uvblock_base[1] =
td->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
329 td->eob_base = (uint8_t *) (
td->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
330 td->uveob_base[0] =
td->eob_base + 16 * 16 * sbs;
331 td->uveob_base[1] =
td->uveob_base[0] + chroma_eobs * sbs;
335 if (!
td->block_structure)
339 for (
i = 1;
i <
s->active_tile_cols;
i++)
342 for (
i = 0;
i <
s->active_tile_cols;
i++) {
344 s->td[
i].block_base =
av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel *
sizeof(int16_t) +
345 16 * 16 + 2 * chroma_eobs);
346 if (!
s->td[
i].b_base || !
s->td[
i].block_base)
348 s->td[
i].uvblock_base[0] =
s->td[
i].block_base + 64 * 64 * bytesperpixel;
349 s->td[
i].uvblock_base[1] =
s->td[
i].uvblock_base[0] + chroma_blocks * bytesperpixel;
350 s->td[
i].eob_base = (uint8_t *) (
s->td[
i].uvblock_base[1] + chroma_blocks * bytesperpixel);
351 s->td[
i].uveob_base[0] =
s->td[
i].eob_base + 16 * 16;
352 s->td[
i].uveob_base[1] =
s->td[
i].uveob_base[0] + chroma_eobs;
356 if (!
s->td[
i].block_structure)
361 s->block_alloc_using_2pass =
s->s.frames[
CUR_FRAME].uses_2pass;
378 return m - ((v + 1) >> 1);
385 static const uint8_t inv_map_table[255] = {
386 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
387 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
388 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
389 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
390 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
391 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
392 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
393 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
394 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
395 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
396 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
397 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
398 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
399 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
400 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
401 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
402 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
403 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
451 s->s.h.bpp = 8 +
bits * 2;
452 s->bytesperpixel = (7 +
s->s.h.bpp) >> 3;
458 s->ss_h =
s->ss_v = 0;
472 static const enum AVPixelFormat pix_fmt_for_ss[3][2 ][2 ] = {
484 s->pix_fmt = pix_fmt_for_ss[
bits][
s->ss_v][
s->ss_h];
495 s->ss_h =
s->ss_v = 1;
496 s->pix_fmt = pix_fmt_for_ss[
bits][1][1];
507 int c,
i, j, k, l, m, n,
w,
h,
max, size2,
ret, sharp;
509 const uint8_t *data2;
533 s->last_keyframe =
s->s.h.keyframe;
536 last_invisible =
s->s.h.invisible;
539 s->s.h.use_last_frame_mvs = !
s->s.h.errorres && !last_invisible;
541 if (
s->s.h.keyframe) {
549 s->s.h.refreshrefmask = 0xff;
555 s->s.h.intraonly =
s->s.h.invisible ?
get_bits1(&
s->gb) : 0;
556 s->s.h.resetctx =
s->s.h.errorres ? 0 :
get_bits(&
s->gb, 2);
557 if (
s->s.h.intraonly) {
566 s->ss_h =
s->ss_v = 1;
569 s->bytesperpixel = 1;
582 s->s.h.signbias[0] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
584 s->s.h.signbias[1] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
586 s->s.h.signbias[2] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
587 if (!
s->s.refs[
s->s.h.refidx[0]].f->buf[0] ||
588 !
s->s.refs[
s->s.h.refidx[1]].f->buf[0] ||
589 !
s->s.refs[
s->s.h.refidx[2]].f->buf[0]) {
594 w =
s->s.refs[
s->s.h.refidx[0]].f->width;
595 h =
s->s.refs[
s->s.h.refidx[0]].f->height;
597 w =
s->s.refs[
s->s.h.refidx[1]].f->width;
598 h =
s->s.refs[
s->s.h.refidx[1]].f->height;
600 w =
s->s.refs[
s->s.h.refidx[2]].f->width;
601 h =
s->s.refs[
s->s.h.refidx[2]].f->height;
609 s->s.h.use_last_frame_mvs &=
s->s.frames[
CUR_FRAME].tf.f->width ==
w &&
616 s->s.h.allowcompinter =
s->s.h.signbias[0] !=
s->s.h.signbias[1] ||
617 s->s.h.signbias[0] !=
s->s.h.signbias[2];
618 if (
s->s.h.allowcompinter) {
619 if (
s->s.h.signbias[0] ==
s->s.h.signbias[1]) {
620 s->s.h.fixcompref = 2;
621 s->s.h.varcompref[0] = 0;
622 s->s.h.varcompref[1] = 1;
623 }
else if (
s->s.h.signbias[0] ==
s->s.h.signbias[2]) {
624 s->s.h.fixcompref = 1;
625 s->s.h.varcompref[0] = 0;
626 s->s.h.varcompref[1] = 2;
628 s->s.h.fixcompref = 0;
629 s->s.h.varcompref[0] = 1;
630 s->s.h.varcompref[1] = 2;
635 s->s.h.refreshctx =
s->s.h.errorres ? 0 :
get_bits1(&
s->gb);
636 s->s.h.parallelmode =
s->s.h.errorres ? 1 :
get_bits1(&
s->gb);
638 if (
s->s.h.keyframe ||
s->s.h.intraonly)
639 s->s.h.framectxid = 0;
642 if (
s->s.h.keyframe ||
s->s.h.errorres ||
s->s.h.intraonly) {
644 s->s.h.lf_delta.ref[0] = 1;
645 s->s.h.lf_delta.ref[1] = 0;
646 s->s.h.lf_delta.ref[2] = -1;
647 s->s.h.lf_delta.ref[3] = -1;
648 s->s.h.lf_delta.mode[0] = 0;
649 s->s.h.lf_delta.mode[1] = 0;
650 memset(
s->s.h.segmentation.feat, 0,
sizeof(
s->s.h.segmentation.feat));
656 if (
s->s.h.filter.sharpness != sharp) {
657 for (
i = 1;
i <= 63;
i++) {
661 limit >>= (sharp + 3) >> 2;
666 s->filter_lut.lim_lut[
i] =
limit;
667 s->filter_lut.mblim_lut[
i] = 2 * (
i + 2) +
limit;
670 s->s.h.filter.sharpness = sharp;
671 if ((
s->s.h.lf_delta.enabled =
get_bits1(&
s->gb))) {
672 if ((
s->s.h.lf_delta.updated =
get_bits1(&
s->gb))) {
673 for (
i = 0;
i < 4;
i++)
676 for (
i = 0;
i < 2;
i++)
687 s->s.h.lossless =
s->s.h.yac_qi == 0 &&
s->s.h.ydc_qdelta == 0 &&
688 s->s.h.uvdc_qdelta == 0 &&
s->s.h.uvac_qdelta == 0;
693 if ((
s->s.h.segmentation.enabled =
get_bits1(&
s->gb))) {
694 if ((
s->s.h.segmentation.update_map =
get_bits1(&
s->gb))) {
695 for (
i = 0;
i < 7;
i++)
698 if ((
s->s.h.segmentation.temporal =
get_bits1(&
s->gb)))
699 for (
i = 0;
i < 3;
i++)
705 s->s.h.segmentation.absolute_vals =
get_bits1(&
s->gb);
706 for (
i = 0;
i < 8;
i++) {
707 if ((
s->s.h.segmentation.feat[
i].q_enabled =
get_bits1(&
s->gb)))
709 if ((
s->s.h.segmentation.feat[
i].lf_enabled =
get_bits1(&
s->gb)))
711 if ((
s->s.h.segmentation.feat[
i].ref_enabled =
get_bits1(&
s->gb)))
712 s->s.h.segmentation.feat[
i].ref_val =
get_bits(&
s->gb, 2);
713 s->s.h.segmentation.feat[
i].skip_enabled =
get_bits1(&
s->gb);
719 for (
i = 0;
i < (
s->s.h.segmentation.enabled ? 8 : 1);
i++) {
720 int qyac, qydc, quvac, quvdc, lflvl, sh;
722 if (
s->s.h.segmentation.enabled &&
s->s.h.segmentation.feat[
i].q_enabled) {
723 if (
s->s.h.segmentation.absolute_vals)
728 qyac =
s->s.h.yac_qi;
740 sh =
s->s.h.filter.level >= 32;
741 if (
s->s.h.segmentation.enabled &&
s->s.h.segmentation.feat[
i].lf_enabled) {
742 if (
s->s.h.segmentation.absolute_vals)
745 lflvl =
av_clip_uintp2(
s->s.h.filter.level +
s->s.h.segmentation.feat[
i].lf_val, 6);
747 lflvl =
s->s.h.filter.level;
749 if (
s->s.h.lf_delta.enabled) {
750 s->s.h.segmentation.feat[
i].lflvl[0][0] =
751 s->s.h.segmentation.feat[
i].lflvl[0][1] =
753 for (j = 1; j < 4; j++) {
754 s->s.h.segmentation.feat[
i].lflvl[j][0] =
756 s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
757 s->s.h.segmentation.feat[
i].lflvl[j][1] =
759 s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
762 memset(
s->s.h.segmentation.feat[
i].lflvl, lflvl,
763 sizeof(
s->s.h.segmentation.feat[
i].lflvl));
773 for (
s->s.h.tiling.log2_tile_cols = 0;
774 s->sb_cols > (64 <<
s->s.h.tiling.log2_tile_cols);
775 s->s.h.tiling.log2_tile_cols++) ;
776 for (
max = 0; (
s->sb_cols >>
max) >= 4;
max++) ;
778 while (
max >
s->s.h.tiling.log2_tile_cols) {
780 s->s.h.tiling.log2_tile_cols++;
785 s->s.h.tiling.tile_rows = 1 <<
s->s.h.tiling.log2_tile_rows;
786 if (
s->s.h.tiling.tile_cols != (1 <<
s->s.h.tiling.log2_tile_cols)) {
791 for (
i = 0;
i <
s->active_tile_cols;
i++)
796 s->s.h.tiling.tile_cols = 1 <<
s->s.h.tiling.log2_tile_cols;
798 s->s.h.tiling.tile_cols : 1;
803 n_range_coders =
s->s.h.tiling.tile_cols;
810 for (
i = 0;
i <
s->active_tile_cols;
i++) {
813 rc += n_range_coders;
818 if (!
s->s.h.keyframe && !
s->s.h.intraonly) {
819 int valid_ref_frame = 0;
820 for (
i = 0;
i < 3;
i++) {
822 int refw =
ref->width, refh =
ref->height;
826 "Ref pixfmt (%s) did not match current frame (%s)",
830 }
else if (refw ==
w && refh ==
h) {
831 s->mvscale[
i][0] =
s->mvscale[
i][1] = 0;
835 if (
w * 2 < refw ||
h * 2 < refh ||
w > 16 * refw ||
h > 16 * refh) {
837 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
842 s->mvscale[
i][0] = (refw << 14) /
w;
843 s->mvscale[
i][1] = (refh << 14) /
h;
844 s->mvstep[
i][0] = 16 *
s->mvscale[
i][0] >> 14;
845 s->mvstep[
i][1] = 16 *
s->mvscale[
i][1] >> 14;
849 if (!valid_ref_frame) {
850 av_log(avctx,
AV_LOG_ERROR,
"No valid reference frame is found, bitstream not supported\n");
855 if (
s->s.h.keyframe ||
s->s.h.errorres || (
s->s.h.intraonly &&
s->s.h.resetctx == 3)) {
856 s->prob_ctx[0].p =
s->prob_ctx[1].p =
s->prob_ctx[2].p =
866 }
else if (
s->s.h.intraonly &&
s->s.h.resetctx == 2) {
873 s->s.h.compressed_header_size = size2 =
get_bits(&
s->gb, 16);
877 if (size2 >
size - (data2 -
data)) {
890 for (
i = 0;
i <
s->active_tile_cols;
i++) {
891 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
892 memset(
s->td[
i].counts.coef, 0,
sizeof(
s->td[0].counts.coef));
893 memset(
s->td[
i].counts.eob, 0,
sizeof(
s->td[0].counts.eob));
895 memset(&
s->td[
i].counts, 0,
sizeof(
s->td[0].counts));
897 s->td[
i].nb_block_structure = 0;
903 s->prob.p =
s->prob_ctx[
c].p;
906 if (
s->s.h.lossless) {
910 if (
s->s.h.txfmmode == 3)
914 for (
i = 0;
i < 2;
i++)
917 for (
i = 0;
i < 2;
i++)
918 for (j = 0; j < 2; j++)
920 s->prob.p.tx16p[
i][j] =
922 for (
i = 0;
i < 2;
i++)
923 for (j = 0; j < 3; j++)
925 s->prob.p.tx32p[
i][j] =
931 for (
i = 0;
i < 4;
i++) {
932 uint8_t (*
ref)[2][6][6][3] =
s->prob_ctx[
c].coef[
i];
934 for (j = 0; j < 2; j++)
935 for (k = 0; k < 2; k++)
936 for (l = 0; l < 6; l++)
937 for (m = 0; m < 6; m++) {
938 uint8_t *p =
s->prob.coef[
i][j][k][l][m];
939 uint8_t *
r =
ref[j][k][l][m];
940 if (m >= 3 && l == 0)
942 for (n = 0; n < 3; n++) {
951 for (j = 0; j < 2; j++)
952 for (k = 0; k < 2; k++)
953 for (l = 0; l < 6; l++)
954 for (m = 0; m < 6; m++) {
955 uint8_t *p =
s->prob.coef[
i][j][k][l][m];
956 uint8_t *
r =
ref[j][k][l][m];
963 if (
s->s.h.txfmmode ==
i)
968 for (
i = 0;
i < 3;
i++)
971 if (!
s->s.h.keyframe && !
s->s.h.intraonly) {
972 for (
i = 0;
i < 7;
i++)
973 for (j = 0; j < 3; j++)
975 s->prob.p.mv_mode[
i][j] =
979 for (
i = 0;
i < 4;
i++)
980 for (j = 0; j < 2; j++)
982 s->prob.p.filter[
i][j] =
985 for (
i = 0;
i < 4;
i++)
989 if (
s->s.h.allowcompinter) {
991 if (
s->s.h.comppredmode)
994 for (
i = 0;
i < 5;
i++)
1003 for (
i = 0;
i < 5;
i++) {
1005 s->prob.p.single_ref[
i][0] =
1008 s->prob.p.single_ref[
i][1] =
1014 for (
i = 0;
i < 5;
i++)
1016 s->prob.p.comp_ref[
i] =
1020 for (
i = 0;
i < 4;
i++)
1021 for (j = 0; j < 9; j++)
1023 s->prob.p.y_mode[
i][j] =
1026 for (
i = 0;
i < 4;
i++)
1027 for (j = 0; j < 4; j++)
1028 for (k = 0; k < 3; k++)
1030 s->prob.p.partition[3 -
i][j][k] =
1032 s->prob.p.partition[3 -
i][j][k]);
1035 for (
i = 0;
i < 3;
i++)
1039 for (
i = 0;
i < 2;
i++) {
1041 s->prob.p.mv_comp[
i].sign =
1044 for (j = 0; j < 10; j++)
1046 s->prob.p.mv_comp[
i].classes[j] =
1050 s->prob.p.mv_comp[
i].class0 =
1053 for (j = 0; j < 10; j++)
1055 s->prob.p.mv_comp[
i].bits[j] =
1059 for (
i = 0;
i < 2;
i++) {
1060 for (j = 0; j < 2; j++)
1061 for (k = 0; k < 3; k++)
1063 s->prob.p.mv_comp[
i].class0_fp[j][k] =
1066 for (j = 0; j < 3; j++)
1068 s->prob.p.mv_comp[
i].fp[j] =
1072 if (
s->s.h.highprecisionmvs) {
1073 for (
i = 0;
i < 2;
i++) {
1075 s->prob.p.mv_comp[
i].class0_hp =
1079 s->prob.p.mv_comp[
i].hp =
1085 return (data2 -
data) + size2;
1089 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
1092 int c = ((
s->above_partition_ctx[col] >> (3 - bl)) & 1) |
1093 (((
td->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
1095 s->prob.p.partition[bl][
c];
1097 ptrdiff_t hbs = 4 >> bl;
1099 ptrdiff_t y_stride =
f->linesize[0], uv_stride =
f->linesize[1];
1100 int bytesperpixel =
s->bytesperpixel;
1105 }
else if (col + hbs < s->cols) {
1106 if (row + hbs < s->rows) {
1114 yoff += hbs * 8 * y_stride;
1115 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1120 yoff += hbs * 8 * bytesperpixel;
1121 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1125 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1127 yoff + 8 * hbs * bytesperpixel,
1128 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1129 yoff += hbs * 8 * y_stride;
1130 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1131 decode_sb(
td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1133 yoff + 8 * hbs * bytesperpixel,
1134 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1141 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1143 yoff + 8 * hbs * bytesperpixel,
1144 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1149 }
else if (row + hbs < s->rows) {
1152 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1153 yoff += hbs * 8 * y_stride;
1154 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1155 decode_sb(
td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1162 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1164 td->counts.partition[bl][
c][bp]++;
1168 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
1172 ptrdiff_t hbs = 4 >> bl;
1174 ptrdiff_t y_stride =
f->linesize[0], uv_stride =
f->linesize[1];
1175 int bytesperpixel =
s->bytesperpixel;
1180 }
else if (
td->b->bl == bl) {
1183 yoff += hbs * 8 * y_stride;
1184 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1186 }
else if (
b->bp ==
PARTITION_V && col + hbs < s->cols) {
1187 yoff += hbs * 8 * bytesperpixel;
1188 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1193 if (col + hbs < s->cols) {
1194 if (row + hbs < s->rows) {
1195 decode_sb_mem(
td, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
1196 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1197 yoff += hbs * 8 * y_stride;
1198 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1201 yoff + 8 * hbs * bytesperpixel,
1202 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1204 yoff += hbs * 8 * bytesperpixel;
1205 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1208 }
else if (row + hbs < s->rows) {
1209 yoff += hbs * 8 * y_stride;
1210 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1218 int sb_start = ( idx * n) >> log2_n;
1219 int sb_end = ((idx + 1) * n) >> log2_n;
1220 *start =
FFMIN(sb_start, n) << 3;
1221 *end =
FFMIN(sb_end, n) << 3;
1229 for (
i = 0;
i <
s->active_tile_cols;
i++)
1238 for (
i = 0;
i < 3;
i++) {
1243 for (
i = 0;
i < 8;
i++) {
1264 int row, col, tile_row, tile_col,
ret;
1266 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1268 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
1271 ls_y =
f->linesize[0];
1272 ls_uv =
f->linesize[1];
1273 bytesperpixel =
s->bytesperpixel;
1276 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1278 tile_row,
s->s.h.tiling.log2_tile_rows,
s->sb_rows);
1280 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1283 if (tile_col ==
s->s.h.tiling.tile_cols - 1 &&
1284 tile_row ==
s->s.h.tiling.tile_rows - 1) {
1291 if (tile_size >
size)
1302 for (row = tile_row_start; row < tile_row_end;
1303 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >>
s->ss_v) {
1305 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1307 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1309 tile_col,
s->s.h.tiling.log2_tile_cols,
s->sb_cols);
1310 td->tile_col_start = tile_col_start;
1312 memset(
td->left_partition_ctx, 0, 8);
1313 memset(
td->left_skip_ctx, 0, 8);
1314 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1319 memset(
td->left_y_nnz_ctx, 0, 16);
1320 memset(
td->left_uv_nnz_ctx, 0, 32);
1321 memset(
td->left_segpred_ctx, 0, 8);
1323 td->c = &
td->c_b[tile_col];
1326 for (col = tile_col_start;
1328 col += 8, yoff2 += 64 * bytesperpixel,
1329 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1333 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
1354 if (row + 8 <
s->rows) {
1355 memcpy(
s->intra_pred_data[0],
1356 f->data[0] + yoff + 63 * ls_y,
1357 8 *
s->cols * bytesperpixel);
1358 memcpy(
s->intra_pred_data[1],
1359 f->data[1] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1360 8 *
s->cols * bytesperpixel >>
s->ss_h);
1361 memcpy(
s->intra_pred_data[2],
1362 f->data[2] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1363 8 *
s->cols * bytesperpixel >>
s->ss_h);
1367 if (
s->s.h.filter.level) {
1370 lflvl_ptr =
s->lflvl;
1371 for (col = 0; col <
s->cols;
1372 col += 8, yoff2 += 64 * bytesperpixel,
1373 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1390 int decode_tiles_mt(
AVCodecContext *avctx,
void *tdata,
int jobnr,
1395 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1396 int bytesperpixel =
s->bytesperpixel, row, col, tile_row;
1397 unsigned tile_cols_len;
1398 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1403 ls_y =
f->linesize[0];
1404 ls_uv =
f->linesize[1];
1407 jobnr,
s->s.h.tiling.log2_tile_cols,
s->sb_cols);
1408 td->tile_col_start = tile_col_start;
1409 uvoff = (64 * bytesperpixel >>
s->ss_h)*(tile_col_start >> 3);
1410 yoff = (64 * bytesperpixel)*(tile_col_start >> 3);
1411 lflvl_ptr_base =
s->lflvl+(tile_col_start >> 3);
1413 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1415 tile_row,
s->s.h.tiling.log2_tile_rows,
s->sb_rows);
1417 td->c = &
td->c_b[tile_row];
1418 for (row = tile_row_start; row < tile_row_end;
1419 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >>
s->ss_v) {
1420 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1421 VP9Filter *lflvl_ptr = lflvl_ptr_base+
s->sb_cols*(row >> 3);
1423 memset(
td->left_partition_ctx, 0, 8);
1424 memset(
td->left_skip_ctx, 0, 8);
1425 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1430 memset(
td->left_y_nnz_ctx, 0, 16);
1431 memset(
td->left_uv_nnz_ctx, 0, 32);
1432 memset(
td->left_segpred_ctx, 0, 8);
1434 for (col = tile_col_start;
1436 col += 8, yoff2 += 64 * bytesperpixel,
1437 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1440 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
1447 tile_cols_len = tile_col_end - tile_col_start;
1448 if (row + 8 <
s->rows) {
1449 memcpy(
s->intra_pred_data[0] + (tile_col_start * 8 * bytesperpixel),
1450 f->data[0] + yoff + 63 * ls_y,
1451 8 * tile_cols_len * bytesperpixel);
1452 memcpy(
s->intra_pred_data[1] + (tile_col_start * 8 * bytesperpixel >>
s->ss_h),
1453 f->data[1] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1454 8 * tile_cols_len * bytesperpixel >>
s->ss_h);
1455 memcpy(
s->intra_pred_data[2] + (tile_col_start * 8 * bytesperpixel >>
s->ss_h),
1456 f->data[2] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1457 8 * tile_cols_len * bytesperpixel >>
s->ss_h);
1460 vp9_report_tile_progress(
s, row >> 3, 1);
1470 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1472 int bytesperpixel =
s->bytesperpixel, col,
i;
1476 ls_y =
f->linesize[0];
1477 ls_uv =
f->linesize[1];
1479 for (
i = 0;
i <
s->sb_rows;
i++) {
1480 vp9_await_tile_progress(
s,
i,
s->s.h.tiling.tile_cols);
1482 if (
s->s.h.filter.level) {
1483 yoff = (ls_y * 64)*
i;
1484 uvoff = (ls_uv * 64 >>
s->ss_v)*
i;
1485 lflvl_ptr =
s->lflvl+
s->sb_cols*
i;
1486 for (col = 0; col <
s->cols;
1487 col += 8, yoff += 64 * bytesperpixel,
1488 uvoff += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1501 unsigned int tile, nb_blocks = 0;
1503 if (
s->s.h.segmentation.enabled) {
1504 for (tile = 0; tile <
s->active_tile_cols; tile++)
1505 nb_blocks +=
s->td[tile].nb_block_structure;
1513 par->
qp =
s->s.h.yac_qi;
1514 par->
delta_qp[0][0] =
s->s.h.ydc_qdelta;
1515 par->
delta_qp[1][0] =
s->s.h.uvdc_qdelta;
1516 par->
delta_qp[2][0] =
s->s.h.uvdc_qdelta;
1517 par->
delta_qp[1][1] =
s->s.h.uvac_qdelta;
1518 par->
delta_qp[2][1] =
s->s.h.uvac_qdelta;
1521 unsigned int block = 0;
1522 unsigned int tile, block_tile;
1524 for (tile = 0; tile <
s->active_tile_cols; tile++) {
1527 for (block_tile = 0; block_tile <
td->nb_block_structure; block_tile++) {
1529 unsigned int row =
td->block_structure[block_tile].row;
1530 unsigned int col =
td->block_structure[block_tile].col;
1531 uint8_t seg_id =
frame->segmentation_map[row * 8 *
s->sb_cols + col];
1535 b->w = 1 << (3 +
td->block_structure[block_tile].block_size_idx_x);
1536 b->h = 1 << (3 +
td->block_structure[block_tile].block_size_idx_y);
1538 if (
s->s.h.segmentation.feat[seg_id].q_enabled) {
1539 b->delta_qp =
s->s.h.segmentation.feat[seg_id].q_val;
1540 if (
s->s.h.segmentation.absolute_vals)
1541 b->delta_qp -= par->
qp;
1558 (!
s->s.h.segmentation.enabled || !
s->s.h.segmentation.update_map);
1563 }
else if (
ret == 0) {
1564 if (!
s->s.refs[
ref].f->buf[0]) {
1572 for (
i = 0;
i < 8;
i++) {
1573 if (
s->next_refs[
i].f->buf[0])
1575 if (
s->s.refs[
i].f->buf[0] &&
1585 if (!retain_segmap_ref ||
s->s.h.keyframe ||
s->s.h.intraonly) {
1588 if (!
s->s.h.keyframe && !
s->s.h.intraonly && !
s->s.h.errorres &&
s->s.frames[
CUR_FRAME].tf.f->buf[0] &&
1594 if (!
s->s.h.intraonly && !
s->s.h.keyframe && !
s->s.h.errorres &&
s->s.frames[
CUR_FRAME].tf.f->buf[0] &&
1602 if (
s->s.h.keyframe)
1615 for (
i = 0;
i < 8;
i++) {
1616 if (
s->next_refs[
i].f->buf[0])
1618 if (
s->s.h.refreshrefmask & (1 <<
i)) {
1620 }
else if (
s->s.refs[
i].f->buf[0]) {
1642 memset(
s->above_partition_ctx, 0,
s->cols);
1643 memset(
s->above_skip_ctx, 0,
s->cols);
1644 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1645 memset(
s->above_mode_ctx,
DC_PRED,
s->cols * 2);
1649 memset(
s->above_y_nnz_ctx, 0,
s->sb_cols * 16);
1650 memset(
s->above_uv_nnz_ctx[0], 0,
s->sb_cols * 16 >>
s->ss_h);
1651 memset(
s->above_uv_nnz_ctx[1], 0,
s->sb_cols * 16 >>
s->ss_h);
1652 memset(
s->above_segpred_ctx, 0,
s->cols);
1657 "Failed to allocate block buffers\n");
1660 if (
s->s.h.refreshctx &&
s->s.h.parallelmode) {
1663 for (
i = 0;
i < 4;
i++) {
1664 for (j = 0; j < 2; j++)
1665 for (k = 0; k < 2; k++)
1666 for (l = 0; l < 6; l++)
1667 for (m = 0; m < 6; m++)
1668 memcpy(
s->prob_ctx[
s->s.h.framectxid].coef[
i][j][k][l][m],
1669 s->prob.coef[
i][j][k][l][m], 3);
1670 if (
s->s.h.txfmmode ==
i)
1673 s->prob_ctx[
s->s.h.framectxid].p =
s->prob.p;
1675 }
else if (!
s->s.h.refreshctx) {
1681 for (
i = 0;
i <
s->sb_rows;
i++)
1687 for (
i = 0;
i <
s->active_tile_cols;
i++) {
1688 s->td[
i].b =
s->td[
i].b_base;
1689 s->td[
i].block =
s->td[
i].block_base;
1690 s->td[
i].uvblock[0] =
s->td[
i].uvblock_base[0];
1691 s->td[
i].uvblock[1] =
s->td[
i].uvblock_base[1];
1692 s->td[
i].eob =
s->td[
i].eob_base;
1693 s->td[
i].uveob[0] =
s->td[
i].uveob_base[0];
1694 s->td[
i].uveob[1] =
s->td[
i].uveob_base[1];
1695 s->td[
i].error_info = 0;
1700 int tile_row, tile_col;
1704 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1705 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1708 if (tile_col ==
s->s.h.tiling.tile_cols - 1 &&
1709 tile_row ==
s->s.h.tiling.tile_rows - 1) {
1716 if (tile_size >
size)
1741 for (
i = 1;
i <
s->s.h.tiling.tile_cols;
i++)
1742 for (j = 0; j <
sizeof(
s->td[
i].counts) /
sizeof(
unsigned); j++)
1743 ((
unsigned *)&
s->td[0].counts)[j] += ((
unsigned *)&
s->td[
i].counts)[j];
1745 if (
s->pass < 2 &&
s->s.h.refreshctx && !
s->s.h.parallelmode) {
1749 }
while (
s->pass++ == 1);
1752 if (
s->td->error_info < 0) {
1754 s->td->error_info = 0;
1765 for (
i = 0;
i < 8;
i++) {
1766 if (
s->s.refs[
i].f->buf[0])
1768 if (
s->next_refs[
i].f->buf[0] &&
1773 if (!
s->s.h.invisible) {
1787 for (
i = 0;
i < 3;
i++)
1789 for (
i = 0;
i < 8;
i++)
1802 s->s.h.filter.sharpness = -1;
1812 for (
int i = 0;
i < 3;
i++) {
1814 if (!
s->s.frames[
i].tf.f)
1817 for (
int i = 0;
i < 8;
i++) {
1820 if (!
s->s.refs[
i].f || !
s->next_refs[
i].f)
1832 for (
i = 0;
i < 3;
i++) {
1833 if (
s->s.frames[
i].tf.f->buf[0])
1835 if (ssrc->s.frames[
i].tf.f->buf[0]) {
1840 for (
i = 0;
i < 8;
i++) {
1841 if (
s->s.refs[
i].f->buf[0])
1843 if (ssrc->next_refs[
i].f->buf[0]) {
1849 s->frame_extradata_pool_size = ssrc->frame_extradata_pool_size;
1851 s->s.h.invisible = ssrc->s.h.invisible;
1852 s->s.h.keyframe = ssrc->s.h.keyframe;
1853 s->s.h.intraonly = ssrc->s.h.intraonly;
1854 s->ss_v = ssrc->ss_v;
1855 s->ss_h = ssrc->ss_h;
1856 s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
1857 s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
1858 s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
1859 s->bytesperpixel = ssrc->bytesperpixel;
1860 s->gf_fmt = ssrc->gf_fmt;
1863 s->s.h.bpp = ssrc->s.h.bpp;
1864 s->bpp_index = ssrc->bpp_index;
1865 s->pix_fmt = ssrc->pix_fmt;
1866 memcpy(&
s->prob_ctx, &ssrc->prob_ctx,
sizeof(
s->prob_ctx));
1867 memcpy(&
s->s.h.lf_delta, &ssrc->s.h.lf_delta,
sizeof(
s->s.h.lf_delta));
1868 memcpy(&
s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
1869 sizeof(
s->s.h.segmentation.feat));
1891 .bsfs =
"vp9_superframe_split",
1893 #if CONFIG_VP9_DXVA2_HWACCEL
1896 #if CONFIG_VP9_D3D11VA_HWACCEL
1899 #if CONFIG_VP9_D3D11VA2_HWACCEL
1902 #if CONFIG_VP9_NVDEC_HWACCEL
1905 #if CONFIG_VP9_VAAPI_HWACCEL
1908 #if CONFIG_VP9_VDPAU_HWACCEL
1911 #if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL