38 #define VP9_SYNCCODE 0x498342
66 for (
i = 0;
i <
n;
i++)
101 f->segmentation_map =
NULL;
102 f->hwaccel_picture_private =
NULL;
114 sz = 64 *
s->sb_cols *
s->sb_rows;
120 f->segmentation_map =
f->extradata->data;
128 if (!
f->hwaccel_priv_buf)
130 f->hwaccel_picture_private =
f->hwaccel_priv_buf->data;
157 if (
src->hwaccel_picture_private) {
173 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \
174 CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
175 CONFIG_VP9_NVDEC_HWACCEL + \
176 CONFIG_VP9_VAAPI_HWACCEL)
180 int bytesperpixel =
s->bytesperpixel,
ret, cols, rows;
185 if (!(
s->pix_fmt ==
s->gf_fmt &&
w ==
s->w &&
h ==
s->h)) {
189 switch (
s->pix_fmt) {
192 #if CONFIG_VP9_DXVA2_HWACCEL
195 #if CONFIG_VP9_D3D11VA_HWACCEL
199 #if CONFIG_VP9_NVDEC_HWACCEL
202 #if CONFIG_VP9_VAAPI_HWACCEL
207 #if CONFIG_VP9_NVDEC_HWACCEL
210 #if CONFIG_VP9_VAAPI_HWACCEL
216 *fmtp++ =
s->pix_fmt;
224 s->gf_fmt =
s->pix_fmt;
232 if (
s->intra_pred_data[0] && cols ==
s->cols && rows ==
s->rows &&
s->pix_fmt ==
s->last_fmt)
235 s->last_fmt =
s->pix_fmt;
236 s->sb_cols = (
w + 63) >> 6;
237 s->sb_rows = (
h + 63) >> 6;
238 s->cols = (
w + 7) >> 3;
239 s->rows = (
h + 7) >> 3;
242 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
246 p =
av_malloc(
s->sb_cols * (128 + 192 * bytesperpixel +
247 lflvl_len *
sizeof(*
s->lflvl) + 16 *
sizeof(*
s->above_mv_ctx)));
270 for (
i = 0;
i <
s->active_tile_cols;
i++) {
276 if (
s->s.h.bpp !=
s->last_bpp) {
279 s->last_bpp =
s->s.h.bpp;
289 int chroma_blocks, chroma_eobs, bytesperpixel =
s->bytesperpixel;
292 if (
td->b_base &&
td->block_base &&
s->block_alloc_using_2pass ==
s->s.frames[
CUR_FRAME].uses_2pass)
297 chroma_blocks = 64 * 64 >> (
s->ss_h +
s->ss_v);
298 chroma_eobs = 16 * 16 >> (
s->ss_h +
s->ss_v);
300 int sbs =
s->sb_cols *
s->sb_rows;
303 td->block_base =
av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel *
sizeof(int16_t) +
304 16 * 16 + 2 * chroma_eobs) * sbs);
305 if (!
td->b_base || !
td->block_base)
307 td->uvblock_base[0] =
td->block_base + sbs * 64 * 64 * bytesperpixel;
308 td->uvblock_base[1] =
td->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
309 td->eob_base = (
uint8_t *) (
td->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
310 td->uveob_base[0] =
td->eob_base + 16 * 16 * sbs;
311 td->uveob_base[1] =
td->uveob_base[0] + chroma_eobs * sbs;
313 for (
i = 1;
i <
s->active_tile_cols;
i++) {
314 if (
s->td[
i].b_base &&
s->td[
i].block_base) {
319 for (
i = 0;
i <
s->active_tile_cols;
i++) {
321 s->td[
i].block_base =
av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel *
sizeof(int16_t) +
322 16 * 16 + 2 * chroma_eobs);
323 if (!
s->td[
i].b_base || !
s->td[
i].block_base)
325 s->td[
i].uvblock_base[0] =
s->td[
i].block_base + 64 * 64 * bytesperpixel;
326 s->td[
i].uvblock_base[1] =
s->td[
i].uvblock_base[0] + chroma_blocks * bytesperpixel;
327 s->td[
i].eob_base = (
uint8_t *) (
s->td[
i].uvblock_base[1] + chroma_blocks * bytesperpixel);
328 s->td[
i].uveob_base[0] =
s->td[
i].eob_base + 16 * 16;
329 s->td[
i].uveob_base[1] =
s->td[
i].uveob_base[0] + chroma_eobs;
332 s->block_alloc_using_2pass =
s->s.frames[
CUR_FRAME].uses_2pass;
349 return m - ((v + 1) >> 1);
356 static const uint8_t inv_map_table[255] = {
357 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
358 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
359 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
360 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
361 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
362 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
363 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
364 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
365 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
366 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
367 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
368 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
369 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
370 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
371 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
372 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
373 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
374 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
422 s->s.h.bpp = 8 +
bits * 2;
423 s->bytesperpixel = (7 +
s->s.h.bpp) >> 3;
429 s->ss_h =
s->ss_v = 0;
443 static const enum AVPixelFormat pix_fmt_for_ss[3][2 ][2 ] = {
455 s->pix_fmt = pix_fmt_for_ss[
bits][
s->ss_v][
s->ss_h];
466 s->ss_h =
s->ss_v = 1;
467 s->pix_fmt = pix_fmt_for_ss[
bits][1][1];
478 int c,
i, j, k, l, m,
n,
w,
h,
max, size2,
ret, sharp;
504 s->last_keyframe =
s->s.h.keyframe;
507 last_invisible =
s->s.h.invisible;
510 s->s.h.use_last_frame_mvs = !
s->s.h.errorres && !last_invisible;
512 if (
s->s.h.keyframe) {
520 s->s.h.refreshrefmask = 0xff;
526 s->s.h.intraonly =
s->s.h.invisible ?
get_bits1(&
s->gb) : 0;
527 s->s.h.resetctx =
s->s.h.errorres ? 0 :
get_bits(&
s->gb, 2);
528 if (
s->s.h.intraonly) {
537 s->ss_h =
s->ss_v = 1;
540 s->bytesperpixel = 1;
553 s->s.h.signbias[0] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
555 s->s.h.signbias[1] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
557 s->s.h.signbias[2] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
558 if (!
s->s.refs[
s->s.h.refidx[0]].f->buf[0] ||
559 !
s->s.refs[
s->s.h.refidx[1]].f->buf[0] ||
560 !
s->s.refs[
s->s.h.refidx[2]].f->buf[0]) {
565 w =
s->s.refs[
s->s.h.refidx[0]].f->width;
566 h =
s->s.refs[
s->s.h.refidx[0]].f->height;
568 w =
s->s.refs[
s->s.h.refidx[1]].f->width;
569 h =
s->s.refs[
s->s.h.refidx[1]].f->height;
571 w =
s->s.refs[
s->s.h.refidx[2]].f->width;
572 h =
s->s.refs[
s->s.h.refidx[2]].f->height;
580 s->s.h.use_last_frame_mvs &=
s->s.frames[
CUR_FRAME].tf.f->width ==
w &&
587 s->s.h.allowcompinter =
s->s.h.signbias[0] !=
s->s.h.signbias[1] ||
588 s->s.h.signbias[0] !=
s->s.h.signbias[2];
589 if (
s->s.h.allowcompinter) {
590 if (
s->s.h.signbias[0] ==
s->s.h.signbias[1]) {
591 s->s.h.fixcompref = 2;
592 s->s.h.varcompref[0] = 0;
593 s->s.h.varcompref[1] = 1;
594 }
else if (
s->s.h.signbias[0] ==
s->s.h.signbias[2]) {
595 s->s.h.fixcompref = 1;
596 s->s.h.varcompref[0] = 0;
597 s->s.h.varcompref[1] = 2;
599 s->s.h.fixcompref = 0;
600 s->s.h.varcompref[0] = 1;
601 s->s.h.varcompref[1] = 2;
606 s->s.h.refreshctx =
s->s.h.errorres ? 0 :
get_bits1(&
s->gb);
607 s->s.h.parallelmode =
s->s.h.errorres ? 1 :
get_bits1(&
s->gb);
609 if (
s->s.h.keyframe ||
s->s.h.intraonly)
610 s->s.h.framectxid = 0;
613 if (
s->s.h.keyframe ||
s->s.h.errorres ||
s->s.h.intraonly) {
615 s->s.h.lf_delta.ref[0] = 1;
616 s->s.h.lf_delta.ref[1] = 0;
617 s->s.h.lf_delta.ref[2] = -1;
618 s->s.h.lf_delta.ref[3] = -1;
619 s->s.h.lf_delta.mode[0] = 0;
620 s->s.h.lf_delta.mode[1] = 0;
621 memset(
s->s.h.segmentation.feat, 0,
sizeof(
s->s.h.segmentation.feat));
627 if (
s->s.h.filter.sharpness != sharp) {
628 for (
i = 1;
i <= 63;
i++) {
632 limit >>= (sharp + 3) >> 2;
633 limit =
FFMIN(limit, 9 - sharp);
635 limit =
FFMAX(limit, 1);
637 s->filter_lut.lim_lut[
i] = limit;
638 s->filter_lut.mblim_lut[
i] = 2 * (
i + 2) + limit;
641 s->s.h.filter.sharpness = sharp;
642 if ((
s->s.h.lf_delta.enabled =
get_bits1(&
s->gb))) {
643 if ((
s->s.h.lf_delta.updated =
get_bits1(&
s->gb))) {
644 for (
i = 0;
i < 4;
i++)
647 for (
i = 0;
i < 2;
i++)
658 s->s.h.lossless =
s->s.h.yac_qi == 0 &&
s->s.h.ydc_qdelta == 0 &&
659 s->s.h.uvdc_qdelta == 0 &&
s->s.h.uvac_qdelta == 0;
664 if ((
s->s.h.segmentation.enabled =
get_bits1(&
s->gb))) {
665 if ((
s->s.h.segmentation.update_map =
get_bits1(&
s->gb))) {
666 for (
i = 0;
i < 7;
i++)
669 if ((
s->s.h.segmentation.temporal =
get_bits1(&
s->gb)))
670 for (
i = 0;
i < 3;
i++)
676 s->s.h.segmentation.absolute_vals =
get_bits1(&
s->gb);
677 for (
i = 0;
i < 8;
i++) {
678 if ((
s->s.h.segmentation.feat[
i].q_enabled =
get_bits1(&
s->gb)))
680 if ((
s->s.h.segmentation.feat[
i].lf_enabled =
get_bits1(&
s->gb)))
682 if ((
s->s.h.segmentation.feat[
i].ref_enabled =
get_bits1(&
s->gb)))
683 s->s.h.segmentation.feat[
i].ref_val =
get_bits(&
s->gb, 2);
684 s->s.h.segmentation.feat[
i].skip_enabled =
get_bits1(&
s->gb);
690 for (
i = 0;
i < (
s->s.h.segmentation.enabled ? 8 : 1);
i++) {
691 int qyac, qydc, quvac, quvdc, lflvl, sh;
693 if (
s->s.h.segmentation.enabled &&
s->s.h.segmentation.feat[
i].q_enabled) {
694 if (
s->s.h.segmentation.absolute_vals)
695 qyac = av_clip_uintp2(
s->s.h.segmentation.feat[
i].q_val, 8);
697 qyac = av_clip_uintp2(
s->s.h.yac_qi +
s->s.h.segmentation.feat[
i].q_val, 8);
699 qyac =
s->s.h.yac_qi;
701 qydc = av_clip_uintp2(qyac +
s->s.h.ydc_qdelta, 8);
702 quvdc = av_clip_uintp2(qyac +
s->s.h.uvdc_qdelta, 8);
703 quvac = av_clip_uintp2(qyac +
s->s.h.uvac_qdelta, 8);
704 qyac = av_clip_uintp2(qyac, 8);
711 sh =
s->s.h.filter.level >= 32;
712 if (
s->s.h.segmentation.enabled &&
s->s.h.segmentation.feat[
i].lf_enabled) {
713 if (
s->s.h.segmentation.absolute_vals)
714 lflvl = av_clip_uintp2(
s->s.h.segmentation.feat[
i].lf_val, 6);
716 lflvl = av_clip_uintp2(
s->s.h.filter.level +
s->s.h.segmentation.feat[
i].lf_val, 6);
718 lflvl =
s->s.h.filter.level;
720 if (
s->s.h.lf_delta.enabled) {
721 s->s.h.segmentation.feat[
i].lflvl[0][0] =
722 s->s.h.segmentation.feat[
i].lflvl[0][1] =
723 av_clip_uintp2(lflvl + (
s->s.h.lf_delta.ref[0] * (1 << sh)), 6);
724 for (j = 1; j < 4; j++) {
725 s->s.h.segmentation.feat[
i].lflvl[j][0] =
726 av_clip_uintp2(lflvl + ((
s->s.h.lf_delta.ref[j] +
727 s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
728 s->s.h.segmentation.feat[
i].lflvl[j][1] =
729 av_clip_uintp2(lflvl + ((
s->s.h.lf_delta.ref[j] +
730 s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
733 memset(
s->s.h.segmentation.feat[
i].lflvl, lflvl,
734 sizeof(
s->s.h.segmentation.feat[
i].lflvl));
744 for (
s->s.h.tiling.log2_tile_cols = 0;
745 s->sb_cols > (64 <<
s->s.h.tiling.log2_tile_cols);
746 s->s.h.tiling.log2_tile_cols++) ;
747 for (
max = 0; (
s->sb_cols >>
max) >= 4;
max++) ;
749 while (
max >
s->s.h.tiling.log2_tile_cols) {
751 s->s.h.tiling.log2_tile_cols++;
756 s->s.h.tiling.tile_rows = 1 <<
s->s.h.tiling.log2_tile_rows;
757 if (
s->s.h.tiling.tile_cols != (1 <<
s->s.h.tiling.log2_tile_cols)) {
762 for (
i = 0;
i <
s->active_tile_cols;
i++) {
769 s->s.h.tiling.tile_cols = 1 <<
s->s.h.tiling.log2_tile_cols;
772 s->s.h.tiling.tile_cols : 1;
777 n_range_coders =
s->s.h.tiling.tile_cols;
784 for (
i = 0;
i <
s->active_tile_cols;
i++) {
787 rc += n_range_coders;
792 if (!
s->s.h.keyframe && !
s->s.h.intraonly) {
793 for (
i = 0;
i < 3;
i++) {
795 int refw =
ref->width, refh =
ref->height;
799 "Ref pixfmt (%s) did not match current frame (%s)",
803 }
else if (refw ==
w && refh ==
h) {
804 s->mvscale[
i][0] =
s->mvscale[
i][1] = 0;
806 if (
w * 2 < refw ||
h * 2 < refh ||
w > 16 * refw ||
h > 16 * refh) {
808 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
812 s->mvscale[
i][0] = (refw << 14) /
w;
813 s->mvscale[
i][1] = (refh << 14) /
h;
814 s->mvstep[
i][0] = 16 *
s->mvscale[
i][0] >> 14;
815 s->mvstep[
i][1] = 16 *
s->mvscale[
i][1] >> 14;
820 if (
s->s.h.keyframe ||
s->s.h.errorres || (
s->s.h.intraonly &&
s->s.h.resetctx == 3)) {
821 s->prob_ctx[0].p =
s->prob_ctx[1].p =
s->prob_ctx[2].p =
831 }
else if (
s->s.h.intraonly &&
s->s.h.resetctx == 2) {
838 s->s.h.compressed_header_size = size2 =
get_bits(&
s->gb, 16);
842 if (size2 >
size - (data2 -
data)) {
855 for (
i = 0;
i <
s->active_tile_cols;
i++) {
856 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
857 memset(
s->td[
i].counts.coef, 0,
sizeof(
s->td[0].counts.coef));
858 memset(
s->td[
i].counts.eob, 0,
sizeof(
s->td[0].counts.eob));
860 memset(&
s->td[
i].counts, 0,
sizeof(
s->td[0].counts));
867 s->prob.p =
s->prob_ctx[
c].p;
870 if (
s->s.h.lossless) {
874 if (
s->s.h.txfmmode == 3)
878 for (
i = 0;
i < 2;
i++)
881 for (
i = 0;
i < 2;
i++)
882 for (j = 0; j < 2; j++)
884 s->prob.p.tx16p[
i][j] =
886 for (
i = 0;
i < 2;
i++)
887 for (j = 0; j < 3; j++)
889 s->prob.p.tx32p[
i][j] =
895 for (
i = 0;
i < 4;
i++) {
898 for (j = 0; j < 2; j++)
899 for (k = 0; k < 2; k++)
900 for (l = 0; l < 6; l++)
901 for (m = 0; m < 6; m++) {
902 uint8_t *p =
s->prob.coef[
i][j][k][l][m];
904 if (m >= 3 && l == 0)
906 for (
n = 0;
n < 3;
n++) {
915 for (j = 0; j < 2; j++)
916 for (k = 0; k < 2; k++)
917 for (l = 0; l < 6; l++)
918 for (m = 0; m < 6; m++) {
919 uint8_t *p =
s->prob.coef[
i][j][k][l][m];
927 if (
s->s.h.txfmmode ==
i)
932 for (
i = 0;
i < 3;
i++)
935 if (!
s->s.h.keyframe && !
s->s.h.intraonly) {
936 for (
i = 0;
i < 7;
i++)
937 for (j = 0; j < 3; j++)
939 s->prob.p.mv_mode[
i][j] =
943 for (
i = 0;
i < 4;
i++)
944 for (j = 0; j < 2; j++)
946 s->prob.p.filter[
i][j] =
949 for (
i = 0;
i < 4;
i++)
953 if (
s->s.h.allowcompinter) {
955 if (
s->s.h.comppredmode)
958 for (
i = 0;
i < 5;
i++)
967 for (
i = 0;
i < 5;
i++) {
969 s->prob.p.single_ref[
i][0] =
972 s->prob.p.single_ref[
i][1] =
978 for (
i = 0;
i < 5;
i++)
980 s->prob.p.comp_ref[
i] =
984 for (
i = 0;
i < 4;
i++)
985 for (j = 0; j < 9; j++)
987 s->prob.p.y_mode[
i][j] =
990 for (
i = 0;
i < 4;
i++)
991 for (j = 0; j < 4; j++)
992 for (k = 0; k < 3; k++)
994 s->prob.p.partition[3 -
i][j][k] =
996 s->prob.p.partition[3 -
i][j][k]);
999 for (
i = 0;
i < 3;
i++)
1003 for (
i = 0;
i < 2;
i++) {
1005 s->prob.p.mv_comp[
i].sign =
1008 for (j = 0; j < 10; j++)
1010 s->prob.p.mv_comp[
i].classes[j] =
1014 s->prob.p.mv_comp[
i].class0 =
1017 for (j = 0; j < 10; j++)
1019 s->prob.p.mv_comp[
i].bits[j] =
1023 for (
i = 0;
i < 2;
i++) {
1024 for (j = 0; j < 2; j++)
1025 for (k = 0; k < 3; k++)
1027 s->prob.p.mv_comp[
i].class0_fp[j][k] =
1030 for (j = 0; j < 3; j++)
1032 s->prob.p.mv_comp[
i].fp[j] =
1036 if (
s->s.h.highprecisionmvs) {
1037 for (
i = 0;
i < 2;
i++) {
1039 s->prob.p.mv_comp[
i].class0_hp =
1043 s->prob.p.mv_comp[
i].hp =
1049 return (data2 -
data) + size2;
1053 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
1056 int c = ((
s->above_partition_ctx[col] >> (3 - bl)) & 1) |
1057 (((
td->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
1059 s->prob.p.partition[bl][
c];
1061 ptrdiff_t hbs = 4 >> bl;
1063 ptrdiff_t y_stride =
f->linesize[0], uv_stride =
f->linesize[1];
1064 int bytesperpixel =
s->bytesperpixel;
1069 }
else if (col + hbs < s->cols) {
1070 if (row + hbs < s->rows) {
1078 yoff += hbs * 8 * y_stride;
1079 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1084 yoff += hbs * 8 * bytesperpixel;
1085 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1089 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1091 yoff + 8 * hbs * bytesperpixel,
1092 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1093 yoff += hbs * 8 * y_stride;
1094 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1095 decode_sb(
td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1097 yoff + 8 * hbs * bytesperpixel,
1098 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1105 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1107 yoff + 8 * hbs * bytesperpixel,
1108 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1113 }
else if (row + hbs < s->rows) {
1116 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1117 yoff += hbs * 8 * y_stride;
1118 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1119 decode_sb(
td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1126 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1128 td->counts.partition[bl][
c][bp]++;
1132 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
1136 ptrdiff_t hbs = 4 >> bl;
1138 ptrdiff_t y_stride =
f->linesize[0], uv_stride =
f->linesize[1];
1139 int bytesperpixel =
s->bytesperpixel;
1144 }
else if (
td->b->bl == bl) {
1147 yoff += hbs * 8 * y_stride;
1148 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1150 }
else if (
b->bp ==
PARTITION_V && col + hbs < s->cols) {
1151 yoff += hbs * 8 * bytesperpixel;
1152 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1157 if (col + hbs < s->cols) {
1158 if (row + hbs < s->rows) {
1159 decode_sb_mem(
td, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
1160 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1161 yoff += hbs * 8 * y_stride;
1162 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1165 yoff + 8 * hbs * bytesperpixel,
1166 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1168 yoff += hbs * 8 * bytesperpixel;
1169 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1172 }
else if (row + hbs < s->rows) {
1173 yoff += hbs * 8 * y_stride;
1174 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1182 int sb_start = ( idx *
n) >> log2_n;
1183 int sb_end = ((idx + 1) *
n) >> log2_n;
1193 for (
i = 0;
i <
s->active_tile_cols;
i++) {
1204 for (
i = 0;
i < 3;
i++) {
1205 if (
s->s.frames[
i].tf.f->buf[0])
1209 for (
i = 0;
i < 8;
i++) {
1210 if (
s->s.refs[
i].f->buf[0])
1213 if (
s->next_refs[
i].f->buf[0])
1229 int row, col, tile_row, tile_col,
ret;
1231 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1233 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
1236 ls_y =
f->linesize[0];
1237 ls_uv =
f->linesize[1];
1238 bytesperpixel =
s->bytesperpixel;
1241 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1243 tile_row,
s->s.h.tiling.log2_tile_rows,
s->sb_rows);
1245 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1248 if (tile_col ==
s->s.h.tiling.tile_cols - 1 &&
1249 tile_row ==
s->s.h.tiling.tile_rows - 1) {
1256 if (tile_size >
size) {
1271 for (row = tile_row_start; row < tile_row_end;
1272 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >>
s->ss_v) {
1274 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1276 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1278 tile_col,
s->s.h.tiling.log2_tile_cols,
s->sb_cols);
1279 td->tile_col_start = tile_col_start;
1281 memset(
td->left_partition_ctx, 0, 8);
1282 memset(
td->left_skip_ctx, 0, 8);
1283 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1288 memset(
td->left_y_nnz_ctx, 0, 16);
1289 memset(
td->left_uv_nnz_ctx, 0, 32);
1290 memset(
td->left_segpred_ctx, 0, 8);
1292 td->c = &
td->c_b[tile_col];
1295 for (col = tile_col_start;
1297 col += 8, yoff2 += 64 * bytesperpixel,
1298 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1302 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
1323 if (row + 8 <
s->rows) {
1324 memcpy(
s->intra_pred_data[0],
1325 f->data[0] + yoff + 63 * ls_y,
1326 8 *
s->cols * bytesperpixel);
1327 memcpy(
s->intra_pred_data[1],
1328 f->data[1] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1329 8 *
s->cols * bytesperpixel >>
s->ss_h);
1330 memcpy(
s->intra_pred_data[2],
1331 f->data[2] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1332 8 *
s->cols * bytesperpixel >>
s->ss_h);
1336 if (
s->s.h.filter.level) {
1339 lflvl_ptr =
s->lflvl;
1340 for (col = 0; col <
s->cols;
1341 col += 8, yoff2 += 64 * bytesperpixel,
1342 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1359 int decode_tiles_mt(
AVCodecContext *avctx,
void *tdata,
int jobnr,
1364 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1365 int bytesperpixel =
s->bytesperpixel, row, col, tile_row;
1366 unsigned tile_cols_len;
1367 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1372 ls_y =
f->linesize[0];
1373 ls_uv =
f->linesize[1];
1376 jobnr,
s->s.h.tiling.log2_tile_cols,
s->sb_cols);
1377 td->tile_col_start = tile_col_start;
1378 uvoff = (64 * bytesperpixel >>
s->ss_h)*(tile_col_start >> 3);
1379 yoff = (64 * bytesperpixel)*(tile_col_start >> 3);
1380 lflvl_ptr_base =
s->lflvl+(tile_col_start >> 3);
1382 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1384 tile_row,
s->s.h.tiling.log2_tile_rows,
s->sb_rows);
1386 td->c = &
td->c_b[tile_row];
1387 for (row = tile_row_start; row < tile_row_end;
1388 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >>
s->ss_v) {
1389 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1390 VP9Filter *lflvl_ptr = lflvl_ptr_base+
s->sb_cols*(row >> 3);
1392 memset(
td->left_partition_ctx, 0, 8);
1393 memset(
td->left_skip_ctx, 0, 8);
1394 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1399 memset(
td->left_y_nnz_ctx, 0, 16);
1400 memset(
td->left_uv_nnz_ctx, 0, 32);
1401 memset(
td->left_segpred_ctx, 0, 8);
1403 for (col = tile_col_start;
1405 col += 8, yoff2 += 64 * bytesperpixel,
1406 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1409 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
1416 tile_cols_len = tile_col_end - tile_col_start;
1417 if (row + 8 <
s->rows) {
1418 memcpy(
s->intra_pred_data[0] + (tile_col_start * 8 * bytesperpixel),
1419 f->data[0] + yoff + 63 * ls_y,
1420 8 * tile_cols_len * bytesperpixel);
1421 memcpy(
s->intra_pred_data[1] + (tile_col_start * 8 * bytesperpixel >>
s->ss_h),
1422 f->data[1] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1423 8 * tile_cols_len * bytesperpixel >>
s->ss_h);
1424 memcpy(
s->intra_pred_data[2] + (tile_col_start * 8 * bytesperpixel >>
s->ss_h),
1425 f->data[2] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1426 8 * tile_cols_len * bytesperpixel >>
s->ss_h);
1429 vp9_report_tile_progress(
s, row >> 3, 1);
1439 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1441 int bytesperpixel =
s->bytesperpixel, col,
i;
1445 ls_y =
f->linesize[0];
1446 ls_uv =
f->linesize[1];
1448 for (
i = 0;
i <
s->sb_rows;
i++) {
1449 vp9_await_tile_progress(
s,
i,
s->s.h.tiling.tile_cols);
1451 if (
s->s.h.filter.level) {
1452 yoff = (ls_y * 64)*
i;
1453 uvoff = (ls_uv * 64 >>
s->ss_v)*
i;
1454 lflvl_ptr =
s->lflvl+
s->sb_cols*
i;
1455 for (col = 0; col <
s->cols;
1456 col += 8, yoff += 64 * bytesperpixel,
1457 uvoff += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1475 (!
s->s.h.segmentation.enabled || !
s->s.h.segmentation.update_map);
1480 }
else if (
ret == 0) {
1481 if (!
s->s.refs[
ref].f->buf[0]) {
1494 for (
i = 0;
i < 8;
i++) {
1495 if (
s->next_refs[
i].f->buf[0])
1497 if (
s->s.refs[
i].f->buf[0] &&
1507 if (!retain_segmap_ref ||
s->s.h.keyframe ||
s->s.h.intraonly) {
1510 if (!
s->s.h.keyframe && !
s->s.h.intraonly && !
s->s.h.errorres &&
s->s.frames[
CUR_FRAME].tf.f->buf[0] &&
1516 if (!
s->s.h.intraonly && !
s->s.h.keyframe && !
s->s.h.errorres &&
s->s.frames[
CUR_FRAME].tf.f->buf[0] &&
1524 f->key_frame =
s->s.h.keyframe;
1534 for (
i = 0;
i < 8;
i++) {
1535 if (
s->next_refs[
i].f->buf[0])
1537 if (
s->s.h.refreshrefmask & (1 <<
i)) {
1539 }
else if (
s->s.refs[
i].f->buf[0]) {
1560 memset(
s->above_partition_ctx, 0,
s->cols);
1561 memset(
s->above_skip_ctx, 0,
s->cols);
1562 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1563 memset(
s->above_mode_ctx,
DC_PRED,
s->cols * 2);
1567 memset(
s->above_y_nnz_ctx, 0,
s->sb_cols * 16);
1568 memset(
s->above_uv_nnz_ctx[0], 0,
s->sb_cols * 16 >>
s->ss_h);
1569 memset(
s->above_uv_nnz_ctx[1], 0,
s->sb_cols * 16 >>
s->ss_h);
1570 memset(
s->above_segpred_ctx, 0,
s->cols);
1575 "Failed to allocate block buffers\n");
1578 if (
s->s.h.refreshctx &&
s->s.h.parallelmode) {
1581 for (
i = 0;
i < 4;
i++) {
1582 for (j = 0; j < 2; j++)
1583 for (k = 0; k < 2; k++)
1584 for (l = 0; l < 6; l++)
1585 for (m = 0; m < 6; m++)
1586 memcpy(
s->prob_ctx[
s->s.h.framectxid].coef[
i][j][k][l][m],
1587 s->prob.coef[
i][j][k][l][m], 3);
1588 if (
s->s.h.txfmmode ==
i)
1591 s->prob_ctx[
s->s.h.framectxid].p =
s->prob.p;
1593 }
else if (!
s->s.h.refreshctx) {
1599 for (
i = 0;
i <
s->sb_rows;
i++)
1605 for (
i = 0;
i <
s->active_tile_cols;
i++) {
1606 s->td[
i].b =
s->td[
i].b_base;
1607 s->td[
i].block =
s->td[
i].block_base;
1608 s->td[
i].uvblock[0] =
s->td[
i].uvblock_base[0];
1609 s->td[
i].uvblock[1] =
s->td[
i].uvblock_base[1];
1610 s->td[
i].eob =
s->td[
i].eob_base;
1611 s->td[
i].uveob[0] =
s->td[
i].uveob_base[0];
1612 s->td[
i].uveob[1] =
s->td[
i].uveob_base[1];
1617 int tile_row, tile_col;
1621 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1622 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1625 if (tile_col ==
s->s.h.tiling.tile_cols - 1 &&
1626 tile_row ==
s->s.h.tiling.tile_rows - 1) {
1633 if (tile_size >
size)
1658 for (
i = 1;
i <
s->s.h.tiling.tile_cols;
i++)
1659 for (j = 0; j <
sizeof(
s->td[
i].counts) /
sizeof(
unsigned); j++)
1660 ((
unsigned *)&
s->td[0].counts)[j] += ((
unsigned *)&
s->td[
i].counts)[j];
1662 if (
s->pass < 2 &&
s->s.h.refreshctx && !
s->s.h.parallelmode) {
1666 }
while (
s->pass++ == 1);
1671 for (
i = 0;
i < 8;
i++) {
1672 if (
s->s.refs[
i].f->buf[0])
1674 if (
s->next_refs[
i].f->buf[0] &&
1679 if (!
s->s.h.invisible) {
1693 for (
i = 0;
i < 3;
i++)
1695 for (
i = 0;
i < 8;
i++)
1704 for (
i = 0;
i < 3;
i++) {
1706 if (!
s->s.frames[
i].tf.f) {
1712 for (
i = 0;
i < 8;
i++) {
1715 if (!
s->s.refs[
i].f || !
s->next_refs[
i].f) {
1731 s->s.h.filter.sharpness = -1;
1747 for (
i = 0;
i < 3;
i++) {
1748 if (
s->s.frames[
i].tf.f->buf[0])
1750 if (ssrc->s.frames[
i].tf.f->buf[0]) {
1755 for (
i = 0;
i < 8;
i++) {
1756 if (
s->s.refs[
i].f->buf[0])
1758 if (ssrc->next_refs[
i].f->buf[0]) {
1764 s->s.h.invisible = ssrc->s.h.invisible;
1765 s->s.h.keyframe = ssrc->s.h.keyframe;
1766 s->s.h.intraonly = ssrc->s.h.intraonly;
1767 s->ss_v = ssrc->ss_v;
1768 s->ss_h = ssrc->ss_h;
1769 s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
1770 s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
1771 s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
1772 s->bytesperpixel = ssrc->bytesperpixel;
1773 s->gf_fmt = ssrc->gf_fmt;
1776 s->s.h.bpp = ssrc->s.h.bpp;
1777 s->bpp_index = ssrc->bpp_index;
1778 s->pix_fmt = ssrc->pix_fmt;
1779 memcpy(&
s->prob_ctx, &ssrc->prob_ctx,
sizeof(
s->prob_ctx));
1780 memcpy(&
s->s.h.lf_delta, &ssrc->s.h.lf_delta,
sizeof(
s->s.h.lf_delta));
1781 memcpy(&
s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
1782 sizeof(
s->s.h.segmentation.feat));
1803 .bsfs =
"vp9_superframe_split",
1805 #if CONFIG_VP9_DXVA2_HWACCEL
1808 #if CONFIG_VP9_D3D11VA_HWACCEL
1811 #if CONFIG_VP9_D3D11VA2_HWACCEL
1814 #if CONFIG_VP9_NVDEC_HWACCEL
1817 #if CONFIG_VP9_VAAPI_HWACCEL