35 #define VP9_SYNCCODE 0x498342
206 unsigned coef[4][2][2][6][6][3];
207 unsigned eob[4][2][2][6][6][2];
255 { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
256 { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
258 { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
259 { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
325 s->
cols = (w + 7) >> 3;
326 s->
rows = (h + 7) >> 3;
328 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
405 return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
411 static const int inv_map_table[254] = {
412 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
413 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
414 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
415 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
416 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
417 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
418 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
419 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
420 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
421 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
422 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
423 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
424 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
425 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
426 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
427 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
428 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
429 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
470 int c, i, j, k, l,
m,
n, w, h, max, size2, res, sharp;
599 for (i = 0; i < 4; i++)
602 for (i = 0; i < 2; i++)
621 for (i = 0; i < 7; i++)
625 for (i = 0; i < 3; i++)
634 "Reference segmap (temp=%d,update=%d) enabled on size-change!\n",
641 for (i = 0; i < 8; i++) {
660 int qyac, qydc, quvac, quvdc, lflvl, sh;
670 qydc = av_clip_uintp2(qyac + s->
ydc_qdelta, 8);
673 qyac = av_clip_uintp2(qyac, 8);
691 av_clip_uintp2(lflvl + (s->
lf_delta.
ref[0] << sh), 6);
692 for (j = 1; j < 4; j++) {
710 for (max = 0; (s->
sb_cols >> max) >= 4; max++) ;
711 max =
FFMAX(0, max - 1);
746 if (size2 > size - (data2 - data)) {
775 for (i = 0; i < 2; i++)
778 for (i = 0; i < 2; i++)
779 for (j = 0; j < 2; j++)
783 for (i = 0; i < 2; i++)
784 for (j = 0; j < 3; j++)
792 for (i = 0; i < 4; i++) {
795 for (j = 0; j < 2; j++)
796 for (k = 0; k < 2; k++)
797 for (l = 0; l < 6; l++)
798 for (m = 0; m < 6; m++) {
801 if (m >= 3 && l == 0)
803 for (n = 0; n < 3; n++) {
813 for (j = 0; j < 2; j++)
814 for (k = 0; k < 2; k++)
815 for (l = 0; l < 6; l++)
816 for (m = 0; m < 6; m++) {
830 for (i = 0; i < 3; i++)
834 for (i = 0; i < 7; i++)
835 for (j = 0; j < 3; j++)
841 for (i = 0; i < 4; i++)
842 for (j = 0; j < 2; j++)
847 for (i = 0; i < 4; i++)
856 for (i = 0; i < 5; i++)
865 for (i = 0; i < 5; i++) {
876 for (i = 0; i < 5; i++)
882 for (i = 0; i < 4; i++)
883 for (j = 0; j < 9; j++)
888 for (i = 0; i < 4; i++)
889 for (j = 0; j < 4; j++)
890 for (k = 0; k < 3; k++)
896 for (i = 0; i < 3; i++)
900 for (i = 0; i < 2; i++) {
904 for (j = 0; j < 10; j++)
912 for (j = 0; j < 10; j++)
918 for (i = 0; i < 2; i++) {
919 for (j = 0; j < 2; j++)
920 for (k = 0; k < 3; k++)
925 for (j = 0; j < 3; j++)
932 for (i = 0; i < 2; i++) {
944 return (data2 - data) + size2;
955 VP56mv *pmv,
int ref,
int z,
int idx,
int sb)
957 static const int8_t mv_ref_blk_off[
N_BS_SIZES][8][2] = {
958 [
BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
959 { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
960 [
BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
961 { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
962 [
BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
963 { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
964 [
BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
965 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
966 [
BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
967 { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
968 [
BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
969 { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
970 [
BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
971 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
972 [
BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
973 { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
974 [
BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
975 { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
976 [
BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
977 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
978 [
BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
979 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
980 [
BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
981 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
982 [
BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
983 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
986 int row = s->
row, col = s->
col, row7 = s->
row7;
987 const int8_t (*p)[2] = mv_ref_blk_off[b->
bs];
988 #define INVALID_MV 0x80008000U
992 #define RETURN_DIRECT_MV(mv) \
994 uint32_t m = AV_RN32A(&mv); \
998 } else if (mem == INVALID_MV) { \
1000 } else if (m != mem) { \
1007 if (sb == 2 || sb == 1) {
1009 }
else if (sb == 3) {
1015 #define RETURN_MV(mv) \
1020 clamp_mv(&tmp, &mv, s); \
1021 m = AV_RN32A(&tmp); \
1025 } else if (mem == INVALID_MV) { \
1027 } else if (m != mem) { \
1032 uint32_t m = AV_RN32A(&mv); \
1034 clamp_mv(pmv, &mv, s); \
1036 } else if (mem == INVALID_MV) { \
1038 } else if (m != mem) { \
1039 clamp_mv(pmv, &mv, s); \
1047 if (mv->
ref[0] == ref) {
1049 }
else if (mv->
ref[1] == ref) {
1055 if (mv->
ref[0] == ref) {
1057 }
else if (mv->
ref[1] == ref) {
1067 for (; i < 8; i++) {
1068 int c = p[i][0] + col,
r = p[i][1] + row;
1073 if (mv->
ref[0] == ref) {
1075 }
else if (mv->
ref[1] == ref) {
1087 if (mv->
ref[0] == ref) {
1089 }
else if (mv->
ref[1] == ref) {
1094 #define RETURN_SCALE_MV(mv, scale) \
1097 VP56mv mv_temp = { -mv.x, -mv.y }; \
1098 RETURN_MV(mv_temp); \
1105 for (i = 0; i < 8; i++) {
1106 int c = p[i][0] + col,
r = p[i][1] + row;
1111 if (mv->
ref[0] != ref && mv->
ref[0] >= 0) {
1114 if (mv->
ref[1] != ref && mv->
ref[1] >= 0 &&
1128 if (mv->
ref[0] != ref && mv->
ref[0] >= 0) {
1131 if (mv->
ref[1] != ref && mv->
ref[1] >= 0 &&
1142 #undef RETURN_SCALE_MV
1156 for (n = 0, m = 0; m <
c; m++) {
1182 n = (n << 3) | (bit << 1);
1195 return sign ? -(n + 1) : (n + 1);
1210 mode ==
NEWMV ? -1 : sb);
1212 if ((mode ==
NEWMV || sb == -1) &&
1227 if (mode ==
NEWMV) {
1241 mode ==
NEWMV ? -1 : sb);
1242 if ((mode ==
NEWMV || sb == -1) &&
1257 if (mode ==
NEWMV) {
1282 int v16 = v * 0x0101;
1290 uint32_t v32 = v * 0x01010101;
1299 uint64_t v64 = v * 0x0101010101010101ULL;
1305 uint32_t v32 = v * 0x01010101;
1320 0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1323 0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1331 int row = s->
row, col = s->
col, row7 = s->
row7;
1332 enum TxfmMode max_tx = max_tx_for_bl_bp[b->
bs];
1336 int vref, filter_id;
1353 for (
y = 0;
y < h4;
y++)
1354 for (x = 0; x < w4; x++)
1355 pred =
FFMIN(pred, refsegmap[(
y + row) * 8 * s->
sb_cols + x + col]);
1392 if (have_a && have_l) {
1416 }
else if (have_l) {
1464 l[0] = a[1] = b->
mode[1];
1466 l[0] = a[1] = b->
mode[1] = b->
mode[0];
1474 l[1] = a[1] = b->
mode[3];
1476 l[1] = a[1] = b->
mode[3] = b->
mode[2];
1480 l[1] = a[1] = b->
mode[3] = b->
mode[1];
1492 }
else if (b->
intra) {
1521 static const uint8_t size_group[10] = {
1522 3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1524 int sz = size_group[b->
bs];
1535 static const uint8_t inter_mode_ctx_lut[14][14] = {
1536 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1537 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1538 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1539 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1540 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1541 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1542 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1543 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1544 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1545 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1546 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1547 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1548 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1549 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1584 }
else if (have_l) {
1615 if (refl == refa && refa == s->
varcompref[1]) {
1622 c = (refa == refl) ? 3 : 1;
1639 c = (refl == refa) ? 4 : 2;
1651 }
else if (have_l) {
1777 }
else if (have_l) {
1791 b->
ref[0] = 1 + bit;
1800 static const uint8_t off[10] = {
1801 3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1897 #define SPLAT_CTX(var, val, n) \
1899 case 1: var = val; break; \
1900 case 2: AV_WN16A(&var, val * 0x0101); break; \
1901 case 4: AV_WN32A(&var, val * 0x01010101); break; \
1902 case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
1904 uint64_t v64 = val * 0x0101010101010101ULL; \
1905 AV_WN64A( &var, v64); \
1906 AV_WN64A(&((uint8_t *) &var)[8], v64); \
1911 #define SPLAT_CTX(var, val, n) \
1913 case 1: var = val; break; \
1914 case 2: AV_WN16A(&var, val * 0x0101); break; \
1915 case 4: AV_WN32A(&var, val * 0x01010101); break; \
1917 uint32_t v32 = val * 0x01010101; \
1918 AV_WN32A( &var, v32); \
1919 AV_WN32A(&((uint8_t *) &var)[4], v32); \
1923 uint32_t v32 = val * 0x01010101; \
1924 AV_WN32A( &var, v32); \
1925 AV_WN32A(&((uint8_t *) &var)[4], v32); \
1926 AV_WN32A(&((uint8_t *) &var)[8], v32); \
1927 AV_WN32A(&((uint8_t *) &var)[12], v32); \
1934 #define SET_CTXS(dir, off, n) \
1936 SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \
1937 SPLAT_CTX(s->dir##_txfm_ctx[off], b->tx, n); \
1938 SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
1939 if (!s->keyframe && !s->intraonly) { \
1940 SPLAT_CTX(s->dir##_intra_ctx[off], b->intra, n); \
1941 SPLAT_CTX(s->dir##_comp_ctx[off], b->comp, n); \
1942 SPLAT_CTX(s->dir##_mode_ctx[off], b->mode[3], n); \
1944 SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
1945 if (s->filtermode == FILTER_SWITCHABLE) { \
1946 SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
1951 case 1:
SET_CTXS(above, col, 1);
break;
1952 case 2:
SET_CTXS(above, col, 2);
break;
1953 case 4:
SET_CTXS(above, col, 4);
break;
1954 case 8:
SET_CTXS(above, col, 8);
break;
1957 case 1:
SET_CTXS(left, row7, 1);
break;
1958 case 2:
SET_CTXS(left, row7, 2);
break;
1959 case 4:
SET_CTXS(left, row7, 4);
break;
1960 case 8:
SET_CTXS(left, row7, 8);
break;
1980 for (n = 0; n < w4 * 2; n++) {
1984 for (n = 0; n < h4 * 2; n++) {
1992 for (
y = 0;
y < h4;
y++) {
1993 int x, o = (row +
y) * s->
sb_cols * 8 + col;
1997 for (x = 0; x < w4; x++) {
2001 }
else if (b->
comp) {
2002 for (x = 0; x < w4; x++) {
2003 mv[x].ref[0] = b->
ref[0];
2004 mv[x].ref[1] = b->
ref[1];
2009 for (x = 0; x < w4; x++) {
2010 mv[x].ref[0] = b->
ref[0];
2021 int is_tx32x32,
unsigned (*cnt)[6][3],
2022 unsigned (*eob)[6][2],
uint8_t (*p)[6][11],
2023 int nnz,
const int16_t *scan,
const int16_t (*nb)[2],
2024 const int16_t *band_counts,
const int16_t *qmul)
2026 int i = 0,
band = 0, band_left = band_counts[
band];
2040 cnt[
band][nnz][0]++;
2042 band_left = band_counts[++
band];
2044 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2046 if (++i == n_coeffs)
2053 cnt[
band][nnz][1]++;
2061 cnt[
band][nnz][2]++;
2064 cache[rc] = val = 2;
2115 band_left = band_counts[++
band];
2120 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2122 }
while (++i < n_coeffs);
2128 unsigned (*cnt)[6][3],
unsigned (*eob)[6][2],
2129 uint8_t (*p)[6][11],
int nnz,
const int16_t *scan,
2130 const int16_t (*nb)[2],
const int16_t *band_counts,
2131 const int16_t *qmul)
2134 nnz, scan, nb, band_counts, qmul);
2138 unsigned (*cnt)[6][3],
unsigned (*eob)[6][2],
2139 uint8_t (*p)[6][11],
int nnz,
const int16_t *scan,
2140 const int16_t (*nb)[2],
const int16_t *band_counts,
2141 const int16_t *qmul)
2144 nnz, scan, nb, band_counts, qmul);
2151 int row = s->
row, col = s->
col;
2156 int end_x =
FFMIN(2 * (s->
cols - col), w4);
2157 int end_y =
FFMIN(2 * (s->
rows - row), h4);
2158 int n, pl, x,
y, res;
2161 const int16_t *
const *yscans =
vp9_scans[tx];
2167 static const int16_t band_counts[4][8] = {
2168 { 1, 2, 3, 4, 3, 16 - 13 },
2169 { 1, 2, 3, 4, 11, 64 - 21 },
2170 { 1, 2, 3, 4, 11, 256 - 21 },
2171 { 1, 2, 3, 4, 11, 1024 - 21 },
2173 const int16_t *y_band_counts = band_counts[b->tx];
2174 const int16_t *uv_band_counts = band_counts[b->
uvtx];
2176 #define MERGE(la, end, step, rd) \
2177 for (n = 0; n < end; n += step) \
2178 la[n] = !!rd(&la[n])
2179 #define MERGE_CTX(step, rd) \
2181 MERGE(l, end_y, step, rd); \
2182 MERGE(a, end_x, step, rd); \
2185 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \
2186 for (n = 0, y = 0; y < end_y; y += step) { \
2187 for (x = 0; x < end_x; x += step, n += step * step) { \
2188 enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \
2189 res = decode_coeffs_b##v(&s->c, s->block + 16 * n, 16 * step * step, \
2190 c, e, p, a[x] + l[y], yscans[txtp], \
2191 ynbs[txtp], y_band_counts, qmul[0]); \
2192 a[x] = l[y] = !!res; \
2194 AV_WN16A(&s->eob[n], res); \
2201 #define SPLAT(la, end, step, cond) \
2203 for (n = 1; n < end; n += step) \
2204 la[n] = la[n - 1]; \
2205 } else if (step == 4) { \
2207 for (n = 0; n < end; n += step) \
2208 AV_WN32A(&la[n], la[n] * 0x01010101); \
2210 for (n = 0; n < end; n += step) \
2211 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
2215 if (HAVE_FAST_64BIT) { \
2216 for (n = 0; n < end; n += step) \
2217 AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
2219 for (n = 0; n < end; n += step) { \
2220 uint32_t v32 = la[n] * 0x01010101; \
2221 AV_WN32A(&la[n], v32); \
2222 AV_WN32A(&la[n + 4], v32); \
2226 for (n = 0; n < end; n += step) \
2227 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
2230 #define SPLAT_CTX(step) \
2232 SPLAT(a, end_x, step, end_x == w4); \
2233 SPLAT(l, end_y, step, end_y == h4); \
2258 #define DECODE_UV_COEF_LOOP(step) \
2259 for (n = 0, y = 0; y < end_y; y += step) { \
2260 for (x = 0; x < end_x; x += step, n += step * step) { \
2261 res = decode_coeffs_b(&s->c, s->uvblock[pl] + 16 * n, \
2262 16 * step * step, c, e, p, a[x] + l[y], \
2263 uvscan, uvnb, uv_band_counts, qmul[1]); \
2264 a[x] = l[y] = !!res; \
2266 AV_WN16A(&s->uveob[pl][n], res); \
2268 s->uveob[pl][n] = res; \
2280 for (pl = 0; pl < 2; pl++) {
2302 1024, c, e, p, a[0] + l[0],
2303 uvscan, uvnb, uv_band_counts, qmul[1]);
2304 a[0] = l[0] = !!res;
2313 uint8_t *dst_edge, ptrdiff_t stride_edge,
2314 uint8_t *dst_inner, ptrdiff_t stride_inner,
2315 uint8_t *l,
int col,
int x,
int w,
2319 int have_top = row > 0 || y > 0;
2321 int have_right = x < w - 1;
2322 static const uint8_t mode_conv[10][2 ][2 ] = {
2344 static const struct {
2352 [
DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2355 [
VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2356 [
HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2359 [
TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2368 mode = mode_conv[
mode][have_left][have_top];
2369 if (edges[mode].needs_top) {
2371 int n_px_need = 4 << tx, n_px_have = (((s->
cols - col) << !p) - x) * 4;
2372 int n_px_need_tr = 0;
2374 if (tx ==
TX_4X4 && edges[mode].needs_topright && have_right)
2381 top = !(row & 7) && !y ?
2383 y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2385 topleft = !(row & 7) && !y ?
2387 y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2388 &dst_inner[-stride_inner];
2392 (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2393 (tx !=
TX_4X4 || !edges[mode].needs_topright || have_right) &&
2394 n_px_need + n_px_need_tr <= n_px_have) {
2398 if (n_px_need <= n_px_have) {
2399 memcpy(*a, top, n_px_need);
2401 memcpy(*a, top, n_px_have);
2402 memset(&(*a)[n_px_have], (*a)[n_px_have - 1],
2403 n_px_need - n_px_have);
2406 memset(*a, 127, n_px_need);
2408 if (edges[mode].needs_topleft) {
2409 if (have_left && have_top) {
2410 (*a)[-1] = topleft[-1];
2412 (*a)[-1] = have_top ? 129 : 127;
2415 if (tx ==
TX_4X4 && edges[mode].needs_topright) {
2416 if (have_top && have_right &&
2417 n_px_need + n_px_need_tr <= n_px_have) {
2418 memcpy(&(*a)[4], &top[4], 4);
2420 memset(&(*a)[4], (*a)[3], 4);
2425 if (edges[mode].needs_left) {
2427 int n_px_need = 4 << tx, i, n_px_have = (((s->
rows - row) << !p) -
y) * 4;
2428 uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2429 ptrdiff_t
stride = x == 0 ? stride_edge : stride_inner;
2431 if (n_px_need <= n_px_have) {
2432 for (i = 0; i < n_px_need; i++)
2433 l[n_px_need - 1 - i] = dst[i * stride - 1];
2435 for (i = 0; i < n_px_have; i++)
2436 l[n_px_need - 1 - i] = dst[i * stride - 1];
2437 memset(l, l[n_px_need - n_px_have], n_px_need - n_px_have);
2440 memset(l, 129, 4 << tx);
2451 int row = s->
row, col = s->
col;
2452 int w4 =
bwh_tab[1][b->
bs][0] << 1, step1d = 1 << b->tx,
n;
2453 int h4 =
bwh_tab[1][b->
bs][1] << 1, x,
y, step = 1 << (b->tx * 2);
2454 int end_x =
FFMIN(2 * (s->
cols - col), w4);
2455 int end_y =
FFMIN(2 * (s->
rows - row), h4);
2457 int uvstep1d = 1 << b->
uvtx, p;
2462 for (
n = 0, y = 0; y < end_y; y += step1d) {
2463 uint8_t *ptr = dst, *ptr_r = dst_r;
2464 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d,
2465 ptr_r += 4 * step1d,
n += step) {
2475 col, x, w4, row, y, b->tx, 0);
2489 step = 1 << (b->
uvtx * 2);
2490 for (p = 0; p < 2; p++) {
2491 dst = s->
dst[1 + p];
2493 for (
n = 0, y = 0; y < end_y; y += uvstep1d) {
2494 uint8_t *ptr = dst, *ptr_r = dst_r;
2495 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d,
2496 ptr_r += 4 * uvstep1d,
n += step) {
2504 col, x, w4, row, y, b->
uvtx, p + 1);
2517 uint8_t *dst, ptrdiff_t dst_stride,
2518 const uint8_t *ref, ptrdiff_t ref_stride,
2520 ptrdiff_t
y, ptrdiff_t x,
const VP56mv *
mv,
2521 int bw,
int bh,
int w,
int h)
2523 int mx = mv->
x, my = mv->
y,
th;
2527 ref += y * ref_stride + x;
2533 th = (y + bh + 4 * !!my + 7) >> 6;
2535 if (x < !!mx * 3 || y < !!my * 3 ||
2536 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2538 ref - !!my * 3 * ref_stride - !!mx * 3,
2540 bw + !!mx * 7, bh + !!my * 7,
2541 x - !!mx * 3, y - !!my * 3, w, h);
2545 mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2550 ptrdiff_t dst_stride,
2551 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2552 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2554 ptrdiff_t
y, ptrdiff_t x,
const VP56mv *
mv,
2555 int bw,
int bh,
int w,
int h)
2557 int mx = mv->
x, my = mv->
y,
th;
2561 ref_u += y * src_stride_u + x;
2562 ref_v += y * src_stride_v + x;
2568 th = (y + bh + 4 * !!my + 7) >> 5;
2570 if (x < !!mx * 3 || y < !!my * 3 ||
2571 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2573 ref_u - !!my * 3 * src_stride_u - !!mx * 3,
2575 bw + !!mx * 7, bh + !!my * 7,
2576 x - !!mx * 3, y - !!my * 3, w, h);
2578 mc[!!mx][!!my](dst_u, dst_stride, ref_u, 80, bh, mx, my);
2581 ref_v - !!my * 3 * src_stride_v - !!mx * 3,
2583 bw + !!mx * 7, bh + !!my * 7,
2584 x - !!mx * 3, y - !!my * 3, w, h);
2586 mc[!!mx][!!my](dst_v, dst_stride, ref_v, 80, bh, mx, my);
2588 mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2589 mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2596 { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 },
2597 { 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4 },
2601 int row = s->
row, col = s->
col;
2619 row << 3, col << 3, &b->
mv[0][0], 8, 4, w1, h1);
2621 s->
dst[0] + 4 * ls_y, ls_y,
2623 (row << 3) + 4, col << 3, &b->
mv[2][0], 8, 4, w1, h1);
2627 ref2->data[0], ref2->linesize[0], tref2,
2628 row << 3, col << 3, &b->
mv[0][1], 8, 4, w2, h2);
2630 s->
dst[0] + 4 * ls_y, ls_y,
2631 ref2->data[0], ref2->linesize[0], tref2,
2632 (row << 3) + 4, col << 3, &b->
mv[2][1], 8, 4, w2, h2);
2637 row << 3, col << 3, &b->
mv[0][0], 4, 8, w1, h1);
2640 row << 3, (col << 3) + 4, &b->
mv[1][0], 4, 8, w1, h1);
2644 ref2->data[0], ref2->linesize[0], tref2,
2645 row << 3, col << 3, &b->
mv[0][1], 4, 8, w2, h2);
2647 ref2->data[0], ref2->linesize[0], tref2,
2648 row << 3, (col << 3) + 4, &b->
mv[1][1], 4, 8, w2, h2);
2657 row << 3, col << 3, &b->
mv[0][0], 4, 4, w1, h1);
2660 row << 3, (col << 3) + 4, &b->
mv[1][0], 4, 4, w1, h1);
2662 s->
dst[0] + 4 * ls_y, ls_y,
2664 (row << 3) + 4, col << 3, &b->
mv[2][0], 4, 4, w1, h1);
2666 s->
dst[0] + 4 * ls_y + 4, ls_y,
2668 (row << 3) + 4, (col << 3) + 4, &b->
mv[3][0], 4, 4, w1, h1);
2672 ref2->data[0], ref2->linesize[0], tref2,
2673 row << 3, col << 3, &b->
mv[0][1], 4, 4, w2, h2);
2675 ref2->data[0], ref2->linesize[0], tref2,
2676 row << 3, (col << 3) + 4, &b->
mv[1][1], 4, 4, w2, h2);
2678 s->
dst[0] + 4 * ls_y, ls_y,
2679 ref2->data[0], ref2->linesize[0], tref2,
2680 (row << 3) + 4, col << 3, &b->
mv[2][1], 4, 4, w2, h2);
2682 s->
dst[0] + 4 * ls_y + 4, ls_y,
2683 ref2->data[0], ref2->linesize[0], tref2,
2684 (row << 3) + 4, (col << 3) + 4, &b->
mv[3][1], 4, 4, w2, h2);
2688 int bwl = bwlog_tab[0][b->
bs];
2693 row << 3, col << 3, &b->
mv[0][0],bw, bh, w1, h1);
2697 ref2->data[0], ref2->linesize[0], tref2,
2698 row << 3, col << 3, &b->
mv[0][1], bw, bh, w2, h2);
2703 int bwl = bwlog_tab[1][b->
bs];
2721 s->
dst[1], s->
dst[2], ls_uv,
2724 row << 2, col << 2, &mvuv, bw, bh, w1, h1);
2734 s->
dst[1], s->
dst[2], ls_uv,
2735 ref2->data[1], ref2->linesize[1],
2736 ref2->data[2], ref2->linesize[2], tref2,
2737 row << 2, col << 2, &mvuv, bw, bh, w2, h2);
2744 int w4 =
bwh_tab[1][b->
bs][0] << 1, step1d = 1 << b->tx,
n;
2745 int h4 =
bwh_tab[1][b->
bs][1] << 1, x,
y, step = 1 << (b->tx * 2);
2746 int end_x =
FFMIN(2 * (s->
cols - col), w4);
2747 int end_y =
FFMIN(2 * (s->
rows - row), h4);
2749 int uvstep1d = 1 << b->
uvtx, p;
2753 for (
n = 0, y = 0; y < end_y; y += step1d) {
2755 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d,
n += step) {
2768 step = 1 << (b->
uvtx * 2);
2769 for (p = 0; p < 2; p++) {
2770 dst = s->
dst[p + 1];
2771 for (
n = 0, y = 0; y < end_y; y += uvstep1d) {
2773 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d,
n += step) {
2787 int row_and_7,
int col_and_7,
2788 int w,
int h,
int col_end,
int row_end,
2801 if (tx ==
TX_4X4 && is_uv) {
2816 if (tx ==
TX_4X4 && !skip_inter) {
2817 int t = 1 << col_and_7, m_col = (t << w) - t,
y;
2818 int m_col_odd = (t << (w - 1)) - t;
2822 int m_row_8 = m_col & 0x01, m_row_4 = m_col - m_row_8;
2824 for (
y = row_and_7;
y < h + row_and_7;
y++) {
2825 int col_mask_id = 2 - !(
y & 7);
2827 lflvl->
mask[is_uv][0][
y][1] |= m_row_8;
2828 lflvl->
mask[is_uv][0][
y][2] |= m_row_4;
2839 if ((col_end & 1) && (
y & 1)) {
2840 lflvl->
mask[is_uv][1][
y][col_mask_id] |= m_col_odd;
2842 lflvl->
mask[is_uv][1][
y][col_mask_id] |= m_col;
2846 int m_row_8 = m_col & 0x11, m_row_4 = m_col - m_row_8;
2848 for (
y = row_and_7;
y < h + row_and_7;
y++) {
2849 int col_mask_id = 2 - !(
y & 3);
2851 lflvl->
mask[is_uv][0][
y][1] |= m_row_8;
2852 lflvl->
mask[is_uv][0][
y][2] |= m_row_4;
2853 lflvl->
mask[is_uv][1][
y][col_mask_id] |= m_col;
2854 lflvl->
mask[is_uv][0][
y][3] |= m_col;
2855 lflvl->
mask[is_uv][1][
y][3] |= m_col;
2859 int y, t = 1 << col_and_7, m_col = (t << w) - t;
2862 int mask_id = (tx ==
TX_8X8);
2863 int l2 = tx + is_uv - 1, step1d = 1 << l2;
2864 static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
2865 int m_row = m_col & masks[l2];
2869 if (is_uv && tx >
TX_8X8 && (w ^ (w - 1)) == 1) {
2870 int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
2871 int m_row_8 = m_row - m_row_16;
2873 for (y = row_and_7; y < h + row_and_7; y++) {
2874 lflvl->
mask[is_uv][0][
y][0] |= m_row_16;
2875 lflvl->
mask[is_uv][0][
y][1] |= m_row_8;
2878 for (y = row_and_7; y < h + row_and_7; y++)
2879 lflvl->
mask[is_uv][0][y][mask_id] |= m_row;
2882 if (is_uv && tx >
TX_8X8 && (h ^ (h - 1)) == 1) {
2883 for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
2884 lflvl->
mask[is_uv][1][y][0] |= m_col;
2885 if (y - row_and_7 == h - 1)
2886 lflvl->
mask[is_uv][1][
y][1] |= m_col;
2888 for (y = row_and_7; y < h + row_and_7; y += step1d)
2889 lflvl->
mask[is_uv][1][y][mask_id] |= m_col;
2891 }
else if (tx !=
TX_4X4) {
2894 mask_id = (tx ==
TX_8X8) || (is_uv && h == 1);
2895 lflvl->
mask[is_uv][1][row_and_7][mask_id] |= m_col;
2896 mask_id = (tx ==
TX_8X8) || (is_uv && w == 1);
2897 for (y = row_and_7; y < h + row_and_7; y++)
2898 lflvl->
mask[is_uv][0][y][mask_id] |= t;
2900 int t8 = t & 0x01,
t4 = t -
t8;
2902 for (y = row_and_7; y < h + row_and_7; y++) {
2903 lflvl->
mask[is_uv][0][
y][2] |=
t4;
2904 lflvl->
mask[is_uv][0][
y][1] |=
t8;
2906 lflvl->
mask[is_uv][1][row_and_7][2 - !(row_and_7 & 7)] |= m_col;
2908 int t8 = t & 0x11,
t4 = t -
t8;
2910 for (y = row_and_7; y < h + row_and_7; y++) {
2911 lflvl->
mask[is_uv][0][
y][2] |=
t4;
2912 lflvl->
mask[is_uv][0][
y][1] |=
t8;
2914 lflvl->
mask[is_uv][1][row_and_7][2 - !(row_and_7 & 3)] |= m_col;
2920 struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
2934 s->
min_mv.
x = -(128 + col * 64);
2935 s->
min_mv.
y = -(128 + row * 64);
2943 b->
uvtx = b->tx - (w4 * 2 == (1 << b->tx) || h4 * 2 == (1 << b->tx));
2950 #define SPLAT_ZERO_CTX(v, n) \
2952 case 1: v = 0; break; \
2953 case 2: AV_ZERO16(&v); break; \
2954 case 4: AV_ZERO32(&v); break; \
2955 case 8: AV_ZERO64(&v); break; \
2956 case 16: AV_ZERO128(&v); break; \
2958 #define SPLAT_ZERO_YUV(dir, var, off, n) \
2960 SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
2961 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
2962 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
2980 s->
block += w4 * h4 * 64;
2981 s->
uvblock[0] += w4 * h4 * 16;
2982 s->
uvblock[1] += w4 * h4 * 16;
2983 s->
eob += 4 * w4 * h4;
2984 s->
uveob[0] += w4 * h4;
2985 s->
uveob[1] += w4 * h4;
2994 emu[0] = (col + w4) * 8 > f->
linesize[0] ||
2995 (row + h4) > s->
rows;
2996 emu[1] = (col + w4) * 4 > f->
linesize[1] ||
2997 (row + h4) > s->
rows;
3002 s->
dst[0] = f->
data[0] + yoff;
3010 s->
dst[1] = f->
data[1] + uvoff;
3011 s->
dst[2] = f->
data[2] + uvoff;
3022 for (
n = 0; o < w;
n++) {
3028 s->
tmp_y + o, 64, h, 0, 0);
3036 for (
n = 1; o < w;
n++) {
3042 s->
tmp_uv[0] + o, 32, h, 0, 0);
3044 s->
tmp_uv[1] + o, 32, h, 0, 0);
3058 mask_edges(lflvl, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
3059 mask_edges(lflvl, 1, row7, col7, x_end, y_end,
3062 b->
uvtx, skip_inter);
3069 limit >>= (sharp + 3) >> 2;
3070 limit =
FFMIN(limit, 9 - sharp);
3072 limit =
FFMAX(limit, 1);
3081 s->
block += w4 * h4 * 64;
3082 s->
uvblock[0] += w4 * h4 * 16;
3083 s->
uvblock[1] += w4 * h4 * 16;
3084 s->
eob += 4 * w4 * h4;
3085 s->
uveob[0] += w4 * h4;
3086 s->
uveob[1] += w4 * h4;
3091 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
3099 ptrdiff_t hbs = 4 >> bl;
3105 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3106 }
else if (col + hbs < s->cols) {
3107 if (row + hbs < s->rows) {
3111 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3114 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3115 yoff += hbs * 8 * y_stride;
3116 uvoff += hbs * 4 * uv_stride;
3117 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
3120 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3123 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
3126 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3128 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3129 yoff += hbs * 8 * y_stride;
3130 uvoff += hbs * 4 * uv_stride;
3131 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3132 decode_sb(ctx, row + hbs, col + hbs, lflvl,
3133 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3140 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3142 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3145 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3147 }
else if (row + hbs < s->rows) {
3150 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3151 yoff += hbs * 8 * y_stride;
3152 uvoff += hbs * 4 * uv_stride;
3153 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3156 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3160 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3166 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
3170 ptrdiff_t hbs = 4 >> bl;
3176 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->
bl, b->
bp);
3177 }
else if (s->
b->
bl == bl) {
3178 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->
bl, b->
bp);
3180 yoff += hbs * 8 * y_stride;
3181 uvoff += hbs * 4 * uv_stride;
3182 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->
bl, b->
bp);
3186 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->
bl, b->
bp);
3190 if (col + hbs < s->cols) {
3191 if (row + hbs < s->rows) {
3193 uvoff + 4 * hbs, bl + 1);
3194 yoff += hbs * 8 * y_stride;
3195 uvoff += hbs * 4 * uv_stride;
3196 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3198 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3202 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
3204 }
else if (row + hbs < s->rows) {
3205 yoff += hbs * 8 * y_stride;
3206 uvoff += hbs * 4 * uv_stride;
3207 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3213 int row,
int col, ptrdiff_t yoff, ptrdiff_t uvoff)
3228 for (y = 0; y < 8; y += 2, dst += 16 * ls_y, lvl += 16) {
3229 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->
mask[0][0][
y];
3231 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
3232 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
3233 unsigned hm = hm1 | hm2 | hm13 | hm23;
3235 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8, l++) {
3237 int L = *l,
H = L >> 4;
3241 if (hmask1[0] & x) {
3242 if (hmask2[0] & x) {
3248 }
else if (hm2 & x) {
3255 [0](ptr, ls_y,
E, I,
H);
3258 [0](ptr, ls_y, E, I, H);
3261 }
else if (hm2 & x) {
3262 int L = l[8],
H = L >> 4;
3267 [0](ptr + 8 * ls_y, ls_y, E, I, H);
3271 int L = *l,
H = L >> 4;
3283 }
else if (hm23 & x) {
3284 int L = l[8],
H = L >> 4;
3295 dst = f->
data[0] + yoff;
3297 for (y = 0; y < 8; y++, dst += 8 * ls_y, lvl += 8) {
3298 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->
mask[0][1][
y];
3299 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
3301 for (x = 1; vm & ~(x - 1); x <<= 2, ptr += 16, l += 2) {
3304 int L = *l,
H = L >> 4;
3308 if (vmask[0] & (x << 1)) {
3314 }
else if (vm & (x << 1)) {
3320 [!!(vmask[1] & (x << 1))]
3321 [1](ptr, ls_y, E, I, H);
3324 [1](ptr, ls_y, E, I, H);
3326 }
else if (vm & (x << 1)) {
3327 int L = l[1],
H = L >> 4;
3331 [1](ptr + 8, ls_y,
E, I,
H);
3335 int L = *l,
H = L >> 4;
3338 if (vm3 & (x << 1)) {
3347 }
else if (vm3 & (x << 1)) {
3348 int L = l[1],
H = L >> 4;
3357 for (p = 0; p < 2; p++) {
3359 dst = f->
data[1 + p] + uvoff;
3360 for (y = 0; y < 8; y += 4, dst += 16 * ls_uv, lvl += 32) {
3361 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->
mask[1][0][
y];
3363 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2];
3364 unsigned hm2 = hmask2[1] | hmask2[2], hm = hm1 | hm2;
3366 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 4) {
3369 int L = *l,
H = L >> 4;
3372 if (hmask1[0] & x) {
3373 if (hmask2[0] & x) {
3379 }
else if (hm2 & x) {
3386 [0](ptr, ls_uv,
E, I,
H);
3389 [0](ptr, ls_uv, E, I, H);
3391 }
else if (hm2 & x) {
3392 int L = l[16],
H = L >> 4;
3396 [0](ptr + 8 * ls_uv, ls_uv, E, I, H);
3404 dst = f->
data[1 + p] + uvoff;
3405 for (y = 0; y < 8; y++, dst += 4 * ls_uv) {
3406 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->
mask[1][1][
y];
3407 unsigned vm = vmask[0] | vmask[1] | vmask[2];
3409 for (x = 1; vm & ~(x - 1); x <<= 4, ptr += 16, l += 4) {
3412 int L = *l,
H = L >> 4;
3416 if (vmask[0] & (x << 2)) {
3422 }
else if (vm & (x << 2)) {
3428 [!!(vmask[1] & (x << 2))]
3429 [1](ptr, ls_uv, E, I, H);
3432 [1](ptr, ls_uv, E, I, H);
3434 }
else if (vm & (x << 2)) {
3435 int L = l[2],
H = L >> 4;
3439 [1](ptr + 8, ls_uv,
E, I,
H);
3451 int sb_start = ( idx *
n) >> log2_n;
3452 int sb_end = ((idx + 1) * n) >> log2_n;
3453 *start =
FFMIN(sb_start, n) << 3;
3454 *end =
FFMIN(sb_end, n) << 3;
3458 int max_count,
int update_factor)
3460 unsigned ct = ct0 + ct1, p2, p1;
3466 p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3467 p2 = av_clip(p2, 1, 255);
3468 ct =
FFMIN(ct, max_count);
3469 update_factor =
FASTDIV(update_factor * ct, max_count);
3472 *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3482 for (i = 0; i < 4; i++)
3483 for (j = 0; j < 2; j++)
3484 for (k = 0; k < 2; k++)
3485 for (l = 0; l < 6; l++)
3486 for (m = 0; m < 6; m++) {
3491 if (l == 0 && m >= 3)
3495 adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3508 for (i = 0; i < 3; i++)
3512 for (i = 0; i < 4; i++)
3517 for (i = 0; i < 5; i++)
3523 for (i = 0; i < 5; i++)
3529 for (i = 0; i < 5; i++) {
3533 adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3534 adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3539 for (i = 0; i < 4; i++)
3540 for (j = 0; j < 4; j++) {
3544 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3545 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3551 for (i = 0; i < 2; i++) {
3557 adapt_prob(&p->
tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3565 for (i = 0; i < 4; i++) {
3569 adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3575 for (i = 0; i < 7; i++) {
3579 adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3580 adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3589 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3590 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3595 for (i = 0; i < 2; i++) {
3597 unsigned *
c, (*c2)[2], sum;
3604 sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3609 adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3612 adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3616 adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3624 for (j = 0; j < 10; j++)
3625 adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3627 for (j = 0; j < 2; j++) {
3630 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3631 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3636 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3637 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3649 for (i = 0; i < 4; i++) {
3653 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3673 for (i = 0; i < 10; i++) {
3677 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3709 for (i = 0; i < 2; i++) {
3714 for (i = 0; i < 8; i++) {
3736 int res, tile_row, tile_col, i, ref, row, col;
3737 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
3742 }
else if (res == 0) {
3771 for (i = 0; i < 8; i++) {
3799 "Failed to allocate block buffers\n");
3805 for (i = 0; i < 4; i++) {
3806 for (j = 0; j < 2; j++)
3807 for (k = 0; k < 2; k++)
3808 for (l = 0; l < 6; l++)
3809 for (m = 0; m < 6; m++)
3844 if (tile_size > size) {
3859 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 32) {
3861 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
3879 memcpy(&s->
c, &s->
c_b[tile_col],
sizeof(s->
c));
3883 col < s->tiling.tile_col_end;
3884 col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3888 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
3900 memcpy(&s->
c_b[tile_col], &s->
c,
sizeof(s->
c));
3910 if (row + 8 < s->
rows) {
3912 f->
data[0] + yoff + 63 * ls_y,
3915 f->
data[1] + uvoff + 31 * ls_uv,
3918 f->
data[2] + uvoff + 31 * ls_uv,
3926 lflvl_ptr = s->
lflvl;
3927 for (col = 0; col < s->
cols;
3928 col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3944 }
while (s->
pass++ == 1);
3948 for (i = 0; i < 8; i++) {
3968 for (i = 0; i < 2; i++)
3970 for (i = 0; i < 8; i++)
3979 for (i = 0; i < 2; i++) {
3987 for (i = 0; i < 8; i++) {
4025 (!ssrc->intra_pred_data[0] || s->
cols != ssrc->cols || s->
rows != ssrc->rows)) {
4029 for (i = 0; i < 2; i++) {
4032 if (ssrc->frames[i].tf.f->data[0]) {
4037 for (i = 0; i < 8; i++) {
4040 if (ssrc->next_refs[i].f->data[0]) {
4051 if (ssrc->segmentation.enabled) {