Go to the documentation of this file.
34 const uint8_t *
left,
const uint8_t *_top)
48 const uint8_t *
left,
const uint8_t *_top)
61 for (y = 0; y < 8; y++) {
73 const uint8_t *
left,
const uint8_t *_top)
89 for (y = 0; y < 16; y++) {
104 const uint8_t *
left,
const uint8_t *_top)
126 for (y = 0; y < 32; y++) {
147 const uint8_t *_left,
const uint8_t *top)
160 const uint8_t *_left,
const uint8_t *top)
167 for (y = 0; y < 8; y++) {
177 const uint8_t *_left,
const uint8_t *top)
184 for (y = 0; y < 16; y++) {
196 const uint8_t *_left,
const uint8_t *top)
203 for (y = 0; y < 32; y++) {
221 const uint8_t *_left,
const uint8_t *_top)
229 for (y = 0; y < 4; y++) {
230 int l_m_tl =
left[3 - y] - tl;
241 const uint8_t *_left,
const uint8_t *_top)
249 for (y = 0; y < 8; y++) {
250 int l_m_tl =
left[7 - y] - tl;
265 const uint8_t *_left,
const uint8_t *_top)
273 for (y = 0; y < 16; y++) {
274 int l_m_tl =
left[15 - y] - tl;
297 const uint8_t *_left,
const uint8_t *_top)
305 for (y = 0; y < 32; y++) {
306 int l_m_tl =
left[31 - y] - tl;
347 const uint8_t *_left,
const uint8_t *_top)
353 top[0] + top[1] + top[2] + top[3] + 4) >> 3);
363 const uint8_t *_left,
const uint8_t *_top)
370 left[6] +
left[7] + top[0] + top[1] + top[2] + top[3] +
371 top[4] + top[5] + top[6] + top[7] + 8) >> 4);
375 for (y = 0; y < 8; y++) {
383 const uint8_t *_left,
const uint8_t *_top)
391 left[13] +
left[14] +
left[15] + top[0] + top[1] + top[2] + top[3] +
392 top[4] + top[5] + top[6] + top[7] + top[8] + top[9] + top[10] +
393 top[11] + top[12] + top[13] + top[14] + top[15] + 16) >> 5);
397 for (y = 0; y < 16; y++) {
407 const uint8_t *_left,
const uint8_t *_top)
418 left[31] + top[0] + top[1] + top[2] + top[3] + top[4] + top[5] +
419 top[6] + top[7] + top[8] + top[9] + top[10] + top[11] + top[12] +
420 top[13] + top[14] + top[15] + top[16] + top[17] + top[18] + top[19] +
421 top[20] + top[21] + top[22] + top[23] + top[24] + top[25] + top[26] +
422 top[27] + top[28] + top[29] + top[30] + top[31] + 32) >> 6);
426 for (y = 0; y < 32; y++) {
440 const uint8_t *_left,
const uint8_t *top)
454 const uint8_t *_left,
const uint8_t *top)
464 for (y = 0; y < 8; y++) {
472 const uint8_t *_left,
const uint8_t *top)
483 for (y = 0; y < 16; y++) {
493 const uint8_t *_left,
const uint8_t *top)
507 for (y = 0; y < 32; y++) {
521 const uint8_t *
left,
const uint8_t *_top)
535 const uint8_t *
left,
const uint8_t *_top)
540 ((top[0] + top[1] + top[2] + top[3] +
541 top[4] + top[5] + top[6] + top[7] + 4) >> 3);
545 for (y = 0; y < 8; y++) {
553 const uint8_t *
left,
const uint8_t *_top)
558 ((top[0] + top[1] + top[2] + top[3] + top[4] + top[5] +
559 top[6] + top[7] + top[8] + top[9] + top[10] + top[11] +
560 top[12] + top[13] + top[14] + top[15] + 8) >> 4);
564 for (y = 0; y < 16; y++) {
574 const uint8_t *
left,
const uint8_t *_top)
579 ((top[0] + top[1] + top[2] + top[3] + top[4] + top[5] +
580 top[6] + top[7] + top[8] + top[9] + top[10] + top[11] +
581 top[12] + top[13] + top[14] + top[15] + top[16] + top[17] +
582 top[18] + top[19] + top[20] + top[21] + top[22] + top[23] +
583 top[24] + top[25] + top[26] + top[27] + top[28] + top[29] +
584 top[30] + top[31] + 16) >> 5);
588 for (y = 0; y < 32; y++) {
604 const uint8_t *
left,
const uint8_t *top)
617 const uint8_t *
left,
const uint8_t *top)
624 for (y = 0; y < 8; y++) {
632 const uint8_t *
left,
const uint8_t *top)
639 for (y = 0; y < 16; y++) {
649 const uint8_t *
left,
const uint8_t *top)
656 for (y = 0; y < 32; y++) {
670 const uint8_t *
left,
const uint8_t *top)
682 const uint8_t *
left,
const uint8_t *top)
689 for (y = 0; y < 8; y++) {
697 const uint8_t *
left,
const uint8_t *top)
704 for (y = 0; y < 16; y++) {
714 const uint8_t *
left,
const uint8_t *top)
721 for (y = 0; y < 32; y++) {
735 const uint8_t *
left,
const uint8_t *top)
748 const uint8_t *
left,
const uint8_t *top)
755 for (y = 0; y < 8; y++) {
763 const uint8_t *
left,
const uint8_t *top)
770 for (y = 0; y < 16; y++) {
780 const uint8_t *
left,
const uint8_t *top)
787 for (y = 0; y < 32; y++) {
803 #define memset_bpc memset
807 for (n = 0; n <
len; n++) {
813 #define DST(x, y) dst[(x) + (y) * stride]
816 const uint8_t *
left,
const uint8_t *_top)
820 int a0 = top[0],
a1 = top[1],
a2 = top[2],
a3 = top[3],
821 a4 = top[4], a5 = top[5], a6 = top[6], a7 = top[7];
826 DST(2,0) =
DST(1,1) =
DST(0,2) = (
a2 +
a3 * 2 + a4 + 2) >> 2;
827 DST(3,0) =
DST(2,1) =
DST(1,2) =
DST(0,3) = (
a3 + a4 * 2 + a5 + 2) >> 2;
828 DST(3,1) =
DST(2,2) =
DST(1,3) = (a4 + a5 * 2 + a6 + 2) >> 2;
829 DST(3,2) =
DST(2,3) = (a5 + a6 * 2 + a7 + 2) >> 2;
833 #define def_diag_downleft(size) \
834 static void diag_downleft_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \
835 const uint8_t *left, const uint8_t *_top) \
837 pixel *dst = (pixel *) _dst; \
838 const pixel *top = (const pixel *) _top; \
842 stride /= sizeof(pixel); \
843 for (i = 0; i < size - 2; i++) \
844 v[i] = (top[i] + top[i + 1] * 2 + top[i + 2] + 2) >> 2; \
845 v[size - 2] = (top[size - 2] + top[size - 1] * 3 + 2) >> 2; \
847 for (j = 0; j < size; j++) { \
848 memcpy(dst + j*stride, v + j, (size - 1 - j) * sizeof(pixel)); \
849 memset_bpc(dst + j*stride + size - 1 - j, top[size - 1], j + 1); \
857 static
void diag_downright_4x4_c(uint8_t *_dst, ptrdiff_t
stride,
858 const uint8_t *_left, const uint8_t *_top)
863 int tl = top[-1],
a0 = top[0],
a1 = top[1],
a2 = top[2],
a3 = top[3],
867 DST(0,3) = (l1 + l2 * 2 + l3 + 2) >> 2;
868 DST(0,2) =
DST(1,3) = (l0 + l1 * 2 + l2 + 2) >> 2;
869 DST(0,1) =
DST(1,2) =
DST(2,3) = (tl + l0 * 2 + l1 + 2) >> 2;
870 DST(0,0) =
DST(1,1) =
DST(2,2) =
DST(3,3) = (l0 + tl * 2 +
a0 + 2) >> 2;
871 DST(1,0) =
DST(2,1) =
DST(3,2) = (tl +
a0 * 2 +
a1 + 2) >> 2;
876 #define def_diag_downright(size) \
877 static void diag_downright_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \
878 const uint8_t *_left, const uint8_t *_top) \
880 pixel *dst = (pixel *) _dst; \
881 const pixel *top = (const pixel *) _top; \
882 const pixel *left = (const pixel *) _left; \
884 pixel v[size + size - 1]; \
886 stride /= sizeof(pixel); \
887 for (i = 0; i < size - 2; i++) { \
888 v[i ] = (left[i] + left[i + 1] * 2 + left[i + 2] + 2) >> 2; \
889 v[size + 1 + i] = (top[i] + top[i + 1] * 2 + top[i + 2] + 2) >> 2; \
891 v[size - 2] = (left[size - 2] + left[size - 1] * 2 + top[-1] + 2) >> 2; \
892 v[size - 1] = (left[size - 1] + top[-1] * 2 + top[ 0] + 2) >> 2; \
893 v[size ] = (top[-1] + top[0] * 2 + top[ 1] + 2) >> 2; \
895 for (j = 0; j < size; j++) \
896 memcpy(dst + j*stride, v + size - 1 - j, size * sizeof(pixel)); \
903 static
void vert_right_4x4_c(uint8_t *_dst, ptrdiff_t
stride,
904 const uint8_t *_left, const uint8_t *_top)
909 int tl = top[-1],
a0 = top[0],
a1 = top[1],
a2 = top[2],
a3 = top[3],
913 DST(0,3) = (l0 + l1 * 2 + l2 + 2) >> 2;
914 DST(0,2) = (tl + l0 * 2 + l1 + 2) >> 2;
915 DST(0,0) =
DST(1,2) = (tl +
a0 + 1) >> 1;
916 DST(0,1) =
DST(1,3) = (l0 + tl * 2 +
a0 + 2) >> 2;
918 DST(1,1) =
DST(2,3) = (tl +
a0 * 2 +
a1 + 2) >> 2;
925 #define def_vert_right(size) \
926 static void vert_right_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \
927 const uint8_t *_left, const uint8_t *_top) \
929 pixel *dst = (pixel *) _dst; \
930 const pixel *top = (const pixel *) _top; \
931 const pixel *left = (const pixel *) _left; \
933 pixel ve[size + size/2 - 1], vo[size + size/2 - 1]; \
935 stride /= sizeof(pixel); \
936 for (i = 0; i < size/2 - 2; i++) { \
937 vo[i] = (left[i*2 + 3] + left[i*2 + 2] * 2 + left[i*2 + 1] + 2) >> 2; \
938 ve[i] = (left[i*2 + 4] + left[i*2 + 3] * 2 + left[i*2 + 2] + 2) >> 2; \
940 vo[size/2 - 2] = (left[size - 1] + left[size - 2] * 2 + left[size - 3] + 2) >> 2; \
941 ve[size/2 - 2] = (top[-1] + left[size - 1] * 2 + left[size - 2] + 2) >> 2; \
943 ve[size/2 - 1] = (top[-1] + top[0] + 1) >> 1; \
944 vo[size/2 - 1] = (left[size - 1] + top[-1] * 2 + top[0] + 2) >> 2; \
945 for (i = 0; i < size - 1; i++) { \
946 ve[size/2 + i] = (top[i] + top[i + 1] + 1) >> 1; \
947 vo[size/2 + i] = (top[i - 1] + top[i] * 2 + top[i + 1] + 2) >> 2; \
950 for (j = 0; j < size / 2; j++) { \
951 memcpy(dst + j*2 *stride, ve + size/2 - 1 - j, size * sizeof(pixel)); \
952 memcpy(dst + (j*2 + 1)*stride, vo + size/2 - 1 - j, size * sizeof(pixel)); \
960 static
void hor_down_4x4_c(uint8_t *_dst, ptrdiff_t
stride,
961 const uint8_t *_left, const uint8_t *_top)
967 tl = top[-1],
a0 = top[0],
a1 = top[1],
a2 = top[2];
970 DST(2,0) = (tl +
a0 * 2 +
a1 + 2) >> 2;
972 DST(0,0) =
DST(2,1) = (tl + l0 + 1) >> 1;
973 DST(1,0) =
DST(3,1) = (
a0 + tl * 2 + l0 + 2) >> 2;
974 DST(0,1) =
DST(2,2) = (l0 + l1 + 1) >> 1;
975 DST(1,1) =
DST(3,2) = (tl + l0 * 2 + l1 + 2) >> 2;
976 DST(0,2) =
DST(2,3) = (l1 + l2 + 1) >> 1;
977 DST(1,2) =
DST(3,3) = (l0 + l1 * 2 + l2 + 2) >> 2;
978 DST(0,3) = (l2 + l3 + 1) >> 1;
979 DST(1,3) = (l1 + l2 * 2 + l3 + 2) >> 2;
982 #define def_hor_down(size) \
983 static void hor_down_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \
984 const uint8_t *_left, const uint8_t *_top) \
986 pixel *dst = (pixel *) _dst; \
987 const pixel *top = (const pixel *) _top; \
988 const pixel *left = (const pixel *) _left; \
990 pixel v[size * 3 - 2]; \
992 stride /= sizeof(pixel); \
993 for (i = 0; i < size - 2; i++) { \
994 v[i*2 ] = (left[i + 1] + left[i + 0] + 1) >> 1; \
995 v[i*2 + 1] = (left[i + 2] + left[i + 1] * 2 + left[i + 0] + 2) >> 2; \
996 v[size*2 + i] = (top[i - 1] + top[i] * 2 + top[i + 1] + 2) >> 2; \
998 v[size*2 - 2] = (top[-1] + left[size - 1] + 1) >> 1; \
999 v[size*2 - 4] = (left[size - 1] + left[size - 2] + 1) >> 1; \
1000 v[size*2 - 1] = (top[0] + top[-1] * 2 + left[size - 1] + 2) >> 2; \
1001 v[size*2 - 3] = (top[-1] + left[size - 1] * 2 + left[size - 2] + 2) >> 2; \
1003 for (j = 0; j < size; j++) \
1004 memcpy(dst + j*stride, v + size*2 - 2 - j*2, size * sizeof(pixel)); \
1011 static
void vert_left_4x4_c(uint8_t *_dst, ptrdiff_t
stride,
1012 const uint8_t *
left, const uint8_t *_top)
1016 int a0 = top[0],
a1 = top[1],
a2 = top[2],
a3 = top[3],
1017 a4 = top[4], a5 = top[5], a6 = top[6];
1020 DST(0,0) = (
a0 +
a1 + 1) >> 1;
1025 DST(2,1) =
DST(1,3) = (
a2 +
a3 * 2 + a4 + 2) >> 2;
1026 DST(3,0) =
DST(2,2) = (
a3 + a4 + 1) >> 1;
1027 DST(3,1) =
DST(2,3) = (
a3 + a4 * 2 + a5 + 2) >> 2;
1028 DST(3,2) = (a4 + a5 + 1) >> 1;
1029 DST(3,3) = (a4 + a5 * 2 + a6 + 2) >> 2;
1032 #define def_vert_left(size) \
1033 static void vert_left_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \
1034 const uint8_t *left, const uint8_t *_top) \
1036 pixel *dst = (pixel *) _dst; \
1037 const pixel *top = (const pixel *) _top; \
1039 pixel ve[size - 1], vo[size - 1]; \
1041 stride /= sizeof(pixel); \
1042 for (i = 0; i < size - 2; i++) { \
1043 ve[i] = (top[i] + top[i + 1] + 1) >> 1; \
1044 vo[i] = (top[i] + top[i + 1] * 2 + top[i + 2] + 2) >> 2; \
1046 ve[size - 2] = (top[size - 2] + top[size - 1] + 1) >> 1; \
1047 vo[size - 2] = (top[size - 2] + top[size - 1] * 3 + 2) >> 2; \
1049 for (j = 0; j < size / 2; j++) { \
1050 memcpy(dst + j*2 * stride, ve + j, (size - j - 1) * sizeof(pixel)); \
1051 memset_bpc(dst + j*2 * stride + size - j - 1, top[size - 1], j + 1); \
1052 memcpy(dst + (j*2 + 1) * stride, vo + j, (size - j - 1) * sizeof(pixel)); \
1053 memset_bpc(dst + (j*2 + 1) * stride + size - j - 1, top[size - 1], j + 1); \
1061 static
void hor_up_4x4_c(uint8_t *_dst, ptrdiff_t
stride,
1062 const uint8_t *_left, const uint8_t *top)
1069 DST(0,0) = (l0 + l1 + 1) >> 1;
1070 DST(1,0) = (l0 + l1 * 2 + l2 + 2) >> 2;
1071 DST(0,1) =
DST(2,0) = (l1 + l2 + 1) >> 1;
1072 DST(1,1) =
DST(3,0) = (l1 + l2 * 2 + l3 + 2) >> 2;
1073 DST(0,2) =
DST(2,1) = (l2 + l3 + 1) >> 1;
1074 DST(1,2) =
DST(3,1) = (l2 + l3 * 3 + 2) >> 2;
1078 #define def_hor_up(size) \
1079 static void hor_up_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \
1080 const uint8_t *_left, const uint8_t *top) \
1082 pixel *dst = (pixel *) _dst; \
1083 const pixel *left = (const pixel *) _left; \
1085 pixel v[size*2 - 2]; \
1087 stride /= sizeof(pixel); \
1088 for (i = 0; i < size - 2; i++) { \
1089 v[i*2 ] = (left[i] + left[i + 1] + 1) >> 1; \
1090 v[i*2 + 1] = (left[i] + left[i + 1] * 2 + left[i + 2] + 2) >> 2; \
1092 v[size*2 - 4] = (left[size - 2] + left[size - 1] + 1) >> 1; \
1093 v[size*2 - 3] = (left[size - 2] + left[size - 1] * 3 + 2) >> 2; \
1095 for (j = 0; j < size / 2; j++) \
1096 memcpy(dst + j*stride, v + j*2, size * sizeof(pixel)); \
1097 for (j = size / 2; j < size; j++) { \
1098 memcpy(dst + j*stride, v + j*2, (size*2 - 2 - j*2) * sizeof(pixel)); \
1099 memset_bpc(dst + j*stride + size*2 - 2 - j*2, left[size - 1], \
1120 #define init_intra_pred_bd_aware(tx, sz) \
1121 dsp->intra_pred[tx][TM_VP8_PRED] = tm_##sz##_c; \
1122 dsp->intra_pred[tx][DC_128_PRED] = dc_128_##sz##_c; \
1123 dsp->intra_pred[tx][DC_127_PRED] = dc_127_##sz##_c; \
1124 dsp->intra_pred[tx][DC_129_PRED] = dc_129_##sz##_c
1127 ff_vp9dsp_intrapred_init_10(dsp);
1128 #define init_intra_pred(tx, sz) \
1129 init_intra_pred_bd_aware(tx, sz)
1131 #define init_intra_pred(tx, sz) \
1132 dsp->intra_pred[tx][VERT_PRED] = vert_##sz##_c; \
1133 dsp->intra_pred[tx][HOR_PRED] = hor_##sz##_c; \
1134 dsp->intra_pred[tx][DC_PRED] = dc_##sz##_c; \
1135 dsp->intra_pred[tx][DIAG_DOWN_LEFT_PRED] = diag_downleft_##sz##_c; \
1136 dsp->intra_pred[tx][DIAG_DOWN_RIGHT_PRED] = diag_downright_##sz##_c; \
1137 dsp->intra_pred[tx][VERT_RIGHT_PRED] = vert_right_##sz##_c; \
1138 dsp->intra_pred[tx][HOR_DOWN_PRED] = hor_down_##sz##_c; \
1139 dsp->intra_pred[tx][VERT_LEFT_PRED] = vert_left_##sz##_c; \
1140 dsp->intra_pred[tx][HOR_UP_PRED] = hor_up_##sz##_c; \
1141 dsp->intra_pred[tx][LEFT_DC_PRED] = dc_left_##sz##_c; \
1142 dsp->intra_pred[tx][TOP_DC_PRED] = dc_top_##sz##_c; \
1143 init_intra_pred_bd_aware(tx, sz)
1151 #undef init_intra_pred
1152 #undef init_intra_pred_bd_aware
1155 #define itxfm_wrapper(type_a, type_b, sz, bits, has_dconly) \
1156 static void type_a##_##type_b##_##sz##x##sz##_add_c(uint8_t *_dst, \
1158 int16_t *_block, int eob) \
1161 pixel *dst = (pixel *) _dst; \
1162 dctcoef *block = (dctcoef *) _block, tmp[sz * sz], out[sz]; \
1164 stride /= sizeof(pixel); \
1165 if (has_dconly && eob == 1) { \
1166 const int t = ((((dctint) block[0] * 11585 + (1 << 13)) >> 14) \
1167 * 11585 + (1 << 13)) >> 14; \
1169 for (i = 0; i < sz; i++) { \
1170 for (j = 0; j < sz; j++) \
1171 dst[j * stride] = av_clip_pixel(dst[j * stride] + \
1173 (int)(t + (1U << (bits - 1))) >> bits : \
1180 for (i = 0; i < sz; i++) \
1181 type_a##sz##_1d(block + i, sz, tmp + i * sz, 0); \
1182 memset(block, 0, sz * sz * sizeof(*block)); \
1183 for (i = 0; i < sz; i++) { \
1184 type_b##sz##_1d(tmp + i, sz, out, 1); \
1185 for (j = 0; j < sz; j++) \
1186 dst[j * stride] = av_clip_pixel(dst[j * stride] + \
1188 (int)(out[j] + (1U << (bits - 1))) >> bits : \
1194 #define itxfm_wrap(sz, bits) \
1195 itxfm_wrapper(idct, idct, sz, bits, 1) \
1196 itxfm_wrapper(iadst, idct, sz, bits, 0) \
1197 itxfm_wrapper(idct, iadst, sz, bits, 0) \
1198 itxfm_wrapper(iadst, iadst, sz, bits, 0)
1200 #define IN(x) ((dctint) in[(x) * stride])
1207 t0 = ((
IN(0) +
IN(2)) * 11585 + (1 << 13)) >> 14;
1208 t1 = ((
IN(0) -
IN(2)) * 11585 + (1 << 13)) >> 14;
1209 t2 = (
IN(1) * 6270 -
IN(3) * 15137 + (1 << 13)) >> 14;
1210 t3 = (
IN(1) * 15137 +
IN(3) * 6270 + (1 << 13)) >> 14;
1223 t0 = 5283 *
IN(0) + 15212 *
IN(2) + 9929 *
IN(3);
1224 t1 = 9929 *
IN(0) - 5283 *
IN(2) - 15212 *
IN(3);
1225 t2 = 13377 * (
IN(0) -
IN(2) +
IN(3));
1228 out[0] = (t0 + t3 + (1 << 13)) >> 14;
1229 out[1] = (t1 + t3 + (1 << 13)) >> 14;
1230 out[2] = (t2 + (1 << 13)) >> 14;
1231 out[3] = (t0 + t1 - t3 + (1 << 13)) >> 14;
1239 dctint t0, t0a, t1, t1a, t2, t2a, t3, t3a, t4, t4a, t5, t5a, t6, t6a, t7, t7a;
1241 t0a = ((
IN(0) +
IN(4)) * 11585 + (1 << 13)) >> 14;
1242 t1a = ((
IN(0) -
IN(4)) * 11585 + (1 << 13)) >> 14;
1243 t2a = (
IN(2) * 6270 -
IN(6) * 15137 + (1 << 13)) >> 14;
1244 t3a = (
IN(2) * 15137 +
IN(6) * 6270 + (1 << 13)) >> 14;
1245 t4a = (
IN(1) * 3196 -
IN(7) * 16069 + (1 << 13)) >> 14;
1246 t5a = (
IN(5) * 13623 -
IN(3) * 9102 + (1 << 13)) >> 14;
1247 t6a = (
IN(5) * 9102 +
IN(3) * 13623 + (1 << 13)) >> 14;
1248 t7a = (
IN(1) * 16069 +
IN(7) * 3196 + (1 << 13)) >> 14;
1259 t5 = ((t6a - t5a) * 11585 + (1 << 13)) >> 14;
1260 t6 = ((t6a + t5a) * 11585 + (1 << 13)) >> 14;
1275 dctint t0, t0a, t1, t1a, t2, t2a, t3, t3a, t4, t4a, t5, t5a, t6, t6a, t7, t7a;
1277 t0a = 16305 *
IN(7) + 1606 *
IN(0);
1278 t1a = 1606 *
IN(7) - 16305 *
IN(0);
1279 t2a = 14449 *
IN(5) + 7723 *
IN(2);
1280 t3a = 7723 *
IN(5) - 14449 *
IN(2);
1281 t4a = 10394 *
IN(3) + 12665 *
IN(4);
1282 t5a = 12665 *
IN(3) - 10394 *
IN(4);
1283 t6a = 4756 *
IN(1) + 15679 *
IN(6);
1284 t7a = 15679 *
IN(1) - 4756 *
IN(6);
1286 t0 = (t0a + t4a + (1 << 13)) >> 14;
1287 t1 = (t1a + t5a + (1 << 13)) >> 14;
1288 t2 = (t2a + t6a + (1 << 13)) >> 14;
1289 t3 = (t3a + t7a + (1 << 13)) >> 14;
1290 t4 = (t0a - t4a + (1 << 13)) >> 14;
1291 t5 = (t1a - t5a + (1 << 13)) >> 14;
1292 t6 = (t2a - t6a + (1 << 13)) >> 14;
1293 t7 = (t3a - t7a + (1 << 13)) >> 14;
1295 t4a = 15137
U * t4 + 6270
U * t5;
1296 t5a = 6270
U * t4 - 15137
U * t5;
1297 t6a = 15137
U * t7 - 6270
U * t6;
1298 t7a = 6270
U * t7 + 15137
U * t6;
1301 out[7] = -(t1 + t3);
1305 out[1] = -((
dctint)((1
U << 13) + t4a + t6a) >> 14);
1306 out[6] = (
dctint)((1
U << 13) + t5a + t7a) >> 14;
1307 t6 = (
dctint)((1
U << 13) + t4a - t6a) >> 14;
1308 t7 = (
dctint)((1
U << 13) + t5a - t7a) >> 14;
1310 out[3] = -((
dctint)((t2 + t3) * 11585
U + (1 << 13)) >> 14);
1311 out[4] = (
dctint)((t2 - t3) * 11585
U + (1 << 13)) >> 14;
1312 out[2] = (
dctint)((t6 + t7) * 11585
U + (1 << 13)) >> 14;
1313 out[5] = -((
dctint)((t6 - t7) * 11585
U + (1 << 13)) >> 14);
1321 dctint t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14,
t15;
1322 dctint t0a, t1a, t2a, t3a, t4a, t5a, t6a, t7a;
1323 dctint t8a, t9a, t10a, t11a, t12a, t13a, t14a, t15a;
1325 t0a = (
dctint)((
IN(0) +
IN(8)) * 11585
U + (1 << 13)) >> 14;
1326 t1a = (
dctint)((
IN(0) -
IN(8)) * 11585
U + (1 << 13)) >> 14;
1327 t2a = (
dctint)(
IN(4) * 6270
U -
IN(12) * 15137
U + (1 << 13)) >> 14;
1328 t3a = (
dctint)(
IN(4) * 15137
U +
IN(12) * 6270
U + (1 << 13)) >> 14;
1329 t4a = (
dctint)(
IN(2) * 3196
U -
IN(14) * 16069
U + (1 << 13)) >> 14;
1330 t7a = (
dctint)(
IN(2) * 16069
U +
IN(14) * 3196
U + (1 << 13)) >> 14;
1331 t5a = (
dctint)(
IN(10) * 13623
U -
IN(6) * 9102
U + (1 << 13)) >> 14;
1332 t6a = (
dctint)(
IN(10) * 9102
U +
IN(6) * 13623
U + (1 << 13)) >> 14;
1333 t8a = (
dctint)(
IN(1) * 1606
U -
IN(15) * 16305
U + (1 << 13)) >> 14;
1334 t15a = (
dctint)(
IN(1) * 16305
U +
IN(15) * 1606
U + (1 << 13)) >> 14;
1335 t9a = (
dctint)(
IN(9) * 12665
U -
IN(7) * 10394
U + (1 << 13)) >> 14;
1336 t14a = (
dctint)(
IN(9) * 10394
U +
IN(7) * 12665
U + (1 << 13)) >> 14;
1337 t10a = (
dctint)(
IN(5) * 7723
U -
IN(11) * 14449
U + (1 << 13)) >> 14;
1338 t13a = (
dctint)(
IN(5) * 14449
U +
IN(11) * 7723
U + (1 << 13)) >> 14;
1339 t11a = (
dctint)(
IN(13) * 15679
U -
IN(3) * 4756
U + (1 << 13)) >> 14;
1340 t12a = (
dctint)(
IN(13) * 4756
U +
IN(3) * 15679
U + (1 << 13)) >> 14;
1359 t5a = (
dctint)((t6 - t5) * 11585
U + (1 << 13)) >> 14;
1360 t6a = (
dctint)((t6 + t5) * 11585
U + (1 << 13)) >> 14;
1361 t9a = (
dctint)( t14 * 6270
U - t9 * 15137
U + (1 << 13)) >> 14;
1362 t14a = (
dctint)( t14 * 15137
U + t9 * 6270
U + (1 << 13)) >> 14;
1363 t10a = (
dctint)(-(t13 * 15137
U + t10 * 6270
U) + (1 << 13)) >> 14;
1364 t13a = (
dctint)( t13 * 6270
U - t10 * 15137
U + (1 << 13)) >> 14;
1383 t10a = (
dctint)((t13 - t10) * 11585
U + (1 << 13)) >> 14;
1384 t13a = (
dctint)((t13 + t10) * 11585
U + (1 << 13)) >> 14;
1385 t11 = (
dctint)((t12a - t11a) * 11585
U + (1 << 13)) >> 14;
1386 t12 = (
dctint)((t12a + t11a) * 11585
U + (1 << 13)) >> 14;
1388 out[ 0] = t0a + t15a;
1389 out[ 1] = t1a + t14;
1390 out[ 2] = t2a + t13a;
1391 out[ 3] = t3a + t12;
1393 out[ 5] = t5 + t10a;
1398 out[10] = t5 - t10a;
1400 out[12] = t3a - t12;
1401 out[13] = t2a - t13a;
1402 out[14] = t1a - t14;
1403 out[15] = t0a - t15a;
1409 dctint t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14,
t15;
1410 dctint t0a, t1a, t2a, t3a, t4a, t5a, t6a, t7a;
1411 dctint t8a, t9a, t10a, t11a, t12a, t13a, t14a, t15a;
1413 t0 =
IN(15) * 16364
U +
IN(0) * 804
U;
1414 t1 =
IN(15) * 804
U -
IN(0) * 16364
U;
1415 t2 =
IN(13) * 15893
U +
IN(2) * 3981
U;
1416 t3 =
IN(13) * 3981
U -
IN(2) * 15893
U;
1417 t4 =
IN(11) * 14811
U +
IN(4) * 7005
U;
1418 t5 =
IN(11) * 7005
U -
IN(4) * 14811
U;
1419 t6 =
IN(9) * 13160
U +
IN(6) * 9760
U;
1420 t7 =
IN(9) * 9760
U -
IN(6) * 13160
U;
1421 t8 =
IN(7) * 11003
U +
IN(8) * 12140
U;
1422 t9 =
IN(7) * 12140
U -
IN(8) * 11003
U;
1423 t10 =
IN(5) * 8423
U +
IN(10) * 14053
U;
1424 t11 =
IN(5) * 14053
U -
IN(10) * 8423
U;
1425 t12 =
IN(3) * 5520
U +
IN(12) * 15426
U;
1426 t13 =
IN(3) * 15426
U -
IN(12) * 5520
U;
1427 t14 =
IN(1) * 2404
U +
IN(14) * 16207
U;
1430 t0a = (
dctint)((1
U << 13) + t0 + t8 ) >> 14;
1431 t1a = (
dctint)((1
U << 13) + t1 + t9 ) >> 14;
1432 t2a = (
dctint)((1
U << 13) + t2 + t10) >> 14;
1433 t3a = (
dctint)((1
U << 13) + t3 + t11) >> 14;
1434 t4a = (
dctint)((1
U << 13) + t4 + t12) >> 14;
1435 t5a = (
dctint)((1
U << 13) + t5 + t13) >> 14;
1436 t6a = (
dctint)((1
U << 13) + t6 + t14) >> 14;
1438 t8a = (
dctint)((1
U << 13) + t0 - t8 ) >> 14;
1439 t9a = (
dctint)((1
U << 13) + t1 - t9 ) >> 14;
1440 t10a = (
dctint)((1
U << 13) + t2 - t10) >> 14;
1441 t11a = (
dctint)((1
U << 13) + t3 - t11) >> 14;
1442 t12a = (
dctint)((1
U << 13) + t4 - t12) >> 14;
1443 t13a = (
dctint)((1
U << 13) + t5 - t13) >> 14;
1444 t14a = (
dctint)((1
U << 13) + t6 - t14) >> 14;
1445 t15a = (
dctint)((1
U << 13) + t7 -
t15) >> 14;
1447 t8 = t8a * 16069
U + t9a * 3196
U;
1448 t9 = t8a * 3196
U - t9a * 16069
U;
1449 t10 = t10a * 9102
U + t11a * 13623
U;
1450 t11 = t10a * 13623
U - t11a * 9102
U;
1451 t12 = t13a * 16069
U - t12a * 3196
U;
1452 t13 = t13a * 3196
U + t12a * 16069
U;
1453 t14 = t15a * 9102
U - t14a * 13623
U;
1454 t15 = t15a * 13623
U + t14a * 9102
U;
1464 t8a = (
dctint)((1
U << 13) + t8 + t12) >> 14;
1465 t9a = (
dctint)((1
U << 13) + t9 + t13) >> 14;
1466 t10a = (
dctint)((1
U << 13) + t10 + t14) >> 14;
1467 t11a = (
dctint)((1
U << 13) + t11 +
t15) >> 14;
1468 t12a = (
dctint)((1
U << 13) + t8 - t12) >> 14;
1469 t13a = (
dctint)((1
U << 13) + t9 - t13) >> 14;
1470 t14a = (
dctint)((1
U << 13) + t10 - t14) >> 14;
1471 t15a = (
dctint)((1
U << 13) + t11 -
t15) >> 14;
1473 t4a = t4 * 15137
U + t5 * 6270
U;
1474 t5a = t4 * 6270
U - t5 * 15137
U;
1475 t6a = t7 * 15137
U - t6 * 6270
U;
1476 t7a = t7 * 6270
U + t6 * 15137
U;
1477 t12 = t12a * 15137
U + t13a * 6270
U;
1478 t13 = t12a * 6270
U - t13a * 15137
U;
1479 t14 = t15a * 15137
U - t14a * 6270
U;
1480 t15 = t15a * 6270
U + t14a * 15137
U;
1483 out[15] = -(t1 + t3);
1486 out[ 3] = -((
dctint)((1
U << 13) + t4a + t6a) >> 14);
1487 out[12] = (
dctint)((1
U << 13) + t5a + t7a) >> 14;
1488 t6 = (
dctint)((1
U << 13) + t4a - t6a) >> 14;
1489 t7 = (
dctint)((1
U << 13) + t5a - t7a) >> 14;
1490 out[ 1] = -(t8a + t10a);
1491 out[14] = t9a + t11a;
1494 out[ 2] = (
dctint)((1
U << 13) + t12 + t14) >> 14;
1496 t14a = (
dctint)((1
U << 13) + t12 - t14) >> 14;
1497 t15a = (
dctint)((1
U << 13) + t13 -
t15) >> 14;
1499 out[ 7] = (
dctint)(-(t2a + t3a) * 11585
U + (1 << 13)) >> 14;
1500 out[ 8] = (
dctint)( (t2a - t3a) * 11585
U + (1 << 13)) >> 14;
1501 out[ 4] = (
dctint)( (t7 + t6) * 11585
U + (1 << 13)) >> 14;
1502 out[11] = (
dctint)( (t7 - t6) * 11585
U + (1 << 13)) >> 14;
1503 out[ 6] = (
dctint)( (t11 + t10) * 11585
U + (1 << 13)) >> 14;
1504 out[ 9] = (
dctint)( (t11 - t10) * 11585
U + (1 << 13)) >> 14;
1505 out[ 5] = (
dctint)(-(t14a + t15a) * 11585
U + (1 << 13)) >> 14;
1506 out[10] = (
dctint)( (t14a - t15a) * 11585
U + (1 << 13)) >> 14;
1557 dctint t10 = t11a - t10a;
1558 dctint t11 = t11a + t10a;
1559 dctint t12 = t12a + t13a;
1560 dctint t13 = t12a - t13a;
1561 dctint t14 = t15a - t14a;
1563 dctint t16 = t16a + t17a;
1564 dctint t17 = t16a - t17a;
1565 dctint t18 = t19a - t18a;
1566 dctint t19 = t19a + t18a;
1567 dctint t20 = t20a + t21a;
1568 dctint t21 = t20a - t21a;
1569 dctint t22 = t23a - t22a;
1570 dctint t23 = t23a + t22a;
1571 dctint t24 = t24a + t25a;
1572 dctint t25 = t24a - t25a;
1573 dctint t26 = t27a - t26a;
1575 dctint t28 = t28a + t29a;
1576 dctint t29 = t28a - t29a;
1577 dctint t30 = t31a - t30a;
1578 dctint t31 = t31a + t30a;
1580 t5a = (
dctint)((t6 - t5) * 11585
U + (1 << 13)) >> 14;
1581 t6a = (
dctint)((t6 + t5) * 11585
U + (1 << 13)) >> 14;
1582 t9a = (
dctint)( t14 * 6270
U - t9 * 15137
U + (1 << 13)) >> 14;
1583 t14a = (
dctint)( t14 * 15137
U + t9 * 6270
U + (1 << 13)) >> 14;
1584 t10a = (
dctint)(-(t13 * 15137
U + t10 * 6270
U) + (1 << 13)) >> 14;
1585 t13a = (
dctint)( t13 * 6270
U - t10 * 15137
U + (1 << 13)) >> 14;
1586 t17a = (
dctint)( t30 * 3196
U - t17 * 16069
U + (1 << 13)) >> 14;
1587 t30a = (
dctint)( t30 * 16069
U + t17 * 3196
U + (1 << 13)) >> 14;
1588 t18a = (
dctint)(-(t29 * 16069
U + t18 * 3196
U) + (1 << 13)) >> 14;
1589 t29a = (
dctint)( t29 * 3196
U - t18 * 16069
U + (1 << 13)) >> 14;
1590 t21a = (
dctint)( t26 * 13623
U - t21 * 9102
U + (1 << 13)) >> 14;
1591 t26a = (
dctint)( t26 * 9102
U + t21 * 13623
U + (1 << 13)) >> 14;
1592 t22a = (
dctint)(-(t25 * 9102
U + t22 * 13623
U) + (1 << 13)) >> 14;
1593 t25a = (
dctint)( t25 * 13623
U - t22 * 9102
U + (1 << 13)) >> 14;
1628 t10a = (
dctint)((t13 - t10) * 11585
U + (1 << 13)) >> 14;
1629 t13a = (
dctint)((t13 + t10) * 11585
U + (1 << 13)) >> 14;
1630 t11 = (
dctint)((t12a - t11a) * 11585
U + (1 << 13)) >> 14;
1631 t12 = (
dctint)((t12a + t11a) * 11585
U + (1 << 13)) >> 14;
1632 t18a = (
dctint)( t29 * 6270
U - t18 * 15137
U + (1 << 13)) >> 14;
1633 t29a = (
dctint)( t29 * 15137
U + t18 * 6270
U + (1 << 13)) >> 14;
1634 t19 = (
dctint)( t28a * 6270
U - t19a * 15137
U + (1 << 13)) >> 14;
1635 t28 = (
dctint)( t28a * 15137
U + t19a * 6270
U + (1 << 13)) >> 14;
1636 t20 = (
dctint)(-(t27a * 15137
U + t20a * 6270
U) + (1 << 13)) >> 14;
1637 t27 = (
dctint)( t27a * 6270
U - t20a * 15137
U + (1 << 13)) >> 14;
1638 t21a = (
dctint)(-(t26 * 15137
U + t21 * 6270
U) + (1 << 13)) >> 14;
1639 t26a = (
dctint)( t26 * 6270
U - t21 * 15137
U + (1 << 13)) >> 14;
1674 t20 = (
dctint)((t27a - t20a) * 11585
U + (1 << 13)) >> 14;
1675 t27 = (
dctint)((t27a + t20a) * 11585
U + (1 << 13)) >> 14;
1676 t21a = (
dctint)((t26 - t21 ) * 11585
U + (1 << 13)) >> 14;
1677 t26a = (
dctint)((t26 + t21 ) * 11585
U + (1 << 13)) >> 14;
1678 t22 = (
dctint)((t25a - t22a) * 11585
U + (1 << 13)) >> 14;
1679 t25 = (
dctint)((t25a + t22a) * 11585
U + (1 << 13)) >> 14;
1680 t23a = (
dctint)((t24 - t23 ) * 11585
U + (1 << 13)) >> 14;
1681 t24a = (
dctint)((t24 + t23 ) * 11585
U + (1 << 13)) >> 14;
1684 out[ 1] = t1 + t30a;
1686 out[ 3] = t3 + t28a;
1688 out[ 5] = t5a + t26a;
1689 out[ 6] = t6a + t25;
1690 out[ 7] = t7 + t24a;
1691 out[ 8] = t8 + t23a;
1692 out[ 9] = t9a + t22;
1693 out[10] = t10 + t21a;
1694 out[11] = t11a + t20;
1695 out[12] = t12a + t19a;
1696 out[13] = t13 + t18;
1697 out[14] = t14a + t17a;
1700 out[17] = t14a - t17a;
1701 out[18] = t13 - t18;
1702 out[19] = t12a - t19a;
1703 out[20] = t11a - t20;
1704 out[21] = t10 - t21a;
1705 out[22] = t9a - t22;
1706 out[23] = t8 - t23a;
1707 out[24] = t7 - t24a;
1708 out[25] = t6a - t25;
1709 out[26] = t5a - t26a;
1711 out[28] = t3 - t28a;
1713 out[30] = t1 - t30a;
1722 int t0, t1, t2, t3, t4;
1738 t4 = (t0 - t3) >> 1;
1753 #undef itxfm_wrapper
1758 #define init_itxfm(tx, sz) \
1759 dsp->itxfm_add[tx][DCT_DCT] = idct_idct_##sz##_add_c; \
1760 dsp->itxfm_add[tx][DCT_ADST] = iadst_idct_##sz##_add_c; \
1761 dsp->itxfm_add[tx][ADST_DCT] = idct_iadst_##sz##_add_c; \
1762 dsp->itxfm_add[tx][ADST_ADST] = iadst_iadst_##sz##_add_c
1764 #define init_idct(tx, nm) \
1765 dsp->itxfm_add[tx][DCT_DCT] = \
1766 dsp->itxfm_add[tx][ADST_DCT] = \
1767 dsp->itxfm_add[tx][DCT_ADST] = \
1768 dsp->itxfm_add[tx][ADST_ADST] = nm##_add_c
1781 ptrdiff_t stridea, ptrdiff_t strideb,
1789 for (
i = 0;
i < 8;
i++, dst += stridea) {
1791 int p3 = dst[strideb * -4], p2 = dst[strideb * -3];
1792 int p1 = dst[strideb * -2], p0 = dst[strideb * -1];
1793 int q0 = dst[strideb * +0],
q1 = dst[strideb * +1];
1794 int q2 = dst[strideb * +2], q3 = dst[strideb * +3];
1796 int fm =
FFABS(p3 - p2) <= I &&
FFABS(p2 - p1) <= I &&
1800 int flat8out, flat8in;
1806 p7 = dst[strideb * -8];
1807 p6 = dst[strideb * -7];
1808 p5 = dst[strideb * -6];
1809 p4 = dst[strideb * -5];
1810 q4 = dst[strideb * +4];
1811 q5 = dst[strideb * +5];
1812 q6 = dst[strideb * +6];
1813 q7 = dst[strideb * +7];
1815 flat8out =
FFABS(p7 - p0) <=
F &&
FFABS(p6 - p0) <=
F &&
1822 flat8in =
FFABS(p3 - p0) <=
F &&
FFABS(p2 - p0) <=
F &&
1826 if (wd >= 16 && flat8out && flat8in) {
1827 dst[strideb * -7] = (p7 + p7 + p7 + p7 + p7 + p7 + p7 + p6 * 2 +
1828 p5 + p4 + p3 + p2 + p1 + p0 +
q0 + 8) >> 4;
1829 dst[strideb * -6] = (p7 + p7 + p7 + p7 + p7 + p7 + p6 + p5 * 2 +
1830 p4 + p3 + p2 + p1 + p0 +
q0 +
q1 + 8) >> 4;
1831 dst[strideb * -5] = (p7 + p7 + p7 + p7 + p7 + p6 + p5 + p4 * 2 +
1832 p3 + p2 + p1 + p0 +
q0 +
q1 + q2 + 8) >> 4;
1833 dst[strideb * -4] = (p7 + p7 + p7 + p7 + p6 + p5 + p4 + p3 * 2 +
1834 p2 + p1 + p0 +
q0 +
q1 + q2 + q3 + 8) >> 4;
1835 dst[strideb * -3] = (p7 + p7 + p7 + p6 + p5 + p4 + p3 + p2 * 2 +
1836 p1 + p0 +
q0 +
q1 + q2 + q3 + q4 + 8) >> 4;
1837 dst[strideb * -2] = (p7 + p7 + p6 + p5 + p4 + p3 + p2 + p1 * 2 +
1838 p0 +
q0 +
q1 + q2 + q3 + q4 + q5 + 8) >> 4;
1839 dst[strideb * -1] = (p7 + p6 + p5 + p4 + p3 + p2 + p1 + p0 * 2 +
1840 q0 +
q1 + q2 + q3 + q4 + q5 + q6 + 8) >> 4;
1841 dst[strideb * +0] = (p6 + p5 + p4 + p3 + p2 + p1 + p0 +
q0 * 2 +
1842 q1 + q2 + q3 + q4 + q5 + q6 + q7 + 8) >> 4;
1843 dst[strideb * +1] = (p5 + p4 + p3 + p2 + p1 + p0 +
q0 +
q1 * 2 +
1844 q2 + q3 + q4 + q5 + q6 + q7 + q7 + 8) >> 4;
1845 dst[strideb * +2] = (p4 + p3 + p2 + p1 + p0 +
q0 +
q1 + q2 * 2 +
1846 q3 + q4 + q5 + q6 + q7 + q7 + q7 + 8) >> 4;
1847 dst[strideb * +3] = (p3 + p2 + p1 + p0 +
q0 +
q1 + q2 + q3 * 2 +
1848 q4 + q5 + q6 + q7 + q7 + q7 + q7 + 8) >> 4;
1849 dst[strideb * +4] = (p2 + p1 + p0 +
q0 +
q1 + q2 + q3 + q4 * 2 +
1850 q5 + q6 + q7 + q7 + q7 + q7 + q7 + 8) >> 4;
1851 dst[strideb * +5] = (p1 + p0 +
q0 +
q1 + q2 + q3 + q4 + q5 * 2 +
1852 q6 + q7 + q7 + q7 + q7 + q7 + q7 + 8) >> 4;
1853 dst[strideb * +6] = (p0 +
q0 +
q1 + q2 + q3 + q4 + q5 + q6 * 2 +
1854 q7 + q7 + q7 + q7 + q7 + q7 + q7 + 8) >> 4;
1855 }
else if (wd >= 8 && flat8in) {
1856 dst[strideb * -3] = (p3 + p3 + p3 + 2 * p2 + p1 + p0 +
q0 + 4) >> 3;
1857 dst[strideb * -2] = (p3 + p3 + p2 + 2 * p1 + p0 +
q0 +
q1 + 4) >> 3;
1858 dst[strideb * -1] = (p3 + p2 + p1 + 2 * p0 +
q0 +
q1 + q2 + 4) >> 3;
1859 dst[strideb * +0] = (p2 + p1 + p0 + 2 *
q0 +
q1 + q2 + q3 + 4) >> 3;
1860 dst[strideb * +1] = (p1 + p0 +
q0 + 2 *
q1 + q2 + q3 + q3 + 4) >> 3;
1861 dst[strideb * +2] = (p0 +
q0 +
q1 + 2 * q2 + q3 + q3 + q3 + 4) >> 3;
1891 #define lf_8_fn(dir, wd, stridea, strideb) \
1892 static void loop_filter_##dir##_##wd##_8_c(uint8_t *_dst, \
1894 int E, int I, int H) \
1896 pixel *dst = (pixel *) _dst; \
1897 stride /= sizeof(pixel); \
1898 loop_filter(dst, E, I, H, stridea, strideb, wd); \
1901 #define lf_8_fns(wd) \
1902 lf_8_fn(h, wd, stride, 1) \
1903 lf_8_fn(v, wd, 1, stride)
1912 #define lf_16_fn(dir, stridea) \
1913 static void loop_filter_##dir##_16_16_c(uint8_t *dst, \
1915 int E, int I, int H) \
1917 loop_filter_##dir##_16_8_c(dst, stride, E, I, H); \
1918 loop_filter_##dir##_16_8_c(dst + 8 * stridea, stride, E, I, H); \
1926 #define lf_mix_fn(dir, wd1, wd2, stridea) \
1927 static void loop_filter_##dir##_##wd1##wd2##_16_c(uint8_t *dst, \
1929 int E, int I, int H) \
1931 loop_filter_##dir##_##wd1##_8_c(dst, stride, E & 0xff, I & 0xff, H & 0xff); \
1932 loop_filter_##dir##_##wd2##_8_c(dst + 8 * stridea, stride, E >> 8, I >> 8, H >> 8); \
1935 #define lf_mix_fns(wd1, wd2) \
1936 lf_mix_fn(h, wd1, wd2, stride) \
1937 lf_mix_fn(v, wd1, wd2, sizeof(pixel))
1972 const uint8_t *restrict
src,
1973 ptrdiff_t src_stride,
int w,
int h)
1984 const uint8_t *restrict _src,
1985 ptrdiff_t src_stride,
int w,
int h)
1990 dst_stride /=
sizeof(
pixel);
1991 src_stride /=
sizeof(
pixel);
1995 for (x = 0; x <
w; x += 4)
2003 #define fpel_fn(type, sz) \
2004 static void type##sz##_c(uint8_t *dst, ptrdiff_t dst_stride, \
2005 const uint8_t *src, ptrdiff_t src_stride, \
2006 int h, int mx, int my) \
2008 type##_c(dst, dst_stride, src, src_stride, sz, h); \
2011 #define copy_avg_fn(sz) \
2026 #define FILTER_8TAP(src, x, F, stride) \
2027 av_clip_pixel((F[0] * src[x + -3 * stride] + \
2028 F[1] * src[x + -2 * stride] + \
2029 F[2] * src[x + -1 * stride] + \
2030 F[3] * src[x + +0 * stride] + \
2031 F[4] * src[x + +1 * stride] + \
2032 F[5] * src[x + +2 * stride] + \
2033 F[6] * src[x + +3 * stride] + \
2034 F[7] * src[x + +4 * stride] + 64) >> 7)
2036 static av_always_inline void do_8tap_1d_c(uint8_t *_dst, ptrdiff_t dst_stride,
2037 const uint8_t *_src, ptrdiff_t src_stride,
2038 int w,
int h, ptrdiff_t ds,
2044 dst_stride /=
sizeof(
pixel);
2045 src_stride /=
sizeof(
pixel);
2049 for (x = 0; x <
w; x++)
2061 #define filter_8tap_1d_fn(opn, opa, dir, ds) \
2062 static av_noinline void opn##_8tap_1d_##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \
2063 const uint8_t *src, ptrdiff_t src_stride, \
2064 int w, int h, const int16_t *filter) \
2066 do_8tap_1d_c(dst, dst_stride, src, src_stride, w, h, ds, filter, opa); \
2074 #undef filter_8tap_1d_fn
2077 const uint8_t *_src, ptrdiff_t src_stride,
2078 int w,
int h,
const int16_t *filterx,
2079 const int16_t *filtery,
int avg)
2086 dst_stride /=
sizeof(
pixel);
2087 src_stride /=
sizeof(
pixel);
2088 src -= src_stride * 3;
2092 for (x = 0; x <
w; x++)
2099 tmp_ptr =
tmp + 64 * 3;
2103 for (x = 0; x <
w; x++)
2105 dst[x] = (dst[x] +
FILTER_8TAP(tmp_ptr, x, filtery, 64) + 1) >> 1;
2115 #define filter_8tap_2d_fn(opn, opa) \
2116 static av_noinline void opn##_8tap_2d_hv_c(uint8_t *dst, ptrdiff_t dst_stride, \
2117 const uint8_t *src, ptrdiff_t src_stride, \
2118 int w, int h, const int16_t *filterx, \
2119 const int16_t *filtery) \
2121 do_8tap_2d_c(dst, dst_stride, src, src_stride, w, h, filterx, filtery, opa); \
2127 #undef filter_8tap_2d_fn
2129 #define filter_fn_1d(sz, dir, dir_m, type, type_idx, avg) \
2130 static void avg##_8tap_##type##_##sz##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \
2131 const uint8_t *src, ptrdiff_t src_stride, \
2132 int h, int mx, int my) \
2134 avg##_8tap_1d_##dir##_c(dst, dst_stride, src, src_stride, sz, h, \
2135 ff_vp9_subpel_filters[type_idx][dir_m]); \
2138 #define filter_fn_2d(sz, type, type_idx, avg) \
2139 static void avg##_8tap_##type##_##sz##hv_c(uint8_t *dst, ptrdiff_t dst_stride, \
2140 const uint8_t *src, ptrdiff_t src_stride, \
2141 int h, int mx, int my) \
2143 avg##_8tap_2d_hv_c(dst, dst_stride, src, src_stride, sz, h, \
2144 ff_vp9_subpel_filters[type_idx][mx], \
2145 ff_vp9_subpel_filters[type_idx][my]); \
2150 #define FILTER_BILIN(src, x, mxy, stride) \
2151 (src[x] + ((mxy * (src[x + stride] - src[x]) + 8) >> 4))
2153 static av_always_inline void do_bilin_1d_c(uint8_t *_dst, ptrdiff_t dst_stride,
2154 const uint8_t *_src, ptrdiff_t src_stride,
2155 int w,
int h, ptrdiff_t ds,
int mxy,
int avg)
2160 dst_stride /=
sizeof(
pixel);
2161 src_stride /=
sizeof(
pixel);
2165 for (x = 0; x <
w; x++)
2177 #define bilin_1d_fn(opn, opa, dir, ds) \
2178 static av_noinline void opn##_bilin_1d_##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \
2179 const uint8_t *src, ptrdiff_t src_stride, \
2180 int w, int h, int mxy) \
2182 do_bilin_1d_c(dst, dst_stride, src, src_stride, w, h, ds, mxy, opa); \
2193 const uint8_t *_src, ptrdiff_t src_stride,
2194 int w,
int h,
int mx,
int my,
int avg)
2201 dst_stride /=
sizeof(
pixel);
2202 src_stride /=
sizeof(
pixel);
2206 for (x = 0; x <
w; x++)
2217 for (x = 0; x <
w; x++)
2219 dst[x] = (dst[x] +
FILTER_BILIN(tmp_ptr, x, my, 64) + 1) >> 1;
2229 #define bilin_2d_fn(opn, opa) \
2230 static av_noinline void opn##_bilin_2d_hv_c(uint8_t *dst, ptrdiff_t dst_stride, \
2231 const uint8_t *src, ptrdiff_t src_stride, \
2232 int w, int h, int mx, int my) \
2234 do_bilin_2d_c(dst, dst_stride, src, src_stride, w, h, mx, my, opa); \
2242 #define bilinf_fn_1d(sz, dir, dir_m, avg) \
2243 static void avg##_bilin_##sz##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \
2244 const uint8_t *src, ptrdiff_t src_stride, \
2245 int h, int mx, int my) \
2247 avg##_bilin_1d_##dir##_c(dst, dst_stride, src, src_stride, sz, h, dir_m); \
2250 #define bilinf_fn_2d(sz, avg) \
2251 static void avg##_bilin_##sz##hv_c(uint8_t *dst, ptrdiff_t dst_stride, \
2252 const uint8_t *src, ptrdiff_t src_stride, \
2253 int h, int mx, int my) \
2255 avg##_bilin_2d_hv_c(dst, dst_stride, src, src_stride, sz, h, mx, my); \
2260 #define bilinf_fn_1d(a, b, c, d)
2261 #define bilinf_fn_2d(a, b)
2265 #define filter_fn(sz, avg) \
2266 filter_fn_1d(sz, h, mx, regular, FILTER_8TAP_REGULAR, avg) \
2267 filter_fn_1d(sz, v, my, regular, FILTER_8TAP_REGULAR, avg) \
2268 filter_fn_2d(sz, regular, FILTER_8TAP_REGULAR, avg) \
2269 filter_fn_1d(sz, h, mx, smooth, FILTER_8TAP_SMOOTH, avg) \
2270 filter_fn_1d(sz, v, my, smooth, FILTER_8TAP_SMOOTH, avg) \
2271 filter_fn_2d(sz, smooth, FILTER_8TAP_SMOOTH, avg) \
2272 filter_fn_1d(sz, h, mx, sharp, FILTER_8TAP_SHARP, avg) \
2273 filter_fn_1d(sz, v, my, sharp, FILTER_8TAP_SHARP, avg) \
2274 filter_fn_2d(sz, sharp, FILTER_8TAP_SHARP, avg) \
2275 bilinf_fn_1d(sz, h, mx, avg) \
2276 bilinf_fn_1d(sz, v, my, avg) \
2277 bilinf_fn_2d(sz, avg)
2279 #define filter_fn_set(avg) \
2280 filter_fn(64, avg) \
2281 filter_fn(32, avg) \
2282 filter_fn(16, avg) \
2290 #undef filter_fn_set
2305 ff_vp9dsp_mc_init_10(dsp);
2308 #define init_fpel(idx1, idx2, sz, type) \
2309 dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] = type##sz##_c; \
2310 dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] = type##sz##_c; \
2311 dsp->mc[idx1][FILTER_8TAP_SHARP ][idx2][0][0] = type##sz##_c; \
2312 dsp->mc[idx1][FILTER_BILINEAR ][idx2][0][0] = type##sz##_c
2314 #define init_copy_avg(idx, sz) \
2315 init_fpel(idx, 0, sz, copy); \
2316 init_fpel(idx, 1, sz, avg)
2324 #undef init_copy_avg
2329 #define init_subpel1_bd_aware(idx1, idx2, idxh, idxv, sz, dir, type) \
2330 dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][idxh][idxv] = type##_8tap_smooth_##sz##dir##_c; \
2331 dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][idxh][idxv] = type##_8tap_regular_##sz##dir##_c; \
2332 dsp->mc[idx1][FILTER_8TAP_SHARP ][idx2][idxh][idxv] = type##_8tap_sharp_##sz##dir##_c
2335 #define init_subpel1 init_subpel1_bd_aware
2337 #define init_subpel1(idx1, idx2, idxh, idxv, sz, dir, type) \
2338 init_subpel1_bd_aware(idx1, idx2, idxh, idxv, sz, dir, type); \
2339 dsp->mc[idx1][FILTER_BILINEAR ][idx2][idxh][idxv] = type##_bilin_##sz##dir##_c
2342 #define init_subpel2(idx, idxh, idxv, dir, type) \
2343 init_subpel1(0, idx, idxh, idxv, 64, dir, type); \
2344 init_subpel1(1, idx, idxh, idxv, 32, dir, type); \
2345 init_subpel1(2, idx, idxh, idxv, 16, dir, type); \
2346 init_subpel1(3, idx, idxh, idxv, 8, dir, type); \
2347 init_subpel1(4, idx, idxh, idxv, 4, dir, type)
2349 #define init_subpel3(idx, type) \
2350 init_subpel2(idx, 1, 1, hv, type); \
2351 init_subpel2(idx, 0, 1, v, type); \
2352 init_subpel2(idx, 1, 0, h, type)
2360 #undef init_subpel1_bd_aware
2364 const uint8_t *_src, ptrdiff_t src_stride,
2365 int w,
int h,
int mx,
int my,
2366 int dx,
int dy,
int avg,
2369 int tmp_h = (((
h - 1) * dy + my) >> 4) + 8;
2374 dst_stride /=
sizeof(
pixel);
2375 src_stride /=
sizeof(
pixel);
2376 src -= src_stride * 3;
2379 int imx = mx, ioff = 0;
2381 for (x = 0; x <
w; x++) {
2392 tmp_ptr =
tmp + 64 * 3;
2397 for (x = 0; x <
w; x++)
2405 tmp_ptr += (my >> 4) * 64;
2411 #define scaled_filter_8tap_fn(opn, opa) \
2412 static av_noinline void opn##_scaled_8tap_c(uint8_t *dst, ptrdiff_t dst_stride, \
2413 const uint8_t *src, ptrdiff_t src_stride, \
2414 int w, int h, int mx, int my, int dx, int dy, \
2415 const int16_t (*filters)[8]) \
2417 do_scaled_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \
2424 #undef scaled_filter_8tap_fn
2428 #define scaled_filter_fn(sz, type, type_idx, avg) \
2429 static void avg##_scaled_##type##_##sz##_c(uint8_t *dst, ptrdiff_t dst_stride, \
2430 const uint8_t *src, ptrdiff_t src_stride, \
2431 int h, int mx, int my, int dx, int dy) \
2433 avg##_scaled_8tap_c(dst, dst_stride, src, src_stride, sz, h, mx, my, dx, dy, \
2434 ff_vp9_subpel_filters[type_idx]); \
2439 static av_always_inline void do_scaled_bilin_c(uint8_t *_dst, ptrdiff_t dst_stride,
2440 const uint8_t *_src, ptrdiff_t src_stride,
2441 int w,
int h,
int mx,
int my,
2442 int dx,
int dy,
int avg)
2445 int tmp_h = (((
h - 1) * dy + my) >> 4) + 2;
2449 dst_stride /=
sizeof(
pixel);
2450 src_stride /=
sizeof(
pixel);
2453 int imx = mx, ioff = 0;
2455 for (x = 0; x <
w; x++) {
2470 for (x = 0; x <
w; x++)
2472 dst[x] = (dst[x] +
FILTER_BILIN(tmp_ptr, x, my, 64) + 1) >> 1;
2478 tmp_ptr += (my >> 4) * 64;
2484 #define scaled_bilin_fn(opn, opa) \
2485 static av_noinline void opn##_scaled_bilin_c(uint8_t *dst, ptrdiff_t dst_stride, \
2486 const uint8_t *src, ptrdiff_t src_stride, \
2487 int w, int h, int mx, int my, int dx, int dy) \
2489 do_scaled_bilin_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, opa); \
2495 #undef scaled_bilin_fn
2499 #define scaled_bilinf_fn(sz, avg) \
2500 static void avg##_scaled_bilin_##sz##_c(uint8_t *dst, ptrdiff_t dst_stride, \
2501 const uint8_t *src, ptrdiff_t src_stride, \
2502 int h, int mx, int my, int dx, int dy) \
2504 avg##_scaled_bilin_c(dst, dst_stride, src, src_stride, sz, h, mx, my, dx, dy); \
2509 #define scaled_bilinf_fn(a, b)
2513 #define scaled_filter_fns(sz, avg) \
2514 scaled_filter_fn(sz, regular, FILTER_8TAP_REGULAR, avg) \
2515 scaled_filter_fn(sz, smooth, FILTER_8TAP_SMOOTH, avg) \
2516 scaled_filter_fn(sz, sharp, FILTER_8TAP_SHARP, avg) \
2517 scaled_bilinf_fn(sz, avg)
2519 #define scaled_filter_fn_set(avg) \
2520 scaled_filter_fns(64, avg) \
2521 scaled_filter_fns(32, avg) \
2522 scaled_filter_fns(16, avg) \
2523 scaled_filter_fns(8, avg) \
2524 scaled_filter_fns(4, avg)
2529 #undef scaled_filter_fns
2530 #undef scaled_filter_fn_set
2531 #undef scaled_filter_fn
2532 #undef scaled_bilinf_fn
2542 #define init_scaled_bd_aware(idx1, idx2, sz, type) \
2543 dsp->smc[idx1][FILTER_8TAP_SMOOTH ][idx2] = type##_scaled_smooth_##sz##_c; \
2544 dsp->smc[idx1][FILTER_8TAP_REGULAR][idx2] = type##_scaled_regular_##sz##_c; \
2545 dsp->smc[idx1][FILTER_8TAP_SHARP ][idx2] = type##_scaled_sharp_##sz##_c
2548 ff_vp9dsp_scaled_mc_init_10(dsp);
2549 #define init_scaled(a,b,c,d) init_scaled_bd_aware(a,b,c,d)
2551 #define init_scaled(idx1, idx2, sz, type) \
2552 init_scaled_bd_aware(idx1, idx2, sz, type); \
2553 dsp->smc[idx1][FILTER_BILINEAR ][idx2] = type##_scaled_bilin_##sz##_c
2556 #define init_scaled_put_avg(idx, sz) \
2557 init_scaled(idx, 0, sz, put); \
2558 init_scaled(idx, 1, sz, avg)
2566 #undef init_scaled_put_avg
2568 #undef init_scaled_bd_aware
2573 FUNC(ff_vp9dsp_intrapred_init)(dsp);
2574 vp9dsp_itxfm_init(dsp);
2575 vp9dsp_loopfilter_init(dsp);
2576 FUNC(ff_vp9dsp_mc_init)(dsp);
2577 FUNC(ff_vp9dsp_scaled_mc_init)(dsp);
static const uint8_t q1[256]
#define FILTER_8TAP(src, x, F, stride)
#define init_intra_pred(tx, sz)
static void dc_top_32x32_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *_top)
static av_always_inline void idct4_1d(const dctcoef *in, ptrdiff_t stride, dctcoef *out, int pass)
#define init_idct(tx, nm)
#define lf_16_fn(dir, stridea)
#define init_copy_avg(idx, sz)
void(* loop_filter_8[3][2])(uint8_t *dst, ptrdiff_t stride, int mb_lim, int lim, int hev_thr)
static av_always_inline void copy_c(uint8_t *restrict dst, ptrdiff_t dst_stride, const uint8_t *restrict src, ptrdiff_t src_stride, int w, int h)
#define lf_mix_fns(wd1, wd2)
void(* filter)(uint8_t *src, int stride, int qscale)
static void hor_8x8_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *top)
static void tm_32x32_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *_top)
#define init_itxfm(tx, sz)
#define def_diag_downleft(size)
static void vert_32x32_c(uint8_t *restrict _dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *_top)
static void memset_bpc(uint16_t *dst, int val, int len)
static void diag_downleft_4x4_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *_top)
void(* loop_filter_mix2[2][2][2])(uint8_t *dst, ptrdiff_t stride, int mb_lim, int lim, int hev_thr)
static av_always_inline void loop_filter(pixel *dst, int E, int I, int H, ptrdiff_t stridea, ptrdiff_t strideb, int wd)
static void dc_left_32x32_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *top)
static double val(void *priv, double ch)
static av_always_inline void iadst8_1d(const dctcoef *in, ptrdiff_t stride, dctcoef *out, int pass)
static double a2(void *priv, double x, double y)
static av_always_inline void do_bilin_2d_c(uint8_t *_dst, ptrdiff_t dst_stride, const uint8_t *_src, ptrdiff_t src_stride, int w, int h, int mx, int my, int avg)
static void idct(int16_t block[64])
#define itxfm_wrapper(type_a, type_b, sz, bits, has_dconly)
static void tm_4x4_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *_top)
static int t15(InterplayACMContext *s, unsigned ind, unsigned col)
static void dc_129_16x16_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)
static void dc_127_4x4_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)
#define scaled_bilin_fn(opn, opa)
#define filters(fmt, type, inverse, clp, inverset, clip, one, clip_fn, packed)
static void dc_128_32x32_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)
#define PIXEL_SPLAT_X4(x)
static void tm_16x16_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *_top)
static const uint8_t q0[256]
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
static av_always_inline void do_scaled_8tap_c(uint8_t *_dst, ptrdiff_t dst_stride, const uint8_t *_src, ptrdiff_t src_stride, int w, int h, int mx, int my, int dx, int dy, int avg, const int16_t(*filters)[8])
static void tm_8x8_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *_top)
#define FILTER_BILIN(src, x, mxy, stride)
static double a3(void *priv, double x, double y)
static void dc_127_32x32_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)
static void dc_16x16_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *_top)
static void hor_32x32_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *top)
static void dc_127_8x8_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)
static void dc_left_8x8_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *top)
static void dc_128_8x8_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)
static void dc_4x4_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *_top)
#define filter_8tap_1d_fn(opn, opa, dir, ds)
#define init_scaled_put_avg(idx, sz)
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff) *mv_scale Intra DC Prediction block[y][x] dc[1]
static void vert_8x8_c(uint8_t *restrict _dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *_top)
#define def_hor_down(size)
static void dc_8x8_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *_top)
static int t27(InterplayACMContext *s, unsigned ind, unsigned col)
static void dc_129_8x8_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)
static void av_always_inline idct16_1d(float *dst, const float *src, int dst_stridea, int dst_strideb, int src_stridea, int src_strideb, int add)
av_cold void FUNC() ff_vp9dsp_init(VP9DSPContext *dsp)
static double a0(void *priv, double x, double y)
static av_always_inline void iadst16_1d(const dctcoef *in, ptrdiff_t stride, dctcoef *out, int pass)
static void dc_top_16x16_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *_top)
static void dc_128_4x4_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)
static void dc_top_8x8_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *_top)
static void vert_4x4_c(uint8_t *restrict _dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *_top)
#define i(width, name, range_min, range_max)
static av_always_inline void avg_c(uint8_t *restrict _dst, ptrdiff_t dst_stride, const uint8_t *restrict _src, ptrdiff_t src_stride, int w, int h)
static av_always_inline void iadst4_1d(const dctcoef *in, ptrdiff_t stride, dctcoef *out, int pass)
#define scaled_filter_fn_set(avg)
static void dc_129_4x4_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)
#define def_diag_downright(size)
static void hor_4x4_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *top)
static void dc_left_4x4_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *top)
static void dc_128_16x16_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled left
static void dc_left_16x16_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *top)
static void av_always_inline idct8_1d(float *dst, const float *src, int dst_stridea, int dst_strideb, int src_stridea, int src_strideb, int add)
static void vert_16x16_c(uint8_t *restrict _dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *_top)
#define bilin_1d_fn(opn, opa, dir, ds)
#define filter_8tap_2d_fn(opn, opa)
#define filter_fn_set(avg)
#define itxfm_wrap(sz, bits)
static av_always_inline int hev(uint8_t *p, ptrdiff_t stride, int thresh)
static void dc_top_4x4_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *_top)
#define scaled_filter_8tap_fn(opn, opa)
static void dc_127_16x16_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)
#define init_subpel3(idx, type)
#define def_vert_left(size)
static double a1(void *priv, double x, double y)
void(* loop_filter_16[2])(uint8_t *dst, ptrdiff_t stride, int mb_lim, int lim, int hev_thr)
static void dc_32x32_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *_top)
#define def_vert_right(size)
static void hor_16x16_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *top)
static av_always_inline void do_8tap_2d_c(uint8_t *_dst, ptrdiff_t dst_stride, const uint8_t *_src, ptrdiff_t src_stride, int w, int h, const int16_t *filterx, const int16_t *filtery, int avg)
#define bilin_2d_fn(opn, opa)
static void dc_129_32x32_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)