FFmpeg
vp9dsp.h
Go to the documentation of this file.
1 /*
2  * VP9 compatible video decoder
3  *
4  * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5  * Copyright (C) 2013 Clément Bœsch <u pkh me>
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #ifndef AVCODEC_VP9DSP_H
25 #define AVCODEC_VP9DSP_H
26 
27 #include <stddef.h>
28 #include <stdint.h>
29 
30 #include "libavcodec/vp9.h"
31 
32 typedef void (*vp9_mc_func)(uint8_t *dst, ptrdiff_t dst_stride,
33  const uint8_t *ref, ptrdiff_t ref_stride,
34  int h, int mx, int my);
35 typedef void (*vp9_scaled_mc_func)(uint8_t *dst, ptrdiff_t dst_stride,
36  const uint8_t *ref, ptrdiff_t ref_stride,
37  int h, int mx, int my, int dx, int dy);
38 
39 typedef struct VP9DSPContext {
40  /*
41  * dimension 1: 0=4x4, 1=8x8, 2=16x16, 3=32x32
42  * dimension 2: intra prediction modes
43  *
44  * dst/left/top is aligned by transform-size (i.e. 4, 8, 16 or 32 pixels)
45  * stride is aligned by 16 pixels
46  * top[-1] is top/left; top[4,7] is top-right for 4x4
47  */
48  // FIXME(rbultje) maybe replace left/top pointers with HAVE_TOP/
49  // HAVE_LEFT/HAVE_TOPRIGHT flags instead, and then handle it in-place?
50  // also needs to fit in with what H.264/VP8/etc do
52  ptrdiff_t stride,
53  const uint8_t *left,
54  const uint8_t *top);
55 
56  /*
57  * dimension 1: 0=4x4, 1=8x8, 2=16x16, 3=32x32, 4=lossless (3-4=dct only)
58  * dimension 2: 0=dct/dct, 1=dct/adst, 2=adst/dct, 3=adst/adst
59  *
60  * dst is aligned by transform-size (i.e. 4, 8, 16 or 32 pixels)
61  * stride is aligned by 16 pixels
62  * block is 16-byte aligned
63  * eob indicates the position (+1) of the last non-zero coefficient,
64  * in scan-order. This can be used to write faster versions, e.g. a
65  * dc-only 4x4/8x8/16x16/32x32, or a 4x4-only (eob<10) 8x8/16x16/32x32,
66  * etc.
67  */
68  // FIXME also write idct_add_block() versions for whole (inter) pred
69  // blocks, so we can do 2 4x4s at once
71  ptrdiff_t stride,
72  int16_t *block, int eob);
73 
74  /*
75  * dimension 1: width of filter (0=4, 1=8, 2=16)
76  * dimension 2: 0=col-edge filter (h), 1=row-edge filter (v)
77  *
78  * dst/stride are aligned by 8
79  */
80  void (*loop_filter_8[3][2])(uint8_t *dst, ptrdiff_t stride,
81  int mb_lim, int lim, int hev_thr);
82 
83  /*
84  * dimension 1: 0=col-edge filter (h), 1=row-edge filter (v)
85  *
86  * The width of filter is assumed to be 16; dst/stride are aligned by 16
87  */
88  void (*loop_filter_16[2])(uint8_t *dst, ptrdiff_t stride,
89  int mb_lim, int lim, int hev_thr);
90 
91  /*
92  * dimension 1/2: width of filter (0=4, 1=8) for each filter half
93  * dimension 3: 0=col-edge filter (h), 1=row-edge filter (v)
94  *
95  * dst/stride are aligned by operation size
96  * this basically calls loop_filter[d1][d3][0](), followed by
97  * loop_filter[d2][d3][0]() on the next 8 pixels
98  * mb_lim/lim/hev_thr contain two values in the lowest two bytes of the
99  * integer.
100  */
101  // FIXME perhaps a mix4 that operates on 32px (for AVX2)
102  void (*loop_filter_mix2[2][2][2])(uint8_t *dst, ptrdiff_t stride,
103  int mb_lim, int lim, int hev_thr);
104 
105  /*
106  * dimension 1: hsize (0: 64, 1: 32, 2: 16, 3: 8, 4: 4)
107  * dimension 2: filter type (0: smooth, 1: regular, 2: sharp, 3: bilin)
108  * dimension 3: averaging type (0: put, 1: avg)
109  * dimension 4: x subpel interpolation (0: none, 1: 8tap/bilin)
110  * dimension 5: y subpel interpolation (0: none, 1: 8tap/bilin)
111  *
112  * dst/stride are aligned by hsize
113  */
114  vp9_mc_func mc[5][N_FILTERS][2][2][2];
115 
116  /*
117  * for scalable MC, first 3 dimensions identical to above, the other two
118  * don't exist since it changes per stepsize.
119  */
121 } VP9DSPContext;
122 
123 extern const int16_t ff_vp9_subpel_filters[3][16][8];
124 
125 void ff_vp9dsp_init(VP9DSPContext *dsp, int bpp, int bitexact);
126 
130 
131 void ff_vp9dsp_init_aarch64(VP9DSPContext *dsp, int bpp);
132 void ff_vp9dsp_init_arm(VP9DSPContext *dsp, int bpp);
133 void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact);
134 void ff_vp9dsp_init_mips(VP9DSPContext *dsp, int bpp);
135 
136 #endif /* AVCODEC_VP9DSP_H */
stride
int stride
Definition: mace.c:144
N_TXFM_TYPES
@ N_TXFM_TYPES
Definition: vp9.h:42
ff_vp9dsp_init
void ff_vp9dsp_init(VP9DSPContext *dsp, int bpp, int bitexact)
Definition: vp9dsp.c:86
VP9DSPContext::loop_filter_8
void(* loop_filter_8[3][2])(uint8_t *dst, ptrdiff_t stride, int mb_lim, int lim, int hev_thr)
Definition: vp9dsp.h:80
VP9DSPContext
Definition: vp9dsp.h:39
ff_vp9dsp_init_arm
void ff_vp9dsp_init_arm(VP9DSPContext *dsp, int bpp)
Definition: vp9dsp_init_arm.c:244
ff_vp9dsp_init_10
void ff_vp9dsp_init_10(VP9DSPContext *dsp)
N_TXFM_SIZES
@ N_TXFM_SIZES
Definition: vp9.h:32
VP9DSPContext::loop_filter_mix2
void(* loop_filter_mix2[2][2][2])(uint8_t *dst, ptrdiff_t stride, int mb_lim, int lim, int hev_thr)
Definition: vp9dsp.h:102
vp9_scaled_mc_func
void(* vp9_scaled_mc_func)(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride, int h, int mx, int my, int dx, int dy)
Definition: vp9dsp.h:35
VP9DSPContext::smc
vp9_scaled_mc_func smc[5][N_FILTERS][2]
Definition: vp9dsp.h:120
ff_vp9dsp_init_aarch64
void ff_vp9dsp_init_aarch64(VP9DSPContext *dsp, int bpp)
Definition: vp9dsp_init_aarch64.c:244
ff_vp9dsp_init_mips
void ff_vp9dsp_init_mips(VP9DSPContext *dsp, int bpp)
Definition: vp9dsp_init_mips.c:211
VP9DSPContext::itxfm_add
void(* itxfm_add[N_TXFM_SIZES+1][N_TXFM_TYPES])(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob)
Definition: vp9dsp.h:70
VP9DSPContext::intra_pred
void(* intra_pred[N_TXFM_SIZES][N_INTRA_PRED_MODES])(uint8_t *dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)
Definition: vp9dsp.h:51
vp9.h
ff_vp9dsp_init_8
void ff_vp9dsp_init_8(VP9DSPContext *dsp)
N_INTRA_PRED_MODES
@ N_INTRA_PRED_MODES
Definition: vp9.h:61
VP9DSPContext::mc
vp9_mc_func mc[5][N_FILTERS][2][2][2]
Definition: vp9dsp.h:114
N_FILTERS
@ N_FILTERS
Definition: vp9.h:69
ff_vp9_subpel_filters
const int16_t ff_vp9_subpel_filters[3][16][8]
Definition: vp9dsp.c:30
uint8_t
uint8_t
Definition: audio_convert.c:194
ff_vp9dsp_init_12
void ff_vp9dsp_init_12(VP9DSPContext *dsp)
left
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled left
Definition: snow.txt:386
vp9_mc_func
void(* vp9_mc_func)(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride, int h, int mx, int my)
Definition: vp9dsp.h:32
ref
static int ref[MAX_W *MAX_W]
Definition: jpeg2000dwt.c:107
ff_vp9dsp_init_x86
void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact)
Definition: vp9dsp_init.c:217
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
h
h
Definition: vp9dsp_template.c:2038
VP9DSPContext::loop_filter_16
void(* loop_filter_16[2])(uint8_t *dst, ptrdiff_t stride, int mb_lim, int lim, int hev_thr)
Definition: vp9dsp.h:88