[FFmpeg-soc] RV40 review

Tue Aug 21 05:04:03 CEST 2007

Hi

another review, (again note files are a day or 2 old ...)
yes these reviewes take more time than one thinks ...

h264pred.h:
[...]
> /**
>  * @file h264pred.h
>  * H.264 / AVC / MPEG4 prediction functions.
>  * @author Michael Niedermayer <michaelni at gmx.at>
>  */
> 
> #ifndef H264PRED_H
> #define H264PRED_H
> 
> #include "common.h"
> 
> /**
>  * Prediction types
>  */
> //@{
> #define VERT_PRED             0
> #define HOR_PRED              1
> #define DC_PRED               2
> #define DIAG_DOWN_LEFT_PRED   3
> #define DIAG_DOWN_RIGHT_PRED  4
> #define VERT_RIGHT_PRED       5
> #define HOR_DOWN_PRED         6
> #define VERT_LEFT_PRED        7
> #define HOR_UP_PRED           8
> 
> #define LEFT_DC_PRED          9
> #define TOP_DC_PRED           10
> #define DC_128_PRED           11
> 
> #define DIAG_DOWN_LEFT_PRED_RV40_NODOWN   12
> #define HOR_UP_PRED_RV40_NODOWN           13
> 
> #define DC_PRED8x8            0
> #define HOR_PRED8x8           1
> #define VERT_PRED8x8          2
> #define PLANE_PRED8x8         3
> 
> #define LEFT_DC_PRED8x8       4
> #define TOP_DC_PRED8x8        5
> #define DC_128_PRED8x8        6

this looks familiar, i assume you plan to remove these from
h264data.h in svn because otherwise they would be duplicated
(same with h264pred.c)

[...]

rv40data.h:
[...]
> /**
>  * Maximum number of macroblocks for each of the possible slice offset sizes
>  */
> static const uint16_t rv40_mb_max_sizes[6] = { 0x2F, 0x68, 0x18B, 0x62F, 0x18BF, 0x23FF };
> /**
>  * Bits needed to code slice offset for the given size
>  */
> static const uint8_t rv40_mb_bits_sizes[6] = { 6, 7, 9, 11, 13, 14 };

these 2 tables might be identical to ff_mba_max/length()
the code which uses them might be identical too (ive not checked)
maybe something can be shared here, but maybe its too tricky file
dependancy wise and not worth for 18 byte ...
though a note could be added pointing at the similarity

[...]

rv40.c:
[...]
> /**
>  * Real Video 4.0 inverse transform
>  * Code is almost the same as in SVQ3, only scaling is different
>  */
> static void rv40_intra_inv_transform(DCTELEM *block, const int offset){

yes and the scaling can be done before the transform IMHO
so the actual transform is the same and could be shared

[...]
>         const int z0= 13*(temp[4*0+i] +    temp[4*2+i]);
>         const int z1= 13*(temp[4*0+i] -    temp[4*2+i]);
>         const int z2=  7* temp[4*1+i] - 17*temp[4*3+i];
>         const int z3= 17* temp[4*1+i] +  7*temp[4*3+i];
> 
>         block[offset+i*8+0]= ((z0 + z3) + 0x200)>>10;
>         block[offset+i*8+1]= ((z1 + z2) + 0x200)>>10;
>         block[offset+i*8+2]= ((z1 - z2) + 0x200)>>10;
>         block[offset+i*8+3]= ((z0 - z3) + 0x200)>>10;

the  +0x200 can be added to z0/z1 avoiding 2 +

[...]
> /**
>  * Decode coded block pattern
>  */
> static int rv40_decode_cbp(GetBitContext *gb, RV40VLC *vlc, int table)
> {
>     int pattern, code, cbp=0;
>     int table2;
>     static const int cbp_masks[4] = {0x000000, 0x100000, 0x010000, 0x110000};

cbp_masks[0] is unused

[...]
> static inline void rv40_decode_block(DCTELEM *dst, GetBitContext *gb, RV40VLC *rvlc, int fc, int sc)
> {
>     int code, pattern;
>     int coeffs[4];
> 
>     code = get_vlc2(gb, rvlc->first_pattern[fc].table, 9, 2);
> 
>     pattern = code & 0x7;
> 
>     code >>= 3;
>     coeffs[0] = modulo_three_table[code][0];
>     coeffs[1] = modulo_three_table[code][1];
>     coeffs[2] = modulo_three_table[code][2];
>     coeffs[3] = modulo_three_table[code][3];
>     decode_subblock(dst, coeffs, gb, &rvlc->coefficient);
> 
>     if(pattern & 4){
>         code = get_vlc2(gb, rvlc->second_pattern[sc].table, 9, 2);
>         coeffs[0] = modulo_three_table[code][0];
>         coeffs[1] = modulo_three_table[code][1];
>         coeffs[2] = modulo_three_table[code][2];
>         coeffs[3] = modulo_three_table[code][3];
>         decode_subblock(dst + 2, coeffs, gb, &rvlc->coefficient);
>     }
>     if(pattern & 2){ // Looks like coefficients 1 and 2 are swapped for this block
>         code = get_vlc2(gb, rvlc->second_pattern[sc].table, 9, 2);
>         coeffs[0] = modulo_three_table[code][0];
>         coeffs[1] = modulo_three_table[code][1];
>         coeffs[2] = modulo_three_table[code][2];
>         coeffs[3] = modulo_three_table[code][3];
>         decode_subblock2(dst + 8*2, coeffs, gb, &rvlc->coefficient);
>     }
>     if(pattern & 1){
>         code = get_vlc2(gb, rvlc->third_pattern[sc].table, 9, 2);
>         coeffs[0] = modulo_three_table[code][0];
>         coeffs[1] = modulo_three_table[code][1];
>         coeffs[2] = modulo_three_table[code][2];
>         coeffs[3] = modulo_three_table[code][3];

code quadruplification
the coeffs[]= could be moved into a function maybe

>         decode_subblock(dst + 8*2+2, coeffs, gb, &rvlc->coefficient);
>     }
> }
> 

> /**
>  * Dequantize ordinary 4x4 block
>  * @todo optimize
>  */
> static inline void rv40_dequant4x4(DCTELEM *block, int offset, int Qdc, int Q)
> {
>     int i, j;
> 
>     block += offset;
>     block[0] = (block[0] * Qdc + 8) >> 4;
>     for(i = 0; i < 4; i++)
>         for(j = 0; j < 4; j++)
>             if(i || j)
>                 block[j + i*8] = (block[j + i*8] * Q + 8) >> 4;
> }
> 
> /**
>  * Dequantize 4x4 block of DC values for 16x16 macroblock
>  * @todo optimize
>  */
> static inline void rv40_dequant4x4_16x16(DCTELEM *block, int offset, int Qdc, int Q)
> {
>     int i;
> 
>     block += offset;
>     for(i = 0; i < 3; i++)
>          block[rv40_dezigzag[i]] = (block[rv40_dezigzag[i]] * Qdc + 8) >> 4;
>     for(; i < 16; i++)
>          block[rv40_dezigzag[i]] = (block[rv40_dezigzag[i]] * Q + 8) >> 4;
> }

if you do dequant during decode then you dont have to dequant the
common 0 coeffs

[...]
> /**
>  * Get stored dimension from bitstream
>  *
>  * If the width/height is the standard one then it's coded as 3-bit index.
>  * Otherwise it is coded as escaped 8-bit portions.
>  */
> static inline int get_dimension(GetBitContext *gb, const int *dim1, const int *dim2)

i dont think this should be inline

[...]
> /**
>  * Get encoded picture size - usually this is called from rv40_parse_slice_header
>  */
> static void rv40_parse_picture_size(GetBitContext *gb, int *w, int *h)
> {
>     *w = get_dimension(gb, rv40_standard_widths, NULL);
>     *h = get_dimension(gb, rv40_standard_heights, rv40_standard_heights2);

avcodec_check_dimensions() should be used here to ensure that the dimensions
wont produce overflows

[...]
> /**
>  * Decode variable-length code constructed from variable-length codes
>  * similar to Even-Rodeh and Elias Omega codes
>  *
>  * Code is constructed from bit chunks of even length (odd length means end of code)
>  * and chunks are coded with variable-length codes too
>  */
> static inline int get_omega(GetBitContext *gb)
> {
>     int bits = 0, code = 1, t, tb;
> 
>     for(;;){
>         t = get_vlc2(gb, mbinfo_vlc.table, MBINFO_BITS, 1);
>         tb = t >> 5;
>         code = (code << tb) | (t & 0xF);
>         bits += tb;
>         if(t & 0x10) break;
>     }
>     return code;
> }

bits is unused

[...]
>     if(s->pict_type == P_TYPE){
>         if(!s->first_slice_line){
>             blocks[r->mb_type[mb_pos - s->mb_stride]]++;
>             if(s->mb_x && !(s->mb_x == s->resync_mb_x && (s->mb_y-1) == s->resync_mb_y))
>                 blocks[r->mb_type[mb_pos - s->mb_stride - 1]]++;
>             if(s->mb_x+1 < s->mb_width)
>                 blocks[r->mb_type[mb_pos - s->mb_stride + 1]]++;
>         }
>         if(s->mb_x && !(s->first_slice_line && s->mb_x == s->resync_mb_x))
>             blocks[r->mb_type[mb_pos - 1]]++;
>         for(i = 0; i < RV40_MB_TYPES; i++){
>             if(blocks[i] > count){
>                 count = blocks[i];
>                 prev_type = i;
>             }
>         }
>         if(prev_type == RV40_MB_SKIP) prev_type = RV40_MB_P_16x16;
>         prev_type = block_num_to_ptype_vlc_num[prev_type];
>         q = get_vlc2(gb, ptype_vlc[prev_type].table, PTYPE_VLC_BITS, 1);
>         if(q < PBTYPE_ESCAPE)
>             return q;
>         q = get_vlc2(gb, ptype_vlc[prev_type].table, PTYPE_VLC_BITS, 1);
>         av_log(NULL,0,"Dquant for P-frame\n");
>     }else{

>         if(!s->first_slice_line){
>             blocks[r->mb_type[mb_pos - s->mb_stride]]++;
>             if(s->mb_x && !(s->mb_x == s->resync_mb_x && (s->mb_y-1) == s->resync_mb_y))
>                 blocks[r->mb_type[mb_pos - s->mb_stride - 1]]++;
>             if(s->mb_x+1 < s->mb_width)
>                 blocks[r->mb_type[mb_pos - s->mb_stride + 1]]++;
>         }
>         if(s->mb_x && !(s->first_slice_line && s->mb_x == s->resync_mb_x))
>             blocks[r->mb_type[mb_pos - 1]]++;
>         for(i = 0; i < RV40_MB_TYPES; i++){
>             if(blocks[i] > count){
>                 count = blocks[i];
>                 prev_type = i;
>             }
>         }

this code looks duplicated

[...]
> /** Macroblock partition width in 8x8 blocks */
> const int part_sizes_w[RV40_MB_TYPES] = { 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 2, 2 };
> 
> /** Macroblock partition height in 8x8 blocks */
> const int part_sizes_h[RV40_MB_TYPES] = { 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2 };

static const uint8_t ?

> 
> /**
>  * Motion vectors prediction
>  *
>  * Motion prediction performed for the block by using median prediction of
>  * motion vector from the left, top and right top blocks but in corener cases
>  * some other vectors may be used instead
>  */
> static void rv40_pred_mv(RV40DecContext *r, int block_type, int subblock_no)
> {
>     MpegEncContext *s = &r->s;
>     int mv_pos = s->mb_x * 2 + s->mb_y * 2 * s->b8_stride;
>     int A[2], B[2], C[2];
>     int no_A = 1, no_B = 1, no_C = 1;
>     int i, j;
>     int mx, my;
> 
>     memset(A, 0, sizeof(A));
>     memset(B, 0, sizeof(B));
>     memset(C, 0, sizeof(C));

>     no_A = s->mb_x < 1 || (s->first_slice_line && s->mb_x == s->resync_mb_x);
>     no_B = s->first_slice_line;
>     no_C = s->first_slice_line || (s->mb_x + 1) == s->mb_width;

i think there are a few special cases, which arent considered here
like maybe: (numbers to distingish slices)
0011
1.     (here C is available though B is not so no first_slice_line check alone
will work unless rv40 does somthing simpler than other video codecs)

[...]
>         if(no_B){
>             C[0] = A[0];
>             C[1] = A[1];
>         }else{
>             if(!no_A){
>                 C[0] = s->current_picture_ptr->motion_val[0][mv_pos-s->b8_stride-1][0];
>                 C[1] = s->current_picture_ptr->motion_val[0][mv_pos-s->b8_stride-1][1];
>             }else{
>                 C[0] = A[0];
>                 C[1] = A[1];
>             }
>         }

if(no_B || no_A){
    C[0] = A[0];
    C[1] = A[1];
}else{
    C[0] = s->current_picture_ptr->motion_val[0][mv_pos-s->b8_stride-1][0];
    C[1] = s->current_picture_ptr->motion_val[0][mv_pos-s->b8_stride-1][1];
}

also if A/B/C where pointers then 
C=A;
else
C= s->current_picture_ptr->motion_val[0][mv_pos-s->b8_stride-1]

would be enough ...

[...]
>         for(j = 0; j < 2; j++){
>             for(i = 0; i < 2; i++){
>                 s->current_picture_ptr->motion_val[0][s->mb_x * 2 + s->mb_y * 2 * s->b8_stride + i + j*s->b8_stride][0] = 0;
>                 s->current_picture_ptr->motion_val[0][s->mb_x * 2 + s->mb_y * 2 * s->b8_stride + i + j*s->b8_stride][1] = 0;
>             }
>         }

fill_rectangle() (yes it should be moved into some header ...)

[...]
>     if(no_up && no_left)
>         itype = DC_128_PRED;
>     else if(no_up){
>         if(itype == VERT_PRED) itype = HOR_PRED;
>         if(itype == DC_PRED)   itype = LEFT_DC_PRED;
>     }else if(no_left){
>         if(itype == HOR_PRED)  itype = VERT_PRED;
>         if(itype == DC_PRED)   itype = TOP_DC_PRED;
>         if(itype == DIAG_DOWN_LEFT_PRED) itype = DIAG_DOWN_LEFT_PRED_RV40_NODOWN;
>     }

2 tables like in check_intra4x4_pred_mode() might be simpler

[...]

>             for(YY = Y, i = 0; i < 4; i++, cbp >>= 1, no_left = 0, YY += 4){

thiscodeputstoomuchonthesamelineicanreaditbutitneedsmoretimethanitshould

>                 no_topright = no_up || (i==3 && j) || (i==3 && !j && (s->mb_x-1) == s->mb_width);

maybe some solution similar to h264.c *_cache would be a good idea, that is
that there simply is a small array of fixed size which contains the
availabilty/type of surrounding blocks, that way you just initalize that
at one point per macroblock with code like:
if(first_slice_line)
    mark all blocks above as unavailable
if(!s->mb_x || (s->mb_x == s->resync_mb_x && s->first_slice_line))
    mark all blcoks to the left as unavailable

and then just do blah_cache[current_block-1] for the left blocks avaiabilty
and blah_cache[current_block-8] for the top blocks avaiabilty
and blah_cache[current_block-8+block_size] for the top right

or maybe something like top_samples_available that is a bit per
each of the 16 4x4 blocks which indicates if the top block is
available could be used ...

[...]
>         no_up = s->first_slice_line;
>         for(j = 0; j < 2; j++){
>             no_left = !s->mb_x || (s->mb_x == s->resync_mb_x && s->first_slice_line);
>             for(i = 0; i < 2; i++, cbp >>= 1, no_left = 0){
>                 no_topright = no_up || (i && j) || (i && !j && (s->mb_x-1) == s->mb_width);
>                 rv40_pred_4x4_block(r, U + i*4 + j*4*s->uvlinesize, s->uvlinesize, ittrans[intra_types[i*2+j*2*r->intra_types_stride]], no_up, no_left, i || j, no_topright);
>                 if(!(cbp & 1)) continue;
>                 rv40_add_4x4_block(U + i*4 + j*4*s->uvlinesize, s->uvlinesize, s->block[4], i*4+j*32);
>             }
>             no_up = 0;
>         }
>         no_up = s->first_slice_line;
>         for(j = 0; j < 2; j++){
>             no_left = !s->mb_x || (s->mb_x == s->resync_mb_x && s->first_slice_line);
>             for(i = 0; i < 2; i++, cbp >>= 1, no_left = 0){
>                 no_topright = no_up || (i && j) || (i && !j && (s->mb_x-1) == s->mb_width);
>                 rv40_pred_4x4_block(r, V + i*4 + j*4*s->uvlinesize, s->uvlinesize, ittrans[intra_types[i*2+j*2*r->intra_types_stride]], no_up, no_left, i || j, no_topright);
>                 if(!(cbp & 1)) continue;
>                 rv40_add_4x4_block(V + i*4 + j*4*s->uvlinesize, s->uvlinesize, s->block[5], i*4+j*32);
>             }
>             no_up = 0;
>         }

code duplication

[...]
> static int rv40_decode_frame(AVCodecContext *avctx,
>                             void *data, int *data_size,
>                             uint8_t *buf, int buf_size)
> {
>     RV40DecContext *r = avctx->priv_data;
>     MpegEncContext *s = &r->s;
>     AVFrame *pict = data;
>     SliceInfo si;
>     int i;
> 
>     /* no supplementary picture */
>     if (buf_size == 0) {
>         return 0;
>     }

this is wrong as rv40 supports b frames, the not yet output frame should
be returned here

[...]
>         ff_er_frame_end(s);
>         MPV_frame_end(s);
>         if (s->pict_type == B_TYPE || s->low_delay) {
>             *pict= *(AVFrame*)s->current_picture_ptr;
>         } else if (s->last_picture_ptr != NULL) {
>             *pict= *(AVFrame*)s->last_picture_ptr;
>         }
> 
>         if(s->last_picture_ptr || s->low_delay){
>             *data_size = sizeof(AVFrame);
>             ff_print_debug_info(s, pict);
>         }
>         s->current_picture_ptr= NULL; //so we can detect if frame_end wasnt called (find some nicer solution...)
>         s->mb_x = s->mb_y = 0;
>         return buf_size;
>     }
> 
>     init_get_bits(&s->gb, buf, buf_size*8);
>     rv40_parse_slice_header(r, &r->s.gb, &si);
>     si.size = buf_size * 8;
>     si.end = s->mb_width * s->mb_height;
> 
>     if(si.start > r->prev_si.start && si.type == r->prev_si.type) r->prev_si.end = si.start;
>     if(r->has_slice){
>         //XXX: Take it directly from slice info
>         r->cur_vlcs = choose_vlc_set(r->prev_si.quant, r->prev_si.vlc_set, r->prev_si.type);
>         r->quant = r->prev_si.quant;
>         r->bits = r->prev_si.size;
>         r->block_start = r->prev_si.start;
>         s->mb_num_left = r->prev_si.end - r->prev_si.start;
>         s->pict_type = r->prev_si.type ? r->prev_si.type : I_TYPE;
>         rv40_decode_slice(r);
>         s->mb_num_left = r->prev_si.end - r->prev_si.start;
>         //rv40_postprocess(r);
>     }
> 
>     if(r->has_slice && (si.start < r->prev_si.start || si.type != r->prev_si.type)){ // output complete frame
>         ff_er_frame_end(s);
>         MPV_frame_end(s);
>         if (s->pict_type == B_TYPE || s->low_delay) {
>             *pict= *(AVFrame*)s->current_picture_ptr;
>         } else if (s->last_picture_ptr != NULL) {
>             *pict= *(AVFrame*)s->last_picture_ptr;
>         }
> 
>         if(s->last_picture_ptr || s->low_delay){
>             *data_size = sizeof(AVFrame);
>             ff_print_debug_info(s, pict);
>         }
>         s->current_picture_ptr= NULL; //so we can detect if frame_end wasnt called (find some nicer solution...)
>         s->mb_x = s->mb_y = 0;

duplicated

>     }
>     //save slice for future decoding
>     r->slice_data = av_realloc(r->slice_data, buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
>     memcpy(r->slice_data, buf, buf_size);
>     r->prev_si = si;
>     r->has_slice = si.type != -1;

please explain what this has_slice slice_data and stuff is good for
it looks like you workaround demuxer bugs or the lack of a parser in the
decoder if so thats unacceptable for ffmpeg svn ...

[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Frequently ignored awnser#1 FFmpeg bugs should be sent to our bugtracker, user
questions for the command line tools ffmpeg, ffplay, ... as well as questions
about how to use libav* should be sent to the ffmpeg-user mailinglist.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-soc/attachments/20070821/274fa064/attachment.pgp>