[FFmpeg-devel] [PATCH] RV40 Loop Filter

Michael Niedermayer michaelni
Wed Oct 22 10:53:23 CEST 2008


On Tue, Oct 21, 2008 at 09:23:21AM +0300, Kostya wrote:
> $subj
> I've tested it and looks like filter is invoked properly now,
> filtering workflow should be also correct, but there is some
> non-bitexactness elsewhere, so result picture is still a bit
> wrong.

[...]
> +    if(!edge)
> +        flag0 = flag1 = 0;
> +    else{
> +        flag0 = (llim0 == 3);
> +        flag1 = (llim1 == 3);
> +    }
> +    if(flag0 && FFABS(s2) >= thr1)
> +        flag0 = 0;
> +    if(flag1 && FFABS(s3) >= thr1)
> +        flag1 = 0;

if(!edge)
    flag0 = flag1 = 0;
else{
    flag0 = llim0 == 3 && FFABS(s2) < thr1;
    flag1 = llim1 == 3 && FFABS(s3) < thr1;
}

also the naming of variables like lim0 llim0 lim1 llim1 thr0 thr1 and so on
is very poor.
Where they correspond to positions numbers are fine but where they do not
this kind of naming makes the code undeciperable and unreviewable.


> +
> +    lims = (lim0 + lim1 + llim0 + llim1) >> 1;
> +    if(flag0 && flag1){ /* strong filtering */
> +        for(i = 0; i < 4; i++, src += stride){
> +            t = src[0*step] - src[-1*step];
> +            if(!t) continue;
> +            sflag = (mult * FFABS(t)) >> 7;
> +            if(sflag > 1) continue;
> +
> +            p0 = (RV40_STRONG_FILTER(src, step, -3, 1, -3) + rv40_dither_l[dmode + i]) >> 7;
> +            p1 = (RV40_STRONG_FILTER(src, step, -2, 2, -2) + rv40_dither_r[dmode + i]) >> 7;
> +            diff[0] = src[-1*step];
> +            diff[1] = src[ 0*step];

the variable diff can be moved into the for loop, this may also apply to
others


[...]
> +static int rv40_set_deblock_coef(RV34DecContext *r)
> +{
> +    MpegEncContext *s = &r->s;
> +    int mvmask = 0, i, j, dx, dy;
> +    int midx = s->mb_x * 2 + s->mb_y * 2 * s->b8_stride;

> +    if(s->pict_type == FF_I_TYPE)
> +        return 0;

why is this even called for i frames?


> +    for(j = 0; j < 2; j++){
> +        for(i = 0; i < 2; i++){
> +            if(i || s->mb_x){
> +                dx = FFABS(s->current_picture_ptr->motion_val[0][midx + i][0] - s->current_picture_ptr->motion_val[0][midx + i - 1][0]);
> +                dy = FFABS(s->current_picture_ptr->motion_val[0][midx + i][1] - s->current_picture_ptr->motion_val[0][midx + i - 1][1]);
> +                if(dx > 3 || dy > 3){
> +                    mvmask |= 0x11 << (i*2 + j*8);
> +                }
> +            }
> +            if(j || !s->first_slice_line){
> +                dx = FFABS(s->current_picture_ptr->motion_val[0][midx + i][0] - s->current_picture_ptr->motion_val[0][midx + i - s->b8_stride][0]);
> +                dy = FFABS(s->current_picture_ptr->motion_val[0][midx + i][1] - s->current_picture_ptr->motion_val[0][midx + i - s->b8_stride][1]);

s->current_picture_ptr->motion_val[0][midx + i] is duplicated all over the 151 char long lines


> +                if(dx > 3 || dy > 3){
> +                    mvmask |= 0x03 << (i*2 + j*8);
> +                }
> +            }
> +        }
> +        midx += s->b8_stride;
> +    }

i think the if() can be moved out of the loop like
if(first_slice_line)
    mvmask &= 123;


> +    return mvmask;
> +}
> +
> +static void rv40_loop_filter(RV34DecContext *r)
> +{
> +    MpegEncContext *s = &r->s;
> +    int mb_pos;
> +    int i, j;
> +    uint8_t *Y, *C;
> +    int alpha, beta, betaY, betaC;
> +    int q;
> +    // 0 - cur block, 1 - top, 2 - left, 3 - bottom
> +    int btype[4], clip[4], mvmasks[4], cbps[4], uvcbps[4][2];
> +

> +    if(s->pict_type == FF_B_TYPE)
> +        return;

why is this even called for b frames?


> +
> +    for(s->mb_y = 0; s->mb_y < s->mb_height; s->mb_y++){
> +        mb_pos = s->mb_y * s->mb_stride;
> +        for(s->mb_x = 0; s->mb_x < s->mb_width; s->mb_x++, mb_pos++){
> +            int btype = s->current_picture_ptr->mb_type[mb_pos];

> +            if(IS_INTRA(btype) || IS_SEPARATE_DC(btype)){
> +                r->cbp_luma  [mb_pos] = 0xFFFF;
> +            }
> +            if(IS_INTRA(btype))
> +                r->cbp_chroma[mb_pos] = 0xFF;

inconsistant {}


[...]
> +            mvmasks[0] = r->deblock_coefs[mb_pos];
> +            btype  [0] = s->current_picture_ptr->mb_type[mb_pos];
> +            cbps   [0] = r->cbp_luma[mb_pos];
> +            uvcbps[0][0] = r->cbp_chroma[mb_pos] & 0xF;
> +            uvcbps[0][1] = r->cbp_chroma[mb_pos] >> 4;
> +            if(s->mb_y){
> +                mvmasks[1] = r->deblock_coefs[mb_pos - s->mb_stride] & 0xF000;
> +                btype  [1] = s->current_picture_ptr->mb_type[mb_pos - s->mb_stride];
> +                cbps   [1] = r->cbp_luma[mb_pos - s->mb_stride] & 0xF000;
> +                uvcbps[1][0] =  r->cbp_chroma[mb_pos - s->mb_stride]       & 0xC;
> +                uvcbps[1][1] = (r->cbp_chroma[mb_pos - s->mb_stride] >> 4) & 0xC;
> +            }else{
> +                mvmasks[1] = 0;
> +                btype  [1] = btype[0];
> +                cbps   [1] = 0;
> +                uvcbps[1][0] = uvcbps[1][1] = 0;
> +            }
> +            if(s->mb_x){
> +                mvmasks[2] = r->deblock_coefs[mb_pos - 1] & 0x8888;
> +                btype  [2] = s->current_picture_ptr->mb_type[mb_pos - 1];
> +                cbps   [2] = r->cbp_luma[mb_pos - 1] & 0x8888;
> +                uvcbps[2][0] =  r->cbp_chroma[mb_pos - 1]       & 0xA;
> +                uvcbps[2][1] = (r->cbp_chroma[mb_pos - 1] >> 4) & 0xA;
> +            }else{
> +                mvmasks[2] = 0;
> +                btype  [2] = btype[0];
> +                cbps   [2] = 0;
> +                uvcbps[2][0] = uvcbps[2][1] = 0;
> +            }
> +            if(s->mb_y < s->mb_height - 1){
> +                mvmasks[3] = r->deblock_coefs[mb_pos + s->mb_stride] & 0x000F;
> +                btype  [3] = s->current_picture_ptr->mb_type[mb_pos + s->mb_stride];
> +                cbps   [3] = r->cbp_luma[mb_pos + s->mb_stride] & 0x000F;
> +                uvcbps[3][0] =  r->cbp_chroma[mb_pos + s->mb_stride]       & 0x3;
> +                uvcbps[3][1] = (r->cbp_chroma[mb_pos + s->mb_stride] >> 4) & 0x3;
> +            }else{
> +                mvmasks[3] = 0;
> +                btype  [3] = btype[0];
> +                cbps   [3] = 0;
> +                uvcbps[3][0] = uvcbps[3][1] = 0;
> +            }

lots of duplicated code
btype holds the macro block type thus the 'b' seems wrong
also the plural 's' in the names seems wrong, we have a mask, a pattern
not masks and patterns


> +            for(i = 0; i < 4; i++){
> +                btype[i] = (IS_INTRA(btype[i]) || IS_SEPARATE_DC(btype[i])) ? 2 : 1;
> +                clip[i] = rv40_filter_clip_tbl[btype[i]][q];
> +            }
> +            y_h_deblock = cbps[0] | ((cbps[0] << 4) & ~0x000F) | (cbps[1] >> 12)
> +                        | ((cbps[3] << 20) & ~0x000F) | (cbps[3] << 16)
> +                        | mvmasks[0] | (mvmasks[3] << 16);
> +            y_v_deblock = ((cbps[0] << 1) & ~0x1111) | (cbps[2] >> 3)
> +                        | cbps[0] | (cbps[3] << 16)
> +                        | mvmasks[0] | (mvmasks[3] << 16);
> +            if(!s->mb_x){
> +                y_v_deblock &= ~0x1111;
> +            }
> +            if(!s->mb_y){
> +                y_h_deblock &= ~0x000F;
> +            }
> +            if(s->mb_y == s->mb_height - 1 || (btype[0] == 2 || btype[3] == 2)){
> +                y_h_deblock &= ~0xF0000;
> +            }
> +            y_coded = cbps[0] | (cbps[3] << 16)
> +                    | mvmasks[0] | (mvmasks[3] << 16);
> +            y_left_coded = cbps[2] | mvmasks[2];
> +            y_top_coded = cbps[1] | mvmasks[1];
> +            y_top_edge = (btype[0] == 2 || btype[1] == 2) ? 0x000F : 0;
> +            y_left_edge = (btype[0] == 2 || btype[2] == 2) ? 0x1111 : 0;
> +            for(i = 0; i < 2; i++){
> +                c_left_edge[i] = (btype[0] == 2 || btype[2] == 2) ? 0x5 : 0;
> +                c_top_edge[i] = (btype[0] == 2 || btype[1] == 2) ? 0x3 : 0;
> +                c_v_deblock[i] = ((uvcbps[0][i] << 1) & ~0x5) | (uvcbps[2][i] >> 1)
> +                               | (uvcbps[3][i] << 4) | uvcbps[0][i];
> +                c_h_deblock[i] = (uvcbps[3][i] << 4) | uvcbps[0][i] | (uvcbps[1][i] >> 2)
> +                               | (uvcbps[3][i] << 6) | (uvcbps[0][i] << 2);
> +                c_top_coded[i] = uvcbps[1][i];
> +                c_left_coded[i] = uvcbps[2][i];
> +                c_coded[i] = (uvcbps[3][i] << 4) | uvcbps[0][i];
> +                if(!s->mb_x){
> +                    c_v_deblock[i] &= ~0x5;
> +                }
> +                if(!s->mb_y){
> +                    c_h_deblock[i] &= ~0x3;
> +                }
> +                if(s->mb_y == s->mb_height - 1){
> +                    c_h_deblock[i] &= ~0x30;
> +                }
> +                if(btype[0] == 2 || btype[3] == 2){
> +                    c_h_deblock[i] &= ~0x30;
> +                }
> +            }
> +
> +            Y = s->dest[0];
> +            if((y_h_deblock & 0x0010) && !(y_top_edge & 0x0010)){
> +                rv40_h_loop_filter(Y+4*s->linesize, s->linesize, 0,
> +                                   y_coded & 0x0010 ? clip[0] : 0,
> +                                   y_coded & 0x0001 ? clip[0] : 0,
> +                                   alpha, beta, betaY, 0, 0);
> +            }
> +            if((y_v_deblock & 0x0001) && !(y_left_edge & 0x0001)){
> +                rv40_v_loop_filter(Y, s->linesize, 0,
> +                                   y_coded & 0x0001 ? clip[0] : 0,
> +                                   y_left_coded & 0x0008 ? clip[2] : 0,
> +                                   alpha, beta, betaY, 0, 0);
> +            }
> +            if((y_h_deblock & 0x0001) &&  (y_top_edge & 0x0001)){
> +                rv40_h_loop_filter(Y, s->linesize, 0,
> +                                   y_coded & 0x0001 ? clip[0] : 0,
> +                                   y_top_coded & 0x1000 ? clip[1] : 0,
> +                                   alpha, beta, betaY, 0, 1);
> +            }
> +            if((y_v_deblock & 0x0001) &&  (y_left_edge & 0x0001)){
> +                rv40_v_loop_filter(Y, s->linesize, 0,
> +                                   y_coded & 0x0001 ? clip[0] : 0,
> +                                   y_left_coded & 0x0008 ? clip[2] : 0,
> +                                   alpha, beta, betaY, 0, 1);
> +            }
> +            for(i = 1; i < 4; i++){
> +                Y += 4;
> +                if((y_h_deblock & (0x0010<<i)) && !(y_top_edge & (0x0010<<i))){
> +                    rv40_h_loop_filter(Y+4*s->linesize, s->linesize, i*4,
> +                                       y_coded & (0x0010<<i) ? clip[0] : 0,
> +                                       y_coded & (0x0001<<i) ? clip[0] : 0,
> +                                       alpha, beta, betaY, 0, 0);
> +                }
> +                if((y_v_deblock & (0x0001<<i))){
> +                    rv40_v_loop_filter(Y, s->linesize, i*4,
> +                                       y_coded & (0x0001<<  i)   ? clip[0] : 0,
> +                                       y_coded & (0x0001<<(i-1)) ? clip[0] : 0,
> +                                       alpha, beta, betaY, 0, 0);
> +                }
> +                if((y_h_deblock & (0x0001<<i)) &&  (y_top_edge & (0x0001<<i))){
> +                    rv40_h_loop_filter(Y, s->linesize, i*4,
> +                                       y_coded     & (0x0001<<i) ? clip[0] : 0,
> +                                       y_top_coded & (0x1000<<i) ? clip[1] : 0,
> +                                       alpha, beta, betaY, 0, 1);
> +                }                
> +            }
> +            for(j = 4; j < 16; j += 4){
> +                Y = s->dest[0] + j*s->linesize;
> +                if((y_h_deblock & (0x0010<<j))){
> +                    rv40_h_loop_filter(Y+4*s->linesize, s->linesize, j,
> +                                       y_coded & (0x0010<<j) ? clip[0] : 0,
> +                                       y_coded & (0x0001<<j) ? clip[0] : 0,
> +                                       alpha, beta, betaY, 0, 0);
> +                }
> +                if((y_v_deblock & (0x0001<<j)) && !(y_left_edge & (0x0001<<j))){
> +                    rv40_v_loop_filter(Y, s->linesize, j,
> +                                       y_coded      & (0x0001<<j) ? clip[0] : 0,
> +                                       y_left_coded & (0x0008<<j) ? clip[2] : 0,
> +                                       alpha, beta, betaY, 0, 0);
> +                }
> +                if((y_v_deblock & (0x0001<<j)) &&  (y_left_edge & (0x0001<<j))){
> +                    rv40_v_loop_filter(Y, s->linesize, j,
> +                                       y_coded      & (0x0001<<j) ? clip[0] : 0,
> +                                       y_left_coded & (0x0008<<j) ? clip[2] : 0,
> +                                       alpha, beta, betaY, 0, 1);
> +                }                
> +                for(i = 1; i < 4; i++){
> +                    int ij = i+j;
> +                    Y += 4;
> +                    if((y_h_deblock & (0x0010<<ij))){
> +                        rv40_h_loop_filter(Y+4*s->linesize, s->linesize, ij,
> +                                           y_coded & (0x0010<<ij) ? clip[0] : 0,
> +                                           y_coded & (0x0001<<ij) ? clip[0] : 0,
> +                                           alpha, beta, betaY, 0, 0);
> +                    }
> +                    if((y_v_deblock & (0x0001<<ij))){
> +                        rv40_v_loop_filter(Y, s->linesize, ij,
> +                                           y_coded & (0x0001<< ij)    ? clip[0] : 0,
> +                                           y_coded & (0x0001<<(ij-1)) ? clip[0] : 0,
> +                                           alpha, beta, betaY, 0, 0);
> +                    }
> +                }
> +            }
> +            for(i = 0; i < 2; i++){
> +                C = s->dest[i+1];
> +                if((c_h_deblock[i] & 0x4) && !(c_top_edge[i] & 0x4)){
> +                    rv40_h_loop_filter(C+4*s->uvlinesize, s->uvlinesize, 0,
> +                                       c_coded[i] & 0x4 ? clip[0] : 0,
> +                                       c_coded[i] & 0x1 ? clip[0] : 0,
> +                                       alpha, beta, betaC, 1, 0);
> +                }
> +                if((c_v_deblock[i] & 0x1) && !(c_left_edge[i] & 0x1)){
> +                    rv40_v_loop_filter(C, s->uvlinesize, 0,
> +                                       c_coded[i]      & 0x1 ? clip[0] : 0,
> +                                       c_left_coded[i] & 0x2 ? clip[2] : 0,
> +                                       alpha, beta, betaC, 1, 0);
> +                }
> +                if((c_h_deblock[i] & 0x1) &&  (c_top_edge[i] & 0x1)){
> +                    rv40_h_loop_filter(C, s->uvlinesize, 0,
> +                                       c_coded[i]     & 0x1 ? clip[0] : 0,
> +                                       c_top_coded[i] & 0x4 ? clip[1] : 0,
> +                                       alpha, beta, betaC, 1, 1);
> +                }
> +                if((c_v_deblock[i] & 0x1) &&  (c_left_edge[i] & 0x1)){
> +                    rv40_v_loop_filter(C, s->uvlinesize, 0,
> +                                       c_coded[i]      & 0x1 ? clip[0] : 0,
> +                                       c_left_coded[i] & 0x2 ? clip[2] : 0,
> +                                       alpha, beta, betaC, 1, 1);
> +                }
> +            }
> +            for(i = 0; i < 2; i++){
> +                C = s->dest[i+1] + 4;
> +                if((c_h_deblock[i] & 0x8) && !(c_top_edge[i] & 0x8)){
> +                    rv40_h_loop_filter(C+4*s->uvlinesize, s->uvlinesize, 8,
> +                                       c_coded[i] & 0x8 ? clip[0] : 0,
> +                                       c_coded[i] & 0x2 ? clip[0] : 0,
> +                                       alpha, beta, betaC, 1, 0);
> +                }
> +                if((c_v_deblock[i] & 0x2)){
> +                    rv40_v_loop_filter(C, s->uvlinesize, 0,
> +                                       c_coded[i] & 0x2 ? clip[0] : 0,
> +                                       c_coded[i] & 0x1 ? clip[0] : 0,
> +                                       alpha, beta, betaC, 1, 0);
> +                }
> +                if((c_h_deblock[i] & 0x2) &&  (c_top_edge[i] & 0x1)){
> +                    rv40_h_loop_filter(C, s->uvlinesize, 8,
> +                                       c_coded[i]     & 0x2 ? clip[0] : 0,
> +                                       c_top_coded[i] & 0x8 ? clip[1] : 0,
> +                                       alpha, beta, betaC, 1, 1);
> +                }
> +            }
> +            for(i = 0; i < 2; i++){
> +                C = s->dest[i+1] + 4*s->uvlinesize;
> +                if((c_h_deblock[i] & 0x10)){
> +                    rv40_h_loop_filter(C+4*s->uvlinesize, s->uvlinesize, 0,
> +                                       c_coded[i] & 0x10? clip[0] : 0,
> +                                       c_coded[i] & 0x04 ? clip[0] : 0,
> +                                       alpha, beta, betaC, 1, 0);
> +                }
> +                if((c_v_deblock[i] & 0x4) && !(c_left_edge[i] & 0x4)){
> +                    rv40_v_loop_filter(C, s->uvlinesize, 8,
> +                                       c_coded[i]      & 0x4 ? clip[0] : 0,
> +                                       c_left_coded[i] & 0x8 ? clip[2] : 0,
> +                                       alpha, beta, betaC, 1, 0);
> +                }
> +                if((c_v_deblock[i] & 0x4) &&  (c_left_edge[i] & 0x4)){
> +                    rv40_v_loop_filter(C, s->uvlinesize, 8,
> +                                       c_coded[i]      & 0x4 ? clip[0] : 0,
> +                                       c_left_coded[i] & 0x8 ? clip[2] : 0,
> +                                       alpha, beta, betaC, 1, 1);
> +                }
> +            }
> +            for(i = 0; i < 2; i++){
> +                C = s->dest[i+1] + 4*s->uvlinesize + 4;
> +                if((c_h_deblock[i] & 0x20)){
> +                    rv40_h_loop_filter(C+4*s->uvlinesize, s->uvlinesize, 8,
> +                                       c_coded[i] & 0x20? clip[3] : 0,
> +                                       c_coded[i] & 0x8 ? clip[0] : 0,
> +                                       alpha, beta, betaC, 1, 0);
> +                }
> +                if((c_v_deblock[i] & 0x8)){
> +                    rv40_v_loop_filter(C, s->uvlinesize, 8,
> +                                       c_coded[i] & 0x8 ? clip[0] : 0,
> +                                       c_coded[i] & 0x4 ? clip[0] : 0,
> +                                       alpha, beta, betaC, 1, 0);
> +                }
> +            }
> +        }

the word mess is probably the best way to describe this
as far as i can tell you are packing all the bits related to deblocking
and then later duplicate code each with hardcoded masks to extract them
again.

this likely could be reduced in size by 3/4 and made much more readable


[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Everything should be made as simple as possible, but not simpler.
-- Albert Einstein
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20081022/0d7ac976/attachment.pgp>



More information about the ffmpeg-devel mailing list