[FFmpeg-devel] [PATCH] H.264 predictive lossless: again

Sat Nov 29 12:31:44 CET 2008

On Fri, Nov 28, 2008 at 04:04:44PM -0800, Jason Garrett-Glaser wrote:
> I'm getting rather annoyed at having to tell people to install payware
> (CoreAVC) or recompile their ffmpeg with an unsupported patch in order
> to play back output from x264.  So, here's the patch again, with a few
> changes to make it a bit more palatable.  In particular,
> transform_bypass is now counted as a "complex" block in order to
> eliminate the speed impact of the changes on ordinary decoding of
> non-lossless material.

putting poorly implemented code behind a if(!simple) while surely better than
not is still not making the code less poorly implemented.

review below

[...]
> Index: libavcodec/h264.c
> ===================================================================
> --- libavcodec/h264.c	(revision 15948)
> +++ libavcodec/h264.c	(working copy)
> @@ -1580,6 +1580,64 @@
>  }
>  #endif
>  
> +static void inline zero(uint8_t *src, int stride, int width, int height){
> +    int i, j;
> +    for(i=0; i<height; i++){
> +        memset( src, 0, width );
> +        src+= stride;
> +    }
> +}
> +
> +static void inline pred4x4_predictive_vertical_c(uint8_t *src, int stride){
> +    int i;
> +    for(i=0; i<4; i++){
> +        src[0] += src[0-stride];
> +        src[1] += src[1-stride];
> +        src[2] += src[2-stride];
> +        src[3] += src[3-stride];
> +        src+= stride;
> +    }
> +}
> +
> +static void inline pred4x4_predictive_horizontal_c(uint8_t *src, int stride){
> +    int i;
> +    for(i=0; i<4; i++){
> +        src[stride*0] += src[stride*0-1];
> +        src[stride*1] += src[stride*1-1];
> +        src[stride*2] += src[stride*2-1];
> +        src[stride*3] += src[stride*3-1];
> +        src++;
> +    }
> +}
> +
> +static void inline pred8x8_predictive_vertical_c(uint8_t *src, int stride){
> +    pred4x4_predictive_vertical_c(src, stride);
> +    pred4x4_predictive_vertical_c(src+4, stride);
> +    pred4x4_predictive_vertical_c(src+stride*4, stride);
> +    pred4x4_predictive_vertical_c(src+stride*4+4, stride);
> +}
> +
> +static void inline pred8x8_predictive_horizontal_c(uint8_t *src, int stride){
> +    pred4x4_predictive_horizontal_c(src, stride);
> +    pred4x4_predictive_horizontal_c(src+stride*4, stride);
> +    pred4x4_predictive_horizontal_c(src+4, stride);
> +    pred4x4_predictive_horizontal_c(src+stride*4+4, stride);
> +}
> +
> +static void pred16x16_predictive_vertical_c(uint8_t *src, int stride){
> +    pred8x8_predictive_vertical_c(src, stride);
> +    pred8x8_predictive_vertical_c(src+8, stride);
> +    pred8x8_predictive_vertical_c(src+stride*8, stride);
> +    pred8x8_predictive_vertical_c(src+stride*8+8, stride);
> +}
> +
> +static void pred16x16_predictive_horizontal_c(uint8_t *src, int stride){
> +    pred8x8_predictive_horizontal_c(src, stride);
> +    pred8x8_predictive_horizontal_c(src+stride*8, stride);
> +    pred8x8_predictive_horizontal_c(src+8, stride);
> +    pred8x8_predictive_horizontal_c(src+stride*8+8, stride);
> +}
> +
>  /**
>   * gets the chroma qp.
>   */

these belong to h264pred.c/h not here, h264.c is enough of a mess already,
no need to make it worse

> @@ -2366,7 +2424,7 @@
>      int linesize, uvlinesize /*dct_offset*/;
>      int i;
>      int *block_offset = &h->block_offset[0];
> -    const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
> +    const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);

this is getting a little long ...
now considering this is initializing 2 variables it really should be 2 lines anyway

>      void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
>      void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
>  
> @@ -2434,8 +2492,16 @@
>                  xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
>  
>              if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
> -                h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
> -                h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
> +                if(transform_bypass && h->sps.profile_idc == 244 && (h->chroma_pred_mode == VERT_PRED8x8 || h->chroma_pred_mode == HOR_PRED8x8))
> +                {
> +                    zero(dest_cb, uvlinesize, 8, 8);
> +                    zero(dest_cr, uvlinesize, 8, 8);
> +                }
> +                else
> +                {
> +                    h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
> +                    h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
> +                }
>              }
>  
>              if(IS_INTRA4x4(mb_type)){

i cannot see any sense in this change, nor in the similar ones for other
block sizes. It should be well possible to leave the prediction alone
add the residual and modify the result in the special cases

this also reindents 2 lines, and {} placement is inconsistant

> @@ -2445,14 +2511,25 @@
>                              uint8_t * const ptr= dest_y + block_offset[i];
>                              const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
>                              const int nnz = h->non_zero_count_cache[ scan8[i] ];
> -                            h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
> -                                                   (h->topright_samples_available<<i)&0x4000, linesize);
> -                            if(nnz){
> -                                if(nnz == 1 && h->mb[i*16])
> -                                    idct_dc_add(ptr, h->mb + i*16, linesize);
> -                                else
> -                                    idct_add(ptr, h->mb + i*16, linesize);
> +                            if(transform_bypass && h->sps.profile_idc == 244 && (dir == VERT_PRED || dir == HOR_PRED)){
> +                                zero(ptr, linesize, 8, 8);
> +                                idct_add(ptr, h->mb + i*16, linesize);
> +                                if( dir == VERT_PRED )
> +                                    pred8x8_predictive_vertical_c(ptr, linesize);
> +                                else //if( dir == HOR_PRED )
> +                                    pred8x8_predictive_horizontal_c(ptr, linesize);
>                              }
> +                            else
> +                            {
> +                                h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
> +                                                       (h->topright_samples_available<<i)&0x4000, linesize);
> +                                if(nnz){
> +                                    if(nnz == 1 && h->mb[i*16])
> +                                        idct_dc_add(ptr, h->mb + i*16, linesize);
> +                                    else
> +                                        idct_add(ptr, h->mb + i*16, linesize);
> +                                }
> +                            }
>                          }

you are mixing reindentions with functional changes and
{} placement is inconsistant

>                      }else
>                      for(i=0; i<16; i++){
> @@ -2472,21 +2549,35 @@
>                          }else
>                              topright= NULL;
>  
> -                        h->hpc.pred4x4[ dir ](ptr, topright, linesize);
> -                        nnz = h->non_zero_count_cache[ scan8[i] ];
> -                        if(nnz){
> -                            if(is_h264){
> -                                if(nnz == 1 && h->mb[i*16])
> -                                    idct_dc_add(ptr, h->mb + i*16, linesize);
> -                                else
> -                                    idct_add(ptr, h->mb + i*16, linesize);
> -                            }else
> -                                svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);

> +                        if(transform_bypass && h->sps.profile_idc == 244 && (dir == VERT_PRED || dir == HOR_PRED)){

transform_bypass && h->sps.profile_idc == 244 are not changing within a MB
thus repeating this double check 16 times is not ideal

> +                            zero(ptr, linesize, 4, 4);
> +                            idct_add(ptr, h->mb + i*16, linesize);
> +                            if( dir == VERT_PRED )
> +                                pred4x4_predictive_vertical_c(ptr, linesize);
> +                            else// if( dir == HOR_PRED )
> +                                pred4x4_predictive_horizontal_c(ptr, linesize);
>                          }
> +                        else
> +                        {
> +                            h->hpc.pred4x4[ dir ](ptr, topright, linesize);
> +                            nnz = h->non_zero_count_cache[ scan8[i] ];
> +                            if(nnz){
> +                                if(is_h264){
> +                                    if(nnz == 1 && h->mb[i*16])
> +                                        idct_dc_add(ptr, h->mb + i*16, linesize);
> +                                    else
> +                                        idct_add(ptr, h->mb + i*16, linesize);
> +                                }else
> +                                    svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
> +                            }
> +                        }
>                      }
>                  }

more reindentions ...

>              }else{
> -                h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
> +                if(transform_bypass && h->sps.profile_idc == 244 && (h->intra16x16_pred_mode == VERT_PRED8x8 || h->intra16x16_pred_mode == HOR_PRED8x8))
> +                    zero(dest_y, linesize, 16, 16);
> +                else
> +                    h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
>                  if(is_h264){
>                      if(!transform_bypass)
>                          h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);

unneeded

[...]

> @@ -2551,6 +2648,18 @@
>                      else if(h->mb[i*16])
>                          idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
>                  }
> +                if(transform_bypass && IS_INTRA(mb_type) && h->sps.profile_idc == 244 && (h->chroma_pred_mode == VERT_PRED8x8 || h->chroma_pred_mode == HOR_PRED8x8)){
> +                    if( h->chroma_pred_mode == VERT_PRED8x8 )
> +                    {
> +                        pred8x8_predictive_vertical_c(dest[0], uvlinesize);
> +                        pred8x8_predictive_vertical_c(dest[1], uvlinesize);
> +                    }

> +                    else //if( h->chroma_pred_mode == HOR_PRED8x8 )

should be assert()

> +                    {
> +                        pred8x8_predictive_horizontal_c(dest[0], uvlinesize);
> +                        pred8x8_predictive_horizontal_c(dest[1], uvlinesize);
> +                    }
> +                }
>              }else{
>                  for(i=16; i<16+8; i++){
>                      if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){

inconsistant {}

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

The worst form of inequality is to try to make unequal things equal.
-- Aristotle
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20081129/26cd2b66/attachment.pgp>