[FFmpeg-devel] Parallelized h264 proof-of-concept

Michael Niedermayer michaelni
Wed Jun 6 22:10:46 CEST 2007


Hi

On Wed, Jun 06, 2007 at 12:03:58PM +0200, Andreas ?man wrote:
[...]

> * "Fix" av_realloc to correctly align (by using free + memaling +
> memcpy)

dont try, you will fail (like everyone else) also we dont really need that
it could be considered bloat ...


> * Any other ideas?

av_free() + av_malloc() or pass an argument to MPV_common_init()


> 
> >
> >also look at how slice level multithreading is implemented for
> >mpeg2/mpeg4 ...
> 
> >>Anyway,
> >>If this is something that ffmpeg is willing to integrate
> >>I'd like to get a few pointers, hints and answers on the
> >>topics above before I continue with the stuff that's left.
> >
> >iam not against slice level threading support, though the
> >implementation must be clean, simple and there must be no
> >speedloss for the single threaded case (>1% is completely
> >unacceptable)
> >
> 
> This version is much cleaner, there are some "unrelated"
> changes (border backup + copy stuff) that might be beneficial
> to commit anyway (but the deblocking-type-2 conditional in xchg must
> be there in order for deblocking to work correctly when run in parallel)

"unrelated" changes must be in seperate patches


[...]
> Index: libavcodec/h264.c
> ===================================================================
> --- libavcodec/h264.c	(revision 9211)
> +++ libavcodec/h264.c	(working copy)
> @@ -52,6 +52,7 @@
>  static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
>  static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
>  static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
> +static void execute_decode_slices(H264Context *h, int reset);

cant you order the new functions so as to avoid that?


[...]
> @@ -2789,6 +2791,13 @@
>  
>      for(i = 0; i < MAX_PPS_COUNT; i++)
>          av_freep(h->pps_buffers + i);
> +
> +    for(i = 1; i < h->s.avctx->thread_count; i++) {
> +	hx = (H264Context *)h->s.thread_context[i];
> +	av_freep(&hx->top_borders[0]);
> +	av_freep(&hx->top_borders[1]);
> +	av_freep(&hx->s.obmc_scratchpad);
> +    }

tabs are forbidden in svn


[...]
>      if(!s->obmc_scratchpad)
>          s->obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
>  
> +    for(i = 1; i < s->avctx->thread_count; i++)
> +	if(!s->thread_context[i]->obmc_scratchpad)
> +	    s->thread_context[i]->obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);

hmm cant the obmc_scratchpad be done just in 1 loop instead of doing it
once for the 1 thread and then 2+
IIRC mpegvideo also succeeds in doing it


[...]

> @@ -4154,16 +4212,94 @@
>  }
>  
>  /**
> + * Init scan tables
> + */
> +static void init_scan_tables(H264Context *h)
> +{
> +    MpegEncContext * const s = &h->s;
> +
> +    if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
> +	memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
> +	memcpy(h-> field_scan,  field_scan, 16*sizeof(uint8_t));
> +    }else{
> +	int i;
> +	for(i=0; i<16; i++){
> +#define T(x) (x>>2) | ((x<<2) & 0xF)
> +	    h->zigzag_scan[i] = T(zigzag_scan[i]);
> +	    h-> field_scan[i] = T( field_scan[i]);
> +#undef T
> +	}
> +    }
> +    if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
> +	memcpy(h->zigzag_scan8x8,       zigzag_scan8x8,       64*sizeof(uint8_t));
> +	memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
> +	memcpy(h->field_scan8x8,        field_scan8x8,        64*sizeof(uint8_t));
> +	memcpy(h->field_scan8x8_cavlc,  field_scan8x8_cavlc,  64*sizeof(uint8_t));
> +    }else{
> +	int i;
> +	for(i=0; i<64; i++){
> +#define T(x) (x>>3) | ((x&7)<<3)
> +	    h->zigzag_scan8x8[i]       = T(zigzag_scan8x8[i]);
> +	    h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
> +	    h->field_scan8x8[i]        = T(field_scan8x8[i]);
> +	    h->field_scan8x8_cavlc[i]  = T(field_scan8x8_cavlc[i]);
> +#undef T
> +	}
> +    }
> +    if(h->sps.transform_bypass){ //FIXME same ugly
> +	h->zigzag_scan_q0          = zigzag_scan;
> +	h->zigzag_scan8x8_q0       = zigzag_scan8x8;
> +	h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
> +	h->field_scan_q0           = field_scan;
> +	h->field_scan8x8_q0        = field_scan8x8;
> +	h->field_scan8x8_cavlc_q0  = field_scan8x8_cavlc;
> +    }else{
> +	h->zigzag_scan_q0          = h->zigzag_scan;
> +	h->zigzag_scan8x8_q0       = h->zigzag_scan8x8;
> +	h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
> +	h->field_scan_q0           = h->field_scan;
> +	h->field_scan8x8_q0        = h->field_scan8x8;
> +	h->field_scan8x8_cavlc_q0  = h->field_scan8x8_cavlc;
> +    }
> +}

moving this into its own functions could be a seperate patch too


[...]
> @@ -6667,9 +6792,9 @@
>  
>      mb_xy = mb_x + mb_y*s->mb_stride;
>  
> -    if(mb_x==0 || mb_y==0 || !s->dsp.h264_loop_filter_strength ||
> -       (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
> -                                      h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
> +    if(mb_x==0 || mb_y==0 || !s->dsp.h264_loop_filter_strength || 

trailing whitespace is also forbidden in svn, so are cosmetic 
(whitespace only) changes mixed with functional changes


[...]
> @@ -6870,7 +7007,7 @@
>              first_vertical_edge_done = 0;
>          }
>  
> -        if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
> +        if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_num)
>              start = 1;

arent these h->slice_table[mb_xy] -> h->slice_num changes independant of the
threading? if so they should be in a seperate patch


[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Observe your enemies, for they first find out your faults. -- Antisthenes
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: not available
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20070606/8dafbed5/attachment.pgp>



More information about the ffmpeg-devel mailing list