[FFmpeg-devel] [PATCH 09/10] diracdec: run the final decoding stage/idwt for every plane in parallel

Rostislav Pehlivanov atomnuker at gmail.com
Thu Jun 30 16:19:10 CEST 2016


On 23 June 2016 at 18:07, Rostislav Pehlivanov <rpehlivanov at ob-encoder.com>
wrote:

> 27% performance increase for a 12bit 4k file.
>
> Signed-off-by: Rostislav Pehlivanov <rpehlivanov at obe.tv>
> ---
>  libavcodec/diracdec.c | 152
> ++++++++++++++++++++++++++------------------------
>  1 file changed, 80 insertions(+), 72 deletions(-)
>
> diff --git a/libavcodec/diracdec.c b/libavcodec/diracdec.c
> index 63eb4d1..ec45132 100644
> --- a/libavcodec/diracdec.c
> +++ b/libavcodec/diracdec.c
> @@ -1804,99 +1804,107 @@ static int interpolate_refplane(DiracContext *s,
> DiracFrame *ref, int plane, int
>      return 0;
>  }
>
> -/**
> - * Dirac Specification ->
> - * 13.0 Transform data syntax. transform_data()
> - */
> -static int dirac_decode_frame_internal(DiracContext *s)
> +static int decode_plane(AVCodecContext *avctx, void *arg, int jobnr, int
> thread)
>  {
>      DWTContext d;
> -    int y, i, comp, dsty;
> -    int ret;
> +    int i, y, ret, dsty;
> +    DiracContext *s = avctx->priv_data;
> +    Plane *p        = &s->plane[jobnr];
> +    uint8_t *frame  = s->current_picture->avframe->data[jobnr];
>
> -    if (s->low_delay) {
> -        /* [DIRAC_STD] 13.5.1 low_delay_transform_data() */
> -        for (comp = 0; comp < 3; comp++) {
> -            Plane *p = &s->plane[comp];
> -            memset(p->idwt.buf, 0, p->idwt.stride * p->idwt.height);
> -        }
> -        if (!s->zero_res) {
> -            if ((ret = decode_lowdelay(s)) < 0)
> -                return ret;
> -        }
> +    /* FIXME: small resolutions */
> +    for (i = 0; i < 4; i++)
> +        s->edge_emu_buffer[i] = s->edge_emu_buffer_base +
> i*FFALIGN(p->width, 16);
> +
> +    if (!s->zero_res && !s->low_delay)
> +    {
> +        memset(p->idwt.buf, 0, p->idwt.stride * p->idwt.height);
> +        decode_component(s, jobnr); /* [DIRAC_STD] 13.4.1
> core_transform_data() */
>      }
> +    ret = ff_spatial_idwt_init(&d, &p->idwt, s->wavelet_idx+2,
> +                               s->wavelet_depth, s->bit_depth);
> +    if (ret < 0)
> +        return ret;
>
> -    for (comp = 0; comp < 3; comp++) {
> -        Plane *p       = &s->plane[comp];
> -        uint8_t *frame = s->current_picture->avframe->data[comp];
> +    if (!s->num_refs) { /* intra */
> +        for (y = 0; y < p->height; y += 16) {
> +            int idx = (s->bit_depth - 8) >> 1;
> +            ff_spatial_idwt_slice2(&d, y+16); /* decode */
> +            s->diracdsp.put_signed_rect_clamped[idx](frame + y*p->stride,
> +                                                     p->stride,
> +                                                     p->idwt.buf +
> y*p->idwt.stride,
> +                                                     p->idwt.stride,
> p->width, 16);
> +        }
> +    } else { /* inter */
> +        int rowheight = p->ybsep*p->stride;
>
> -        /* FIXME: small resolutions */
> -        for (i = 0; i < 4; i++)
> -            s->edge_emu_buffer[i] = s->edge_emu_buffer_base +
> i*FFALIGN(p->width, 16);
> +        select_dsp_funcs(s, p->width, p->height, p->xblen, p->yblen);
>
> -        if (!s->zero_res && !s->low_delay)
> -        {
> -            memset(p->idwt.buf, 0, p->idwt.stride * p->idwt.height);
> -            decode_component(s, comp); /* [DIRAC_STD] 13.4.1
> core_transform_data() */
> +        for (i = 0; i < s->num_refs; i++) {
> +            int ret = interpolate_refplane(s, s->ref_pics[i], jobnr,
> p->width, p->height);
> +            if (ret < 0)
> +                return ret;
>          }
> -        ret = ff_spatial_idwt_init(&d, &p->idwt, s->wavelet_idx+2,
> -                                   s->wavelet_depth, s->bit_depth);
> -        if (ret < 0)
> -            return ret;
>
> -        if (!s->num_refs) { /* intra */
> -            for (y = 0; y < p->height; y += 16) {
> -                int idx = (s->bit_depth - 8) >> 1;
> -                ff_spatial_idwt_slice2(&d, y+16); /* decode */
> -                s->diracdsp.put_signed_rect_clamped[idx](frame +
> y*p->stride,
> -                                                         p->stride,
> -                                                         p->idwt.buf +
> y*p->idwt.stride,
> -                                                         p->idwt.stride,
> p->width, 16);
> -            }
> -        } else { /* inter */
> -            int rowheight = p->ybsep*p->stride;
> +        memset(s->mctmp, 0, 4*p->yoffset*p->stride);
>
> -            select_dsp_funcs(s, p->width, p->height, p->xblen, p->yblen);
> +        dsty = -p->yoffset;
> +        for (y = 0; y < s->blheight; y++) {
> +            int h     = 0,
> +                start = FFMAX(dsty, 0);
> +            uint16_t *mctmp    = s->mctmp + y*rowheight;
> +            DiracBlock *blocks = s->blmotion + y*s->blwidth;
>
> -            for (i = 0; i < s->num_refs; i++) {
> -                int ret = interpolate_refplane(s, s->ref_pics[i], comp,
> p->width, p->height);
> -                if (ret < 0)
> -                    return ret;
> -            }
> +            init_obmc_weights(s, p, y);
>
> -            memset(s->mctmp, 0, 4*p->yoffset*p->stride);
> +            if (y == s->blheight-1 || start+p->ybsep > p->height)
> +                h = p->height - start;
> +            else
> +                h = p->ybsep - (start - dsty);
> +            if (h < 0)
> +                break;
>
> -            dsty = -p->yoffset;
> -            for (y = 0; y < s->blheight; y++) {
> -                int h     = 0,
> -                    start = FFMAX(dsty, 0);
> -                uint16_t *mctmp    = s->mctmp + y*rowheight;
> -                DiracBlock *blocks = s->blmotion + y*s->blwidth;
> +            memset(mctmp+2*p->yoffset*p->stride, 0, 2*rowheight);
> +            mc_row(s, blocks, mctmp, jobnr, dsty);
>
> -                init_obmc_weights(s, p, y);
> +            mctmp += (start - dsty)*p->stride + p->xoffset;
> +            ff_spatial_idwt_slice2(&d, start + h); /* decode */
> +            /* NOTE: add_rect_clamped hasn't been templated hence the
> shifts.
> +             * idwt.stride is passed as pixels, not in bytes as in the
> rest of the decoder */
> +            s->diracdsp.add_rect_clamped(frame + start*p->stride, mctmp,
> p->stride,
> +                                         (int16_t*)(p->idwt.buf) +
> start*(p->idwt.stride >> 1), (p->idwt.stride >> 1), p->width, h);
>
> -                if (y == s->blheight-1 || start+p->ybsep > p->height)
> -                    h = p->height - start;
> -                else
> -                    h = p->ybsep - (start - dsty);
> -                if (h < 0)
> -                    break;
> +            dsty += p->ybsep;
> +        }
> +    }
>
> -                memset(mctmp+2*p->yoffset*p->stride, 0, 2*rowheight);
> -                mc_row(s, blocks, mctmp, comp, dsty);
> +    return 0;
> +}
>
> -                mctmp += (start - dsty)*p->stride + p->xoffset;
> -                ff_spatial_idwt_slice2(&d, start + h); /* decode */
> -                /* NOTE: add_rect_clamped hasn't been templated hence the
> shifts.
> -                 * idwt.stride is passed as pixels, not in bytes as in
> the rest of the decoder */
> -                s->diracdsp.add_rect_clamped(frame + start*p->stride,
> mctmp, p->stride,
> -                                             (int16_t*)(p->idwt.buf) +
> start*(p->idwt.stride >> 1), (p->idwt.stride >> 1), p->width, h);
> +/**
> + * Dirac Specification ->
> + * 13.0 Transform data syntax. transform_data()
> + */
> +static int dirac_decode_frame_internal(DiracContext *s)
> +{
> +    int ret, comp, res[3];
>
> -                dsty += p->ybsep;
> -            }
> +    if (s->low_delay) {
> +        /* [DIRAC_STD] 13.5.1 low_delay_transform_data() */
> +        for (comp = 0; comp < 3; comp++) {
> +            Plane *p = &s->plane[comp];
> +            memset(p->idwt.buf, 0, p->idwt.stride * p->idwt.height);
> +        }
> +        if (!s->zero_res) {
> +            if ((ret = decode_lowdelay(s)) < 0)
> +                return ret;
>          }
>      }
>
> +    s->avctx->execute2(s->avctx, decode_plane, NULL, res, 3);
> +    for (comp = 0; comp < 3; comp++)
> +        if (res[comp])
> +            return res[comp];
>
>      return 0;
>  }
> --
> 2.8.1.369.geae769a
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>

Disregard this patch, it breaks regular Dirac files.


More information about the ffmpeg-devel mailing list