[FFmpeg-devel] [PATCH v2 3/6] lavc/qsvdec: Replace current parser with MFXVideoDECODE_DecodeHeader()

Wed Feb 20 21:14:07 EET 2019

On Wed, 2019-02-20 at 10:58 +0800, Zhong Li wrote:
> Using MSDK parser can improve qsv decoder pass rate in some cases
> (E.g:
> sps declares a wrong level_idc, smaller than it should be).
> And it is necessary for adding new qsv decoders such as MJPEG and VP9
> since current parser can't provide enough information.
> Actually using MFXVideoDECODE_DecodeHeader() was disscussed at
> https://ffmpeg.org/pipermail/ffmpeg-devel/2015-July/175734.html and
> merged as commit 1acb19d,
> but was overwritten when merged libav patches (commit: 1f26a23)
> without any explain.
> 
> v2: split decode header from decode_init, and call it for everyframe
> to
> detect format/resoultion change. It can fix some regression issues
> such
> as hevc 10bits decoding.
> 
> Signed-off-by: Zhong Li <zhong.li at intel.com>
> ---
>  libavcodec/qsvdec.c | 172 ++++++++++++++++++++++------------------
> ----
>  libavcodec/qsvdec.h |   2 +
>  2 files changed, 90 insertions(+), 84 deletions(-)
> 
> diff --git a/libavcodec/qsvdec.c b/libavcodec/qsvdec.c
> index 4a0be811fb..efe054f5c5 100644
> --- a/libavcodec/qsvdec.c
> +++ b/libavcodec/qsvdec.c
> @@ -120,19 +120,18 @@ static inline unsigned int qsv_fifo_size(const
> AVFifoBuffer* fifo)
>      return av_fifo_size(fifo) / qsv_fifo_item_size();
>  }
>  
> -static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q)
> +static int qsv_decode_preinit(AVCodecContext *avctx, QSVContext *q,
> enum AVPixelFormat *pix_fmts, mfxVideoParam *param)
>  {
> -    const AVPixFmtDescriptor *desc;
>      mfxSession session = NULL;
>      int iopattern = 0;
> -    mfxVideoParam param = { 0 };
> -    int frame_width  = avctx->coded_width;
> -    int frame_height = avctx->coded_height;
>      int ret;
>  
> -    desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
> -    if (!desc)
> -        return AVERROR_BUG;
> +    ret = ff_get_format(avctx, pix_fmts);
> +    if (ret < 0) {
> +        q->orig_pix_fmt = avctx->pix_fmt = AV_PIX_FMT_NONE;
> +        return ret;
> +    } else
> +        q->orig_pix_fmt = pix_fmts[1];

You rely on the way how pix_fmts will be declared in some other place.
This gives issues to understand the code for the beginners and hence
difficulty in code support. Can we somehow show what is actually going
on here? For example, declare pix_fmts something like:
enum qsv_formats {
  QSV,
  QSV_NV12,
  QSV_MAX
}

enum AVPixelFormat pix_fmts[QSV_MAX+1] = {
  [QSV] = AV_PIX_FMT_QSV,
  [QSV_NV12] = AV_PIX_FMT_NV12,
  [QSV_MAX] = AV_PIX_FMT_NONE
}

After that we could address like:
        q->orig_pix_fmt = pix_fmts[QSV_NV12];
both highlighting what we actually want to achieve and making sure that
we will have less mistakes supporting the list of fmts wish we reorder
something.

>  
>      if (!q->async_fifo) {
>          q->async_fifo = av_fifo_alloc(q->async_depth *
> qsv_fifo_item_size());
> @@ -170,48 +169,72 @@ static int qsv_decode_init(AVCodecContext
> *avctx, QSVContext *q)
>          return ret;
>      }
>  
> -    ret = ff_qsv_codec_id_to_mfx(avctx->codec_id);
> -    if (ret < 0)
> -        return ret;
> +    param->IOPattern   = q->iopattern;
> +    param->AsyncDepth  = q->async_depth;
> +    param->ExtParam    = q->ext_buffers;
> +    param->NumExtParam = q->nb_ext_buffers;
>  
> -    param.mfx.CodecId      = ret;
> -    param.mfx.CodecProfile = ff_qsv_profile_to_mfx(avctx->codec_id,
> avctx->profile);
> -    param.mfx.CodecLevel   = avctx->level == FF_LEVEL_UNKNOWN ?
> MFX_LEVEL_UNKNOWN : avctx->level;
> -
> -    param.mfx.FrameInfo.BitDepthLuma   = desc->comp[0].depth;
> -    param.mfx.FrameInfo.BitDepthChroma = desc->comp[0].depth;
> -    param.mfx.FrameInfo.Shift          = desc->comp[0].depth > 8;
> -    param.mfx.FrameInfo.FourCC         = q->fourcc;
> -    param.mfx.FrameInfo.Width          = frame_width;
> -    param.mfx.FrameInfo.Height         = frame_height;
> -    param.mfx.FrameInfo.ChromaFormat   = MFX_CHROMAFORMAT_YUV420;
> -
> -    switch (avctx->field_order) {
> -    case AV_FIELD_PROGRESSIVE:
> -        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_PROGRESSIVE;
> -        break;
> -    case AV_FIELD_TT:
> -        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_FIELD_TFF;
> -        break;
> -    case AV_FIELD_BB:
> -        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_FIELD_BFF;
> -        break;
> -    default:
> -        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_UNKNOWN;
> -        break;
> -    }
> +    return 0;
> + }
> +
> +static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q,
> mfxVideoParam *param)
> +{
> +    int ret;
>  
> -    param.IOPattern   = q->iopattern;
> -    param.AsyncDepth  = q->async_depth;
> -    param.ExtParam    = q->ext_buffers;
> -    param.NumExtParam = q->nb_ext_buffers;
> +    avctx->width        = param->mfx.FrameInfo.CropW;
> +    avctx->height       = param->mfx.FrameInfo.CropH;
> +    avctx->coded_width  = param->mfx.FrameInfo.Width;
> +    avctx->coded_height = param->mfx.FrameInfo.Height;
> +    avctx->level        = param->mfx.CodecLevel;
> +    avctx->profile      = param->mfx.CodecProfile;
> +    avctx->field_order  = ff_qsv_map_picstruct(param-
> >mfx.FrameInfo.PicStruct);
> +    avctx->pix_fmt      = ff_qsv_map_fourcc(param-
> >mfx.FrameInfo.FourCC);
>  
> -    ret = MFXVideoDECODE_Init(q->session, &param);
> +    ret = MFXVideoDECODE_Init(q->session, param);
>      if (ret < 0)
>          return ff_qsv_print_error(avctx, ret,
>                                    "Error initializing the MFX video
> decoder");
>  
> -    q->frame_info = param.mfx.FrameInfo;
> +    q->frame_info = param->mfx.FrameInfo;
> +
> +    return 0;
> +}
> +
> +static int qsv_decode_header(AVCodecContext *avctx, QSVContext *q,
> AVPacket *avpkt, enum AVPixelFormat *pix_fmts, mfxVideoParam *param)
> +{
> +    int ret;
> +
> +    mfxBitstream bs = { { { 0 } } };
> +
> +    if (avpkt->size) {
> +        bs.Data       = avpkt->data;
> +        bs.DataLength = avpkt->size;
> +        bs.MaxLength  = bs.DataLength;
> +        bs.TimeStamp  = avpkt->pts;
> +        if (avctx->field_order == AV_FIELD_PROGRESSIVE)
> +            bs.DataFlag   |= MFX_BITSTREAM_COMPLETE_FRAME;
> +    } else
> +        return AVERROR_INVALIDDATA;
> +
> +
> +    if(!q->session) {
> +        ret = qsv_decode_preinit(avctx, q, pix_fmts, param);
> +        if (ret < 0)
> +            return ret;
> +    }
> +
> +    ret = ff_qsv_codec_id_to_mfx(avctx->codec_id);
> +    if (ret < 0)
> +        return ret;
> +
> +    param->mfx.CodecId = ret;
> +    ret = MFXVideoDECODE_DecodeHeader(q->session, &bs, param);
> +    if (MFX_ERR_MORE_DATA == ret) {
> +       return AVERROR(EAGAIN);
> +    }
> +    if (ret < 0)
> +        return ff_qsv_print_error(avctx, ret,
> +                "Error decoding stream header");
>  
>      return 0;
>  }
> @@ -494,7 +517,11 @@ int ff_qsv_process_data(AVCodecContext *avctx,
> QSVContext *q,
>      uint8_t *dummy_data;
>      int dummy_size;
>      int ret;
> -    const AVPixFmtDescriptor *desc;
> +    mfxVideoParam param = { 0 };
> +    static enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,
> +                                              AV_PIX_FMT_NV12,
> +                                              AV_PIX_FMT_NONE };
> +

I am surprised to see 'static'. Why?

>  
>      if (!q->avctx_internal) {
>          q->avctx_internal = avcodec_alloc_context3(NULL);
> @@ -521,15 +548,13 @@ int ff_qsv_process_data(AVCodecContext *avctx,
> QSVContext *q,
>                       pkt->data, pkt->size, pkt->pts, pkt->dts,
>                       pkt->pos);
>  
> -    avctx->field_order  = q->parser->field_order;
>      /* TODO: flush delayed frames on reinit */
> -    if (q->parser->format       != q->orig_pix_fmt    ||
> -        FFALIGN(q->parser->coded_width, 16)  != FFALIGN(avctx-
> >coded_width, 16) ||
> -        FFALIGN(q->parser->coded_height, 16) != FFALIGN(avctx-
> >coded_height, 16)) {
> -        enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,
> -                                           AV_PIX_FMT_NONE,
> -                                           AV_PIX_FMT_NONE };
> -        enum AVPixelFormat qsv_format;
> +
> +    ret = qsv_decode_header(avctx, q, pkt, pix_fmts, &param);
> +
> +    if (ret >= 0 && (q->orig_pix_fmt !=
> ff_qsv_map_fourcc(param.mfx.FrameInfo.FourCC) ||
> +        avctx->coded_width  != param.mfx.FrameInfo.Width ||
> +        avctx->coded_height != param.mfx.FrameInfo.Height)) {
>          AVPacket zero_pkt = {0};
>  
>          if (q->buffered_count) {
> @@ -538,45 +563,23 @@ int ff_qsv_process_data(AVCodecContext *avctx,
> QSVContext *q,
>              q->buffered_count--;
>              return qsv_decode(avctx, q, frame, got_frame,
> &zero_pkt);
>          }
> -
>          q->reinit_flag = 0;
>  
> -        qsv_format = ff_qsv_map_pixfmt(q->parser->format, &q-
> >fourcc);
> -        if (qsv_format < 0) {
> -            av_log(avctx, AV_LOG_ERROR,
> -                   "Decoding pixel format '%s' is not supported\n",
> -                   av_get_pix_fmt_name(q->parser->format));
> -            ret = AVERROR(ENOSYS);
> -            goto reinit_fail;
> -        }
> -
> -        q->orig_pix_fmt     = q->parser->format;
> -        avctx->pix_fmt      = pix_fmts[1] = qsv_format;
> -        avctx->width        = q->parser->width;
> -        avctx->height       = q->parser->height;
> -        avctx->coded_width  = FFALIGN(q->parser->coded_width, 16);
> -        avctx->coded_height = FFALIGN(q->parser->coded_height, 16);
> -        avctx->level        = q->avctx_internal->level;
> -        avctx->profile      = q->avctx_internal->profile;
> -
> -        ret = ff_get_format(avctx, pix_fmts);
> +        avctx->pix_fmt = pix_fmts[1]  =
> ff_qsv_map_fourcc(param.mfx.FrameInfo.FourCC);

Here you modify pix_fmts which is declared static. If someone will use
QSV decoder few times in the same application in parallel he might
experience a race condition, isn't it?

> +        avctx->coded_width  = param.mfx.FrameInfo.Width;
> +        avctx->coded_height = param.mfx.FrameInfo.Height;
> +        
> +        ret = qsv_decode_preinit(avctx, q, pix_fmts, &param);
>          if (ret < 0)
>              goto reinit_fail;
> +        q->initialized = 0;
> +    }
>  
> -        avctx->pix_fmt = ret;
> -
> -        desc = av_pix_fmt_desc_get(avctx->pix_fmt);
> -        if (!desc)
> -            goto reinit_fail;
> -
> -         if (desc->comp[0].depth > 8) {
> -            avctx->coded_width =  FFALIGN(q->parser->coded_width,
> 32);
> -            avctx->coded_height = FFALIGN(q->parser->coded_height,
> 32);
> -        }
> -
> -        ret = qsv_decode_init(avctx, q);
> +    if (!q->initialized) {
> +        ret = qsv_decode_init(avctx, q, &param);
>          if (ret < 0)
>              goto reinit_fail;
> +        q->initialized = 1;
>      }
>  
>      return qsv_decode(avctx, q, frame, got_frame, pkt);
> @@ -589,4 +592,5 @@ reinit_fail:
>  void ff_qsv_decode_flush(AVCodecContext *avctx, QSVContext *q)
>  {
>      q->orig_pix_fmt = AV_PIX_FMT_NONE;
> +    q->initialized = 0;
>  }
> diff --git a/libavcodec/qsvdec.h b/libavcodec/qsvdec.h
> index 111536caba..4812fb2a6b 100644
> --- a/libavcodec/qsvdec.h
> +++ b/libavcodec/qsvdec.h
> @@ -63,6 +63,8 @@ typedef struct QSVContext {
>      uint32_t fourcc;
>      mfxFrameInfo frame_info;
>  
> +    int initialized;
> +
>      // options set by the caller
>      int async_depth;
>      int iopattern;