[FFmpeg-devel] [PATCH] lavc/vaapi_encode_h26[45]: respect "slices" option in h26[45] vaapi encoder.

Jun Zhao mypopydev at gmail.com
Wed Jun 14 04:34:48 EEST 2017



On 2017/6/14 7:41, Mark Thompson wrote:
> On 07/06/17 01:53, Jun Zhao wrote:
>> From 5c88956e36e7318cf1d1b7c41a9d4108fcf9d0a5 Mon Sep 17 00:00:00 2001
>> From: Jun Zhao <jun.zhao at intel.com>
>> Date: Fri, 12 May 2017 08:30:43 +0800
>> Subject: [PATCH] lavc/vaapi_encode_h26[45]: respect "slices" in h26[45] vaapi
>>  encoder.
>>
>> Enable multi-slice support in AVC/HEVC vaapi encoder.
>>
>> Signed-off-by: Wang, Yi A <yi.a.wang at intel.com>
>> Signed-off-by: Jun Zhao <jun.zhao at intel.com>
>> ---
> 
> I think this should be three patches:
> 
>>  libavcodec/vaapi_encode.c      | 36 ++++++++++++++++++++++++++++++++----
>>  libavcodec/vaapi_encode.h      |  9 +++++++--
> 
> (1) Change the slice/parameter buffers to be allocated dynamically.
> 
>>  libavcodec/vaapi_encode_h264.c | 24 ++++++++++++++++++------
> 
> (2) Respect slices option in H.264 encoder.
> 
>>  libavcodec/vaapi_encode_h265.c | 28 ++++++++++++++++++++++------
> 
> (3) Respect slices option in H.265 encoder.
> 
> 
> I'm not entirely sure that the first one is worth it - do you have a use-case for very large numbers of slices?  Given that VAAPI lacks the ability to limit slices by size (for H.323 / RTP packet limits), the other use I can think of is for parallelism or related conformance requirements, both of which tend to want relatively small numbers.
> 
For the first one, I don't have a user case for large number of slices, but I think dynamic allocate is a general solution if the driver/GPU HW change the max slices number support in GEN10, GEN11, ...

And I will split the patch as the comments.

> 
>>  4 files changed, 79 insertions(+), 18 deletions(-)
>>
>> diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
>> index 7e9c00f51d..14a3fba7b1 100644
>> --- a/libavcodec/vaapi_encode.c
>> +++ b/libavcodec/vaapi_encode.c
>> @@ -36,13 +36,18 @@ static int vaapi_encode_make_packed_header(AVCodecContext *avctx,
>>      VAAPIEncodeContext *ctx = avctx->priv_data;
>>      VAStatus vas;
>>      VABufferID param_buffer, data_buffer;
>> +    VABufferID *tmp;
>>      VAEncPackedHeaderParameterBuffer params = {
>>          .type = type,
>>          .bit_length = bit_len,
>>          .has_emulation_bytes = 1,
>>      };
>>  
>> -    av_assert0(pic->nb_param_buffers + 2 <= MAX_PARAM_BUFFERS);
>> +    tmp = av_realloc_array(pic->param_buffers, sizeof(*tmp), (pic->nb_param_buffers + 2));
>> +    if (!tmp) {
>> +        return AVERROR(ENOMEM);
> 
> This failure case leaks the already-allocated buffers.  Unfortunately you can't use av_realloc_array().
> 
>> +    }
>> +    pic->param_buffers = tmp;
>>  
>>      vas = vaCreateBuffer(ctx->hwctx->display, ctx->va_context,
>>                           VAEncPackedHeaderParameterBufferType,
>> @@ -77,9 +82,14 @@ static int vaapi_encode_make_param_buffer(AVCodecContext *avctx,
>>  {
>>      VAAPIEncodeContext *ctx = avctx->priv_data;
>>      VAStatus vas;
>> +    VABufferID *tmp;
>>      VABufferID buffer;
>>  
>> -    av_assert0(pic->nb_param_buffers + 1 <= MAX_PARAM_BUFFERS);
>> +    tmp = av_realloc_array(pic->param_buffers, sizeof(*tmp), (pic->nb_param_buffers + 1));
>> +    if (!tmp) {
>> +        return AVERROR(ENOMEM);
> 
> Same.
> 
>> +    }
>> +    pic->param_buffers = tmp;
>>  
>>      vas = vaCreateBuffer(ctx->hwctx->display, ctx->va_context,
>>                           type, len, 1, data, &buffer);
>> @@ -122,6 +132,8 @@ static int vaapi_encode_wait(AVCodecContext *avctx,
>>      // Input is definitely finished with now.
>>      av_frame_free(&pic->input_image);
>>  
>> +    av_freep(&pic->param_buffers);
>> +
>>      pic->encode_complete = 1;
>>      return 0;
>>  }
>> @@ -313,7 +325,10 @@ static int vaapi_encode_issue(AVCodecContext *avctx,
>>          }
>>      }
>>  
>> -    av_assert0(pic->nb_slices <= MAX_PICTURE_SLICES);
>> +    pic->slices = (VAAPIEncodeSlice **)av_malloc(sizeof(VAAPIEncodeSlice *) * pic->nb_slices);
>> +    if (pic->slices == NULL)
>> +        goto fail;
>> +
>>      for (i = 0; i < pic->nb_slices; i++) {
>>          slice = av_mallocz(sizeof(*slice));
>>          if (!slice) {
>> @@ -322,7 +337,6 @@ static int vaapi_encode_issue(AVCodecContext *avctx,
>>          }
>>          slice->index = i;
>>          pic->slices[i] = slice;
>> -
> 
> Stray change?

Will clean

> 
>>          if (ctx->codec->slice_params_size > 0) {
>>              slice->codec_slice_params = av_mallocz(ctx->codec->slice_params_size);
>>              if (!slice->codec_slice_params) {
>> @@ -427,6 +441,8 @@ fail:
>>          vaDestroyBuffer(ctx->hwctx->display, pic->param_buffers[i]);
>>  fail_at_end:
>>      av_freep(&pic->codec_picture_params);
>> +    av_freep(&pic->param_buffers);
>> +    av_freep(&pic->slices);
>>      av_frame_free(&pic->recon_image);
>>      return err;
>>  }
>> @@ -542,6 +558,8 @@ static int vaapi_encode_free(AVCodecContext *avctx,
>>      av_frame_free(&pic->input_image);
>>      av_frame_free(&pic->recon_image);
>>  
>> +    av_freep(&pic->param_buffers);
>> +    av_freep(&pic->slices);
>>      // Output buffer should already be destroyed.
>>      av_assert0(pic->output_buffer == VA_INVALID_ID);
>>  
>> @@ -949,6 +967,7 @@ static av_cold int vaapi_encode_config_attributes(AVCodecContext *avctx)
>>          { VAConfigAttribRTFormat         },
>>          { VAConfigAttribRateControl      },
>>          { VAConfigAttribEncMaxRefFrames  },
>> +        { VAConfigAttribEncMaxSlices     },
>>          { VAConfigAttribEncPackedHeaders },
>>      };
>>  
>> @@ -1079,6 +1098,15 @@ static av_cold int vaapi_encode_config_attributes(AVCodecContext *avctx)
>>              }
>>          }
>>          break;
>> +        case VAConfigAttribEncMaxSlices:
>> +            if (avctx->slices > attr[i].value) {
>> +                av_log(avctx, AV_LOG_ERROR, "Slices per frame more than %#x "
>> +                       "is not supported.\n", attr[i].value);
>> +                err = AVERROR(EINVAL);
>> +                goto fail;
>> +            }
>> +            ctx->multi_slices_available = 1;
>> +            break;
> 
> This can also be VA_ATTRIB_NOT_SUPPORTED, which presumably implies one slice per frame only?  Unfortunately it is always that with the i965 driver for MPEG-2 despite the need for slices, so I'm not sure what you actually want to do there.
> 
>>          case VAConfigAttribEncPackedHeaders:
>>              if (ctx->va_packed_headers & ~attr[i].value) {
>>                  // This isn't fatal, but packed headers are always
>> diff --git a/libavcodec/vaapi_encode.h b/libavcodec/vaapi_encode.h
>> index 0edf27e4cb..4afe4fa103 100644
>> --- a/libavcodec/vaapi_encode.h
>> +++ b/libavcodec/vaapi_encode.h
>> @@ -73,7 +73,7 @@ typedef struct VAAPIEncodePicture {
>>      VASurfaceID     recon_surface;
>>  
>>      int          nb_param_buffers;
>> -    VABufferID      param_buffers[MAX_PARAM_BUFFERS];
>> +    VABufferID      *param_buffers;
>>  
>>      AVBufferRef    *output_buffer_ref;
>>      VABufferID      output_buffer;
>> @@ -85,7 +85,10 @@ typedef struct VAAPIEncodePicture {
>>      struct VAAPIEncodePicture *refs[MAX_PICTURE_REFERENCES];
>>  
>>      int          nb_slices;
>> -    VAAPIEncodeSlice *slices[MAX_PICTURE_SLICES];
>> +    VAAPIEncodeSlice **slices;
>> +    int          slice_of_mbs;
>> +    int          slice_mod_mbs;
>> +    int          last_mb_index;
> 
> Macroblocks are not a generic concept - H.265 does not have them.  I think I'd prefer this calculated stuff to go in the private data of the individual codes rather than in the generic header (the generic code never touches it, after all).

Will use a private struct for this

> 
>>  } VAAPIEncodePicture;
>>  
>>  typedef struct VAAPIEncodeContext {
>> @@ -105,6 +108,8 @@ typedef struct VAAPIEncodeContext {
>>      // Supported packed headers (initially the desired set, modified
>>      // later to what is actually supported).
>>      unsigned int    va_packed_headers;
>> +    // Supported multi-slices per frame
>> +    int          multi_slices_available;
>>  
>>      // The required size of surfaces.  This is probably the input
>>      // size (AVCodecContext.width|height) aligned up to whatever
>> diff --git a/libavcodec/vaapi_encode_h264.c b/libavcodec/vaapi_encode_h264.c
>> index 92e29554ed..f325346433 100644
>> --- a/libavcodec/vaapi_encode_h264.c
>> +++ b/libavcodec/vaapi_encode_h264.c
>> @@ -1002,7 +1002,16 @@ static int vaapi_encode_h264_init_picture_params(AVCodecContext *avctx,
>>      vpic->pic_fields.bits.idr_pic_flag = (pic->type == PICTURE_TYPE_IDR);
>>      vpic->pic_fields.bits.reference_pic_flag = (pic->type != PICTURE_TYPE_B);
>>  
>> -    pic->nb_slices = 1;
>> +    if (ctx->multi_slices_available)
>> +        avctx->slices = av_clip(avctx->slices, 1, priv->mb_height);
>> +    else
>> +        avctx->slices = 1;
> 
> If the user requests slices they probably have some clear reason for doing so (e.g. to work with some old device which only accepts 1080p in four slices).  Do we really want to silently ignore that if we can't actually do it?

Will give a warning message when driver can't support multi-slices but user setting the "slices" > 1.

> 
>> +
>> +    pic->nb_slices = avctx->slices;
>> +
>> +    pic->slice_of_mbs = (priv->mb_width * priv->mb_height) / pic->nb_slices;
>> +    pic->slice_mod_mbs = (priv->mb_width * priv->mb_height) % pic->nb_slices;
>> +    pic->last_mb_index = 0;
>>  
>>      return 0;
>>  }
>> @@ -1052,15 +1061,18 @@ static int vaapi_encode_h264_init_slice_params(AVCodecContext *avctx,
>>          av_assert0(0 && "invalid picture type");
>>      }
>>  
>> -    // Only one slice per frame.
>> -    vslice->macroblock_address = 0;
>> -    vslice->num_macroblocks = priv->mb_width * priv->mb_height;
>> +    vslice->macroblock_address = pic->last_mb_index;
>> +    vslice->num_macroblocks = pic->slice_of_mbs + (pic->slice_mod_mbs > 0 ? 1 : 0);
> 
> I think it would be cuter if rounded properly rather than top-loading the rounding error.
> 
> That is, for N in [0, slices) use:
>     macroblock_address = N * total_macroblocks / slices;
>     num_macroblocks = (N + 1) * total_macroblocks / slices - macroblock_address;
> 
> That makes it cut in half nicely on, for example, four slices in a 7x6 image, rather than having a stray macroblock over the centre line.
> 
> (That is: 1111111  vs.  1111111
>           1111222       1112222
>           2222222       2222222
>           2333333       3333333
>           3333444       3334444
>           4444444       4444444 ).
> 

Will double-check this part

>> +    if (pic->slice_mod_mbs > 0)
>> +        pic->slice_mod_mbs --;
>> +    pic->last_mb_index += vslice->num_macroblocks;
>>  
>>      vslice->macroblock_info = VA_INVALID_ID;
>>  
>>      vslice->pic_parameter_set_id = vpic->pic_parameter_set_id;
>> -    vslice->idr_pic_id = priv->idr_pic_count++;
>> -
>> +    vslice->idr_pic_id = priv->idr_pic_count;
>> +    if (pic->last_mb_index == priv->mb_width * priv->mb_height)
>> +        priv->idr_pic_count ++;
>>      vslice->pic_order_cnt_lsb = (pic->display_order - priv->last_idr_frame) &
>>          ((1 << (4 + vseq->seq_fields.bits.log2_max_pic_order_cnt_lsb_minus4)) - 1);
>>  
>> diff --git a/libavcodec/vaapi_encode_h265.c b/libavcodec/vaapi_encode_h265.c
>> index 6e008b7b9c..e930026184 100644
>> --- a/libavcodec/vaapi_encode_h265.c
>> +++ b/libavcodec/vaapi_encode_h265.c
>> @@ -1025,7 +1025,15 @@ static int vaapi_encode_h265_init_picture_params(AVCodecContext *avctx,
>>          av_assert0(0 && "invalid picture type");
>>      }
>>  
>> -    pic->nb_slices = 1;
>> +    if (ctx->multi_slices_available)
>> +        avctx->slices = av_clip(avctx->slices, 1, priv->ctu_height);
>> +    else
>> +        avctx->slices = 1;
>> +
>> +    pic->nb_slices = avctx->slices;
>> +    pic->slice_of_mbs = (priv->ctu_width * priv->ctu_height) / pic->nb_slices;
>> +    pic->slice_mod_mbs = (priv->ctu_width * priv->ctu_height) % pic->nb_slices;
>> +    pic->last_mb_index = 0;
> 
> They are CTUs, don't call them macroblocks.
> 
In split patch, will change this.

>>  
>>      return 0;
>>  }
>> @@ -1048,9 +1056,13 @@ static int vaapi_encode_h265_init_slice_params(AVCodecContext *avctx,
>>      pslice = slice->priv_data;
>>      mslice = &pslice->misc_slice_params;
>>  
>> -    // Currently we only support one slice per frame.
>> -    vslice->slice_segment_address = 0;
>> -    vslice->num_ctu_in_slice = priv->ctu_width * priv->ctu_height;
>> +    vslice->slice_segment_address = pic->last_mb_index;
>> +    mslice->slice_segment_address = pic->last_mb_index;
> 
> Duplication, oops.  Just remove the one in VAAPIEncodeH265MiscSliceParams, I think?

Yes, I think we need to remove the slice_segment_address in VAAPIEncodeH265MiscSliceParams

> 
>> +    vslice->num_ctu_in_slice = pic->slice_of_mbs + (pic->slice_mod_mbs > 0 ? 1 : 0);
> 
> Same point on rounding as above.
> 
>> +
>> +    if (pic->slice_mod_mbs > 0)
>> +        pic->slice_mod_mbs --;
>> +    pic->last_mb_index += vslice->num_ctu_in_slice;
>>  
>>      switch (pic->type) {
>>      case PICTURE_TYPE_IDR:
>> @@ -1104,9 +1116,13 @@ static int vaapi_encode_h265_init_slice_params(AVCodecContext *avctx,
>>      else
>>          vslice->slice_qp_delta = priv->fixed_qp_idr - vpic->pic_init_qp;
>>  
>> -    vslice->slice_fields.bits.last_slice_of_pic_flag = 1;
>> +    if (pic->last_mb_index == priv->ctu_width * priv->ctu_height)
>> +        vslice->slice_fields.bits.last_slice_of_pic_flag = 1;
>>  
>> -    mslice->first_slice_segment_in_pic_flag = 1;
>> +    if (vslice->slice_segment_address == 0)
>> +        mslice->first_slice_segment_in_pic_flag = 1;
>> +    else
>> +        mslice->first_slice_segment_in_pic_flag = 0;
>>  
>>      if (pic->type == PICTURE_TYPE_IDR) {
>>          // No reference pictures.
>> -- 
>> 2.11.0
>>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 


More information about the ffmpeg-devel mailing list