[FFmpeg-devel] [PATCH v5 2/9] avcodec/vaapi_encode: introduce a base layer for vaapi encode

Thu Feb 22 11:53:35 EET 2024

Hi Mark. Thanks for your careful review.

>On 18/02/2024 08:45, tong1.wu-at-intel.com at ffmpeg.org wrote:
>> From: Tong Wu <tong1.wu at intel.com>
>>
>> Since VAAPI and future D3D12VA implementation may share the same dpb
>> logic and some other functions. A base layer encode context is introduced
>> as vaapi context's base and extract the related funtions to a common
>> file such as receive_packet, init, close, etc.
>>
>> Signed-off-by: Tong Wu <tong1.wu at intel.com>
>> ---
>>   libavcodec/Makefile             |   2 +-
>>   libavcodec/hw_base_encode.c     | 637 ++++++++++++++++++++++
>>   libavcodec/hw_base_encode.h     | 277 ++++++++++
>>   libavcodec/vaapi_encode.c       | 924 +++++++-------------------------
>>   libavcodec/vaapi_encode.h       | 229 +-------
>>   libavcodec/vaapi_encode_av1.c   |  73 +--
>>   libavcodec/vaapi_encode_h264.c  | 201 +++----
>>   libavcodec/vaapi_encode_h265.c  | 163 +++---
>>   libavcodec/vaapi_encode_mjpeg.c |  22 +-
>>   libavcodec/vaapi_encode_mpeg2.c |  53 +-
>>   libavcodec/vaapi_encode_vp8.c   |  28 +-
>>   libavcodec/vaapi_encode_vp9.c   |  70 +--
>>   12 files changed, 1427 insertions(+), 1252 deletions(-)
>>   create mode 100644 libavcodec/hw_base_encode.c
>>   create mode 100644 libavcodec/hw_base_encode.h
>>
>> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
>> index 470d7cb9b1..23946f6ea3 100644
>> --- a/libavcodec/Makefile
>> +++ b/libavcodec/Makefile
>> @@ -164,7 +164,7 @@ OBJS-$(CONFIG_STARTCODE)               += startcode.o
>>   OBJS-$(CONFIG_TEXTUREDSP)              += texturedsp.o
>>   OBJS-$(CONFIG_TEXTUREDSPENC)           += texturedspenc.o
>>   OBJS-$(CONFIG_TPELDSP)                 += tpeldsp.o
>> -OBJS-$(CONFIG_VAAPI_ENCODE)            += vaapi_encode.o
>> +OBJS-$(CONFIG_VAAPI_ENCODE)            += vaapi_encode.o
>hw_base_encode.o
>>   OBJS-$(CONFIG_AV1_AMF_ENCODER)         += amfenc_av1.o
>>   OBJS-$(CONFIG_VC1DSP)                  += vc1dsp.o
>>   OBJS-$(CONFIG_VIDEODSP)                += videodsp.o
>> diff --git a/libavcodec/hw_base_encode.c b/libavcodec/hw_base_encode.c
>> ...
>
>I'm going to trust that the contents of this file are only moved around with no
>functional changes.  If that isn't the case please do say.

You are absolutely right.

>
>> diff --git a/libavcodec/hw_base_encode.h b/libavcodec/hw_base_encode.h
>> new file mode 100644
>> index 0000000000..70982c97b2
>> --- /dev/null
>> +++ b/libavcodec/hw_base_encode.h
>> @@ -0,0 +1,277 @@
>> +/*
>> + * Copyright (c) 2024 Intel Corporation
>
>I would avoid adding this.  Most of these files have content mostly copied from
>other files which are not exclusively copyright by Intel Corporation.

Will delete.

>
>> + *
>> + * This file is part of FFmpeg.
>> + *
>> + * FFmpeg is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2.1 of the License, or (at your option) any later version.
>> + *
>> + * FFmpeg is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with FFmpeg; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
>USA
>> + */
>> +
>> +#ifndef AVCODEC_HW_BASE_ENCODE_H
>> +#define AVCODEC_HW_BASE_ENCODE_H
>> +
>> +#include "libavutil/hwcontext.h"
>> +#include "libavutil/fifo.h"
>> +
>> +#include "avcodec.h"
>> +#include "hwconfig.h"
>
>This file doesn't do anything with hwconfig stuff?
>

Will delete.

>> +
>> +enum {
>> +    MAX_DPB_SIZE           = 16,
>> +    MAX_PICTURE_REFERENCES = 2,
>> +    MAX_REORDER_DELAY      = 16,
>> +    MAX_ASYNC_DEPTH        = 64,
>> +    MAX_REFERENCE_LIST_NUM = 2,
>> +};
>> +
>> +enum {
>> +    PICTURE_TYPE_IDR = 0,
>> +    PICTURE_TYPE_I   = 1,
>> +    PICTURE_TYPE_P   = 2,
>> +    PICTURE_TYPE_B   = 3,
>> +};
>> +
>> +enum {
>> +    // Codec supports controlling the subdivision of pictures into slices.
>> +    FLAG_SLICE_CONTROL         = 1 << 0,
>> +    // Codec only supports constant quality (no rate control).
>> +    FLAG_CONSTANT_QUALITY_ONLY = 1 << 1,
>> +    // Codec is intra-only.
>> +    FLAG_INTRA_ONLY            = 1 << 2,
>> +    // Codec supports B-pictures.
>> +    FLAG_B_PICTURES            = 1 << 3,
>> +    // Codec supports referencing B-pictures.
>> +    FLAG_B_PICTURE_REFERENCES  = 1 << 4,
>> +    // Codec supports non-IDR key pictures (that is, key pictures do
>> +    // not necessarily empty the DPB).
>> +    FLAG_NON_IDR_KEY_PICTURES  = 1 << 5,
>> +    // Codec output packet without timestamp delay, which means the
>> +    // output packet has same PTS and DTS.
>> +    FLAG_TIMESTAMP_NO_DELAY    = 1 << 6,
>> +};
>> +
>> +enum {
>> +    RC_MODE_AUTO,
>> +    RC_MODE_CQP,
>> +    RC_MODE_CBR,
>> +    RC_MODE_VBR,
>> +    RC_MODE_ICQ,
>
>I thought ICQ was an Intel name which leaked into libva?  This probably
>shouldn't be in a generic API, maybe something about constant-quality.
>
>> +    RC_MODE_QVBR,
>> +    RC_MODE_AVBR,
>
>More generally, I think you want to document what these modes are intended
>to be.  When they mapped directly to VAAPI then it was clear that the
>responsibility was "whatever libva gives you", but when there are multiple
>possibilities which do not use exactly the same options it is less clear.

I am going to remove this enumeration from the common header and remove 
ICQ and AVBR totally from D3D12 side as they are not supported yet.

>
>> +    RC_MODE_MAX = RC_MODE_AVBR,
>> +};
>> +
>> +typedef struct HWBaseEncodePicture {
>> +    struct HWBaseEncodePicture *next;
>> +
>> +    int64_t         display_order;
>> +    int64_t         encode_order;
>> +    int64_t         pts;
>> +    int64_t         duration;
>> +    int             force_idr;
>> +
>> +    void           *opaque;
>> +    AVBufferRef    *opaque_ref;
>> +
>> +    int             type;
>> +    int             b_depth;
>> +    int             encode_issued;
>> +    int             encode_complete;
>> +
>> +    AVFrame        *input_image;
>> +    AVFrame        *recon_image;
>> +
>> +    void           *priv_data;
>> +
>> +    // Whether this picture is a reference picture.
>> +    int             is_reference;
>> +
>> +    // The contents of the DPB after this picture has been decoded.
>> +    // This will contain the picture itself if it is a reference picture,
>> +    // but not if it isn't.
>> +    int                     nb_dpb_pics;
>> +    struct HWBaseEncodePicture *dpb[MAX_DPB_SIZE];
>> +    // The reference pictures used in decoding this picture. If they are
>> +    // used by later pictures they will also appear in the DPB. ref[0][] for
>> +    // previous reference frames. ref[1][] for future reference frames.
>> +    int                     nb_refs[MAX_REFERENCE_LIST_NUM];
>> +    struct HWBaseEncodePicture
>*refs[MAX_REFERENCE_LIST_NUM][MAX_PICTURE_REFERENCES];
>> +    // The previous reference picture in encode order.  Must be in at least
>> +    // one of the reference list and DPB list.
>> +    struct HWBaseEncodePicture *prev;
>> +    // Reference count for other pictures referring to this one through
>> +    // the above pointers, directly from incomplete pictures and indirectly
>> +    // through completed pictures.
>> +    int             ref_count[2];
>> +    int             ref_removed[2];
>> +} HWBaseEncodePicture;
>> +
>> +typedef struct HWEncodeType {
>> +    HWBaseEncodePicture * (*alloc)(AVCodecContext *avctx, AVFrame
>*frame);
>> +
>> +    int (*issue)(AVCodecContext *avctx, HWBaseEncodePicture *base_pic);
>
>The semantics of this are a bit blurred across the boundary of this call.  I think
>ideally you want the picture structure to be const here to make it clear?
>
>(Move the recon_image allocation and issued-flag setting out of the callee, in
>particular.)
>
>> +
>> +    int (*output)(AVCodecContext *avctx, HWBaseEncodePicture *base_pic,
>AVPacket *pkt);
>
>Feels like the picture structure should again be const for the same reason.
>
>> +
>> +    int (*free)(AVCodecContext *avctx, HWBaseEncodePicture *base_pic);
>
>The VAAPI implementation of this is definitely doing lots of things that should
>be in the base layer.
>
>> +}  HWEncodeType;
>
>I think the naming of this wants to be clear that they are operations on a single
>picture (either the type name or the fields).
>
>Some documentation of the precise semantics of the callbacks would definitely
>help as well to be sure that the split is correct.

Will do as suggested.

>
>> +
>> +typedef struct HWBaseEncodeContext {
>> +    const AVClass *class;
>> +
>> +    // Hardware-specific hooks.
>> +    const struct HWEncodeType *hw;
>> +
>> +    // Global options.
>> +
>> +    // Number of I frames between IDR frames.
>> +    int             idr_interval;
>> +
>> +    // Desired B frame reference depth.
>> +    int             desired_b_depth;
>> +
>> +    // Explicitly set RC mode (otherwise attempt to pick from
>> +    // available modes).
>> +    int             explicit_rc_mode;
>> +
>> +    // Explicitly-set QP, for use with the "qp" options.
>> +    // (Forces CQP mode when set, overriding everything else.)
>> +    int             explicit_qp;
>> +
>> +    // The required size of surfaces.  This is probably the input
>> +    // size (AVCodecContext.width|height) aligned up to whatever
>> +    // block size is required by the codec.
>> +    int             surface_width;
>> +    int             surface_height;
>> +
>> +    // The block size for slice calculations.
>> +    int             slice_block_width;
>> +    int             slice_block_height;
>> +
>> +    // RC quality level - meaning depends on codec and RC mode.
>> +    // In CQP mode this sets the fixed quantiser value.
>> +    int             rc_quality;
>> +
>> +    AVBufferRef    *device_ref;
>> +    AVHWDeviceContext *device;
>> +
>> +    // The hardware frame context containing the input frames.
>> +    AVBufferRef    *input_frames_ref;
>> +    AVHWFramesContext *input_frames;
>> +
>> +    // The hardware frame context containing the reconstructed frames.
>> +    AVBufferRef    *recon_frames_ref;
>> +    AVHWFramesContext *recon_frames;
>> +
>> +    // Current encoding window, in display (input) order.
>> +    HWBaseEncodePicture *pic_start, *pic_end;
>> +    // The next picture to use as the previous reference picture in
>> +    // encoding order. Order from small to large in encoding order.
>> +    HWBaseEncodePicture *next_prev[MAX_PICTURE_REFERENCES];
>> +    int                  nb_next_prev;
>> +
>> +    // Next input order index (display order).
>> +    int64_t         input_order;
>> +    // Number of frames that output is behind input.
>> +    int64_t         output_delay;
>> +    // Next encode order index.
>> +    int64_t         encode_order;
>> +    // Number of frames decode output will need to be delayed.
>> +    int64_t         decode_delay;
>> +    // Next output order index (in encode order).
>> +    int64_t         output_order;
>> +
>> +    // Timestamp handling.
>> +    int64_t         first_pts;
>> +    int64_t         dts_pts_diff;
>> +    int64_t         ts_ring[MAX_REORDER_DELAY * 3 +
>> +                            MAX_ASYNC_DEPTH];
>> +
>> +    // Frame type decision.
>> +    int gop_size;
>> +    int closed_gop;
>> +    int gop_per_idr;
>> +    int p_per_i;
>> +    int max_b_depth;
>> +    int b_per_p;
>> +    int force_idr;
>> +    int idr_counter;
>> +    int gop_counter;
>> +    int end_of_stream;
>> +    int p_to_gpb;
>> +
>> +    // Whether the driver supports ROI at all.
>> +    int             roi_allowed;
>> +
>> +    // The encoder does not support cropping information, so warn about
>> +    // it the first time we encounter any nonzero crop fields.
>> +    int             crop_warned;
>> +    // If the driver does not support ROI then warn the first time we
>> +    // encounter a frame with ROI side data.
>> +    int             roi_warned;
>> +
>> +    AVFrame         *frame;
>> +
>> +    // Whether the HW supports sync buffer function.
>> +    // If supported, encode_fifo/async_depth will be used together.
>> +    // Used for output buffer synchronization.
>> +    int             async_encode;
>> +
>> +    // Store buffered pic
>> +    AVFifo          *encode_fifo;
>> +    // Max number of frame buffered in encoder.
>> +    int             async_depth;
>> +
>> +    /** Tail data of a pic, now only used for av1 repeat frame header. */
>> +    AVPacket        *tail_pkt;
>> +} HWBaseEncodeContext;
>> +
>> +int ff_hw_base_encode_receive_packet(AVCodecContext *avctx, AVPacket
>*pkt);
>> +
>> +int ff_hw_base_encode_init(AVCodecContext *avctx);
>> +
>> +int ff_hw_base_encode_close(AVCodecContext *avctx);
>> +
>> +#define HW_BASE_ENCODE_COMMON_OPTIONS \
>> +    { "idr_interval", \
>> +      "Distance (in I-frames) between IDR frames", \
>> +      OFFSET(common.base.idr_interval), AV_OPT_TYPE_INT, \
>> +      { .i64 = 0 }, 0, INT_MAX, FLAGS }, \
>> +    { "b_depth", \
>> +      "Maximum B-frame reference depth", \
>> +      OFFSET(common.base.desired_b_depth), AV_OPT_TYPE_INT, \
>> +      { .i64 = 1 }, 1, INT_MAX, FLAGS }, \
>> +    { "async_depth", "Maximum processing parallelism. " \
>> +      "Increase this to improve single channel performance.", \
>> +      OFFSET(common.base.async_depth), AV_OPT_TYPE_INT, \
>> +      { .i64 = 2 }, 1, MAX_ASYNC_DEPTH, FLAGS }
>> +
>> +#define HW_BASE_ENCODE_RC_MODE(name, desc) \
>> +    { #name, desc, 0, AV_OPT_TYPE_CONST, { .i64 = RC_MODE_ ## name }, \
>> +      0, 0, FLAGS, "rc_mode" }
>> +#define HW_BASE_ENCODE_RC_OPTIONS \
>> +    { "rc_mode",\
>> +      "Set rate control mode", \
>> +      OFFSET(common.base.explicit_rc_mode), AV_OPT_TYPE_INT, \
>> +      { .i64 = RC_MODE_AUTO }, RC_MODE_AUTO, RC_MODE_MAX,
>FLAGS, .unit = "rc_mode" }, \
>> +    { "auto", "Choose mode automatically based on other parameters", \
>> +      0, AV_OPT_TYPE_CONST, { .i64 = RC_MODE_AUTO }, 0, 0, FLAGS, .unit =
>"rc_mode" }, \
>> +    HW_BASE_ENCODE_RC_MODE(CQP,  "Constant-quality"), \
>> +    HW_BASE_ENCODE_RC_MODE(CBR,  "Constant-bitrate"), \
>> +    HW_BASE_ENCODE_RC_MODE(VBR,  "Variable-bitrate"), \
>> +    HW_BASE_ENCODE_RC_MODE(ICQ,  "Intelligent constant-quality"), \
>> +    HW_BASE_ENCODE_RC_MODE(QVBR, "Quality-defined variable-bitrate"),
>\
>> +    HW_BASE_ENCODE_RC_MODE(AVBR, "Average variable-bitrate")
>
>This set of options is rather nasty because the set which makes sense depends
>on the API (see how your later D3D12 patch can't map some of them but has
>to deal with them anyway).  Maybe avoid extracting this to the common layer?
>

Sure I'll move it back to where it was.

Thanks,
Tong

>> +
>> +#endif /* AVCODEC_HW_BASE_ENCODE_H */
>> diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
>> ...
>
>Thanks,
>
>- Mark
>_______________________________________________
>ffmpeg-devel mailing list
>ffmpeg-devel at ffmpeg.org
>https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
>To unsubscribe, visit link above, or email
>ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".