[FFmpeg-devel] [PATCH v5 2/9] avcodec/vaapi_encode: introduce a base layer for vaapi encode
Wu, Tong1
tong1.wu at intel.com
Thu Feb 22 11:53:35 EET 2024
Hi Mark. Thanks for your careful review.
>On 18/02/2024 08:45, tong1.wu-at-intel.com at ffmpeg.org wrote:
>> From: Tong Wu <tong1.wu at intel.com>
>>
>> Since VAAPI and future D3D12VA implementation may share the same dpb
>> logic and some other functions. A base layer encode context is introduced
>> as vaapi context's base and extract the related funtions to a common
>> file such as receive_packet, init, close, etc.
>>
>> Signed-off-by: Tong Wu <tong1.wu at intel.com>
>> ---
>> libavcodec/Makefile | 2 +-
>> libavcodec/hw_base_encode.c | 637 ++++++++++++++++++++++
>> libavcodec/hw_base_encode.h | 277 ++++++++++
>> libavcodec/vaapi_encode.c | 924 +++++++-------------------------
>> libavcodec/vaapi_encode.h | 229 +-------
>> libavcodec/vaapi_encode_av1.c | 73 +--
>> libavcodec/vaapi_encode_h264.c | 201 +++----
>> libavcodec/vaapi_encode_h265.c | 163 +++---
>> libavcodec/vaapi_encode_mjpeg.c | 22 +-
>> libavcodec/vaapi_encode_mpeg2.c | 53 +-
>> libavcodec/vaapi_encode_vp8.c | 28 +-
>> libavcodec/vaapi_encode_vp9.c | 70 +--
>> 12 files changed, 1427 insertions(+), 1252 deletions(-)
>> create mode 100644 libavcodec/hw_base_encode.c
>> create mode 100644 libavcodec/hw_base_encode.h
>>
>> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
>> index 470d7cb9b1..23946f6ea3 100644
>> --- a/libavcodec/Makefile
>> +++ b/libavcodec/Makefile
>> @@ -164,7 +164,7 @@ OBJS-$(CONFIG_STARTCODE) += startcode.o
>> OBJS-$(CONFIG_TEXTUREDSP) += texturedsp.o
>> OBJS-$(CONFIG_TEXTUREDSPENC) += texturedspenc.o
>> OBJS-$(CONFIG_TPELDSP) += tpeldsp.o
>> -OBJS-$(CONFIG_VAAPI_ENCODE) += vaapi_encode.o
>> +OBJS-$(CONFIG_VAAPI_ENCODE) += vaapi_encode.o
>hw_base_encode.o
>> OBJS-$(CONFIG_AV1_AMF_ENCODER) += amfenc_av1.o
>> OBJS-$(CONFIG_VC1DSP) += vc1dsp.o
>> OBJS-$(CONFIG_VIDEODSP) += videodsp.o
>> diff --git a/libavcodec/hw_base_encode.c b/libavcodec/hw_base_encode.c
>> ...
>
>I'm going to trust that the contents of this file are only moved around with no
>functional changes. If that isn't the case please do say.
You are absolutely right.
>
>> diff --git a/libavcodec/hw_base_encode.h b/libavcodec/hw_base_encode.h
>> new file mode 100644
>> index 0000000000..70982c97b2
>> --- /dev/null
>> +++ b/libavcodec/hw_base_encode.h
>> @@ -0,0 +1,277 @@
>> +/*
>> + * Copyright (c) 2024 Intel Corporation
>
>I would avoid adding this. Most of these files have content mostly copied from
>other files which are not exclusively copyright by Intel Corporation.
Will delete.
>
>> + *
>> + * This file is part of FFmpeg.
>> + *
>> + * FFmpeg is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2.1 of the License, or (at your option) any later version.
>> + *
>> + * FFmpeg is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
>GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with FFmpeg; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
>USA
>> + */
>> +
>> +#ifndef AVCODEC_HW_BASE_ENCODE_H
>> +#define AVCODEC_HW_BASE_ENCODE_H
>> +
>> +#include "libavutil/hwcontext.h"
>> +#include "libavutil/fifo.h"
>> +
>> +#include "avcodec.h"
>> +#include "hwconfig.h"
>
>This file doesn't do anything with hwconfig stuff?
>
Will delete.
>> +
>> +enum {
>> + MAX_DPB_SIZE = 16,
>> + MAX_PICTURE_REFERENCES = 2,
>> + MAX_REORDER_DELAY = 16,
>> + MAX_ASYNC_DEPTH = 64,
>> + MAX_REFERENCE_LIST_NUM = 2,
>> +};
>> +
>> +enum {
>> + PICTURE_TYPE_IDR = 0,
>> + PICTURE_TYPE_I = 1,
>> + PICTURE_TYPE_P = 2,
>> + PICTURE_TYPE_B = 3,
>> +};
>> +
>> +enum {
>> + // Codec supports controlling the subdivision of pictures into slices.
>> + FLAG_SLICE_CONTROL = 1 << 0,
>> + // Codec only supports constant quality (no rate control).
>> + FLAG_CONSTANT_QUALITY_ONLY = 1 << 1,
>> + // Codec is intra-only.
>> + FLAG_INTRA_ONLY = 1 << 2,
>> + // Codec supports B-pictures.
>> + FLAG_B_PICTURES = 1 << 3,
>> + // Codec supports referencing B-pictures.
>> + FLAG_B_PICTURE_REFERENCES = 1 << 4,
>> + // Codec supports non-IDR key pictures (that is, key pictures do
>> + // not necessarily empty the DPB).
>> + FLAG_NON_IDR_KEY_PICTURES = 1 << 5,
>> + // Codec output packet without timestamp delay, which means the
>> + // output packet has same PTS and DTS.
>> + FLAG_TIMESTAMP_NO_DELAY = 1 << 6,
>> +};
>> +
>> +enum {
>> + RC_MODE_AUTO,
>> + RC_MODE_CQP,
>> + RC_MODE_CBR,
>> + RC_MODE_VBR,
>> + RC_MODE_ICQ,
>
>I thought ICQ was an Intel name which leaked into libva? This probably
>shouldn't be in a generic API, maybe something about constant-quality.
>
>> + RC_MODE_QVBR,
>> + RC_MODE_AVBR,
>
>More generally, I think you want to document what these modes are intended
>to be. When they mapped directly to VAAPI then it was clear that the
>responsibility was "whatever libva gives you", but when there are multiple
>possibilities which do not use exactly the same options it is less clear.
I am going to remove this enumeration from the common header and remove
ICQ and AVBR totally from D3D12 side as they are not supported yet.
>
>> + RC_MODE_MAX = RC_MODE_AVBR,
>> +};
>> +
>> +typedef struct HWBaseEncodePicture {
>> + struct HWBaseEncodePicture *next;
>> +
>> + int64_t display_order;
>> + int64_t encode_order;
>> + int64_t pts;
>> + int64_t duration;
>> + int force_idr;
>> +
>> + void *opaque;
>> + AVBufferRef *opaque_ref;
>> +
>> + int type;
>> + int b_depth;
>> + int encode_issued;
>> + int encode_complete;
>> +
>> + AVFrame *input_image;
>> + AVFrame *recon_image;
>> +
>> + void *priv_data;
>> +
>> + // Whether this picture is a reference picture.
>> + int is_reference;
>> +
>> + // The contents of the DPB after this picture has been decoded.
>> + // This will contain the picture itself if it is a reference picture,
>> + // but not if it isn't.
>> + int nb_dpb_pics;
>> + struct HWBaseEncodePicture *dpb[MAX_DPB_SIZE];
>> + // The reference pictures used in decoding this picture. If they are
>> + // used by later pictures they will also appear in the DPB. ref[0][] for
>> + // previous reference frames. ref[1][] for future reference frames.
>> + int nb_refs[MAX_REFERENCE_LIST_NUM];
>> + struct HWBaseEncodePicture
>*refs[MAX_REFERENCE_LIST_NUM][MAX_PICTURE_REFERENCES];
>> + // The previous reference picture in encode order. Must be in at least
>> + // one of the reference list and DPB list.
>> + struct HWBaseEncodePicture *prev;
>> + // Reference count for other pictures referring to this one through
>> + // the above pointers, directly from incomplete pictures and indirectly
>> + // through completed pictures.
>> + int ref_count[2];
>> + int ref_removed[2];
>> +} HWBaseEncodePicture;
>> +
>> +typedef struct HWEncodeType {
>> + HWBaseEncodePicture * (*alloc)(AVCodecContext *avctx, AVFrame
>*frame);
>> +
>> + int (*issue)(AVCodecContext *avctx, HWBaseEncodePicture *base_pic);
>
>The semantics of this are a bit blurred across the boundary of this call. I think
>ideally you want the picture structure to be const here to make it clear?
>
>(Move the recon_image allocation and issued-flag setting out of the callee, in
>particular.)
>
>> +
>> + int (*output)(AVCodecContext *avctx, HWBaseEncodePicture *base_pic,
>AVPacket *pkt);
>
>Feels like the picture structure should again be const for the same reason.
>
>> +
>> + int (*free)(AVCodecContext *avctx, HWBaseEncodePicture *base_pic);
>
>The VAAPI implementation of this is definitely doing lots of things that should
>be in the base layer.
>
>> +} HWEncodeType;
>
>I think the naming of this wants to be clear that they are operations on a single
>picture (either the type name or the fields).
>
>Some documentation of the precise semantics of the callbacks would definitely
>help as well to be sure that the split is correct.
Will do as suggested.
>
>> +
>> +typedef struct HWBaseEncodeContext {
>> + const AVClass *class;
>> +
>> + // Hardware-specific hooks.
>> + const struct HWEncodeType *hw;
>> +
>> + // Global options.
>> +
>> + // Number of I frames between IDR frames.
>> + int idr_interval;
>> +
>> + // Desired B frame reference depth.
>> + int desired_b_depth;
>> +
>> + // Explicitly set RC mode (otherwise attempt to pick from
>> + // available modes).
>> + int explicit_rc_mode;
>> +
>> + // Explicitly-set QP, for use with the "qp" options.
>> + // (Forces CQP mode when set, overriding everything else.)
>> + int explicit_qp;
>> +
>> + // The required size of surfaces. This is probably the input
>> + // size (AVCodecContext.width|height) aligned up to whatever
>> + // block size is required by the codec.
>> + int surface_width;
>> + int surface_height;
>> +
>> + // The block size for slice calculations.
>> + int slice_block_width;
>> + int slice_block_height;
>> +
>> + // RC quality level - meaning depends on codec and RC mode.
>> + // In CQP mode this sets the fixed quantiser value.
>> + int rc_quality;
>> +
>> + AVBufferRef *device_ref;
>> + AVHWDeviceContext *device;
>> +
>> + // The hardware frame context containing the input frames.
>> + AVBufferRef *input_frames_ref;
>> + AVHWFramesContext *input_frames;
>> +
>> + // The hardware frame context containing the reconstructed frames.
>> + AVBufferRef *recon_frames_ref;
>> + AVHWFramesContext *recon_frames;
>> +
>> + // Current encoding window, in display (input) order.
>> + HWBaseEncodePicture *pic_start, *pic_end;
>> + // The next picture to use as the previous reference picture in
>> + // encoding order. Order from small to large in encoding order.
>> + HWBaseEncodePicture *next_prev[MAX_PICTURE_REFERENCES];
>> + int nb_next_prev;
>> +
>> + // Next input order index (display order).
>> + int64_t input_order;
>> + // Number of frames that output is behind input.
>> + int64_t output_delay;
>> + // Next encode order index.
>> + int64_t encode_order;
>> + // Number of frames decode output will need to be delayed.
>> + int64_t decode_delay;
>> + // Next output order index (in encode order).
>> + int64_t output_order;
>> +
>> + // Timestamp handling.
>> + int64_t first_pts;
>> + int64_t dts_pts_diff;
>> + int64_t ts_ring[MAX_REORDER_DELAY * 3 +
>> + MAX_ASYNC_DEPTH];
>> +
>> + // Frame type decision.
>> + int gop_size;
>> + int closed_gop;
>> + int gop_per_idr;
>> + int p_per_i;
>> + int max_b_depth;
>> + int b_per_p;
>> + int force_idr;
>> + int idr_counter;
>> + int gop_counter;
>> + int end_of_stream;
>> + int p_to_gpb;
>> +
>> + // Whether the driver supports ROI at all.
>> + int roi_allowed;
>> +
>> + // The encoder does not support cropping information, so warn about
>> + // it the first time we encounter any nonzero crop fields.
>> + int crop_warned;
>> + // If the driver does not support ROI then warn the first time we
>> + // encounter a frame with ROI side data.
>> + int roi_warned;
>> +
>> + AVFrame *frame;
>> +
>> + // Whether the HW supports sync buffer function.
>> + // If supported, encode_fifo/async_depth will be used together.
>> + // Used for output buffer synchronization.
>> + int async_encode;
>> +
>> + // Store buffered pic
>> + AVFifo *encode_fifo;
>> + // Max number of frame buffered in encoder.
>> + int async_depth;
>> +
>> + /** Tail data of a pic, now only used for av1 repeat frame header. */
>> + AVPacket *tail_pkt;
>> +} HWBaseEncodeContext;
>> +
>> +int ff_hw_base_encode_receive_packet(AVCodecContext *avctx, AVPacket
>*pkt);
>> +
>> +int ff_hw_base_encode_init(AVCodecContext *avctx);
>> +
>> +int ff_hw_base_encode_close(AVCodecContext *avctx);
>> +
>> +#define HW_BASE_ENCODE_COMMON_OPTIONS \
>> + { "idr_interval", \
>> + "Distance (in I-frames) between IDR frames", \
>> + OFFSET(common.base.idr_interval), AV_OPT_TYPE_INT, \
>> + { .i64 = 0 }, 0, INT_MAX, FLAGS }, \
>> + { "b_depth", \
>> + "Maximum B-frame reference depth", \
>> + OFFSET(common.base.desired_b_depth), AV_OPT_TYPE_INT, \
>> + { .i64 = 1 }, 1, INT_MAX, FLAGS }, \
>> + { "async_depth", "Maximum processing parallelism. " \
>> + "Increase this to improve single channel performance.", \
>> + OFFSET(common.base.async_depth), AV_OPT_TYPE_INT, \
>> + { .i64 = 2 }, 1, MAX_ASYNC_DEPTH, FLAGS }
>> +
>> +#define HW_BASE_ENCODE_RC_MODE(name, desc) \
>> + { #name, desc, 0, AV_OPT_TYPE_CONST, { .i64 = RC_MODE_ ## name }, \
>> + 0, 0, FLAGS, "rc_mode" }
>> +#define HW_BASE_ENCODE_RC_OPTIONS \
>> + { "rc_mode",\
>> + "Set rate control mode", \
>> + OFFSET(common.base.explicit_rc_mode), AV_OPT_TYPE_INT, \
>> + { .i64 = RC_MODE_AUTO }, RC_MODE_AUTO, RC_MODE_MAX,
>FLAGS, .unit = "rc_mode" }, \
>> + { "auto", "Choose mode automatically based on other parameters", \
>> + 0, AV_OPT_TYPE_CONST, { .i64 = RC_MODE_AUTO }, 0, 0, FLAGS, .unit =
>"rc_mode" }, \
>> + HW_BASE_ENCODE_RC_MODE(CQP, "Constant-quality"), \
>> + HW_BASE_ENCODE_RC_MODE(CBR, "Constant-bitrate"), \
>> + HW_BASE_ENCODE_RC_MODE(VBR, "Variable-bitrate"), \
>> + HW_BASE_ENCODE_RC_MODE(ICQ, "Intelligent constant-quality"), \
>> + HW_BASE_ENCODE_RC_MODE(QVBR, "Quality-defined variable-bitrate"),
>\
>> + HW_BASE_ENCODE_RC_MODE(AVBR, "Average variable-bitrate")
>
>This set of options is rather nasty because the set which makes sense depends
>on the API (see how your later D3D12 patch can't map some of them but has
>to deal with them anyway). Maybe avoid extracting this to the common layer?
>
Sure I'll move it back to where it was.
Thanks,
Tong
>> +
>> +#endif /* AVCODEC_HW_BASE_ENCODE_H */
>> diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
>> ...
>
>Thanks,
>
>- Mark
>_______________________________________________
>ffmpeg-devel mailing list
>ffmpeg-devel at ffmpeg.org
>https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
>To unsubscribe, visit link above, or email
>ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".
More information about the ffmpeg-devel
mailing list