[FFmpeg-devel] [PATCH] avcodec/nvdec: Add support for decoding HEVC 4:4:4 content
Timo Rothenpieler
timo at rothenpieler.org
Sun Oct 7 11:55:29 EEST 2018
On 07.10.2018 04:19, Philip Langdale wrote:
> The latest generation video decoder on the Turing chips supports
> decoding HEVC 4:4:4. Supporting this is relatively straight-forward;
> we need to account for the different chroma format and pick the
> right output and sw formats at the right times.
>
> There was one bug which was the hard-coded assumption that the
> first chroma plane would be half-height; I fixed this to use the
> actual shift value on the plane.
>
> The output formats ('2', and '3') are currently undocumented but
> appear to be YUV444P and YUV444P16 based on how they behave.
> ---
> libavcodec/hevcdec.c | 2 ++
> libavcodec/nvdec.c | 43 +++++++++++++++++++++++++++++++++++--------
> 2 files changed, 37 insertions(+), 8 deletions(-)
>
> diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
> index a3b5c8cb71..508e093ea3 100644
> --- a/libavcodec/hevcdec.c
> +++ b/libavcodec/hevcdec.c
> @@ -409,6 +409,8 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
> #endif
> break;
> case AV_PIX_FMT_YUV420P12:
> + case AV_PIX_FMT_YUV444P10:
> + case AV_PIX_FMT_YUV444P12:
> #if CONFIG_HEVC_NVDEC_HWACCEL
> *fmt++ = AV_PIX_FMT_CUDA;
> #endif
> diff --git a/libavcodec/nvdec.c b/libavcodec/nvdec.c
> index e779be3a45..7e5c1791ea 100644
> --- a/libavcodec/nvdec.c
> +++ b/libavcodec/nvdec.c
> @@ -34,6 +34,9 @@
> #include "nvdec.h"
> #include "internal.h"
>
> +#define cudaVideoSurfaceFormat_YUV444P 2
> +#define cudaVideoSurfaceFormat_YUV444P16 3
This will probably collide once the headers add those values, not sure
how to properly handle that, but they at least should have a different
naming scheme.
> typedef struct NVDECDecoder {
> CUvideodecoder decoder;
>
> @@ -273,7 +276,8 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
>
> CUVIDDECODECREATEINFO params = { 0 };
>
> - int cuvid_codec_type, cuvid_chroma_format;
> + cudaVideoSurfaceFormat output_format;
> + int cuvid_codec_type, cuvid_chroma_format, chroma_444;
> int ret = 0;
>
> sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
> @@ -291,6 +295,7 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
> av_log(avctx, AV_LOG_ERROR, "Unsupported chroma format\n");
> return AVERROR(ENOSYS);
> }
> + chroma_444 = cuvid_chroma_format == cudaVideoChromaFormat_444;
>
> if (!avctx->hw_frames_ctx) {
> ret = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_CUDA);
> @@ -298,6 +303,21 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
> return ret;
> }
>
> + switch (sw_desc->comp[0].depth) {
> + case 8:
> + output_format = chroma_444 ? cudaVideoSurfaceFormat_YUV444P :
> + cudaVideoSurfaceFormat_NV12;
> + break;
> + case 10:
> + case 12:
> + output_format = chroma_444 ? cudaVideoSurfaceFormat_YUV444P16 :
> + cudaVideoSurfaceFormat_P016;
> + break;
> + default:
> + av_log(avctx, AV_LOG_ERROR, "Unsupported bit depth\n");
> + return AVERROR(ENOSYS);
> + }
> +
> frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
>
> params.ulWidth = avctx->coded_width;
> @@ -305,8 +325,7 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
> params.ulTargetWidth = avctx->coded_width;
> params.ulTargetHeight = avctx->coded_height;
> params.bitDepthMinus8 = sw_desc->comp[0].depth - 8;
> - params.OutputFormat = params.bitDepthMinus8 ?
> - cudaVideoSurfaceFormat_P016 : cudaVideoSurfaceFormat_NV12;
> + params.OutputFormat = output_format;
> params.CodecType = cuvid_codec_type;
> params.ChromaFormat = cuvid_chroma_format;
> params.ulNumDecodeSurfaces = frames_ctx->initial_pool_size;
> @@ -388,6 +407,8 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
> NVDECFrame *cf = (NVDECFrame*)fdd->hwaccel_priv;
> NVDECDecoder *decoder = (NVDECDecoder*)cf->decoder_ref->data;
>
> + AVHWFramesContext *hwctx = (AVHWFramesContext *)frame->hw_frames_ctx->data;
> +
> CUVIDPROCPARAMS vpp = { 0 };
> NVDECFrame *unmap_data = NULL;
>
> @@ -397,6 +418,7 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
>
> unsigned int pitch, i;
> unsigned int offset = 0;
> + int shift_h = 0, shift_v = 0;
> int ret = 0;
>
> vpp.progressive_frame = 1;
> @@ -433,10 +455,11 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
> unmap_data->idx_ref = av_buffer_ref(cf->idx_ref);
> unmap_data->decoder_ref = av_buffer_ref(cf->decoder_ref);
>
> + av_pix_fmt_get_chroma_sub_sample(hwctx->sw_format, &shift_h, &shift_v);
> for (i = 0; frame->linesize[i]; i++) {
> frame->data[i] = (uint8_t*)(devptr + offset);
> frame->linesize[i] = pitch;
> - offset += pitch * (frame->height >> (i ? 1 : 0));
> + offset += pitch * (frame->height >> (i ? shift_v : 0));
> }
>
> goto finish;
> @@ -576,7 +599,7 @@ int ff_nvdec_frame_params(AVCodecContext *avctx,
> {
> AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data;
> const AVPixFmtDescriptor *sw_desc;
> - int cuvid_codec_type, cuvid_chroma_format;
> + int cuvid_codec_type, cuvid_chroma_format, chroma_444;
>
> sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
> if (!sw_desc)
> @@ -593,6 +616,7 @@ int ff_nvdec_frame_params(AVCodecContext *avctx,
> av_log(avctx, AV_LOG_VERBOSE, "Unsupported chroma format\n");
> return AVERROR(EINVAL);
> }
> + chroma_444 = cuvid_chroma_format == cudaVideoChromaFormat_444;
>
> frames_ctx->format = AV_PIX_FMT_CUDA;
> frames_ctx->width = (avctx->coded_width + 1) & ~1;
> @@ -605,15 +629,18 @@ int ff_nvdec_frame_params(AVCodecContext *avctx,
> if (!frames_ctx->pool)
> return AVERROR(ENOMEM);
>
> + // It it semantically incorrect to use AX_PIX_FMT_YUV444P16 for either the 10
> + // or 12 bit case, but ffmpeg and nvidia disagree on which end the padding
> + // bits go at. P16 is unambiguous and matches.
> switch (sw_desc->comp[0].depth) {
> case 8:
> - frames_ctx->sw_format = AV_PIX_FMT_NV12;
> + frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P : AV_PIX_FMT_NV12;
> break;
> case 10:
> - frames_ctx->sw_format = AV_PIX_FMT_P010;
> + frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P010;
> break;
> case 12:
> - frames_ctx->sw_format = AV_PIX_FMT_P016;
> + frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P016;
> break;
> default:
> return AVERROR(EINVAL);
>
rest LGTM
-------------- next part --------------
A non-text attachment was scrubbed...
Name: smime.p7s
Type: application/pkcs7-signature
Size: 4538 bytes
Desc: S/MIME Cryptographic Signature
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20181007/68b3d4b9/attachment.bin>
More information about the ffmpeg-devel
mailing list