[FFmpeg-devel] [PATCH] avcodec/nvdec: Add support for decoding HEVC 4:4:4 content

Timo Rothenpieler timo at rothenpieler.org
Sun Oct 7 11:55:29 EEST 2018


On 07.10.2018 04:19, Philip Langdale wrote:
> The latest generation video decoder on the Turing chips supports
> decoding HEVC 4:4:4. Supporting this is relatively straight-forward;
> we need to account for the different chroma format and pick the
> right output and sw formats at the right times.
> 
> There was one bug which was the hard-coded assumption that the
> first chroma plane would be half-height; I fixed this to use the
> actual shift value on the plane.
> 
> The output formats ('2', and '3') are currently undocumented but
> appear to be YUV444P and YUV444P16 based on how they behave.
> ---
>   libavcodec/hevcdec.c |  2 ++
>   libavcodec/nvdec.c   | 43 +++++++++++++++++++++++++++++++++++--------
>   2 files changed, 37 insertions(+), 8 deletions(-)
> 
> diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
> index a3b5c8cb71..508e093ea3 100644
> --- a/libavcodec/hevcdec.c
> +++ b/libavcodec/hevcdec.c
> @@ -409,6 +409,8 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
>   #endif
>           break;
>       case AV_PIX_FMT_YUV420P12:
> +    case AV_PIX_FMT_YUV444P10:
> +    case AV_PIX_FMT_YUV444P12:
>   #if CONFIG_HEVC_NVDEC_HWACCEL
>           *fmt++ = AV_PIX_FMT_CUDA;
>   #endif
> diff --git a/libavcodec/nvdec.c b/libavcodec/nvdec.c
> index e779be3a45..7e5c1791ea 100644
> --- a/libavcodec/nvdec.c
> +++ b/libavcodec/nvdec.c
> @@ -34,6 +34,9 @@
>   #include "nvdec.h"
>   #include "internal.h"
>   
> +#define cudaVideoSurfaceFormat_YUV444P 2
> +#define cudaVideoSurfaceFormat_YUV444P16 3

This will probably collide once the headers add those values, not sure 
how to properly handle that, but they at least should have a different 
naming scheme.

>   typedef struct NVDECDecoder {
>       CUvideodecoder decoder;
>   
> @@ -273,7 +276,8 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
>   
>       CUVIDDECODECREATEINFO params = { 0 };
>   
> -    int cuvid_codec_type, cuvid_chroma_format;
> +    cudaVideoSurfaceFormat output_format;
> +    int cuvid_codec_type, cuvid_chroma_format, chroma_444;
>       int ret = 0;
>   
>       sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
> @@ -291,6 +295,7 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
>           av_log(avctx, AV_LOG_ERROR, "Unsupported chroma format\n");
>           return AVERROR(ENOSYS);
>       }
> +    chroma_444 = cuvid_chroma_format == cudaVideoChromaFormat_444;
>   
>       if (!avctx->hw_frames_ctx) {
>           ret = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_CUDA);
> @@ -298,6 +303,21 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
>               return ret;
>       }
>   
> +    switch (sw_desc->comp[0].depth) {
> +    case 8:
> +        output_format = chroma_444 ? cudaVideoSurfaceFormat_YUV444P :
> +                                     cudaVideoSurfaceFormat_NV12;
> +        break;
> +    case 10:
> +    case 12:
> +        output_format = chroma_444 ? cudaVideoSurfaceFormat_YUV444P16 :
> +                                     cudaVideoSurfaceFormat_P016;
> +        break;
> +    default:
> +        av_log(avctx, AV_LOG_ERROR, "Unsupported bit depth\n");
> +        return AVERROR(ENOSYS);
> +    }
> +
>       frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
>   
>       params.ulWidth             = avctx->coded_width;
> @@ -305,8 +325,7 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
>       params.ulTargetWidth       = avctx->coded_width;
>       params.ulTargetHeight      = avctx->coded_height;
>       params.bitDepthMinus8      = sw_desc->comp[0].depth - 8;
> -    params.OutputFormat        = params.bitDepthMinus8 ?
> -                                 cudaVideoSurfaceFormat_P016 : cudaVideoSurfaceFormat_NV12;
> +    params.OutputFormat        = output_format;
>       params.CodecType           = cuvid_codec_type;
>       params.ChromaFormat        = cuvid_chroma_format;
>       params.ulNumDecodeSurfaces = frames_ctx->initial_pool_size;
> @@ -388,6 +407,8 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
>       NVDECFrame        *cf = (NVDECFrame*)fdd->hwaccel_priv;
>       NVDECDecoder *decoder = (NVDECDecoder*)cf->decoder_ref->data;
>   
> +    AVHWFramesContext *hwctx = (AVHWFramesContext *)frame->hw_frames_ctx->data;
> +
>       CUVIDPROCPARAMS vpp = { 0 };
>       NVDECFrame *unmap_data = NULL;
>   
> @@ -397,6 +418,7 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
>   
>       unsigned int pitch, i;
>       unsigned int offset = 0;
> +    int shift_h = 0, shift_v = 0;
>       int ret = 0;
>   
>       vpp.progressive_frame = 1;
> @@ -433,10 +455,11 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
>       unmap_data->idx_ref = av_buffer_ref(cf->idx_ref);
>       unmap_data->decoder_ref = av_buffer_ref(cf->decoder_ref);
>   
> +    av_pix_fmt_get_chroma_sub_sample(hwctx->sw_format, &shift_h, &shift_v);
>       for (i = 0; frame->linesize[i]; i++) {
>           frame->data[i] = (uint8_t*)(devptr + offset);
>           frame->linesize[i] = pitch;
> -        offset += pitch * (frame->height >> (i ? 1 : 0));
> +        offset += pitch * (frame->height >> (i ? shift_v : 0));
>       }
>   
>       goto finish;
> @@ -576,7 +599,7 @@ int ff_nvdec_frame_params(AVCodecContext *avctx,
>   {
>       AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data;
>       const AVPixFmtDescriptor *sw_desc;
> -    int cuvid_codec_type, cuvid_chroma_format;
> +    int cuvid_codec_type, cuvid_chroma_format, chroma_444;
>   
>       sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
>       if (!sw_desc)
> @@ -593,6 +616,7 @@ int ff_nvdec_frame_params(AVCodecContext *avctx,
>           av_log(avctx, AV_LOG_VERBOSE, "Unsupported chroma format\n");
>           return AVERROR(EINVAL);
>       }
> +    chroma_444 = cuvid_chroma_format == cudaVideoChromaFormat_444;
>   
>       frames_ctx->format            = AV_PIX_FMT_CUDA;
>       frames_ctx->width             = (avctx->coded_width + 1) & ~1;
> @@ -605,15 +629,18 @@ int ff_nvdec_frame_params(AVCodecContext *avctx,
>       if (!frames_ctx->pool)
>           return AVERROR(ENOMEM);
>   
> +    // It it semantically incorrect to use AX_PIX_FMT_YUV444P16 for either the 10
> +    // or 12 bit case, but ffmpeg and nvidia disagree on which end the padding
> +    // bits go at. P16 is unambiguous and matches.
>       switch (sw_desc->comp[0].depth) {
>       case 8:
> -        frames_ctx->sw_format = AV_PIX_FMT_NV12;
> +        frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P : AV_PIX_FMT_NV12;
>           break;
>       case 10:
> -        frames_ctx->sw_format = AV_PIX_FMT_P010;
> +        frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P010;
>           break;
>       case 12:
> -        frames_ctx->sw_format = AV_PIX_FMT_P016;
> +        frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P016;
>           break;
>       default:
>           return AVERROR(EINVAL);
> 

rest LGTM

-------------- next part --------------
A non-text attachment was scrubbed...
Name: smime.p7s
Type: application/pkcs7-signature
Size: 4538 bytes
Desc: S/MIME Cryptographic Signature
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20181007/68b3d4b9/attachment.bin>


More information about the ffmpeg-devel mailing list