[FFmpeg-devel] [PATCH v2 2/5] ffmpeg: hwaccel helper for VAAPI decode

Sun Jan 17 23:46:59 CET 2016

From 845be894e8e0e2a966b6e51229cad1aadca36704 Mon Sep 17 00:00:00 2001
From: Mark Thompson <mrt at jkqxz.net>
Date: Sun, 17 Jan 2016 22:13:45 +0000
Subject: [PATCH 2/5] ffmpeg: hwaccel helper for VAAPI decode

---
  Makefile       |   1 +
  ffmpeg.h       |   2 +
  ffmpeg_opt.c   |   3 +
  ffmpeg_vaapi.c | 538 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  4 files changed, 544 insertions(+)
  create mode 100644 ffmpeg_vaapi.c

diff --git a/Makefile b/Makefile
index 7836a20..be1d2ca 100644
--- a/Makefile
+++ b/Makefile
@@ -36,6 +36,7 @@ OBJS-ffmpeg-$(CONFIG_VDA)     += ffmpeg_videotoolbox.o
  endif
  OBJS-ffmpeg-$(CONFIG_VIDEOTOOLBOX) += ffmpeg_videotoolbox.o
  OBJS-ffmpeg-$(CONFIG_LIBMFX)  += ffmpeg_qsv.o
+OBJS-ffmpeg-$(CONFIG_VAAPI)   += ffmpeg_vaapi.o
  OBJS-ffserver                 += ffserver_config.o

  TESTTOOLS   = audiogen videogen rotozoom tiny_psnr tiny_ssim base64
diff --git a/ffmpeg.h b/ffmpeg.h
index 20322b0..d7313c3 100644
--- a/ffmpeg.h
+++ b/ffmpeg.h
@@ -65,6 +65,7 @@ enum HWAccelID {
      HWACCEL_VDA,
      HWACCEL_VIDEOTOOLBOX,
      HWACCEL_QSV,
+    HWACCEL_VAAPI,
  };

  typedef struct HWAccel {
@@ -577,5 +578,6 @@ int vda_init(AVCodecContext *s);
  int videotoolbox_init(AVCodecContext *s);
  int qsv_init(AVCodecContext *s);
  int qsv_transcode_init(OutputStream *ost);
+int vaapi_decode_init(AVCodecContext *s);

  #endif /* FFMPEG_H */
diff --git a/ffmpeg_opt.c b/ffmpeg_opt.c
index 9b341cf..394f2cb 100644
--- a/ffmpeg_opt.c
+++ b/ffmpeg_opt.c
@@ -82,6 +82,9 @@ const HWAccel hwaccels[] = {
  #if CONFIG_LIBMFX
      { "qsv",   qsv_init,   HWACCEL_QSV,   AV_PIX_FMT_QSV },
  #endif
+#if CONFIG_VAAPI
+    { "vaapi", vaapi_decode_init, HWACCEL_VAAPI, AV_PIX_FMT_VAAPI },
+#endif
      { 0 },
  };

diff --git a/ffmpeg_vaapi.c b/ffmpeg_vaapi.c
new file mode 100644
index 0000000..279f531
--- /dev/null
+++ b/ffmpeg_vaapi.c
@@ -0,0 +1,538 @@
+/*
+ * VAAPI helper for hardware-accelerated decoding.
+ *
+ * Copyright (C) 2016 Mark Thompson <mrt at jkqxz.net>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <va/va.h>
+
+#include "ffmpeg.h"
+
+#include "libavutil/avassert.h"
+#include "libavutil/avconfig.h"
+#include "libavutil/buffer.h"
+#include "libavutil/frame.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/pixfmt.h"
+#include "libavutil/vaapi.h"
+
+#include "libavcodec/vaapi.h"
+
+
+static const AVClass vaapi_class = {
+    .class_name = "VAAPI",
+    .item_name  = av_default_item_name,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+
+#define DEFAULT_SURFACES 20
+
+typedef struct VAAPIDecoderContext {
+    const AVClass *class;
+
+    AVVAAPIInstance va_instance;
+    AVVAAPIPipelineConfig config;
+    AVVAAPIPipelineContext codec;
+    AVVAAPISurfaceConfig output;
+
+    int codec_initialised;
+
+    AVFrame output_frame;
+
+    struct vaapi_context hwaccel_context;
+} VAAPIDecoderContext;
+
+
+static int vaapi_get_buffer(AVCodecContext *s, AVFrame *frame, int flags)
+{
+    InputStream *ist = s->opaque;
+    VAAPIDecoderContext *ctx = ist->hwaccel_ctx;
+    int err;
+
+    av_assert0(frame->format == AV_PIX_FMT_VAAPI);
+
+    av_vaapi_instance_lock(&ctx->va_instance);
+
+    err = av_vaapi_get_output_surface(&ctx->codec, frame);
+
+    av_vaapi_instance_unlock(&ctx->va_instance);
+
+    return err;
+}
+
+static int vaapi_retrieve_data(AVCodecContext *avctx, AVFrame *input_frame)
+{
+    InputStream *ist = avctx->opaque;
+    VAAPIDecoderContext *ctx = ist->hwaccel_ctx;
+    AVVAAPISurfaceConfig *output = &ctx->output;
+    AVVAAPISurface *surface;
+    AVFrame *output_frame;
+    int err, copying;
+
+    surface = (AVVAAPISurface*)input_frame->buf[0]->data;
+    av_log(ctx, AV_LOG_DEBUG, "Retrieve data from surface %#x (format %#x).\n",
+           surface->id, output->av_format);
+
+    av_vaapi_instance_lock(&ctx->va_instance);
+
+    if(output->av_format == AV_PIX_FMT_VAAPI) {
+        copying = 0;
+        av_log(ctx, AV_LOG_VERBOSE, "Surface %#x retrieved without copy.\n",
+               surface->id);
+
+    } else {
+        err = av_vaapi_map_surface(surface, 1);
+        if(err) {
+            av_log(ctx, AV_LOG_ERROR, "Failed to map surface %#x.",
+                   surface->id);
+            goto fail;
+        }
+
+        copying = 1;
+        av_log(ctx, AV_LOG_VERBOSE, "Surface %#x mapped: image %#x data %p.\n",
+               surface->id, surface->image.image_id, surface->mapped_address);
+    }
+
+    // The actual frame need not fill the surface.
+    av_assert0(input_frame->width  <= output->width);
+    av_assert0(input_frame->height <= output->height);
+
+    output_frame = &ctx->output_frame;
+    output_frame->width  = input_frame->width;
+    output_frame->height = input_frame->height;
+    output_frame->format = output->av_format;
+
+    if(copying) {
+        err = av_frame_get_buffer(output_frame, 32);
+        if(err < 0) {
+            av_log(ctx, AV_LOG_ERROR, "Failed to get output buffer: %d (%s).\n",
+                   err, av_err2str(err));
+            goto fail_unmap;
+        }
+
+        err = av_vaapi_copy_from_surface(output_frame, surface);
+        if(err < 0) {
+            av_log(ctx, AV_LOG_ERROR, "Failed to copy frame data: %d (%s).\n",
+                   err, av_err2str(err));
+            goto fail_unmap;
+        }
+
+    } else {
+        // Just copy the hidden ID field.
+        output_frame->data[3] = input_frame->data[3];
+    }
+
+    err = av_frame_copy_props(output_frame, input_frame);
+    if(err < 0) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to copy frame props: %d (%s).\n",
+               err, av_err2str(err));
+        goto fail_unmap;
+    }
+
+    av_frame_unref(input_frame);
+    av_frame_move_ref(input_frame, output_frame);
+
+  fail_unmap:
+    if(copying)
+        av_vaapi_unmap_surface(surface, 0);
+  fail:
+    av_vaapi_instance_unlock(&ctx->va_instance);
+    return err;
+}
+
+static const struct {
+    VAProfile from;
+    VAProfile to;
+} vaapi_profile_compatibility[] = {
+#define FT(f, t) { VAProfile ## f, VAProfile ## t }
+    FT(MPEG2Simple,             MPEG2Main   ),
+    FT(H263Baseline,     MPEG4AdvancedSimple),
+    FT(MPEG4Simple,      MPEG4AdvancedSimple),
+    FT(MPEG4AdvancedSimple,     MPEG4Main   ),
+    FT(H264ConstrainedBaseline, H264Baseline),
+    FT(H264Baseline,            H264Main    ), // (Not quite true.)
+    FT(H264Main,                H264High    ),
+    FT(VC1Simple,               VC1Main     ),
+    FT(VC1Main,                 VC1Advanced ),
+#undef FT
+};
+
+static int vaapi_find_next_compatible(VAProfile profile)
+{
+    int i;
+    for(i = 0; i < FF_ARRAY_ELEMS(vaapi_profile_compatibility); i++) {
+        if(vaapi_profile_compatibility[i].from == profile)
+            return vaapi_profile_compatibility[i].to;
+    }
+    return VAProfileNone;
+}
+
+static const struct {
+    enum AVCodecID codec_id;
+    int codec_profile;
+    VAProfile va_profile;
+} vaapi_profile_map[] = {
+#define MAP(c, p, v) { AV_CODEC_ID_ ## c, FF_PROFILE_ ## p, VAProfile ## v }
+    MAP(MPEG2VIDEO,  MPEG2_SIMPLE,    MPEG2Simple ),
+    MAP(MPEG2VIDEO,  MPEG2_MAIN,      MPEG2Main   ),
+    MAP(H263,        UNKNOWN,         H263Baseline),
+    MAP(MPEG4,       MPEG4_SIMPLE,    MPEG4Simple ),
+    MAP(MPEG4,       MPEG4_ADVANCED_SIMPLE,
+                               MPEG4AdvancedSimple),
+    MAP(MPEG4,       MPEG4_MAIN,      MPEG4Main   ),
+    MAP(H264,        H264_CONSTRAINED_BASELINE,
+                           H264ConstrainedBaseline),
+    MAP(H264,        H264_BASELINE,   H264Baseline),
+    MAP(H264,        H264_MAIN,       H264Main    ),
+    MAP(H264,        H264_HIGH,       H264High    ),
+    MAP(HEVC,        HEVC_MAIN,       HEVCMain    ),
+    MAP(WMV3,        VC1_SIMPLE,      VC1Simple   ),
+    MAP(WMV3,        VC1_MAIN,        VC1Main     ),
+    MAP(WMV3,        VC1_COMPLEX,     VC1Advanced ),
+    MAP(WMV3,        VC1_ADVANCED,    VC1Advanced ),
+    MAP(VC1,         VC1_SIMPLE,      VC1Simple   ),
+    MAP(VC1,         VC1_MAIN,        VC1Main     ),
+    MAP(VC1,         VC1_COMPLEX,     VC1Advanced ),
+    MAP(VC1,         VC1_ADVANCED,    VC1Advanced ),
+    MAP(MJPEG,       UNKNOWN,         JPEGBaseline),
+    MAP(VP8,         UNKNOWN,        VP8Version0_3),
+    MAP(VP9,         VP9_0,           VP9Profile0 ),
+#undef MAP
+};
+
+static VAProfile vaapi_find_profile(const AVCodecContext *avctx)
+{
+    VAProfile result = VAProfileNone;
+    int i;
+    for(i = 0; i < FF_ARRAY_ELEMS(vaapi_profile_map); i++) {
+        if(avctx->codec_id != vaapi_profile_map[i].codec_id)
+            continue;
+        result = vaapi_profile_map[i].va_profile;
+        if(avctx->profile == vaapi_profile_map[i].codec_profile)
+            break;
+        // If there isn't an exact match, we will choose the last (highest)
+        // profile in the mapping table.
+    }
+    return result;
+}
+
+static const struct {
+    enum AVPixelFormat pix_fmt;
+    unsigned int fourcc;
+} vaapi_image_formats[] = {
+    { AV_PIX_FMT_NV12,    VA_FOURCC_NV12 },
+    { AV_PIX_FMT_YUV420P, VA_FOURCC_YV12 },
+};
+
+static int vaapi_get_pix_fmt(unsigned int fourcc)
+{
+    int i;
+    for(i = 0; i < FF_ARRAY_ELEMS(vaapi_image_formats); i++)
+        if(vaapi_image_formats[i].fourcc == fourcc)
+            return vaapi_image_formats[i].pix_fmt;
+    return 0;
+}
+
+static int vaapi_build_decoder_config(VAAPIDecoderContext *ctx,
+                                      AVVAAPIPipelineConfig *config,
+                                      AVVAAPISurfaceConfig *output,
+                                      AVCodecContext *avctx)
+{
+    VAStatus vas;
+    int i;
+
+    memset(config, 0, sizeof(*config));
+
+    // Pick codec profile to use.
+    {
+        VAProfile best_profile, profile;
+        int profile_count;
+        VAProfile *profile_list;
+
+        best_profile = vaapi_find_profile(avctx);
+        if(best_profile == VAProfileNone) {
+            av_log(ctx, AV_LOG_ERROR, "VAAPI does not support codec %s.\n",
+                   avcodec_get_name(avctx->codec_id));
+            return AVERROR(EINVAL);
+        }
+
+        profile_count = vaMaxNumProfiles(ctx->va_instance.display);
+        profile_list = av_calloc(profile_count, sizeof(VAProfile));
+
+        vas = vaQueryConfigProfiles(ctx->va_instance.display,
+                                    profile_list, &profile_count);
+        if(vas != VA_STATUS_SUCCESS) {
+            av_log(ctx, AV_LOG_ERROR, "Failed to query profiles: %d (%s).\n",
+                   vas, vaErrorStr(vas));
+            av_free(profile_list);
+            return AVERROR(EINVAL);
+        }
+
+        profile = best_profile;
+        while(profile != VAProfileNone) {
+            for(i = 0; i < profile_count; i++) {
+                if(profile_list[i] == profile)
+                    break;
+            }
+            if(i < profile_count)
+                break;
+
+            av_log(ctx, AV_LOG_DEBUG, "Hardware does not support codec "
+                   "profile: %s / %d -> VAProfile %d.\n",
+                   avcodec_get_name(avctx->codec_id), avctx->profile,
+                   profile);
+            profile = vaapi_find_next_compatible(profile);
+        }
+
+        av_free(profile_list);
+
+        if(profile == VAProfileNone) {
+            av_log(ctx, AV_LOG_ERROR, "Hardware does not support codec: "
+                   "%s / %d.\n", avcodec_get_name(avctx->codec_id),
+                   avctx->profile);
+            return AVERROR(EINVAL);
+        } else if(profile == best_profile) {
+            av_log(ctx, AV_LOG_INFO, "Hardware supports exact codec: "
+                   "%s / %d -> VAProfile %d.\n",
+                   avcodec_get_name(avctx->codec_id), avctx->profile,
+                   profile);
+        } else {
+            av_log(ctx, AV_LOG_INFO, "Hardware supports compatible codec: "
+                   "%s / %d -> VAProfile %d.\n",
+                   avcodec_get_name(avctx->codec_id), avctx->profile,
+                   profile);
+        }
+
+        config->profile = profile;
+        config->entrypoint = VAEntrypointVLD;
+    }
+
+    // Decide on the internal chroma format.
+    {
+        VAConfigAttrib attr;
+
+        // Currently the software only supports YUV420, so just make sure
+        // that the hardware we have does too.
+
+        memset(&attr, 0, sizeof(attr));
+        attr.type = VAConfigAttribRTFormat;
+        vas = vaGetConfigAttributes(ctx->va_instance.display, config->profile,
+                                    VAEntrypointVLD, &attr, 1);
+        if(vas != VA_STATUS_SUCCESS) {
+            av_log(ctx, AV_LOG_ERROR, "Failed to fetch config attributes: "
+                   "%d (%s).\n", vas, vaErrorStr(vas));
+            return AVERROR(EINVAL);
+        }
+        if(!(attr.value & VA_RT_FORMAT_YUV420)) {
+            av_log(ctx, AV_LOG_ERROR, "Hardware does not support required "
+                   "chroma format (%#x).\n", attr.value);
+            return AVERROR(EINVAL);
+        }
+
+        output->rt_format = VA_RT_FORMAT_YUV420;
+    }
+
+    // Decide on the image format.
+    if(avctx->pix_fmt == AV_PIX_FMT_VAAPI) {
+        // We are going to be passing through a VAAPI surface directly:
+        // they will stay as whatever opaque internal format for that time,
+        // and we never need to make VAImages from them.
+
+        av_log(ctx, AV_LOG_INFO, "Using VAAPI opaque output format.\n");
+
+        output->av_format = AV_PIX_FMT_VAAPI;
+        memset(&output->image_format, 0, sizeof(output->image_format));
+
+    } else {
+        int image_format_count;
+        VAImageFormat *image_format_list;
+        int pix_fmt;
+
+        // We might want to force a change to the output format here
+        // if we are intending to use VADeriveImage?
+
+        image_format_count = vaMaxNumImageFormats(ctx->va_instance.display);
+        image_format_list = av_calloc(image_format_count,
+                                      sizeof(VAImageFormat));
+
+        vas = vaQueryImageFormats(ctx->va_instance.display, image_format_list,
+                                  &image_format_count);
+        if(vas != VA_STATUS_SUCCESS) {
+            av_log(ctx, AV_LOG_ERROR, "Failed to query image formats: "
+                   "%d (%s).\n", vas, vaErrorStr(vas));
+            return AVERROR(EINVAL);
+        }
+
+        for(i = 0; i < image_format_count; i++) {
+            pix_fmt = vaapi_get_pix_fmt(image_format_list[i].fourcc);
+            if(pix_fmt == AV_PIX_FMT_NONE)
+                continue;
+            if(pix_fmt == avctx->pix_fmt)
+                break;
+        }
+        if(i < image_format_count) {
+            av_log(ctx, AV_LOG_INFO, "Using desired output format %s "
+                   "(%#x).\n", av_get_pix_fmt_name(pix_fmt),
+                   image_format_list[i].fourcc);
+        } else {
+            for(i = 0; i < image_format_count; i++) {
+                pix_fmt = vaapi_get_pix_fmt(image_format_list[i].fourcc);
+                if(pix_fmt != AV_PIX_FMT_NONE)
+                    break;
+            }
+            if(i >= image_format_count) {
+                av_log(ctx, AV_LOG_ERROR, "No supported output format found.\n");
+                av_free(image_format_list);
+                return AVERROR(EINVAL);
+            }
+            av_log(ctx, AV_LOG_INFO, "Using alternate output format %s "
+                   "(%#x).\n", av_get_pix_fmt_name(pix_fmt),
+                   image_format_list[i].fourcc);
+        }
+
+        output->av_format = pix_fmt;
+        memcpy(&output->image_format, &image_format_list[i],
+               sizeof(VAImageFormat));
+
+        av_free(image_format_list);
+    }
+
+    // Decide how many reference frames we need.
+    {
+        // We should be able to do this in a more sensible way by looking
+        // at how many reference frames the input stream requires.
+        output->count = DEFAULT_SURFACES;
+    }
+
+    // Test whether the width and height are within allowable limits.
+    {
+        // Unfortunately, we need an active codec pipeline to do this properly
+        // using vaQuerySurfaceAttributes().  For now, just assume the values
+        // we got passed are ok.
+        output->width  = avctx->coded_width;
+        output->height = avctx->coded_height;
+    }
+
+    return 0;
+}
+
+static int vaapi_alloc_decoder_context(VAAPIDecoderContext **ctx_ptr, const char *device)
+{
+    VAAPIDecoderContext *ctx;
+    int err;
+
+    ctx = av_mallocz(sizeof(*ctx));
+    if(!ctx)
+        return AVERROR(ENOMEM);
+
+    ctx->class = &vaapi_class;
+
+    err = av_vaapi_instance_init(&ctx->va_instance, device);
+    if(err)
+        return err;
+
+    *ctx_ptr = ctx;
+    return 0;
+}
+
+static void vaapi_decode_uninit(AVCodecContext *avctx)
+{
+    InputStream  *ist = avctx->opaque;
+    VAAPIDecoderContext *ctx = ist->hwaccel_ctx;
+
+    if(ctx->codec_initialised) {
+        av_vaapi_pipeline_uninit(&ctx->codec);
+        ctx->codec_initialised = 0;
+    }
+
+    av_free(ctx);
+
+    ist->hwaccel_ctx           = 0;
+    ist->hwaccel_uninit        = 0;
+    ist->hwaccel_get_buffer    = 0;
+    ist->hwaccel_retrieve_data = 0;
+
+    av_vaapi_instance_uninit(&ctx->va_instance);
+}
+
+int vaapi_decode_init(AVCodecContext *avctx)
+{
+    InputStream *ist = avctx->opaque;
+    VAAPIDecoderContext *ctx;
+    int err;
+
+    if(ist->hwaccel_id != HWACCEL_VAAPI)
+        return AVERROR(EINVAL);
+
+    avctx->hwaccel_context = 0;
+
+    if(ist->hwaccel_ctx) {
+        ctx = ist->hwaccel_ctx;
+        err = av_vaapi_pipeline_uninit(&ctx->codec);
+        if(err) {
+            av_log(ctx, AV_LOG_ERROR, "Unable to reinit; failed to uninit "
+                   "old codec context: %d (%s).\n", err, av_err2str(err));
+            return err;
+        }
+
+    } else {
+        err = vaapi_alloc_decoder_context(&ctx, ist->hwaccel_device);
+        if(err)
+            return err;
+    }
+
+    av_vaapi_instance_lock(&ctx->va_instance);
+
+    err = vaapi_build_decoder_config(ctx, &ctx->config, &ctx->output, avctx);
+    if(err) {
+        av_log(ctx, AV_LOG_ERROR, "No supported configuration for this codec.");
+        goto fail;
+    }
+
+    err = av_vaapi_pipeline_init(&ctx->codec, &ctx->va_instance,
+                                 &ctx->config, 0, &ctx->output);
+    if(err) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to initialise codec context: "
+               "%d (%s).\n", err, av_err2str(err));
+        goto fail;
+    }
+    ctx->codec_initialised = 1;
+
+    av_vaapi_instance_unlock(&ctx->va_instance);
+
+    av_log(ctx, AV_LOG_DEBUG, "VAAPI decoder (re)init complete.\n");
+
+    ist->hwaccel_ctx           = ctx;
+    ist->hwaccel_uninit        = vaapi_decode_uninit;
+    ist->hwaccel_get_buffer    = vaapi_get_buffer;
+    ist->hwaccel_retrieve_data = vaapi_retrieve_data;
+
+    ctx->hwaccel_context.display    = ctx->va_instance.display;
+    ctx->hwaccel_context.config_id  = ctx->codec.config_id;
+    ctx->hwaccel_context.context_id = ctx->codec.context_id;
+    avctx->hwaccel_context = &ctx->hwaccel_context;
+
+    return 0;
+
+ fail:
+    av_vaapi_instance_unlock(&ctx->va_instance);
+    vaapi_decode_uninit(avctx);
+    return err;
+}
-- 
2.6.4