[FFmpeg-devel] [PATCH 1/2] add support for ROI-based encoding

Guo, Yejun yejun.guo at intel.com
Thu Dec 20 08:58:38 EET 2018


aks for review, thanks.

> -----Original Message-----
> From: ffmpeg-devel [mailto:ffmpeg-devel-bounces at ffmpeg.org] On Behalf
> Of Guo, Yejun
> Sent: Thursday, December 13, 2018 12:26 AM
> To: ffmpeg-devel at ffmpeg.org
> Subject: [FFmpeg-devel] [PATCH 1/2] add support for ROI-based encoding
> 
> This patchset contains two patches.
> - the first patch (this patch) finished the code and ask for upstream.
> - the second patch is just a quick example on how to generate ROI info.
> 
> The encoders such as libx264 support different QPs offset for different MBs,
> it makes possible for ROI-based encoding. It makes sense to add support
> within ffmpeg to generate/accept ROI infos and pass into encoders.
> 
> Typical usage: After AVFrame is decoded, a ffmpeg filter or user's code
> generates ROI info for that frame, and the encoder finally does the ROI-
> based encoding. And so I choose to maintain the ROI info (AVFrameROI)
> within AVFrame struct.
> 
> Since the ROI info generator might more focus on the domain knowledge of
> the interest regions, instead of the encoding detail, the AVFrameROI is
> designed to be more friend for ffmpeg users.
> 
> This patch just enabled the path from ffmpeg to libx264, the more encoders
> can be added later.
> 
> Signed-off-by: Guo, Yejun <yejun.guo at intel.com>
> ---
>  libavcodec/libx264.c | 56
> ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  libavutil/frame.c    |  8 ++++++++
>  libavutil/frame.h    | 24 ++++++++++++++++++++++
>  3 files changed, 88 insertions(+)
> 
> diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c index
> a68d0a7..e4e593f 100644
> --- a/libavcodec/libx264.c
> +++ b/libavcodec/libx264.c
> @@ -273,6 +273,29 @@ static void reconfig_encoder(AVCodecContext *ctx,
> const AVFrame *frame)
>      }
>  }
> 
> +static float get_roi_qoffset(AVCodecContext *ctx, enum AVRoiQuality q)
> +{
> +    // the returned value can be refined with more consideration.
> +    float qoffset = 0.0f;
> +    switch (q)
> +    {
> +    case AV_RQ_NONE:
> +        qoffset = 0.0f;
> +        break;
> +    case AV_RQ_BETTER:
> +        qoffset = -8.0f;
> +        break;
> +    case AV_RQ_BEST:
> +        qoffset = -16.0f;
> +        break;
> +    default:
> +        av_log(ctx, AV_LOG_ERROR, "unknown value of AVRoiQuality.\n");
> +        break;
> +    }
> +
> +    return qoffset;
> +}
> +
>  static int X264_frame(AVCodecContext *ctx, AVPacket *pkt, const AVFrame
> *frame,
>                        int *got_packet)
>  {
> @@ -345,6 +368,39 @@ static int X264_frame(AVCodecContext *ctx,
> AVPacket *pkt, const AVFrame *frame,
>                  }
>              }
>          }
> +
> +        if (frame->rois_buf != NULL) {
> +            if (x4->params.rc.i_aq_mode == X264_AQ_NONE) {
> +                av_log(ctx, AV_LOG_ERROR, "Adaptive quantization must be
> enabled to use ROI encoding, skipping ROI.\n");
> +            } else {
> +                if (frame->interlaced_frame == 0) {
> +                    const static int MBSIZE = 16;
> +                    size_t mbx = (frame->width + MBSIZE - 1) / MBSIZE;
> +                    size_t mby = (frame->height + MBSIZE - 1) / MBSIZE;
> +                    float* qoffsets = (float*)av_malloc(sizeof(float) * mbx * mby);
> +                    memset(qoffsets, 0, sizeof(float) * mbx * mby);
> +
> +                    size_t nb_rois = frame->rois_buf->size / sizeof(AVFrameROI);
> +                    AVFrameROI* rois = (AVFrameROI*)frame->rois_buf->data;
> +                    for (size_t roi = 0; roi < nb_rois; ++roi) {
> +                        int starty = FFMIN(mby, rois[roi].top / MBSIZE);
> +                        int endy = FFMIN(mby, (rois[roi].bottom + MBSIZE - 1)/ MBSIZE);
> +                        int startx = FFMIN(mbx, rois[roi].left / MBSIZE);
> +                        int endx = FFMIN(mbx, (rois[roi].right + MBSIZE - 1)/ MBSIZE);
> +                        for (int y = starty; y < endy; ++y) {
> +                            for (int x = startx; x < endx; ++x) {
> +                                qoffsets[x + y*mbx] = get_roi_qoffset(ctx, rois[roi].quality);
> +                            }
> +                        }
> +                    }
> +
> +                    x4->pic.prop.quant_offsets = qoffsets;
> +                    x4->pic.prop.quant_offsets_free = av_free;
> +                } else {
> +                    av_log(ctx, AV_LOG_ERROR, "interlaced_frame not supported for
> ROI encoding yet, skipping ROI.\n");
> +                }
> +            }
> +        }
>      }
> 
>      do {
> diff --git a/libavutil/frame.c b/libavutil/frame.c index 9b3fb13..dbc4b0a
> 100644
> --- a/libavutil/frame.c
> +++ b/libavutil/frame.c
> @@ -425,6 +425,13 @@ FF_DISABLE_DEPRECATION_WARNINGS
> FF_ENABLE_DEPRECATION_WARNINGS  #endif
> 
> +    av_buffer_unref(&dst->rois_buf);
> +    if (src->rois_buf) {
> +        dst->rois_buf = av_buffer_ref(src->rois_buf);
> +        if (!dst->rois_buf)
> +            return AVERROR(ENOMEM);
> +    }
> +
>      av_buffer_unref(&dst->opaque_ref);
>      av_buffer_unref(&dst->private_ref);
>      if (src->opaque_ref) {
> @@ -571,6 +578,7 @@ FF_DISABLE_DEPRECATION_WARNINGS
> FF_ENABLE_DEPRECATION_WARNINGS  #endif
> 
> +    av_buffer_unref(&frame->rois_buf);
>      av_buffer_unref(&frame->hw_frames_ctx);
> 
>      av_buffer_unref(&frame->opaque_ref);
> diff --git a/libavutil/frame.h b/libavutil/frame.h index 66f27f4..00d509d
> 100644
> --- a/libavutil/frame.h
> +++ b/libavutil/frame.h
> @@ -193,6 +193,23 @@ typedef struct AVFrameSideData {
>      AVBufferRef *buf;
>  } AVFrameSideData;
> 
> +enum AVRoiQuality {
> +    AV_RQ_NONE = 0,
> +    AV_RQ_BETTER = 1,
> +    AV_RQ_BEST = 2,
> +};
> +
> +typedef struct AVFrameROI {
> +    /* coordinates at frame pixel level.
> +     * it will be extended internally if the codec requirs an alignment
> +     */
> +    size_t top;
> +    size_t bottom;
> +    size_t left;
> +    size_t right;
> +    enum AVRoiQuality quality;
> +} AVFrameROI;
> +
>  /**
>   * This structure describes decoded (raw) audio or video data.
>   *
> @@ -556,6 +573,13 @@ typedef struct AVFrame {
>      attribute_deprecated
>      AVBufferRef *qp_table_buf;
>  #endif
> +
> +    /**
> +     * For ROI-based encoding, the number of ROI area is implied
> +     * in the size of buf.
> +     */
> +    AVBufferRef *rois_buf;
> +
>      /**
>       * For hwaccel-format frames, this should be a reference to the
>       * AVHWFramesContext describing the frame.
> --
> 2.7.4
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


More information about the ffmpeg-devel mailing list