[FFmpeg-devel] [PATCH 1/2] add support for ROI-based encoding

Guo, Yejun yejun.guo at intel.com
Wed Dec 12 18:26:14 EET 2018


This patchset contains two patches.
- the first patch (this patch) finished the code and ask for upstream.
- the second patch is just a quick example on how to generate ROI info.

The encoders such as libx264 support different QPs offset for different MBs,
it makes possible for ROI-based encoding. It makes sense to add support
within ffmpeg to generate/accept ROI infos and pass into encoders.

Typical usage: After AVFrame is decoded, a ffmpeg filter or user's code
generates ROI info for that frame, and the encoder finally does the
ROI-based encoding. And so I choose to maintain the ROI info (AVFrameROI)
within AVFrame struct.

Since the ROI info generator might more focus on the domain knowledge of
the interest regions, instead of the encoding detail, the AVFrameROI is
designed to be more friend for ffmpeg users.

This patch just enabled the path from ffmpeg to libx264, the more encoders
can be added later.

Signed-off-by: Guo, Yejun <yejun.guo at intel.com>
---
 libavcodec/libx264.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 libavutil/frame.c    |  8 ++++++++
 libavutil/frame.h    | 24 ++++++++++++++++++++++
 3 files changed, 88 insertions(+)

diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c
index a68d0a7..e4e593f 100644
--- a/libavcodec/libx264.c
+++ b/libavcodec/libx264.c
@@ -273,6 +273,29 @@ static void reconfig_encoder(AVCodecContext *ctx, const AVFrame *frame)
     }
 }
 
+static float get_roi_qoffset(AVCodecContext *ctx, enum AVRoiQuality q)
+{
+    // the returned value can be refined with more consideration.
+    float qoffset = 0.0f;
+    switch (q)
+    {
+    case AV_RQ_NONE:
+        qoffset = 0.0f;
+        break;
+    case AV_RQ_BETTER:
+        qoffset = -8.0f;
+        break;
+    case AV_RQ_BEST:
+        qoffset = -16.0f;
+        break;
+    default:
+        av_log(ctx, AV_LOG_ERROR, "unknown value of AVRoiQuality.\n");
+        break;
+    }
+
+    return qoffset;
+}
+
 static int X264_frame(AVCodecContext *ctx, AVPacket *pkt, const AVFrame *frame,
                       int *got_packet)
 {
@@ -345,6 +368,39 @@ static int X264_frame(AVCodecContext *ctx, AVPacket *pkt, const AVFrame *frame,
                 }
             }
         }
+
+        if (frame->rois_buf != NULL) {
+            if (x4->params.rc.i_aq_mode == X264_AQ_NONE) {
+                av_log(ctx, AV_LOG_ERROR, "Adaptive quantization must be enabled to use ROI encoding, skipping ROI.\n");
+            } else {
+                if (frame->interlaced_frame == 0) {
+                    const static int MBSIZE = 16;
+                    size_t mbx = (frame->width + MBSIZE - 1) / MBSIZE;
+                    size_t mby = (frame->height + MBSIZE - 1) / MBSIZE;
+                    float* qoffsets = (float*)av_malloc(sizeof(float) * mbx * mby);
+                    memset(qoffsets, 0, sizeof(float) * mbx * mby);
+
+                    size_t nb_rois = frame->rois_buf->size / sizeof(AVFrameROI);
+                    AVFrameROI* rois = (AVFrameROI*)frame->rois_buf->data;
+                    for (size_t roi = 0; roi < nb_rois; ++roi) {
+                        int starty = FFMIN(mby, rois[roi].top / MBSIZE);
+                        int endy = FFMIN(mby, (rois[roi].bottom + MBSIZE - 1)/ MBSIZE);
+                        int startx = FFMIN(mbx, rois[roi].left / MBSIZE);
+                        int endx = FFMIN(mbx, (rois[roi].right + MBSIZE - 1)/ MBSIZE);
+                        for (int y = starty; y < endy; ++y) {
+                            for (int x = startx; x < endx; ++x) {
+                                qoffsets[x + y*mbx] = get_roi_qoffset(ctx, rois[roi].quality);
+                            }
+                        }
+                    }
+
+                    x4->pic.prop.quant_offsets = qoffsets;
+                    x4->pic.prop.quant_offsets_free = av_free;
+                } else {
+                    av_log(ctx, AV_LOG_ERROR, "interlaced_frame not supported for ROI encoding yet, skipping ROI.\n");
+                }
+            }
+        }
     }
 
     do {
diff --git a/libavutil/frame.c b/libavutil/frame.c
index 9b3fb13..dbc4b0a 100644
--- a/libavutil/frame.c
+++ b/libavutil/frame.c
@@ -425,6 +425,13 @@ FF_DISABLE_DEPRECATION_WARNINGS
 FF_ENABLE_DEPRECATION_WARNINGS
 #endif
 
+    av_buffer_unref(&dst->rois_buf);
+    if (src->rois_buf) {
+        dst->rois_buf = av_buffer_ref(src->rois_buf);
+        if (!dst->rois_buf)
+            return AVERROR(ENOMEM);
+    }
+
     av_buffer_unref(&dst->opaque_ref);
     av_buffer_unref(&dst->private_ref);
     if (src->opaque_ref) {
@@ -571,6 +578,7 @@ FF_DISABLE_DEPRECATION_WARNINGS
 FF_ENABLE_DEPRECATION_WARNINGS
 #endif
 
+    av_buffer_unref(&frame->rois_buf);
     av_buffer_unref(&frame->hw_frames_ctx);
 
     av_buffer_unref(&frame->opaque_ref);
diff --git a/libavutil/frame.h b/libavutil/frame.h
index 66f27f4..00d509d 100644
--- a/libavutil/frame.h
+++ b/libavutil/frame.h
@@ -193,6 +193,23 @@ typedef struct AVFrameSideData {
     AVBufferRef *buf;
 } AVFrameSideData;
 
+enum AVRoiQuality {
+    AV_RQ_NONE = 0,
+    AV_RQ_BETTER = 1,
+    AV_RQ_BEST = 2,
+};
+
+typedef struct AVFrameROI {
+    /* coordinates at frame pixel level.
+     * it will be extended internally if the codec requirs an alignment
+     */
+    size_t top;
+    size_t bottom;
+    size_t left;
+    size_t right;
+    enum AVRoiQuality quality;
+} AVFrameROI;
+
 /**
  * This structure describes decoded (raw) audio or video data.
  *
@@ -556,6 +573,13 @@ typedef struct AVFrame {
     attribute_deprecated
     AVBufferRef *qp_table_buf;
 #endif
+
+    /**
+     * For ROI-based encoding, the number of ROI area is implied
+     * in the size of buf.
+     */
+    AVBufferRef *rois_buf;
+
     /**
      * For hwaccel-format frames, this should be a reference to the
      * AVHWFramesContext describing the frame.
-- 
2.7.4



More information about the ffmpeg-devel mailing list