[FFmpeg-devel] [PATCH V2 1/2] add support for ROI-based encoding

Michael Niedermayer michael at niedermayer.cc
Wed Dec 26 18:43:47 EET 2018


On Wed, Dec 26, 2018 at 04:11:35AM +0800, Guo, Yejun wrote:
> This patchset contains two patches.
> - the first patch (this patch) finished the code and ask for upstream.
> - the second patch is just a quick example on how to generate ROI info.
> 
> The encoders such as libx264 support different QPs offset for different MBs,
> it makes possible for ROI-based encoding. It makes sense to add support
> within ffmpeg to generate/accept ROI infos and pass into encoders.
> 
> Typical usage: After AVFrame is decoded, a ffmpeg filter or user's code
> generates ROI info for that frame, and the encoder finally does the
> ROI-based encoding.
> 
> This patch just enabled the path from ffmpeg to libx264, the more encoders
> can be added later.
> 
> Signed-off-by: Guo, Yejun <yejun.guo at intel.com>
> ---
>  libavcodec/libx264.c | 40 ++++++++++++++++++++++++++++++++++++++++
>  libavutil/frame.c    |  1 +
>  libavutil/frame.h    | 19 +++++++++++++++++++
>  3 files changed, 60 insertions(+)

The commit message talks about 2 patches but this commit is one patch


> 
> diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c
> index a68d0a7..a4f8677 100644
> --- a/libavcodec/libx264.c
> +++ b/libavcodec/libx264.c
> @@ -40,6 +40,10 @@
>  #include <stdlib.h>
>  #include <string.h>
>  
> +// from x264.h, for quant_offsets, Macroblocks are 16x16
> +// blocks of pixels (with respect to the luma plane)
> +#define MB_SIZE 16
> +
>  typedef struct X264Context {
>      AVClass        *class;
>      x264_param_t    params;
> @@ -345,6 +349,42 @@ static int X264_frame(AVCodecContext *ctx, AVPacket *pkt, const AVFrame *frame,
>                  }
>              }
>          }
> +
> +        AVFrameSideData *sd = av_frame_get_side_data(frame, AV_FRAME_DATA_ROIS);
> +        if (sd != NULL) {
> +            if (x4->params.rc.i_aq_mode == X264_AQ_NONE) {
> +                av_log(ctx, AV_LOG_WARNING, "Adaptive quantization must be enabled to use ROI encoding, skipping ROI.\n");
> +            } else {
> +                if (frame->interlaced_frame == 0) {
> +                    size_t mbx = (frame->width + MB_SIZE - 1) / MB_SIZE;
> +                    size_t mby = (frame->height + MB_SIZE - 1) / MB_SIZE;
> +                    float* qoffsets;
> +                    qoffsets = (float*)av_malloc(sizeof(*qoffsets) * mbx * mby);
> +                    if (qoffsets == NULL)
> +                        return AVERROR(ENOMEM);
> +                    memset(qoffsets, 0, sizeof(*qoffsets) * mbx * mby);

av_mallocz_array()



> +
> +                    size_t nb_rois = sd->size / sizeof(AVROI);
> +                    AVROI* rois = (AVROI*)sd->data;
> +                    for (size_t roi = 0; roi < nb_rois; roi++) {
> +                        int starty = FFMIN(mby, rois[roi].top / MB_SIZE);
> +                        int endy = FFMIN(mby, (rois[roi].bottom + MB_SIZE - 1)/ MB_SIZE);
> +                        int startx = FFMIN(mbx, rois[roi].left / MB_SIZE);
> +                        int endx = FFMIN(mbx, (rois[roi].right + MB_SIZE - 1)/ MB_SIZE);
> +                        for (int y = starty; y < endy; y++) {
> +                            for (int x = startx; x < endx; x++) {
> +                                qoffsets[x + y*mbx] = rois[roi].qoffset;
> +                            }
> +                        }
> +                    }
> +
> +                    x4->pic.prop.quant_offsets = qoffsets;
> +                    x4->pic.prop.quant_offsets_free = av_free;
> +                } else {
> +                    av_log(ctx, AV_LOG_WARNING, "interlaced_frame not supported for ROI encoding yet, skipping ROI.\n");
> +                }
> +            }
> +        }
>      }
>  
>      do {
> diff --git a/libavutil/frame.c b/libavutil/frame.c
> index 34a6210..bebc50e 100644
> --- a/libavutil/frame.c
> +++ b/libavutil/frame.c
> @@ -841,6 +841,7 @@ const char *av_frame_side_data_name(enum AVFrameSideDataType type)
>      case AV_FRAME_DATA_QP_TABLE_DATA:               return "QP table data";
>  #endif
>      case AV_FRAME_DATA_DYNAMIC_HDR_PLUS: return "HDR Dynamic Metadata SMPTE2094-40 (HDR10+)";
> +    case AV_FRAME_DATA_ROIS: return "Regions Of Interest";
>      }
>      return NULL;
>  }
> diff --git a/libavutil/frame.h b/libavutil/frame.h
> index 582ac47..d18d235 100644
> --- a/libavutil/frame.h
> +++ b/libavutil/frame.h
> @@ -173,6 +173,12 @@ enum AVFrameSideDataType {
>       * volume transform - application 4 of SMPTE 2094-40:2016 standard.
>       */
>      AV_FRAME_DATA_DYNAMIC_HDR_PLUS,
> +
> +    /**
> +     * Regions Of Interest, the number of ROI area is implied
> +     * in the size of buf.
> +     */
> +    AV_FRAME_DATA_ROIS,
>  };

the API addition to libavutil should be in a seperate patch from the
use of the API, it also must bump the version

The text describing AV_FRAME_DATA_ROIS also is inadequate to use it
it doesnt really describe what this side data looks like exactly

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

If you fake or manipulate statistics in a paper in physics you will never
get a job again.
If you fake or manipulate statistics in a paper in medicin you will get
a job for life at the pharma industry.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 181 bytes
Desc: not available
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20181226/07651bc2/attachment.sig>


More information about the ffmpeg-devel mailing list