[FFmpeg-devel] [PATCH] [RFC] avcodec: export MB information in frame side data

Clément Bœsch u at pkh.me
Wed Jul 16 17:06:27 CEST 2014


Also add an example exporting the MB information as a CSV stream.

---

A bunch of questions stand:

- Ideally the "source" for a given macroblock should be a specific
  reference to a future or paste frame (something like -4 or +2).
  Currently it's just ±1 depending on the direction. I don't see how I
  can extract that information.

- how the MB "type" should be exported? Like, what "generic" type we
  need to expose for the users

- Who is motivated to port -vismv & various other vis_* debug to a video
  filter? (The hard part will probably be writing its documentation...)

(TODO: avcodec version bump & APIChanges entry at least)
---
 .gitignore                    |   1 +
 configure                     |   2 +
 doc/Makefile                  |   1 +
 doc/examples/Makefile         |   1 +
 doc/examples/extract_mbinfo.c | 185 ++++++++++++++++++++++++++++++++++++++++++
 libavcodec/avcodec.h          |   1 +
 libavcodec/mpegvideo.c        | 109 ++++++++++++++++++++++++-
 libavcodec/options_table.h    |   1 +
 libavutil/frame.h             |   5 ++
 libavutil/mbinfo.h            |  32 ++++++++
 10 files changed, 337 insertions(+), 1 deletion(-)
 create mode 100644 doc/examples/extract_mbinfo.c
 create mode 100644 libavutil/mbinfo.h

diff --git a/.gitignore b/.gitignore
index 065fab1..8c38ed2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -39,6 +39,7 @@
 /doc/examples/avio_reading
 /doc/examples/decoding_encoding
 /doc/examples/demuxing_decoding
+/doc/examples/extract_mbinfo
 /doc/examples/filter_audio
 /doc/examples/filtering_audio
 /doc/examples/filtering_video
diff --git a/configure b/configure
index 51efc39..81eb69e 100755
--- a/configure
+++ b/configure
@@ -1299,6 +1299,7 @@ EXAMPLE_LIST="
     avio_reading_example
     decoding_encoding_example
     demuxing_decoding_example
+    extract_mbinfo_example
     filter_audio_example
     filtering_audio_example
     filtering_video_example
@@ -2569,6 +2570,7 @@ zoompan_filter_deps="swscale"
 avio_reading="avformat avcodec avutil"
 avcodec_example_deps="avcodec avutil"
 demuxing_decoding_example_deps="avcodec avformat avutil"
+extract_mbinfo_example_deps="avcodec avformat avutil"
 filter_audio_example_deps="avfilter avutil"
 filtering_audio_example_deps="avfilter avcodec avformat avutil"
 filtering_video_example_deps="avfilter avcodec avformat avutil"
diff --git a/doc/Makefile b/doc/Makefile
index 99f588a..6b6f7c1 100644
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -39,6 +39,7 @@ DOCS = $(DOCS-yes)
 DOC_EXAMPLES-$(CONFIG_AVIO_READING_EXAMPLE)      += avio_reading
 DOC_EXAMPLES-$(CONFIG_AVCODEC_EXAMPLE)           += avcodec
 DOC_EXAMPLES-$(CONFIG_DEMUXING_DECODING_EXAMPLE) += demuxing_decoding
+DOC_EXAMPLES-$(CONFIG_EXTRACT_MBINFO_EXAMPLE)    += extract_mbinfo
 DOC_EXAMPLES-$(CONFIG_FILTER_AUDIO_EXAMPLE)      += filter_audio
 DOC_EXAMPLES-$(CONFIG_FILTERING_AUDIO_EXAMPLE)   += filtering_audio
 DOC_EXAMPLES-$(CONFIG_FILTERING_VIDEO_EXAMPLE)   += filtering_video
diff --git a/doc/examples/Makefile b/doc/examples/Makefile
index 03c7021..2b394c3 100644
--- a/doc/examples/Makefile
+++ b/doc/examples/Makefile
@@ -14,6 +14,7 @@ LDLIBS := $(shell pkg-config --libs $(FFMPEG_LIBS)) $(LDLIBS)
 EXAMPLES=       avio_reading                       \
                 decoding_encoding                  \
                 demuxing_decoding                  \
+                extract_mbinfo                     \
                 filtering_video                    \
                 filtering_audio                    \
                 metadata                           \
diff --git a/doc/examples/extract_mbinfo.c b/doc/examples/extract_mbinfo.c
new file mode 100644
index 0000000..e437a21
--- /dev/null
+++ b/doc/examples/extract_mbinfo.c
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2012 Stefano Sabatini
+ * Copyright (c) 2014 Clément Bœsch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <libavutil/mbinfo.h>
+#include <libavformat/avformat.h>
+
+static AVFormatContext *fmt_ctx = NULL;
+static AVCodecContext *video_dec_ctx = NULL;
+static AVStream *video_stream = NULL;
+static const char *src_filename = NULL;
+
+static int video_stream_idx = -1;
+static AVFrame *frame = NULL;
+static AVPacket pkt;
+static int video_frame_count = 0;
+
+static int decode_packet(int *got_frame, int cached)
+{
+    int decoded = pkt.size;
+
+    *got_frame = 0;
+
+    if (pkt.stream_index == video_stream_idx) {
+        int ret = avcodec_decode_video2(video_dec_ctx, frame, got_frame, &pkt);
+        if (ret < 0) {
+            fprintf(stderr, "Error decoding video frame (%s)\n", av_err2str(ret));
+            return ret;
+        }
+
+        if (*got_frame) {
+            int i;
+            AVFrameSideData *sd;
+
+            video_frame_count++;
+            sd = av_frame_get_side_data(frame, AV_FRAME_DATA_MB_INFO);
+            if (sd) {
+                const AVMBInfo_MB *mbs = (const AVMBInfo_MB *)sd->data;
+                for (i = 0; i < sd->size / sizeof(*mbs); i++) {
+                    const AVMBInfo_MB *mb = &mbs[i];
+                    printf("%d,%2d,0x%08x,%2d,%2d,%4d,%4d,%4d,%4d\n",
+                           video_frame_count, mb->source, mb->type,
+                           mb->w, mb->h, mb->src_x, mb->src_y,
+                           mb->dst_x, mb->dst_y);
+                }
+            }
+        }
+    }
+
+    return decoded;
+}
+
+static int open_codec_context(int *stream_idx,
+                              AVFormatContext *fmt_ctx, enum AVMediaType type)
+{
+    int ret;
+    AVStream *st;
+    AVCodecContext *dec_ctx = NULL;
+    AVCodec *dec = NULL;
+    AVDictionary *opts = NULL;
+
+    ret = av_find_best_stream(fmt_ctx, type, -1, -1, NULL, 0);
+    if (ret < 0) {
+        fprintf(stderr, "Could not find %s stream in input file '%s'\n",
+                av_get_media_type_string(type), src_filename);
+        return ret;
+    } else {
+        *stream_idx = ret;
+        st = fmt_ctx->streams[*stream_idx];
+
+        /* find decoder for the stream */
+        dec_ctx = st->codec;
+        dec = avcodec_find_decoder(dec_ctx->codec_id);
+        if (!dec) {
+            fprintf(stderr, "Failed to find %s codec\n",
+                    av_get_media_type_string(type));
+            return AVERROR(EINVAL);
+        }
+
+        /* Init the video decoder */
+        av_dict_set(&opts, "flags2", "+sd_mb_info", 0);
+        if ((ret = avcodec_open2(dec_ctx, dec, &opts)) < 0) {
+            fprintf(stderr, "Failed to open %s codec\n",
+                    av_get_media_type_string(type));
+            return ret;
+        }
+    }
+
+    return 0;
+}
+
+int main(int argc, char **argv)
+{
+    int ret = 0, got_frame;
+
+    if (argc != 2) {
+        fprintf(stderr, "Usage: %s <video>\n", argv[0]);
+        exit(1);
+    }
+    src_filename = argv[1];
+
+    av_register_all();
+
+    if (avformat_open_input(&fmt_ctx, src_filename, NULL, NULL) < 0) {
+        fprintf(stderr, "Could not open source file %s\n", src_filename);
+        exit(1);
+    }
+
+    if (avformat_find_stream_info(fmt_ctx, NULL) < 0) {
+        fprintf(stderr, "Could not find stream information\n");
+        exit(1);
+    }
+
+    if (open_codec_context(&video_stream_idx, fmt_ctx, AVMEDIA_TYPE_VIDEO) >= 0) {
+        video_stream = fmt_ctx->streams[video_stream_idx];
+        video_dec_ctx = video_stream->codec;
+    }
+
+    av_dump_format(fmt_ctx, 0, src_filename, 0);
+
+    if (!video_stream) {
+        fprintf(stderr, "Could not find video stream in the input, aborting\n");
+        ret = 1;
+        goto end;
+    }
+
+    frame = av_frame_alloc();
+    if (!frame) {
+        fprintf(stderr, "Could not allocate frame\n");
+        ret = AVERROR(ENOMEM);
+        goto end;
+    }
+
+    printf("framenum,source,type,blockw,blockh,srcx,srcy,dstx,dsty\n");
+
+    /* initialize packet, set data to NULL, let the demuxer fill it */
+    av_init_packet(&pkt);
+    pkt.data = NULL;
+    pkt.size = 0;
+
+    /* read frames from the file */
+    while (av_read_frame(fmt_ctx, &pkt) >= 0) {
+        AVPacket orig_pkt = pkt;
+        do {
+            ret = decode_packet(&got_frame, 0);
+            if (ret < 0)
+                break;
+            pkt.data += ret;
+            pkt.size -= ret;
+        } while (pkt.size > 0);
+        av_free_packet(&orig_pkt);
+    }
+
+    /* flush cached frames */
+    pkt.data = NULL;
+    pkt.size = 0;
+    do {
+        decode_packet(&got_frame, 1);
+    } while (got_frame);
+
+end:
+    avcodec_close(video_dec_ctx);
+    avformat_close_input(&fmt_ctx);
+    av_frame_free(&frame);
+    return ret < 0;
+}
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 178349a..ea1a202 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -757,6 +757,7 @@ typedef struct RcOverride{
 
 #define CODEC_FLAG2_CHUNKS        0x00008000 ///< Input bitstream might be truncated at a packet boundaries instead of only at frame boundaries.
 #define CODEC_FLAG2_SHOW_ALL      0x00400000 ///< Show all frames before the first keyframe
+#define CODEC_FLAG2_SD_MB_INFO    0x10000000 ///< Export MB information through frame side data
 
 /* Unsupported options :
  *              Syntax Arithmetic coding (SAC)
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index a9024a9..e4e0729 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -31,6 +31,7 @@
 #include "libavutil/avassert.h"
 #include "libavutil/imgutils.h"
 #include "libavutil/internal.h"
+#include "libavutil/mbinfo.h"
 #include "libavutil/timer.h"
 #include "avcodec.h"
 #include "blockdsp.h"
@@ -593,7 +594,8 @@ static int alloc_picture_tables(MpegEncContext *s, Picture *pic)
             return AVERROR(ENOMEM);
     }
 
-    if (s->out_format == FMT_H263 || s->encoding || s->avctx->debug_mv) {
+    if (s->out_format == FMT_H263 || s->encoding || s->avctx->debug_mv ||
+        (s->avctx->flags2 & CODEC_FLAG2_SD_MB_INFO)) {
         int mv_size        = 2 * (b8_array_size + 4) * sizeof(int16_t);
         int ref_index_size = 4 * mb_array_size;
 
@@ -2101,6 +2103,24 @@ static void draw_arrow(uint8_t *buf, int sx, int sy, int ex,
     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
 }
 
+static int add_mb(AVMBInfo_MB *mb, uint32_t mb_type,
+                  int dst_x, int dst_y,
+                  int src_x, int src_y,
+                  int direction)
+{
+    if (dst_x == src_x && dst_y == src_y)
+        return 0;
+    mb->type = mb_type;
+    mb->w = IS_8X8(mb_type) || IS_8X16(mb_type) ? 8 : 16;
+    mb->h = IS_8X8(mb_type) || IS_16X8(mb_type) ? 8 : 16;
+    mb->src_x = src_x;
+    mb->src_y = src_y;
+    mb->dst_x = dst_x;
+    mb->dst_y = dst_y;
+    mb->source = direction ? -1 : 1;
+    return 1;
+}
+
 /**
  * Print debugging info for the given picture.
  */
@@ -2109,6 +2129,93 @@ void ff_print_debug_info2(AVCodecContext *avctx, AVFrame *pict, uint8_t *mbskip_
                          int *low_delay,
                          int mb_width, int mb_height, int mb_stride, int quarter_sample)
 {
+    if ((avctx->flags2 & CODEC_FLAG2_SD_MB_INFO) && mbtype_table && motion_val[0]) {
+        const int shift = 1 + quarter_sample;
+        const int mv_sample_log2 = avctx->codec_id == AV_CODEC_ID_H264 || avctx->codec_id == AV_CODEC_ID_SVQ3 ? 2 : 1;
+        const int mv_stride      = (mb_width << mv_sample_log2) +
+                                   (avctx->codec->id == AV_CODEC_ID_H264 ? 0 : 1);
+        int mb_x, mb_y, mbcount = 0;
+
+        /* width * height * directions * 4MB (4MB for IS_8x8) */
+        AVMBInfo_MB *mbs = av_malloc_array(mb_width * mb_height, 2 * 4 * sizeof(AVMBInfo_MB));
+        if (!mbs)
+            return;
+
+        // TODO: refactor with the following code
+        for (mb_y = 0; mb_y < mb_height; mb_y++) {
+            for (mb_x = 0; mb_x < mb_width; mb_x++) {
+                int i, direction, mb_type = mbtype_table[mb_x + mb_y * mb_stride];
+                for (direction = 0; direction < 2; direction++) {
+                    if (direction == 0 &&
+                        pict->pict_type != AV_PICTURE_TYPE_B &&
+                        pict->pict_type != AV_PICTURE_TYPE_P)
+                        continue;
+                    if (direction == 1 &&
+                        pict->pict_type != AV_PICTURE_TYPE_B)
+                        continue;
+                    if (!USES_LIST(mb_type, direction))
+                        continue;
+                    if (IS_8X8(mb_type)) {
+                        for (i = 0; i < 4; i++) {
+                            int sx = mb_x * 16 + 4 + 8 * (i & 1);
+                            int sy = mb_y * 16 + 4 + 8 * (i >> 1);
+                            int xy = (mb_x * 2 + (i & 1) +
+                                      (mb_y * 2 + (i >> 1)) * mv_stride) << (mv_sample_log2 - 1);
+                            int mx = (motion_val[direction][xy][0] >> shift) + sx;
+                            int my = (motion_val[direction][xy][1] >> shift) + sy;
+                            mbcount += add_mb(mbs + mbcount, mb_type, sx, sy, mx, my, direction);
+                        }
+                    } else if (IS_16X8(mb_type)) {
+                        for (i = 0; i < 2; i++) {
+                            int sx = mb_x * 16 + 8;
+                            int sy = mb_y * 16 + 4 + 8 * i;
+                            int xy = (mb_x * 2 + (mb_y * 2 + i) * mv_stride) << (mv_sample_log2 - 1);
+                            int mx = (motion_val[direction][xy][0] >> shift);
+                            int my = (motion_val[direction][xy][1] >> shift);
+
+                            if (IS_INTERLACED(mb_type))
+                                my *= 2;
+
+                            mbcount += add_mb(mbs + mbcount, mb_type, sx, sy, mx + sx, my + sy, direction);
+                        }
+                    } else if (IS_8X16(mb_type)) {
+                        for (i = 0; i < 2; i++) {
+                            int sx = mb_x * 16 + 4 + 8 * i;
+                            int sy = mb_y * 16 + 8;
+                            int xy = (mb_x * 2 + i + mb_y * 2 * mv_stride) << (mv_sample_log2 - 1);
+                            int mx = motion_val[direction][xy][0] >> shift;
+                            int my = motion_val[direction][xy][1] >> shift;
+
+                            if (IS_INTERLACED(mb_type))
+                                my *= 2;
+
+                            mbcount += add_mb(mbs + mbcount, mb_type, sx, sy, mx + sx, my + sy, direction);
+                        }
+                    } else {
+                          int sx = mb_x * 16 + 8;
+                          int sy = mb_y * 16 + 8;
+                          int xy = (mb_x + mb_y * mv_stride) << mv_sample_log2;
+                          int mx = (motion_val[direction][xy][0]>>shift) + sx;
+                          int my = (motion_val[direction][xy][1]>>shift) + sy;
+                          mbcount += add_mb(mbs + mbcount, mb_type, sx, sy, mx, my, direction);
+                    }
+                }
+            }
+        }
+
+        if (mbcount) {
+            AVFrameSideData *sd;
+
+            av_log(avctx, AV_LOG_DEBUG, "Adding %d MB info to frame %d\n", mbcount, avctx->frame_number);
+            sd = av_frame_new_side_data(pict, AV_FRAME_DATA_MB_INFO, mbcount * sizeof(AVMBInfo_MB));
+            if (!sd)
+                return;
+            memcpy(sd->data, mbs, mbcount * sizeof(AVMBInfo_MB));
+        }
+
+        av_freep(&mbs);
+    }
+
     if (avctx->hwaccel || !mbtype_table
         || (avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU))
         return;
diff --git a/libavcodec/options_table.h b/libavcodec/options_table.h
index fa0bdf8..45e9105 100644
--- a/libavcodec/options_table.h
+++ b/libavcodec/options_table.h
@@ -86,6 +86,7 @@ static const AVOption avcodec_options[] = {
 {"local_header", "place global headers at every keyframe instead of in extradata", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG2_LOCAL_HEADER }, INT_MIN, INT_MAX, V|E, "flags2"},
 {"chunks", "Frame data might be split into multiple chunks", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG2_CHUNKS }, INT_MIN, INT_MAX, V|D, "flags2"},
 {"showall", "Show all frames before the first keyframe", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG2_SHOW_ALL }, INT_MIN, INT_MAX, V|D, "flags2"},
+{"sd_mb_info", "export MB information through frame side data", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG2_SD_MB_INFO}, INT_MIN, INT_MAX, V|D, "flags2"},
 {"me_method", "set motion estimation method", OFFSET(me_method), AV_OPT_TYPE_INT, {.i64 = ME_EPZS }, INT_MIN, INT_MAX, V|E, "me_method"},
 {"zero", "zero motion estimation (fastest)", 0, AV_OPT_TYPE_CONST, {.i64 = ME_ZERO }, INT_MIN, INT_MAX, V|E, "me_method" },
 {"full", "full motion estimation (slowest)", 0, AV_OPT_TYPE_CONST, {.i64 = ME_FULL }, INT_MIN, INT_MAX, V|E, "me_method" },
diff --git a/libavutil/frame.h b/libavutil/frame.h
index a39c8d0..53095a6 100644
--- a/libavutil/frame.h
+++ b/libavutil/frame.h
@@ -82,6 +82,11 @@ enum AVFrameSideDataType {
      * See libavutil/display.h for a detailed description of the data.
      */
     AV_FRAME_DATA_DISPLAYMATRIX,
+    /**
+     * Macroblock information exported by some codecs.
+     * The data is the AVMBInfo struct defined in libavutil/mbinfo.h
+     */
+    AV_FRAME_DATA_MB_INFO,
 };
 
 typedef struct AVFrameSideData {
diff --git a/libavutil/mbinfo.h b/libavutil/mbinfo.h
new file mode 100644
index 0000000..89538ba
--- /dev/null
+++ b/libavutil/mbinfo.h
@@ -0,0 +1,32 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_MBINFO_H
+#define AVUTIL_MBINFO_H
+
+#include <stdint.h>
+
+typedef struct AVMBInfo_MB {
+    int8_t source; /* -1/+1 XXX: set exact relative ref frame instead of "direction" */
+    uint32_t type; /* how much codec specific can this be? */
+    uint8_t w, h;
+    uint16_t src_x, src_y;
+    uint16_t dst_x, dst_y;
+} AVMBInfo_MB;
+
+#endif /* AVUTIL_MBINFO_H */
-- 
2.0.1



More information about the ffmpeg-devel mailing list