[FFmpeg-devel] [PATCH v2 8/8] libavformat/dashenc: add support for assigning streams to AdaptationSets

Sat Jan 21 23:05:35 EET 2017

This patch is based on the stream assignment code in webmdashenc.

Additional changes:
 * Default to one AdaptationSet per stream

   Previously all mapped streams of a media type (video, audio) where assigned
   to a single AdaptationSet. Using the DASH live profile it is mandatory, that
   the segments of all representations are aligned, which is currently not
   enforced. This leads to problems when using video streams with different
   key frame intervals. So to play safe, default to one AdaptationSet per stream,
   unless overwritten by explicit assignment
 * Make sure all streams are assigned to exactly one AdaptationSet
 * Copy "language" and "role" metadata from streams assigned to the set
 * Stream assignment in "adaptation_sets" option supports stream identifier
   (e.g. v:0)
 * Since the "bandwidth" attribute on a Representation is mandatory, calculate
   bandwith based on the size and duration of the first segment

Signed-off-by: Peter Große <pegro at friiks.de>
---

Changes in v2:
 * removed changes related to min_seg_duration, since they are unrelated

---
 libavformat/dashenc.c | 286 +++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 227 insertions(+), 59 deletions(-)

diff --git a/libavformat/dashenc.c b/libavformat/dashenc.c
index 522a0eb..a5c4970 100644
--- a/libavformat/dashenc.c
+++ b/libavformat/dashenc.c
@@ -24,8 +24,10 @@
 #include <unistd.h>
 #endif
 
+#include "libavutil/avutil.h"
 #include "libavutil/avassert.h"
 #include "libavutil/avstring.h"
+#include "libavutil/eval.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/mathematics.h"
 #include "libavutil/opt.h"
@@ -59,9 +61,17 @@ typedef struct Segment {
     int n;
 } Segment;
 
+typedef struct AdaptationSet {
+    char id[10];
+    enum AVMediaType media_type;
+    AVRational min_frame_rate, max_frame_rate;
+    int ambiguous_frame_rate;
+    AVDictionary *metadata;
+} AdaptationSet;
+
 typedef struct OutputStream {
     AVFormatContext *ctx;
-    int ctx_inited;
+    int ctx_inited, as_idx;
     AVIOContext *out;
     int packets_written;
     char initfile[1024];
@@ -72,13 +82,14 @@ typedef struct OutputStream {
     int64_t first_pts, start_pts, max_pts;
     int64_t last_dts;
     int bit_rate;
-    char bandwidth_str[64];
-
     char codec_str[100];
 } OutputStream;
 
 typedef struct DASHContext {
     const AVClass *class;  /* Class for private options. */
+    char *adaptation_sets;
+    AdaptationSet *as;
+    int nb_as;
     int window_size;
     int extra_window_size;
     int min_seg_duration;
@@ -87,7 +98,7 @@ typedef struct DASHContext {
     int use_timeline;
     int single_file;
     OutputStream *streams;
-    int has_video, has_audio;
+    int has_video;
     int64_t last_duration;
     int64_t total_duration;
     char availability_start_time[100];
@@ -96,8 +107,6 @@ typedef struct DASHContext {
     const char *init_seg_name;
     const char *media_seg_name;
     const char *utc_timing_url;
-    AVRational min_frame_rate, max_frame_rate;
-    int ambiguous_frame_rate;
 } DASHContext;
 
 // RFC 6381
@@ -171,7 +180,7 @@ static int flush_dynbuf(OutputStream *os, int *range_length)
     uint8_t *buffer;
     int ret;
 
-    if(!os->ctx->pb) {
+    if (!os->ctx->pb) {
         return AVERROR(EINVAL);
     }
 
@@ -193,6 +202,16 @@ static void dash_free(AVFormatContext *s)
 {
     DASHContext *c = s->priv_data;
     int i, j;
+
+    if (c->as) {
+        for (i = 0; i < c->nb_as; i++) {
+            if (&c->as[i].metadata)
+                av_dict_free(&c->as[i].metadata);
+        }
+        av_freep(&c->as);
+        c->nb_as = 0;
+    }
+
     if (!c->streams)
         return;
     for (i = 0; i < s->nb_streams; i++) {
@@ -453,12 +472,165 @@ static void format_date_now(char *buf, int size)
     }
 }
 
+static int write_adaptation_set(AVFormatContext *s, AVIOContext *out, int as_index)
+{
+    DASHContext *c = s->priv_data;
+    AdaptationSet *as = &c->as[as_index];
+    AVDictionaryEntry *lang, *role;
+    int i;
+
+    avio_printf(out, "\t\t<AdaptationSet id=\"%s\" contentType=\"%s\" segmentAlignment=\"true\" bitstreamSwitching=\"true\"",
+                as->id, as->media_type == AVMEDIA_TYPE_VIDEO ? "video" : "audio");
+
+    lang = av_dict_get(as->metadata, "language", NULL, 0);
+    if (lang)
+        avio_printf(out, " lang=\"%s\"", lang->value);
+
+    if (as->max_frame_rate.num && !as->ambiguous_frame_rate)
+        avio_printf(out, " %s=\"%d/%d\"", (av_cmp_q(as->min_frame_rate, as->max_frame_rate) < 0) ? "maxFrameRate" : "frameRate", as->max_frame_rate.num, as->max_frame_rate.den);
+    avio_printf(out, ">\n");
+
+    role = av_dict_get(as->metadata, "role", NULL, 0);
+    if (role)
+        avio_printf(out, "\t\t\t<Role schemeIdUri=\"urn:mpeg:dash:role:2011\" value=\"%s\"/>\n", role->value);
+
+    for (i = 0; i < s->nb_streams; i++) {
+        OutputStream *os = &c->streams[i];
+
+        if (os->as_idx != as_index)
+            continue;
+
+        if (as->media_type == AVMEDIA_TYPE_VIDEO) {
+            avio_printf(out, "\t\t\t<Representation id=\"%d\" mimeType=\"video/mp4\" codecs=\"%s\" bandwidth=\"%d\" width=\"%d\" height=\"%d\"",
+                i, os->codec_str, os->bit_rate, s->streams[i]->codecpar->width, s->streams[i]->codecpar->height);
+            if (s->streams[i]->avg_frame_rate.num)
+                avio_printf(out, " frameRate=\"%d/%d\"", s->streams[i]->avg_frame_rate.num, s->streams[i]->avg_frame_rate.den);
+            avio_printf(out, ">\n");
+        } else {
+            avio_printf(out, "\t\t\t<Representation id=\"%d\" mimeType=\"audio/mp4\" codecs=\"%s\" bandwidth=\"%d\" audioSamplingRate=\"%d\">\n",
+                i, os->codec_str, os->bit_rate, s->streams[i]->codecpar->sample_rate);
+            avio_printf(out, "\t\t\t\t<AudioChannelConfiguration schemeIdUri=\"urn:mpeg:dash:23003:3:audio_channel_configuration:2011\" value=\"%d\" />\n",
+                s->streams[i]->codecpar->channels);
+        }
+        output_segment_list(os, out, c);
+        avio_printf(out, "\t\t\t</Representation>\n");
+    }
+    avio_printf(out, "\t\t</AdaptationSet>\n");
+
+    return 0;
+}
+
+static int parse_adaptation_sets(AVFormatContext *s)
+{
+    DASHContext *c = s->priv_data;
+    char *p = c->adaptation_sets;
+    char *q;
+    enum { new_set, parse_id, parsing_streams } state;
+    int i;
+
+    // default: one AdaptationSet for each stream
+    if (!p) {
+        void *mem = av_mallocz(sizeof(*c->as) * s->nb_streams);
+        if (mem == NULL)
+            return AVERROR(ENOMEM);
+        c->as = mem;
+        c->nb_as = s->nb_streams;
+
+        for (i = 0; i < s->nb_streams; i++) {
+            AdaptationSet *as = &c->as[i];
+            OutputStream *os = &c->streams[i];
+            snprintf(as->id, sizeof(as->id), "%d", i);
+            as->metadata = NULL;
+            as->media_type = s->streams[i]->codecpar->codec_type;
+            os->as_idx = i + 1;
+        }
+        return 0;
+    }
+
+    // syntax id=0,streams=0,1,2 id=1,streams=3,4 and so on
+    state = new_set;
+    while (p < c->adaptation_sets + strlen(c->adaptation_sets)) {
+        if (*p == ' ')
+            continue;
+        else if (state == new_set && !strncmp(p, "id=", 3)) {
+            AdaptationSet *as;
+            void *mem = av_realloc(c->as, sizeof(*c->as) * (c->nb_as + 1));
+            if (mem == NULL)
+                return AVERROR(ENOMEM);
+            c->as = mem;
+            ++c->nb_as;
+
+            as = &c->as[c->nb_as - 1];
+            as->metadata = NULL;
+            as->media_type = AVMEDIA_TYPE_UNKNOWN;
+
+            p += 3; // consume "id="
+            q = as->id;
+            while (*p != ',') *q++ = *p++;
+            *q = 0;
+            p++;
+            state = parse_id;
+        } else if (state == parse_id && !strncmp(p, "streams=", 8)) {
+            p += 8; // consume "streams="
+            state = parsing_streams;
+        } else if (state == parsing_streams) {
+            struct AdaptationSet *as = &c->as[c->nb_as - 1];
+            int ret;
+            char *stream_identifier;
+
+            q = p;
+            while (*q != '\0' && *q != ',' && *q != ' ') q++;
+
+            stream_identifier = av_strndup(p, q - p);
+            for (i = 0; i < s->nb_streams; i++) {
+                ret = avformat_match_stream_specifier(s, s->streams[i], stream_identifier);
+                if (ret > 0) {
+                    OutputStream *os = &c->streams[i];
+                    if (as->media_type == AVMEDIA_TYPE_UNKNOWN) {
+                        as->media_type = s->streams[i]->codecpar->codec_type;
+                    } else if (as->media_type != s->streams[i]->codecpar->codec_type) {
+                        av_log(s, AV_LOG_ERROR, "Mixing codec types within an AdaptationSet is not allowed\n");
+                        return -1;
+                    } else if (os->as_idx) {
+                        av_log(s, AV_LOG_ERROR, "Assigning a stream to more than one AdaptationSet is not allowed\n");
+                        return -1;
+                    }
+                    os->as_idx = c->nb_as;
+                    break;
+                }
+            }
+
+            if (ret < 0) {
+                av_log(s, AV_LOG_ERROR, "Selected stream \"%s\" not found!\n", stream_identifier);
+                return -1;
+            }
+            av_free(stream_identifier);
+
+            if (*q == '\0') break;
+            if (*q == ' ') state = new_set;
+            p = ++q;
+        } else {
+            return -1;
+        }
+    }
+
+    // check for unassigned streams
+    for (i = 0; i < s->nb_streams; i++) {
+        OutputStream *os = &c->streams[i];
+        if (!os->as_idx) {
+            av_log(s, AV_LOG_ERROR, "Stream %d is not mapped to an AdaptationSet\n", i);
+            return -1;
+        }
+    }
+    return 0;
+}
+
 static int write_manifest(AVFormatContext *s, int final)
 {
     DASHContext *c = s->priv_data;
     AVIOContext *out;
     char temp_filename[1024];
-    int ret, i, as_id = 0;
+    int ret, i;
     AVDictionaryEntry *title = av_dict_get(s->metadata, "title", NULL, 0);
 
     snprintf(temp_filename, sizeof(temp_filename), "%s.tmp", s->filename);
@@ -509,7 +681,7 @@ static int write_manifest(AVFormatContext *s, int final)
         av_free(escaped);
     }
     avio_printf(out, "\t</ProgramInformation>\n");
-    if(c->utc_timing_url)
+    if (c->utc_timing_url)
         avio_printf(out, "\t<UTCTiming schemeIdUri=\"urn:mpeg:dash:utc:http-xsdate:2014\" value=\"%s\"/>\n", c->utc_timing_url);
     if (c->window_size && s->nb_streams > 0 && c->streams[0].nb_segments > 0 && !c->use_template) {
         OutputStream *os = &c->streams[0];
@@ -522,45 +694,13 @@ static int write_manifest(AVFormatContext *s, int final)
         avio_printf(out, "\t<Period id=\"0\" start=\"PT0.0S\">\n");
     }
 
-    if (c->has_video) {
-        avio_printf(out, "\t\t<AdaptationSet id=\"%d\" contentType=\"video\" segmentAlignment=\"true\" bitstreamSwitching=\"true\"", as_id++);
-        if (c->max_frame_rate.num && !c->ambiguous_frame_rate)
-            avio_printf(out, " %s=\"%d/%d\"", (av_cmp_q(c->min_frame_rate, c->max_frame_rate) < 0) ? "maxFrameRate" : "frameRate", c->max_frame_rate.num, c->max_frame_rate.den);
-        avio_printf(out, ">\n");
-
-        for (i = 0; i < s->nb_streams; i++) {
-            AVStream *st = s->streams[i];
-            OutputStream *os = &c->streams[i];
-
-            if (st->codecpar->codec_type != AVMEDIA_TYPE_VIDEO)
-                continue;
-
-            avio_printf(out, "\t\t\t<Representation id=\"%d\" mimeType=\"video/mp4\" codecs=\"%s\"%s width=\"%d\" height=\"%d\"", i, os->codec_str, os->bandwidth_str, st->codecpar->width, st->codecpar->height);
-            if (st->avg_frame_rate.num)
-                avio_printf(out, " frameRate=\"%d/%d\"", st->avg_frame_rate.num, st->avg_frame_rate.den);
-            avio_printf(out, ">\n");
-
-            output_segment_list(&c->streams[i], out, c);
-            avio_printf(out, "\t\t\t</Representation>\n");
+    for (i = 0; i < c->nb_as; i++) {
+        ret = write_adaptation_set(s, out, i);
+        if (ret < 0) {
+            return ret;
         }
-        avio_printf(out, "\t\t</AdaptationSet>\n");
     }
-    if (c->has_audio) {
-        avio_printf(out, "\t\t<AdaptationSet id=\"%d\" contentType=\"audio\" segmentAlignment=\"true\" bitstreamSwitching=\"true\">\n", as_id++);
-        for (i = 0; i < s->nb_streams; i++) {
-            AVStream *st = s->streams[i];
-            OutputStream *os = &c->streams[i];
-
-            if (st->codecpar->codec_type != AVMEDIA_TYPE_AUDIO)
-                continue;
 
-            avio_printf(out, "\t\t\t<Representation id=\"%d\" mimeType=\"audio/mp4\" codecs=\"%s\"%s audioSamplingRate=\"%d\">\n", i, os->codec_str, os->bandwidth_str, st->codecpar->sample_rate);
-            avio_printf(out, "\t\t\t\t<AudioChannelConfiguration schemeIdUri=\"urn:mpeg:dash:23003:3:audio_channel_configuration:2011\" value=\"%d\" />\n", st->codecpar->channels);
-            output_segment_list(&c->streams[i], out, c);
-            avio_printf(out, "\t\t\t</Representation>\n");
-        }
-        avio_printf(out, "\t\t</AdaptationSet>\n");
-    }
     avio_printf(out, "\t</Period>\n");
     avio_printf(out, "</MPD>\n");
     avio_flush(out);
@@ -568,6 +708,23 @@ static int write_manifest(AVFormatContext *s, int final)
     return avpriv_io_move(temp_filename, s->filename);
 }
 
+static int dict_copy_entry(AVDictionary **dst, const AVDictionary *src, const char *key)
+{
+    AVDictionaryEntry *entry;
+
+    // do not overwrite
+    if (dst) {
+        entry = av_dict_get(*dst, key, NULL, 0);
+        if (entry)
+            return 0;
+    }
+
+    entry = av_dict_get(src, key, NULL, 0);
+    if (entry)
+        av_dict_set(dst, key, entry->value, 0);
+    return 0;
+}
+
 static int dash_init(AVFormatContext *s)
 {
     DASHContext *c = s->priv_data;
@@ -579,7 +736,6 @@ static int dash_init(AVFormatContext *s)
         c->single_file = 1;
     if (c->single_file)
         c->use_template = 0;
-    c->ambiguous_frame_rate = 0;
 
     av_strlcpy(c->dirname, s->filename, sizeof(c->dirname));
     ptr = strrchr(c->dirname, '/');
@@ -599,8 +755,13 @@ static int dash_init(AVFormatContext *s)
     if (!c->streams)
         return AVERROR(ENOMEM);
 
+    ret = parse_adaptation_sets(s);
+    if (ret < 0)
+        return ret;
+
     for (i = 0; i < s->nb_streams; i++) {
         OutputStream *os = &c->streams[i];
+        AdaptationSet *as = &c->as[os->as_idx - 1];
         AVFormatContext *ctx;
         AVStream *st;
         AVDictionary *opts = NULL;
@@ -608,20 +769,17 @@ static int dash_init(AVFormatContext *s)
 
         os->bit_rate = s->streams[i]->codecpar->bit_rate;
         // if no bit rate detected, try whether bitrates are provided via metadata
-        if(!os->bit_rate) {
+        if (!os->bit_rate) {
             AVDictionaryEntry *bitrate;
             bitrate = av_dict_get(s->streams[i]->metadata, "bitrate", NULL, 0);
-            if(bitrate) {
+            if (bitrate) {
                 char *tail;
                 os->bit_rate = av_strtod(bitrate->value, &tail);
                 if (*tail)
                     os->bit_rate = 0;
             }
         }
-        if (os->bit_rate) {
-            snprintf(os->bandwidth_str, sizeof(os->bandwidth_str),
-                     " bandwidth=\"%d\"", os->bit_rate);
-        } else {
+        if (!os->bit_rate) {
             int level = s->strict_std_compliance >= FF_COMPLIANCE_STRICT ?
                         AV_LOG_ERROR : AV_LOG_WARNING;
             av_log(s, level, "No bit rate set for stream %d\n", i);
@@ -629,6 +787,10 @@ static int dash_init(AVFormatContext *s)
                 return AVERROR(EINVAL);
         }
 
+        // copy AdaptationSet language and role from stream metadata
+        dict_copy_entry(&as->metadata, s->streams[i]->metadata, "language");
+        dict_copy_entry(&as->metadata, s->streams[i]->metadata, "role");
+
         ret = avformat_alloc_output_context2(&ctx, NULL, "mp4", NULL);
         if (ret < 0)
             return AVERROR(ENOMEM);
@@ -684,16 +846,14 @@ static int dash_init(AVFormatContext *s)
         if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
             AVRational avg_frame_rate = s->streams[i]->avg_frame_rate;
             if (avg_frame_rate.num > 0) {
-                if (av_cmp_q(avg_frame_rate, c->min_frame_rate) < 0)
-                    c->min_frame_rate = avg_frame_rate;
-                if (av_cmp_q(c->max_frame_rate, avg_frame_rate) < 0)
-                    c->max_frame_rate = avg_frame_rate;
+                if (av_cmp_q(avg_frame_rate, as->min_frame_rate) < 0)
+                    as->min_frame_rate = avg_frame_rate;
+                if (av_cmp_q(as->max_frame_rate, avg_frame_rate) < 0)
+                    as->max_frame_rate = avg_frame_rate;
             } else {
-                c->ambiguous_frame_rate = 1;
+                as->ambiguous_frame_rate = 1;
             }
             c->has_video = 1;
-        } else if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
-            c->has_audio = 1;
         }
 
         set_codec_str(s, st->codecpar, os->codec_str, sizeof(os->codec_str));
@@ -879,6 +1039,13 @@ static int dash_flush(AVFormatContext *s, int final, int stream)
             if (ret < 0)
                 break;
         }
+
+        if (!os->bit_rate) {
+            double bitrate = (int)( (double) range_length * 8.0 * AV_TIME_BASE / (double)(os->max_pts - os->start_pts) );
+            if (bitrate >= 0 && bitrate <= INT64_MAX)
+                os->bit_rate = bitrate;
+        }
+
         add_segment(os, filename, os->start_pts, os->max_pts - os->start_pts, os->pos, range_length, index_length);
         av_log(s, AV_LOG_VERBOSE, "Representation %d media segment %d written to: %s\n", i, os->segment_index, full_path);
 
@@ -1049,6 +1216,7 @@ static int dash_check_bitstream(struct AVFormatContext *s, const AVPacket *avpkt
 #define OFFSET(x) offsetof(DASHContext, x)
 #define E AV_OPT_FLAG_ENCODING_PARAM
 static const AVOption options[] = {
+    { "adaptation_sets", "Adaptation sets. Syntax: id=0,streams=0,1,2 id=1,streams=3,4 and so on", OFFSET(adaptation_sets), AV_OPT_TYPE_STRING, { 0 }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM },
     { "window_size", "number of segments kept in the manifest", OFFSET(window_size), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, E },
     { "extra_window_size", "number of segments kept outside of the manifest before removing from disk", OFFSET(extra_window_size), AV_OPT_TYPE_INT, { .i64 = 5 }, 0, INT_MAX, E },
     { "min_seg_duration", "minimum segment duration (in microseconds)", OFFSET(min_seg_duration), AV_OPT_TYPE_INT64, { .i64 = 5000000 }, 0, INT_MAX, E },
-- 
2.10.2