[FFmpeg-devel] [PATCH 2/2] lavf: Add WebM DASH Manifest Muxer

Vignesh Venkatasubramanian vigneshv at google.com
Thu Jul 10 22:12:20 CEST 2014


On Thu, Jul 10, 2014 at 12:58 PM, Michael Niedermayer <michaelni at gmx.at> wrote:
> On Mon, Jul 07, 2014 at 01:41:34PM -0700, Vignesh Venkatasubramanian wrote:
>> This patch adds the ability to generate WebM DASH manifest XML using
>> ffmpeg. A sample command line would be as follows:
>>
>> ffmpeg \
>>   -f webm_dash_manifest -i video1.webm \
>>   -f webm_dash_manifest -i video2.webm \
>>   -f webm_dash_manifest -i audio1.webm \
>>   -f webm_dash_manifest -i audio2.webm \
>>   -map 0 -map 1 -map 2 -map 3 \
>>   -c copy \
>>   -f webm_dash_manifest \
>>   -adaptation_sets “id=0,streams=0,1 id=1,streams=2,3” \
>>   manifest.xml
>>
>> It works by exporting necessary fields as metadata tags in matroskadec
>> and use those values to write the appropriate XML fields as per the WebM
>> DASH Specification [1]. Some ideas are adopted from webm-tools project
>> [2].
>>
>> [1]
>> https://sites.google.com/a/webmproject.org/wiki/adaptive-streaming/webm-dash-specification
>> [2]
>> https://chromium.googlesource.com/webm/webm-tools/+/master/webm_dash_manifest/
>>
>> Signed-off-by: Vignesh Venkatasubramanian <vigneshv at google.com>
>
> what if the input is not webm but lets say mpeg-ts ?

not sure what you mean here, the muxer is only for generating DASH
Manifest for WebM files.

>
> also a fate test is needed. Maintaining code without any way to test
> it is probably more work for you. Though its your time so i dont
> really mind.
>

i am working on a fate test. i will follow this up with patches for fate test.

thanks for the review. i will address all the other comments and
update the patch.

>
> [...]
>
>> +static double get_duration(AVFormatContext *s)
>> +{
>> +    int i = 0;
>> +    double max = 0.0;
>> +    for (i = 0; i < s->nb_streams; i++) {
>> +        AVDictionaryEntry *duration = av_dict_get(s->streams[i]->metadata,
>> +                                                  DURATION, NULL, 0);
>> +        if (atof(duration->value) > max) max = atof(duration->value);
>
> this lacks a test that the metadata exists, it lacks tests on it being
> valid
>
> also floating point may cause problems with regression tests
>
>
>> +    }
>> +    return max / 1000;
>> +}
>> +
>> +static void write_header(AVFormatContext *s)
>> +{
>> +    double min_buffer_time = 1.0;
>> +    avio_printf(s->pb, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
>> +    avio_printf(s->pb, "<MPD\n");
>> +    avio_printf(s->pb, "  xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n");
>> +    avio_printf(s->pb, "  xmlns=\"urn:mpeg:DASH:schema:MPD:2011\"\n");
>> +    avio_printf(s->pb, "  xsi:schemaLocation=\"urn:mpeg:DASH:schema:MPD:2011\"\n");
>> +    avio_printf(s->pb, "  type=\"static\"\n");
>> +    avio_printf(s->pb, "  mediaPresentationDuration=\"PT%gS\"\n", get_duration(s));
>> +    avio_printf(s->pb, "  minBufferTime=\"PT%gS\"\n", min_buffer_time);
>> +    avio_printf(s->pb, "  profiles=\"urn:webm:dash:profile:webm-on-demand:2012\"");
>> +    avio_printf(s->pb, ">\n");
>
> i dont care much about formating but this is a mess
> please seperate the format strings from the arguments somehow
> can be \n or vertical alignment or anything else you like but as is
> its pretty hard to read, this also applies to other parts of the patch
>
>
>> +}
>> +
>> +static void write_footer(AVFormatContext *s)
>> +{
>> +    avio_printf(s->pb, "</MPD>");
>> +}
>> +
>
>> +static int subsegment_alignment(AVFormatContext *s, AdaptationSet *as) {
>> +    int i;
>> +    AVDictionaryEntry *gold = av_dict_get(s->streams[as->streams[0]]->metadata,
>> +                                          CUE_TIMESTAMPS, NULL, 0);
>> +    for (i = 1; i < as->nb_streams; i++) {
>> +        AVDictionaryEntry *ts = av_dict_get(s->streams[as->streams[i]]->metadata,
>> +                                            CUE_TIMESTAMPS, NULL, 0);
>> +        if (strncmp(gold->value, ts->value, strlen(gold->value))) return 0;
>
> lacking checks, similar to the duration case
>
>
>
>> +    }
>> +    return 1;
>> +}
>> +
>> +static int bitstream_switching(AVFormatContext *s, AdaptationSet *as) {
>> +    int i;
>> +    AVDictionaryEntry *gold_track_num = av_dict_get(s->streams[as->streams[0]]->metadata,
>> +                                                    TRACK_NUMBER, NULL, 0);
>> +    AVCodecContext *gold_codec = s->streams[as->streams[0]]->codec;
>> +    for (i = 1; i < as->nb_streams; i++) {
>> +        AVDictionaryEntry *track_num = av_dict_get(s->streams[as->streams[i]]->metadata,
>> +                                                   TRACK_NUMBER, NULL, 0);
>> +        AVCodecContext *codec = s->streams[as->streams[i]]->codec;
>> +        if (strncmp(gold_track_num->value, track_num->value, strlen(gold_track_num->value)) ||
>> +            gold_codec->codec_id != codec->codec_id ||
>> +            gold_codec->extradata_size != codec->extradata_size ||
>> +            memcmp(gold_codec->extradata, codec->extradata, codec->extradata_size)) {
>> +            return 0;
>> +        }
>> +    }
>> +    return 1;
>> +}
>> +
>> +static void write_adaptation_set(AVFormatContext *s, int as_index)
>> +{
>> +    WebMDashMuxContext *w = s->priv_data;
>> +    AdaptationSet *as = &w->as[as_index];
>> +    AVCodecContext *codec = s->streams[as->streams[0]]->codec;
>> +    int i;
>
>> +    char boolean[2][6] = { "false", "true" };
>
> static const char
>
>
>> +    int subsegmentStartsWithSAP = 1;
>> +    AVDictionaryEntry *lang;
>> +    avio_printf(s->pb, "<AdaptationSet id=\"%s\"", as->id);
>> +    avio_printf(s->pb, " mimeType=\"%s/webm\"",
>> +                codec->codec_type == AVMEDIA_TYPE_VIDEO ? "video" : "audio");
>> +    avio_printf(s->pb, " codecs=\"%s\"", get_codec_name(codec->codec_id));
>> +
>> +    lang = av_dict_get(s->streams[as->streams[0]]->metadata, "language", NULL, 0);
>> +    if (lang != NULL) avio_printf(s->pb, " lang=\"%s\"", lang->value);
>> +
>> +    if (codec->codec_type == AVMEDIA_TYPE_VIDEO) {
>> +        avio_printf(s->pb, " width=\"%d\"", codec->width);
>> +        avio_printf(s->pb, " height=\"%d\"", codec->height);
>> +    } else {
>> +        avio_printf(s->pb, " audioSamplingRate=\"%d\"", codec->sample_rate);
>> +    }
>> +
>> +    avio_printf(s->pb, " bitstreamSwitching=\"%s\"", boolean[bitstream_switching(s, as)]);
>> +    avio_printf(s->pb, " subsegmentAlignment=\"%s\"", boolean[subsegment_alignment(s, as)]);
>> +
>> +    for (i = 0; i < as->nb_streams; i++) {
>> +        AVDictionaryEntry *kf = av_dict_get(s->streams[as->streams[i]]->metadata,
>> +                                            CLUSTER_KEYFRAME, NULL, 0);
>> +        if (!strncmp(kf->value, "0", 1)) subsegmentStartsWithSAP = 0;
>> +    }
>> +    avio_printf(s->pb, " subsegmentStartsWithSAP=\"%d\"", subsegmentStartsWithSAP);
>> +    avio_printf(s->pb, ">\n");
>> +
>> +    for (i = 0; i < as->nb_streams; i++) {
>> +        AVStream *stream = s->streams[as->streams[i]];
>> +        AVDictionaryEntry *irange = av_dict_get(stream->metadata, INITIALIZATION_RANGE, NULL, 0);
>> +        AVDictionaryEntry *cues_start = av_dict_get(stream->metadata, CUES_START, NULL, 0);
>> +        AVDictionaryEntry *cues_end = av_dict_get(stream->metadata, CUES_END, NULL, 0);
>> +        AVDictionaryEntry *filename = av_dict_get(stream->metadata, FILENAME, NULL, 0);
>> +        AVDictionaryEntry *bandwidth = av_dict_get(stream->metadata, BANDWIDTH, NULL, 0);
>> +        avio_printf(s->pb, "<Representation id=\"%d\"", i);
>> +        avio_printf(s->pb, " bandwidth=\"%s\"", bandwidth->value);
>> +        avio_printf(s->pb, ">\n");
>> +        avio_printf(s->pb, "<BaseURL>%s</BaseURL>\n", filename->value);
>> +        avio_printf(s->pb, "<SegmentBase\n");
>> +        avio_printf(s->pb, "  indexRange=\"%s-%s\">\n", cues_start->value, cues_end->value);
>> +        avio_printf(s->pb, "<Initialization\n");
>> +        avio_printf(s->pb, "  range=\"0-%s\" />\n", irange->value);
>> +        avio_printf(s->pb, "</SegmentBase>\n");
>> +        avio_printf(s->pb, "</Representation>\n");
>> +    }
>> +    avio_printf(s->pb, "</AdaptationSet>\n");
>> +}
>> +
>> +static int to_integer(char *p, int len)
>> +{
>> +    int ret;
>> +    char *q = (char*)av_malloc(sizeof(char) * len);
>
> useless cast
>
>
>> +    strncpy(q, p, len);
>
> missing malloc failure check
>
>
>> +    ret = atoi(q);
>> +    av_free(q);
>> +    return ret;
>> +}
>> +
>> +static int parse_adaptation_sets(AVFormatContext *s)
>> +{
>> +    WebMDashMuxContext *w = s->priv_data;
>> +    char *p = w->adaptation_sets;
>> +    char *q;
>> +    enum { new_set, parsed_id, parsing_streams } state;
>> +    // syntax id=0,streams=0,1,2 id=1,streams=3,4 and so on
>> +    state = new_set;
>> +    while (p < w->adaptation_sets + strlen(w->adaptation_sets)) {
>> +        if (*p == ' ')
>> +            continue;
>> +        else if (state == new_set && !strncmp(p, "id=", 3)) {
>> +            w->as = av_realloc(w->as, sizeof(*w->as) * ++w->nb_as);
>> +            w->as[w->nb_as - 1].nb_streams = 0;
>
> missing realloc failure check
>
>
>> +            w->as[w->nb_as - 1].streams = NULL;
>> +            p += 3; // consume "id="
>> +            q = w->as[w->nb_as - 1].id;
>> +            while (*p != ',') *q++ = *p++;
>> +            *q = 0;
>> +            p++;
>> +            state = parsed_id;
>> +        } else if (state == parsed_id && !strncmp(p, "streams=", 8)) {
>> +            p += 8; // consume "streams="
>> +            state = parsing_streams;
>> +        } else if (state == parsing_streams) {
>> +            struct AdaptationSet *as = &w->as[w->nb_as - 1];
>> +            q = p;
>> +            while (*q != '\0' && *q != ',' && *q != ' ') q++;
>> +            as->streams = av_realloc(as->streams, sizeof(*as->streams) * ++as->nb_streams);
>> +            as->streams[as->nb_streams - 1] = to_integer(p, q - p);
>> +            if (*q == '\0') break;
>> +            if (*q == ' ') state = new_set;
>> +            p = ++q;
>> +        } else {
>> +            return -1;
>> +        }
>> +    }
>> +    return 0;
>> +}
>> +
>> +static int webm_dash_manifest_write_header(AVFormatContext *s)
>> +{
>> +    int i;
>> +    double start = 0.0;
>> +    WebMDashMuxContext *w = s->priv_data;
>> +    parse_adaptation_sets(s);
>> +    write_header(s);
>> +    avio_printf(s->pb, "<Period id=\"0\"");
>> +    avio_printf(s->pb, " start=\"PT%gS\"", start);
>> +    avio_printf(s->pb, " duration=\"PT%gS\"", get_duration(s));
>> +    avio_printf(s->pb, " >\n");
>> +
>> +    for (i = 0; i < w->nb_as; i++) {
>> +        write_adaptation_set(s, i);
>> +    }
>> +
>> +    avio_printf(s->pb, "</Period>\n");
>> +    write_footer(s);
>> +    return 0;
>> +}
>> +
>
>> +static int webm_dash_manifest_write_packet(AVFormatContext *s, AVPacket *pkt)
>> +{
>> +    return 0;
>
> this is surely not correct
> either if no packets are expected it should fail or if there are
> packets they should be considered or cross checked against the metadata
>
>
>> +}
>> +
>
>> +static int webm_dash_manifest_write_trailer(AVFormatContext *s)
>> +{
>> +    WebMDashMuxContext *w = s->priv_data;
>> +    int i;
>> +    for (i = 0; i < w->nb_as; i++) {
>> +        av_free(w->as[i].streams);
>
> av_freep()
>
>
>> +    }
>> +    av_free(w->as);
>
> av_freep()
>
> [...]
> --
> Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
>
> Good people do not need laws to tell them to act responsibly, while bad
> people will find a way around the laws. -- Plato
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>



-- 
Vignesh


More information about the ffmpeg-devel mailing list