[FFmpeg-devel] [PATCH] libavformat/matroskaenc: Add WebM DASH support

Tue May 13 08:25:08 CEST 2014

On Thu, May 8, 2014 at 10:11 AM, Vignesh Venkatasubramanian
<vigneshv at google.com> wrote:
> On Wed, May 7, 2014 at 3:37 PM, Michael Niedermayer <michaelni at gmx.at> wrote:
>> On Wed, May 07, 2014 at 02:32:26PM -0700, Vignesh Venkatasubramanian wrote:
>>> On Wed, May 7, 2014 at 1:40 PM, Michael Niedermayer <michaelni at gmx.at> wrote:
>>> > On Wed, May 07, 2014 at 09:46:40AM -0700, Vignesh Venkatasubramanian wrote:
>>> >> WebM DASH specification [1] requires the Clusters and Cues to be output in a
>>> >> specific way. Adding a flag to matroskaenc that will enable support for
>>> >> creating WebM/Mkv files conforming to the WebM DASH specification.
>>> >>
>>> >> [1] http://wiki.webmproject.org/adaptive-streaming/webm-dash-specification
>>> >>
>>> >> Signed-off-by: Vignesh Venkatasubramanian <vigneshv at google.com>
>>> >> ---
>>> >>  libavformat/matroskaenc.c | 68 ++++++++++++++++++++++++++++++++++-------------
>>> >>  1 file changed, 50 insertions(+), 18 deletions(-)
>>> >>
>>> >> diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c
>>> >> index e5e6116..3cf6e73 100644
>>> >> --- a/libavformat/matroskaenc.c
>>> >> +++ b/libavformat/matroskaenc.c
>>> >> @@ -70,6 +70,7 @@ typedef struct mkv_seekhead {
>>> >>
>>> >>  typedef struct {
>>> >>      uint64_t        pts;
>>> >> +    int             stream_idx;
>>> >>      int             tracknum;
>>> >>      int64_t         cluster_pos;        ///< file offset of the cluster containing the block
>>> >>      int64_t         relative_pos;       ///< relative offset from the position of the cluster containing the block
>>> >> @@ -114,6 +115,8 @@ typedef struct MatroskaMuxContext {
>>> >>      int cluster_size_limit;
>>> >>      int64_t cues_pos;
>>> >>      int64_t cluster_time_limit;
>>> >> +    int is_dash;
>>> >> +    int dash_track_number;
>>> >>
>>> >>      uint32_t chapter_id_offset;
>>> >>      int wrote_chapters;
>>> >> @@ -399,8 +402,8 @@ static mkv_cues * mkv_start_cues(int64_t segment_offset)
>>> >>      return cues;
>>> >>  }
>>> >>
>>> >> -static int mkv_add_cuepoint(mkv_cues *cues, int stream, int64_t ts, int64_t cluster_pos, int64_t relative_pos,
>>> >> -                            int64_t duration)
>>> >> +static int mkv_add_cuepoint(mkv_cues *cues, int stream, int tracknum, int64_t ts,
>>> >> +                            int64_t cluster_pos, int64_t relative_pos, int64_t duration)
>>> >>  {
>>> >>      mkv_cuepoint *entries = cues->entries;
>>> >>
>>> >> @@ -413,7 +416,8 @@ static int mkv_add_cuepoint(mkv_cues *cues, int stream, int64_t ts, int64_t clus
>>> >>      cues->entries = entries;
>>> >>
>>> >>      cues->entries[cues->num_entries].pts           = ts;
>>> >> -    cues->entries[cues->num_entries].tracknum      = stream + 1;
>>> >> +    cues->entries[cues->num_entries].stream_idx    = stream;
>>> >> +    cues->entries[cues->num_entries].tracknum      = tracknum;
>>> >>      cues->entries[cues->num_entries].cluster_pos   = cluster_pos - cues->segment_offset;
>>> >>      cues->entries[cues->num_entries].relative_pos  = relative_pos;
>>> >>      cues->entries[cues->num_entries++].duration    = duration;
>>> >> @@ -443,7 +447,7 @@ static int64_t mkv_write_cues(AVIOContext *pb, mkv_cues *cues, mkv_track *tracks
>>> >>          for (j = 0; j < num_tracks; j++)
>>> >>              tracks[j].has_cue = 0;
>>> >>          for (j = 0; j < cues->num_entries - i && entry[j].pts == pts; j++) {
>>> >> -            int tracknum = entry[j].tracknum - 1;
>>> >> +            int tracknum = entry[j].stream_idx;
>>> >>              av_assert0(tracknum>=0 && tracknum<num_tracks);
>>> >>              if (tracks[tracknum].has_cue)
>>> >>                  continue;
>>> >> @@ -646,8 +650,10 @@ static int mkv_write_tracks(AVFormatContext *s)
>>> >>              get_aac_sample_rates(s, codec, &sample_rate, &output_sample_rate);
>>> >>
>>> >>          track = start_ebml_master(pb, MATROSKA_ID_TRACKENTRY, 0);
>>> >> -        put_ebml_uint (pb, MATROSKA_ID_TRACKNUMBER     , i + 1);
>>> >> -        put_ebml_uint (pb, MATROSKA_ID_TRACKUID        , i + 1);
>>> >> +        put_ebml_uint (pb, MATROSKA_ID_TRACKNUMBER,
>>> >> +                       mkv->is_dash ? mkv->dash_track_number : i + 1);
>>> >> +        put_ebml_uint (pb, MATROSKA_ID_TRACKUID,
>>> >> +                       mkv->is_dash ? mkv->dash_track_number : i + 1);
>>> >>          put_ebml_uint (pb, MATROSKA_ID_TRACKFLAGLACING , 0);    // no lacing (yet)
>>> >>
>>> >>          if ((tag = av_dict_get(st->metadata, "title", NULL, 0)))
>>> >> @@ -1425,7 +1431,8 @@ static void mkv_write_block(AVFormatContext *s, AVIOContext *pb,
>>> >>
>>> >>      put_ebml_id(pb, blockid);
>>> >>      put_ebml_num(pb, size+4, 0);
>>> >> -    avio_w8(pb, 0x80 | (pkt->stream_index + 1));     // this assumes stream_index is less than 126
>>> >> +    // this assumes stream_index is less than 126
>>> >> +    avio_w8(pb, 0x80 | (mkv->is_dash ? mkv->dash_track_number : (pkt->stream_index + 1)));
>>> >>      avio_wb16(pb, ts - mkv->cluster_pts);
>>> >>      avio_w8(pb, flags);
>>> >>      avio_write(pb, data + offset, size);
>>> >> @@ -1539,7 +1546,7 @@ static void mkv_flush_dynbuf(AVFormatContext *s)
>>> >>      mkv->dyn_bc = NULL;
>>> >>  }
>>> >>
>>> >> -static int mkv_write_packet_internal(AVFormatContext *s, AVPacket *pkt)
>>> >> +static int mkv_write_packet_internal(AVFormatContext *s, AVPacket *pkt, int add_cue)
>>> >>  {
>>> >>      MatroskaMuxContext *mkv = s->priv_data;
>>> >>      AVIOContext *pb = s->pb;
>>> >> @@ -1596,8 +1603,13 @@ static int mkv_write_packet_internal(AVFormatContext *s, AVPacket *pkt)
>>> >>          end_ebml_master(pb, blockgroup);
>>> >>      }
>>> >>
>>> >> -    if ((codec->codec_type == AVMEDIA_TYPE_VIDEO && keyframe) || codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
>>> >> -        ret = mkv_add_cuepoint(mkv->cues, pkt->stream_index, ts, mkv->cluster_pos, relative_packet_pos,
>>> >> +    if ((codec->codec_type == AVMEDIA_TYPE_VIDEO && keyframe) ||
>>> >> +        codec->codec_type == AVMEDIA_TYPE_SUBTITLE ||
>>> >> +        add_cue) {
>>> >> +        ret = mkv_add_cuepoint(mkv->cues,
>>> >> +                               pkt->stream_index,
>>> >> +                               mkv->is_dash ? mkv->dash_track_number : pkt->stream_index + 1,
>>> >> +                               ts, mkv->cluster_pos, relative_packet_pos,
>>> >>                                 codec->codec_type == AVMEDIA_TYPE_SUBTITLE ? duration : -1);
>>> >>          if (ret < 0) return ret;
>>> >>      }
>>> >> @@ -1615,6 +1627,7 @@ static int mkv_write_packet(AVFormatContext *s, AVPacket *pkt)
>>> >>      int64_t cluster_time;
>>> >>      AVIOContext *pb;
>>> >>      int ret;
>>> >> +    int start_new_cluster;
>>> >>
>>> >>      if (mkv->tracks[pkt->stream_index].write_dts)
>>> >>          cluster_time = pkt->dts - mkv->cluster_pts;
>>> >> @@ -1632,11 +1645,26 @@ static int mkv_write_packet(AVFormatContext *s, AVPacket *pkt)
>>> >>          cluster_size = avio_tell(pb);
>>> >>      }
>>> >>
>>> >> -    if (mkv->cluster_pos != -1 &&
>>> >> -        (cluster_size > mkv->cluster_size_limit ||
>>> >> -         cluster_time > mkv->cluster_time_limit ||
>>> >> -         (codec_type == AVMEDIA_TYPE_VIDEO && keyframe &&
>>> >> -          cluster_size > 4 * 1024))) {
>>> >> +    if (mkv->is_dash && codec_type == AVMEDIA_TYPE_VIDEO) {
>>> >> +        // WebM DASH specification states that the first block of every cluster
>>> >> +        // has to be a key frame. So for DASH video, we only create a cluster
>>> >> +        // on seeing key frames.
>>> >
>>> >> +        start_new_cluster = keyframe;
>>> >
>>> > is it intended to have 1 frame per cluster for intra only videos ?
>>>
>>> yes. but this only done when -dash parameter is explicitly set. so
>>> it's very unlikely that someone would want intra-only video for dash.
>>
>> Someone might already have a intra only video and want to transmit
>> it without reencoding or
>> a encoder might choose to encode multiple frames in a row to be
>> encoded as intra frames (for example in a very high motion scene)
>>
>>
>
> that looks like a valid use case. i will rather limit it by
> cluster_time_limit and force a keyframe with the encoder whenever a
> cluster is to be created. will update the patch soon.
>

on second thought, using cluster_time_limit as a sole factor will
produce incorrect DASH files in case of remuxing without encoding (as
there might not be a key frame when the cluster_time_limit is
reached).

so there are two options here. one is to use cluster_time_limit as the
sole factor (and force keyframes with the encode whenever the cluster
time limit is reached). this will create incorrect files in case of
remux. the second option is to use the existing patch. this will
always produce correct DASH files (although it will be inefficient in
the case of intra-only files).

thoughts?

>>>
>>> >
>>> > also, is there an easy way by which this patch can be tested ?
>>>
>>> i'm not really sure. i have tested it manually. should i see if i can
>>> come up with a fate test?
>>
>> a fate test would certainly be a good idea
>
> will add a fate test too in the follow up patch.
>
>>
>> [...]
>> --
>> Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
>>
>> Awnsering whenever a program halts or runs forever is
>> On a turing machine, in general impossible (turings halting problem).
>> On any real computer, always possible as a real computer has a finite number
>> of states N, and will either halt in less than N cycles or never halt.
>>
>> _______________________________________________
>> ffmpeg-devel mailing list
>> ffmpeg-devel at ffmpeg.org
>> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>
>
>
>
> --
> Vignesh

-- 
Vignesh