[FFmpeg-devel] [PATCH] mov.c: read fragment start dts from fragmented mp4

Mika Raento mikie at iki.fi
Thu Oct 9 07:52:29 CEST 2014


On 8 October 2014 16:03, Michael Niedermayer <michaelni at gmx.at> wrote:
> Hi
>
> On Wed, Oct 08, 2014 at 03:05:07PM +0300, Mika Raento wrote:
>> If present, an MFRA box and its TFRAs are read for fragment start times.
>>
>> Without this change, timestamps for discontinuous fragmented mp4 are
>> wrong, and cause audio/video desync and are not usable for generating
>> HLS.
>> ---
>>  libavformat/isom.h |  14 +++++++
>>  libavformat/mov.c  | 116 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>  2 files changed, 130 insertions(+)
>>
>> diff --git a/libavformat/isom.h b/libavformat/isom.h
>> index 979e967..2b49b55 100644
>> --- a/libavformat/isom.h
>> +++ b/libavformat/isom.h
>> @@ -78,6 +78,7 @@ typedef struct MOVFragment {
>>      unsigned duration;
>>      unsigned size;
>>      unsigned flags;
>> +    int64_t time;
>>  } MOVFragment;
>>
>>  typedef struct MOVTrackExt {
>> @@ -93,6 +94,17 @@ typedef struct MOVSbgp {
>>      unsigned int index;
>>  } MOVSbgp;
>>
>> +typedef struct MOVFragmentIndexItem {
>> +    int64_t time;
>> +} MOVFragmentIndexItem;
>> +
>> +typedef struct MOVFragmentIndex {
>> +    unsigned track_id;
>> +    unsigned current_item_index;
>> +    unsigned item_count;
>> +    MOVFragmentIndexItem *items;
>> +} MOVFragmentIndex;
>> +
>>  typedef struct MOVStreamContext {
>>      AVIOContext *pb;
>>      int pb_is_copied;
>> @@ -171,6 +183,8 @@ typedef struct MOVContext {
>>      int *bitrates;          ///< bitrates read before streams creation
>>      int bitrates_count;
>>      int moov_retry;
>> +    MOVFragmentIndex** fragment_index_data;
>> +    unsigned fragment_index_count;
>>  } MOVContext;
>>
>>  int ff_mp4_read_descr_len(AVIOContext *pb);
>
>> diff --git a/libavformat/mov.c b/libavformat/mov.c
>> index fdd0671..bf92e60 100644
>> --- a/libavformat/mov.c
>> +++ b/libavformat/mov.c
>> @@ -2756,6 +2756,16 @@ static int mov_read_tfhd(MOVContext *c, AVIOContext *pb, MOVAtom atom)
>>          av_log(c->fc, AV_LOG_ERROR, "could not find corresponding trex\n");
>>          return AVERROR_INVALIDDATA;
>>      }
>> +    MOVFragmentIndex* index = NULL;
>
> mixing declaration and statement causes problems for some comilers

Done.

>
>
>> +    for (i = 0; i < c->fragment_index_count; i++) {
>> +        MOVFragmentIndex* candidate = c->fragment_index_data[i];
>> +        if (candidate->track_id == frag->track_id) {
>> +            av_log(c->fc, AV_LOG_VERBOSE,
>> +                   "found fragment index for track %u\n", frag->track_id);
>> +            index = candidate;
>> +            break;
>> +        }
>> +    }
>>
>>      frag->base_data_offset = flags & MOV_TFHD_BASE_DATA_OFFSET ?
>>                               avio_rb64(pb) : flags & MOV_TFHD_DEFAULT_BASE_IS_MOOF ?
>> @@ -2768,6 +2778,20 @@ static int mov_read_tfhd(MOVContext *c, AVIOContext *pb, MOVAtom atom)
>>                       avio_rb32(pb) : trex->size;
>>      frag->flags    = flags & MOV_TFHD_DEFAULT_FLAGS ?
>>                       avio_rb32(pb) : trex->flags;
>> +    frag->time     = AV_NOPTS_VALUE;
>> +    if (index) {
>> +        // TODO: should check moof index from mfhd, rather than just
>> +        // relying on this code seeing the moofs in the same order as they
>> +        // are in the mfra, and only once each.
>> +        if (index->current_item_index == index->item_count) {
>> +            av_log(c->fc, AV_LOG_WARNING, "track %u has a fragment index "
>> +                   "but it doesn't have entries for all moofs, at moof "
>> +                   "%u\n", frag->track_id, index->current_item_index);
>> +        } else if (index->current_item_index < index->item_count) {
>> +            frag->time = index->items[index->current_item_index].time;
>> +        }
>> +        index->current_item_index++;
>> +    }
>>      av_dlog(c->fc, "frag flags 0x%x\n", frag->flags);
>>      return 0;
>>  }
>> @@ -2860,6 +2884,10 @@ static int mov_read_trun(MOVContext *c, AVIOContext *pb, MOVAtom atom)
>>      if (flags & MOV_TRUN_DATA_OFFSET)        data_offset        = avio_rb32(pb);
>>      if (flags & MOV_TRUN_FIRST_SAMPLE_FLAGS) first_sample_flags = avio_rb32(pb);
>>      dts    = sc->track_end - sc->time_offset;
>> +    if (frag->time != AV_NOPTS_VALUE) {
>> +        av_log(c->fc, AV_LOG_DEBUG, "found frag time %"PRId64"\n", frag->time);
>> +        dts = frag->time;
>> +    }
>>      offset = frag->base_data_offset + data_offset;
>>      distance = 0;
>>      av_dlog(c->fc, "first sample flags 0x%x\n", first_sample_flags);
>> @@ -3513,6 +3541,13 @@ static int mov_read_close(AVFormatContext *s)
>>      av_freep(&mov->trex_data);
>>      av_freep(&mov->bitrates);
>>
>> +    for (i = 0; i < mov->fragment_index_count; i++) {
>> +        MOVFragmentIndex* index = mov->fragment_index_data[i];
>> +        av_freep(&index->items);
>> +        av_freep(&mov->fragment_index_data[i]);
>> +    }
>> +    av_freep(&mov->fragment_index_data);
>> +
>>      return 0;
>>  }
>>
>> @@ -3550,6 +3585,84 @@ static void export_orphan_timecode(AVFormatContext *s)
>>      }
>>  }
>>
>> +static int read_tfra(AVFormatContext *s)
>> +{
>> +    MOVContext *mov = s->priv_data;
>> +    AVIOContext *f = s->pb;
>> +    MOVFragmentIndex* index = NULL;
>> +    int version, fieldlength, i, j, err;
>> +    int64_t pos = avio_tell(f);
>> +    uint32_t size = avio_rb32(f);
>> +    if (avio_rb32(f) != MKBETAG('t', 'f', 'r', 'a')) {
>> +        return -1;
>> +    }
>> +    av_log(s, AV_LOG_VERBOSE, "found tfra\n");
>> +    index = av_mallocz(sizeof(MOVFragmentIndex));
>> +    if (!index)
>> +        return AVERROR(ENOMEM);
>> +    mov->fragment_index_count++;
>> +    if ((err = av_reallocp(&mov->fragment_index_data,
>> +                           mov->fragment_index_count *
>> +                           sizeof(MOVFragmentIndex*))) < 0) {
>> +        av_freep(&index);
>> +        return err;
>> +    }
>> +    mov->fragment_index_data[mov->fragment_index_count - 1] =
>> +        index;
>> +
>> +    version = avio_r8(f);
>> +    avio_rb24(f);
>> +    index->track_id = avio_rb32(f);
>> +    fieldlength = avio_rb32(f);
>> +    index->item_count = avio_rb32(f);
>> +    index->items = av_mallocz(
>> +            index->item_count * sizeof(MOVFragmentIndexItem));
>> +    if (!index->items)
>> +        return AVERROR(ENOMEM);
>> +    for (i = 0; i < index->item_count; i++) {
>> +        int64_t time, offset;
>> +        if (version == 1) {
>> +            time   = avio_rb64(f);
>> +            offset = avio_rb64(f);
>> +        } else {
>> +            time   = avio_rb32(f);
>> +            offset = avio_rb32(f);
>> +        }
>
> the offset variable is never read

Done.

>
>
>> +        index->items[i].time = time;
>> +        for (j = 0; j < ((fieldlength >> 4) & 3) + 1; j++)
>> +            avio_r8(f);
>> +        for (j = 0; j < ((fieldlength >> 2) & 3) + 1; j++)
>> +            avio_r8(f);
>> +        for (j = 0; j < ((fieldlength >> 0) & 3) + 1; j++)
>> +            avio_r8(f);
>> +    }
>> +
>> +    avio_seek(f, pos + size, SEEK_SET);
>> +    return 0;
>> +}
>> +
>
>> +static int mov_read_mfra(AVFormatContext *s)
>> +{
>> +    AVIOContext *f = s->pb;
>> +    int32_t mfra_size;
>
>> +    avio_seek(f, avio_size(f) - 4, SEEK_SET);
>
> missing return check for avio_size() and avio_seek()
>
>
>> +    mfra_size = avio_rb32(f);
>> +    avio_seek(f, -mfra_size, SEEK_CUR);
>
> missing sanity check (like > 0 and <avio_size(f)) on mfra_size before
> seek

Done.

>
>
>> +    if (avio_rb32(f) != mfra_size) {
>> +        av_log(s, AV_LOG_DEBUG, "doesn't look like mfra (size)\n");
>> +        return -1;
>> +    }
>> +    if (avio_rb32(f) != MKBETAG('m', 'f', 'r', 'a')) {
>> +        av_log(s, AV_LOG_DEBUG, "doesn't look like mfra (tag)\n");
>> +        return -1;
>> +    }
>> +    av_log(s, AV_LOG_VERBOSE, "stream has mfra\n");
>> +    while (!read_tfra(s)) {
>> +        /* Empty */
>> +    }
>> +    return 0;
>> +}
>> +
>>  static int mov_read_header(AVFormatContext *s)
>>  {
>>      MOVContext *mov = s->priv_data;
>
>> @@ -3565,6 +3678,9 @@ static int mov_read_header(AVFormatContext *s)
>>      else
>>          atom.size = INT64_MAX;
>>
>> +    if (pb->seekable) {
>> +        mov_read_mfra(s);
>> +    }
>
> mov_read_mfra() requires multiple seeks, these are quite expensive if
> the file is not local but accessed over the network.
> Is it possible to detect (most) files which do not have mfra atoms
> reliably without having to try to read the mfra atom so as to avoid
> doing these potentially expensive seeks ?

That's an excellent question. Here are some possibilities:
- we could only do it for filenames ending in .ismv (nothing forces
fragmented mp4s to be called .ismv)
- we could have a flag to look or not look for the mfra, and maybe
default it to true if the filename ends in .ismv (+ for performance, -
for usability)
- we could trigger the code only once we see a moof (I'd need to
restructure the code, reading mfra as the first thing looked like the
minimal change)

I did not make any changes towards this yet.

>
> [...]

Resubmitting now. Will run fate next.

>
> --
> Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
>
> it is not once nor twice but times without number that the same ideas make
> their appearance in the world. -- Aristotle
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>


More information about the ffmpeg-devel mailing list