[FFmpeg-devel] [PATCH 07/19 v2] avformat/hls: parse ID3 timestamps for elementary audio streams

Sat Apr 5 14:20:19 CEST 2014

HLS provides MPEG TS timestamps via ID3 tags in the beginning of each
segment of elementary audio streams.

v2: fix issues with streams that have multiple ID3 tags

Signed-off-by: Anssi Hannula <anssi.hannula at iki.fi>
---

Hi all,

and sorry for the delay in getting this patchset in.

I plan to apply the patchset (minus subtitle parts) later today or
tomorrow, unless any issues are found (by me or anyone here).

The only major change is that I've dropped the separate hls_id3_audio
subdemuxer again due to the various timestamp issues and the ID3
timestamp mangling is now done in the hls demuxer itself. So this is
similar to what the first version of the patchset had, except that the
issues with it have been fixed.

The patch in this message is the major updated part of this patchset
version. intercept_id3() contains most of the new code, other parts
were mostly present in one of the previous versions of the patchset.

For other parts I've applied the changes previously mentioned and/or
discussed.

The full patchset is at:
https://github.com/anssih/FFmpeg/commits/hls-patchset-wip-3
and with extra commits (mostly subtitles) that will remain in TODO:
https://github.com/anssih/FFmpeg/commits/hls-patchset-wip-3-extra

What is missing from the subtitle parts is to see if they could be
handled in a cleaner way (i.e. without the demuxer reopening hacks),
and some additional testing. I don't likely have time to work on that
for at least the next few weeks, though, but patches welcome of course.


 libavformat/hls.c | 315 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 304 insertions(+), 11 deletions(-)

diff --git a/libavformat/hls.c b/libavformat/hls.c
index fe14255..814c596 100644
--- a/libavformat/hls.c
+++ b/libavformat/hls.c
@@ -27,6 +27,7 @@
  */
 
 #include "libavutil/avstring.h"
+#include "libavutil/avassert.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/mathematics.h"
 #include "libavutil/opt.h"
@@ -36,12 +37,16 @@
 #include "internal.h"
 #include "avio_internal.h"
 #include "url.h"
+#include "id3v2.h"
 
 #define INITIAL_BUFFER_SIZE 32768
 
 #define MAX_FIELD_LEN 64
 #define MAX_CHARACTERISTICS_LEN 512
 
+#define MPEG_TIME_BASE 90000
+#define MPEG_TIME_BASE_Q (AVRational){1, MPEG_TIME_BASE}
+
 /*
  * An apple http stream consists of a playlist with media segment files,
  * played sequentially. There may be several playlists with the same
@@ -100,6 +105,18 @@ struct playlist {
     char key_url[MAX_URL_SIZE];
     uint8_t key[16];
 
+    /* ID3 timestamp handling (elementary audio streams have ID3 timestamps
+     * (and possibly other ID3 tags) in the beginning of each segment) */
+    int is_id3_timestamped; /* -1: not yet known */
+    int64_t id3_mpegts_timestamp; /* in mpegts tb */
+    int64_t id3_offset; /* in stream original tb */
+    uint8_t* id3_buf; /* temp buffer for id3 parsing */
+    unsigned int id3_buf_size;
+    AVDictionary *id3_initial; /* data from first id3 tag */
+    int id3_found; /* ID3 tag found at some point */
+    int id3_changed; /* ID3 tag data has changed at some point */
+    ID3v2ExtraMeta *id3_deferred_extra; /* stored here until subdemuxer is opened */
+
     /* Renditions associated with this playlist, if any.
      * Alternative rendition playlists have a single rendition associated
      * with them, and variant main Media Playlists may have
@@ -179,6 +196,9 @@ static void free_playlist_list(HLSContext *c)
         struct playlist *pls = c->playlists[i];
         free_segment_list(pls);
         av_freep(&pls->renditions);
+        av_freep(&pls->id3_buf);
+        av_dict_free(&pls->id3_initial);
+        ff_id3v2_free_extra_meta(&pls->id3_deferred_extra);
         av_free_packet(&pls->pkt);
         av_free(pls->pb.buffer);
         if (pls->input)
@@ -234,6 +254,10 @@ static struct playlist *new_playlist(HLSContext *c, const char *url,
         return NULL;
     reset_packet(&pls->pkt);
     ff_make_absolute_url(pls->url, sizeof(pls->url), base, url);
+
+    pls->is_id3_timestamped = -1;
+    pls->id3_mpegts_timestamp = AV_NOPTS_VALUE;
+
     dynarray_add(&c->playlists, &c->n_playlists, pls);
     return pls;
 }
@@ -600,8 +624,14 @@ fail:
     return ret;
 }
 
+enum ReadFromURLMode {
+    READ_NORMAL,
+    READ_COMPLETE,
+};
+
 /* read from URLContext, limiting read to current segment */
-static int read_from_url(struct playlist *pls, uint8_t *buf, int buf_size)
+static int read_from_url(struct playlist *pls, uint8_t *buf, int buf_size,
+                         enum ReadFromURLMode mode)
 {
     int ret;
     struct segment *seg = pls->segments[pls->cur_seq_no - pls->start_seq_no];
@@ -610,7 +640,10 @@ static int read_from_url(struct playlist *pls, uint8_t *buf, int buf_size)
     if (seg->size >= 0)
         buf_size = FFMIN(buf_size, seg->size - pls->cur_seg_offset);
 
-    ret = ffurl_read(pls->input, buf, buf_size);
+    if (mode == READ_COMPLETE)
+        ret = ffurl_read_complete(pls->input, buf, buf_size);
+    else
+        ret = ffurl_read(pls->input, buf, buf_size);
 
     if (ret > 0)
         pls->cur_seg_offset += ret;
@@ -618,6 +651,204 @@ static int read_from_url(struct playlist *pls, uint8_t *buf, int buf_size)
     return ret;
 }
 
+/* Parse the raw ID3 data and pass contents to caller */
+static void parse_id3(AVFormatContext *s, AVIOContext *pb,
+                      AVDictionary **metadata, int64_t *dts,
+                      ID3v2ExtraMetaAPIC **apic, ID3v2ExtraMeta **extra_meta)
+{
+    static const char id3_priv_owner_ts[] = "com.apple.streaming.transportStreamTimestamp";
+    ID3v2ExtraMeta *meta;
+
+    ff_id3v2_read_dict(pb, metadata, ID3v2_DEFAULT_MAGIC, extra_meta);
+    for (meta = *extra_meta; meta; meta = meta->next) {
+        if (!strcmp(meta->tag, "PRIV")) {
+            ID3v2ExtraMetaPRIV *priv = meta->data;
+            if (priv->datasize == 8 && !strcmp(priv->owner, id3_priv_owner_ts)) {
+                /* 33-bit MPEG timestamp */
+                int64_t ts = AV_RB64(priv->data);
+                av_log(s, AV_LOG_DEBUG, "HLS ID3 audio timestamp %"PRId64"\n", ts);
+                if ((ts & ~((1ULL << 33) - 1)) == 0)
+                    *dts = ts;
+                else
+                    av_log(s, AV_LOG_ERROR, "Invalid HLS ID3 audio timestamp %"PRId64"\n", ts);
+            }
+        } else if (!strcmp(meta->tag, "APIC") && apic)
+            *apic = meta->data;
+    }
+}
+
+/* Check if the ID3 metadata contents have changed */
+static int id3_has_changed_values(struct playlist *pls, AVDictionary *metadata,
+                                  ID3v2ExtraMetaAPIC *apic)
+{
+    AVDictionaryEntry *entry = NULL;
+    AVDictionaryEntry *oldentry;
+    /* check that no keys have changed values */
+    while ((entry = av_dict_get(metadata, "", entry, AV_DICT_IGNORE_SUFFIX))) {
+        oldentry = av_dict_get(pls->id3_initial, entry->key, NULL, AV_DICT_MATCH_CASE);
+        if (!oldentry || strcmp(oldentry->value, entry->value) != 0)
+            return 1;
+    }
+
+    /* check if apic appeared */
+    if (apic && (pls->ctx->nb_streams != 2 || !pls->ctx->streams[1]->attached_pic.data))
+        return 1;
+
+    if (apic) {
+        int size = pls->ctx->streams[1]->attached_pic.size;
+        if (size != apic->buf->size - FF_INPUT_BUFFER_PADDING_SIZE)
+            return 1;
+
+        if (memcmp(apic->buf->data, pls->ctx->streams[1]->attached_pic.data, size) != 0)
+            return 1;
+    }
+
+    return 0;
+}
+
+/* Parse ID3 data and handle the found data */
+static void handle_id3(AVIOContext *pb, struct playlist *pls)
+{
+    AVDictionary *metadata = NULL;
+    ID3v2ExtraMetaAPIC *apic = NULL;
+    ID3v2ExtraMeta *extra_meta = NULL;
+    int64_t timestamp = AV_NOPTS_VALUE;
+
+    parse_id3(pls->ctx, pb, &metadata, &timestamp, &apic, &extra_meta);
+
+    if (timestamp != AV_NOPTS_VALUE) {
+        pls->id3_mpegts_timestamp = timestamp;
+        pls->id3_offset = 0;
+    }
+
+    if (!pls->id3_found) {
+        /* initial ID3 tags */
+        av_assert0(!pls->id3_deferred_extra);
+        pls->id3_found = 1;
+
+        /* get picture attachment and set text metadata */
+        if (pls->ctx->nb_streams)
+            ff_id3v2_parse_apic(pls->ctx, &extra_meta);
+        else
+            /* demuxer not yet opened, defer picture attachment */
+            pls->id3_deferred_extra = extra_meta;
+
+        av_dict_copy(&pls->ctx->metadata, metadata, 0);
+        pls->id3_initial = metadata;
+
+    } else {
+        if (!pls->id3_changed && id3_has_changed_values(pls, metadata, apic)) {
+            avpriv_report_missing_feature(pls->ctx, "Changing ID3 metadata in HLS audio elementary stream");
+            pls->id3_changed = 1;
+        }
+        av_dict_free(&metadata);
+    }
+
+    if (!pls->id3_deferred_extra)
+        ff_id3v2_free_extra_meta(&extra_meta);
+}
+
+/* Intercept and handle ID3 tags between URLContext and AVIOContext */
+static void intercept_id3(struct playlist *pls, uint8_t *buf,
+                         int buf_size, int *len)
+{
+    /* intercept id3 tags, we do not want to pass them to the raw
+     * demuxer on all segment switches */
+    int bytes;
+    int id3_buf_pos = 0;
+    int fill_buf = 0;
+
+    /* gather all the id3 tags */
+    while (1) {
+        /* see if we can retrieve enough data for ID3 header */
+        if (*len < ID3v2_HEADER_SIZE && buf_size >= ID3v2_HEADER_SIZE) {
+            bytes = read_from_url(pls, buf + *len, ID3v2_HEADER_SIZE - *len, READ_COMPLETE);
+            if (bytes > 0) {
+
+                if (bytes == ID3v2_HEADER_SIZE - *len)
+                    /* no EOF yet, so fill the caller buffer again after
+                     * we have stripped the ID3 tags */
+                    fill_buf = 1;
+
+                *len += bytes;
+
+            } else if (*len <= 0) {
+                /* error/EOF */
+                *len = bytes;
+                fill_buf = 0;
+            }
+        }
+
+        if (*len < ID3v2_HEADER_SIZE)
+            break;
+
+        if (ff_id3v2_match(buf, ID3v2_DEFAULT_MAGIC)) {
+            struct segment *seg = pls->segments[pls->cur_seq_no - pls->start_seq_no];
+            int64_t segsize = seg->size >= 0 ? seg->size : ffurl_size(pls->input);
+            int taglen = ff_id3v2_tag_len(buf);
+            int tag_got_bytes = FFMIN(taglen, *len);
+            int remaining = taglen - tag_got_bytes;
+
+            if (taglen > segsize) {
+                av_log(pls->ctx, AV_LOG_ERROR, "Too large HLS ID3 tag (%d vs %"PRId64")\n",
+                       taglen, segsize);
+                break;
+            }
+
+            /*
+             * Copy the id3 tag to our temporary id3 buffer.
+             * We could read a small id3 tag directly without memcpy, but
+             * we would still need to copy the large tags, and handling
+             * both of those cases together with the possibility for multiple
+             * tags would make the handling a bit complex.
+             */
+            pls->id3_buf = av_fast_realloc(pls->id3_buf, &pls->id3_buf_size, id3_buf_pos + taglen);
+            if (!pls->id3_buf)
+                break;
+            memcpy(pls->id3_buf + id3_buf_pos, buf, tag_got_bytes);
+            id3_buf_pos += tag_got_bytes;
+
+            /* strip the intercepted bytes */
+            *len -= tag_got_bytes;
+            memmove(buf, buf + tag_got_bytes, *len);
+            av_log(pls->ctx, AV_LOG_DEBUG, "Stripped %d HLS ID3 bytes\n", tag_got_bytes);
+
+            if (remaining > 0) {
+                /* read the rest of the tag in */
+                if (read_from_url(pls, pls->id3_buf + id3_buf_pos, remaining, READ_COMPLETE) != remaining)
+                    break;
+                id3_buf_pos += remaining;
+                av_log(pls->ctx, AV_LOG_DEBUG, "Stripped additional %d HLS ID3 bytes\n", remaining);
+            }
+
+        } else {
+            /* no more ID3 tags */
+            break;
+        }
+    }
+
+    /* re-fill buffer for the caller unless EOF */
+    if (*len >= 0 && (fill_buf || *len == 0)) {
+        bytes = read_from_url(pls, buf + *len, buf_size - *len, READ_NORMAL);
+
+        /* ignore error if we already had some data */
+        if (bytes >= 0)
+            *len += bytes;
+        else if (*len == 0)
+            *len = bytes;
+    }
+
+    if (pls->id3_buf) {
+        /* Now parse all the ID3 tags */
+        AVIOContext id3ioctx;
+        ffio_init_context(&id3ioctx, pls->id3_buf, id3_buf_pos, 0, NULL, NULL, NULL, NULL);
+        handle_id3(&id3ioctx, pls);
+    }
+
+    if (pls->is_id3_timestamped == -1)
+        pls->is_id3_timestamped = (pls->id3_mpegts_timestamp != AV_NOPTS_VALUE);
+}
+
 static int open_input(HLSContext *c, struct playlist *pls)
 {
     AVDictionary *opts = NULL;
@@ -720,6 +951,7 @@ static int read_data(void *opaque, uint8_t *buf, int buf_size)
     struct playlist *v = opaque;
     HLSContext *c = v->parent->priv_data;
     int ret, i;
+    int just_opened = 0;
 
     if (!v->needed)
         return AVERROR_EOF;
@@ -769,10 +1001,17 @@ reload:
                    v->index);
             return ret;
         }
+        just_opened = 1;
     }
 
-    ret = read_from_url(v, buf, buf_size);
+    ret = read_from_url(v, buf, buf_size, READ_NORMAL);
     if (ret > 0) {
+        if (just_opened && v->is_id3_timestamped != 0) {
+            /* Intercept ID3 tags here, elementary audio streams are required
+             * to convey timestamps using them in the beginning of each segment. */
+            intercept_id3(v, buf, buf_size, &ret);
+        }
+
         return ret;
     }
     ffurl_close(v->input);
@@ -991,11 +1230,21 @@ static int hls_read_header(AVFormatContext *s)
         if (ret < 0)
             goto fail;
 
+        if (pls->id3_deferred_extra && pls->ctx->nb_streams == 1) {
+            ff_id3v2_parse_apic(pls->ctx, &pls->id3_deferred_extra);
+            avformat_queue_attached_pictures(pls->ctx);
+            ff_id3v2_free_extra_meta(&pls->id3_deferred_extra);
+            pls->id3_deferred_extra = NULL;
+        }
+
         pls->ctx->ctx_flags &= ~AVFMTCTX_NOHEADER;
         ret = avformat_find_stream_info(pls->ctx, NULL);
         if (ret < 0)
             goto fail;
 
+        if (pls->is_id3_timestamped == -1)
+            av_log(s, AV_LOG_WARNING, "No expected HTTP requests have been made\n");
+
         /* Create new AVStreams for each stream in this playlist */
         for (j = 0; j < pls->ctx->nb_streams; j++) {
             AVStream *st = avformat_new_stream(s, NULL);
@@ -1005,8 +1254,13 @@ static int hls_read_header(AVFormatContext *s)
                 goto fail;
             }
             st->id = i;
-            avpriv_set_pts_info(st, ist->pts_wrap_bits, ist->time_base.num, ist->time_base.den);
+
             avcodec_copy_context(st->codec, pls->ctx->streams[j]->codec);
+
+            if (pls->is_id3_timestamped) /* custom timestamps via id3 */
+                avpriv_set_pts_info(st, 33, 1, MPEG_TIME_BASE);
+            else
+                avpriv_set_pts_info(st, ist->pts_wrap_bits, ist->time_base.num, ist->time_base.den);
         }
 
         add_metadata_from_renditions(s, pls, AVMEDIA_TYPE_AUDIO);
@@ -1093,6 +1347,39 @@ static int recheck_discard_flags(AVFormatContext *s, int first)
     return changed;
 }
 
+static void fill_timing_for_id3_timestamped_stream(struct playlist *pls)
+{
+    if (pls->id3_offset >= 0) {
+        pls->pkt.dts = pls->id3_mpegts_timestamp +
+                                 av_rescale_q(pls->id3_offset,
+                                              pls->ctx->streams[pls->pkt.stream_index]->time_base,
+                                              MPEG_TIME_BASE_Q);
+        if (pls->pkt.duration)
+            pls->id3_offset += pls->pkt.duration;
+        else
+            pls->id3_offset = -1;
+    } else {
+        /* there have been packets with unknown duration
+         * since the last id3 tag, should not normally happen */
+        pls->pkt.dts = AV_NOPTS_VALUE;
+    }
+
+    if (pls->pkt.duration)
+        pls->pkt.duration = av_rescale_q(pls->pkt.duration,
+                                         pls->ctx->streams[pls->pkt.stream_index]->time_base,
+                                         MPEG_TIME_BASE_Q);
+
+    pls->pkt.pts = AV_NOPTS_VALUE;
+}
+
+static AVRational get_timebase(struct playlist *pls)
+{
+    if (pls->is_id3_timestamped)
+        return MPEG_TIME_BASE_Q;
+
+    return pls->ctx->streams[pls->pkt.stream_index]->time_base;
+}
+
 static int hls_read_packet(AVFormatContext *s, AVPacket *pkt)
 {
     HLSContext *c = s->priv_data;
@@ -1112,7 +1399,7 @@ start:
         if (pls->needed && !pls->pkt.data) {
             while (1) {
                 int64_t ts_diff;
-                AVStream *st;
+                AVRational tb;
                 ret = av_read_frame(pls->ctx, &pls->pkt);
                 if (ret < 0) {
                     if (!url_feof(&pls->pb) && ret != AVERROR_EOF)
@@ -1120,11 +1407,15 @@ start:
                     reset_packet(&pls->pkt);
                     break;
                 } else {
+                    if (pls->is_id3_timestamped) {
+                        /* audio elementary streams are id3 timestamped */
+                        fill_timing_for_id3_timestamped_stream(pls);
+                    }
+
                     if (c->first_timestamp == AV_NOPTS_VALUE &&
                         pls->pkt.dts       != AV_NOPTS_VALUE)
                         c->first_timestamp = av_rescale_q(pls->pkt.dts,
-                            pls->ctx->streams[pls->pkt.stream_index]->time_base,
-                            AV_TIME_BASE_Q);
+                            get_timebase(pls), AV_TIME_BASE_Q);
                 }
 
                 if (c->seek_timestamp == AV_NOPTS_VALUE)
@@ -1135,9 +1426,9 @@ start:
                     break;
                 }
 
-                st = pls->ctx->streams[pls->pkt.stream_index];
+                tb = get_timebase(pls);
                 ts_diff = av_rescale_rnd(pls->pkt.dts, AV_TIME_BASE,
-                                         st->time_base.den, AV_ROUND_DOWN) -
+                                         tb.den, AV_ROUND_DOWN) -
                           c->seek_timestamp;
                 if (ts_diff >= 0 && (c->seek_flags  & AVSEEK_FLAG_ANY ||
                                      pls->pkt.flags & AV_PKT_FLAG_KEY)) {
@@ -1160,6 +1451,8 @@ start:
                 int64_t mindts  = minpls->pkt.dts;
                 AVStream *st    =    pls->ctx->streams[pls->pkt.stream_index];
                 AVStream *minst = minpls->ctx->streams[minpls->pkt.stream_index];
+                AVRational tb    = get_timebase(   pls);
+                AVRational mintb = get_timebase(minpls);
 
                 if (dts == AV_NOPTS_VALUE) {
                     minplaylist = i;
@@ -1169,8 +1462,8 @@ start:
                     if (minst->start_time != AV_NOPTS_VALUE)
                         mindts -= minst->start_time;
 
-                    if (av_compare_ts(dts, st->time_base,
-                                      mindts, minst->time_base) < 0)
+                    if (av_compare_ts(dts, tb,
+                                      mindts, mintb) < 0)
                         minplaylist = i;
                 }
             }
-- 
1.8.4.5