[FFmpeg-devel] [PATCH] lavf: JSON captions demuxer.

Nicolas George nicolas.george at normalesup.org
Wed Nov 21 13:55:34 CET 2012


TODO version bump.

Signed-off-by: Nicolas George <nicolas.george at normalesup.org>
---
 Changelog                    |    1 +
 doc/demuxers.texi            |   21 +++
 doc/general.texi             |    1 +
 libavformat/Makefile         |    1 +
 libavformat/allformats.c     |    1 +
 libavformat/tedcaptionsdec.c |  356 ++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 381 insertions(+)
 create mode 100644 libavformat/tedcaptionsdec.c

diff --git a/Changelog b/Changelog
index 783c4c3..a460cfb 100644
--- a/Changelog
+++ b/Changelog
@@ -26,6 +26,7 @@ version <next>:
 - remove ffserver daemon mode
 - AST demuxer
 - new expansion syntax for drawtext
+- JSON captions (used in TED talks) decoding support
 
 
 version 1.0:
diff --git a/doc/demuxers.texi b/doc/demuxers.texi
index aea4c54..6482f65 100644
--- a/doc/demuxers.texi
+++ b/doc/demuxers.texi
@@ -184,4 +184,25 @@ the script is directly played, the actual times will match the absolute
 timestamps up to the sound controller's clock accuracy, but if the user
 somehow pauses the playback or seeks, all times will be shifted accordingly.
 
+ at section tedcaptions
+
+JSON captions used for @url{http://www.ted.com/, TED Talks}.
+
+TED does not provide links to the captions, but they can be guessed from the
+page. The following bookmarklet can find them:
+ at url{javascript:(function()%7bd%3Dwindow.open%28%22%22%2C%22sub%22%2C%22width%3D256%2Cheight%3D512%2Cresizable%3Dyes%2Cscrollbars%3Dyes%22%29.document%3B%20l%3Ddocument.getElementById%28%22languageCode%22%29.getElementsByTagName%28%22option%22%29%3B%20for%28i%3D1%3Bi%3Cl.length%3Bi++%29%7B%20d.body.appendChild%28p%3Dd.createElement%28%22p%22%29%29%3B%20p.appendChild%28a%3Dd.createElement%28%22a%22%29%29%3B%20a.appendChild%28d.createTextNode%28l%5Bi%5D.textContent%29%29%3B%20a.href%3D%22http%3A//www.ted.com/talks/subtitles/id/%22%20+%20talkID+%22/lang/%22+l%5Bi%5D.value%3B%20%7D%20%7d)();void%200, TED Talks captions}.
+
+This demuxer accepts the following option:
+ at table @option
+ at item start_time
+Set the start time of the TED talk, in milliseconds. The default is 15000
+(15s). It is used to sync the captions with the downloadable videos, because
+they include a 15s intro.
+ at end table
+
+Example: convert the captions to a format most players understand:
+ at example
+ffmpeg -i http://www.ted.com/talks/subtitles/id/1/lang/en talk1-en.srt
+ at end example
+
 @c man end INPUT DEVICES
diff --git a/doc/general.texi b/doc/general.texi
index d947597..1669ab2 100644
--- a/doc/general.texi
+++ b/doc/general.texi
@@ -368,6 +368,7 @@ library:
 @item Sony Wave64 (W64)         @tab   @tab X
 @item SoX native format         @tab X @tab X
 @item SUN AU format             @tab X @tab X
+ at item TED Talks captions        @tab   @tab X
 @item Text files                @tab   @tab X
 @item THP                       @tab   @tab X
     @tab Used on the Nintendo GameCube.
diff --git a/libavformat/Makefile b/libavformat/Makefile
index 136ada8..c5b8090 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -340,6 +340,7 @@ OBJS-$(CONFIG_SUBVIEWER_DEMUXER)         += subviewerdec.o
 OBJS-$(CONFIG_SWF_DEMUXER)               += swfdec.o swf.o
 OBJS-$(CONFIG_SWF_MUXER)                 += swfenc.o swf.o
 OBJS-$(CONFIG_TAK_DEMUXER)               += takdec.o apetag.o img2.o rawdec.o
+OBJS-$(CONFIG_TEDCAPTIONS_DEMUXER)       += tedcaptionsdec.o
 OBJS-$(CONFIG_THP_DEMUXER)               += thp.o
 OBJS-$(CONFIG_TIERTEXSEQ_DEMUXER)        += tiertexseq.o
 OBJS-$(CONFIG_MKVTIMESTAMP_V2_MUXER)     += mkvtimestamp_v2.o
diff --git a/libavformat/allformats.c b/libavformat/allformats.c
index eaeb51a..57f628f 100644
--- a/libavformat/allformats.c
+++ b/libavformat/allformats.c
@@ -238,6 +238,7 @@ void av_register_all(void)
     REGISTER_DEMUXER  (SUBVIEWER, subviewer);
     REGISTER_MUXDEMUX (SWF, swf);
     REGISTER_DEMUXER  (TAK, tak);
+    REGISTER_DEMUXER  (TEDCAPTIONS, tedcaptions);
     REGISTER_MUXER    (TG2, tg2);
     REGISTER_MUXER    (TGP, tgp);
     REGISTER_DEMUXER  (THP, thp);
diff --git a/libavformat/tedcaptionsdec.c b/libavformat/tedcaptionsdec.c
new file mode 100644
index 0000000..55b0baa
--- /dev/null
+++ b/libavformat/tedcaptionsdec.c
@@ -0,0 +1,356 @@
+/*
+ * TED Talks captions format decoder
+ * Copyright (c) 2012 Nicolas George
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/bprint.h"
+#include "libavutil/log.h"
+#include "libavutil/opt.h"
+#include "avformat.h"
+#include "internal.h"
+#include "subtitles.h"
+
+typedef struct {
+    AVClass *class;
+    int64_t start_time;
+    FFDemuxSubtitlesQueue subs;
+} TEDCaptionsDemuxer;
+
+static const AVOption tedcaptions_options[] = {
+    { "start_time", "set the start time (offset) of the subtitles, in ms",
+      offsetof(TEDCaptionsDemuxer, start_time), FF_OPT_TYPE_INT64,
+      { .i64 = 15000 }, INT64_MIN, INT64_MAX,
+      AV_OPT_FLAG_SUBTITLE_PARAM | AV_OPT_FLAG_DECODING_PARAM },
+    { NULL },
+};
+
+static const AVClass tedcaptions_demuxer_class = {
+    .class_name = "tedcaptions_demuxer",
+    .item_name  = av_default_item_name,
+    .option     = tedcaptions_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+#define BETWEEN(a, amin, amax) ((unsigned)((a) - (amin)) <= (amax) - (amin))
+
+#define HEX_DIGIT_TEST(c) (BETWEEN(c, '0', '9') || BETWEEN((c) | 32, 'a', 'z'))
+#define HEX_DIGIT_VAL(c) ((c) <= '9' ? (c) - '0' : ((c) | 32) - 'a' + 10)
+#define ERR_CODE(c) (c < 0 ? c : AVERROR_INVALIDDATA)
+
+static void av_bprint_utf8(AVBPrint *bp, unsigned c)
+{
+    int bytes, i;
+
+    if (c <= 0x7F) {
+        av_bprint_chars(bp, c, 1);
+        return;
+    }
+    bytes = (av_log2(c) - 2) / 5;
+    av_bprint_chars(bp, (c >> (bytes * 6)) | ((0xFF80 >> bytes) & 0xFF), 1);
+    for (i = bytes - 1; i >= 0; i--)
+        av_bprint_chars(bp, ((c >> (i * 6)) & 0x3F) | 0x80, 1);
+}
+
+static void next_byte(AVIOContext *pb, int *cur_byte)
+{
+    uint8_t b;
+    int ret = avio_read(pb, &b, 1);
+    *cur_byte = ret > 0 ? b : ret == 0 ? AVERROR_EOF : ret;
+}
+
+static void skip_spaces(AVIOContext *pb, int *cur_byte)
+{
+    while (*cur_byte == ' '  || *cur_byte == '\t' ||
+           *cur_byte == '\n' || *cur_byte == '\r')
+        next_byte(pb, cur_byte);
+}
+
+static int expect_byte(AVIOContext *pb, int *cur_byte, uint8_t c)
+{
+    skip_spaces(pb, cur_byte);
+    if (*cur_byte != c)
+        return ERR_CODE(*cur_byte);
+    next_byte(pb, cur_byte);
+    return 0;
+}
+
+static int parse_string(AVIOContext *pb, int *cur_byte, AVBPrint *bp, int full)
+{
+    int ret;
+
+    av_bprint_init(bp, 0, full ? -1 : 1);
+    ret = expect_byte(pb, cur_byte, '"');
+    if (ret < 0)
+        goto fail;
+    while (*cur_byte > 0 && *cur_byte != '"') {
+        if (*cur_byte == '\\') {
+            next_byte(pb, cur_byte);
+            if (*cur_byte < 0) {
+                ret = AVERROR_INVALIDDATA;
+                goto fail;
+            }
+            if ((*cur_byte | 32) == 'u') {
+                unsigned chr = 0, i;
+                for (i = 0; i < 4; i++) {
+                    next_byte(pb, cur_byte);
+                    if (!HEX_DIGIT_TEST(*cur_byte)) {
+                        ret = ERR_CODE(*cur_byte);
+                        goto fail;
+                    }
+                    chr = chr * 16 + HEX_DIGIT_VAL(*cur_byte);
+                }
+                av_bprint_utf8(bp, chr);
+            } else {
+                av_bprint_chars(bp, *cur_byte, 1);
+            }
+        } else {
+            av_bprint_chars(bp, *cur_byte, 1);
+        }
+        next_byte(pb, cur_byte);
+    }
+    ret = expect_byte(pb, cur_byte, '"');
+    if (ret < 0)
+        goto fail;
+    if (full && !av_bprint_is_complete(bp)) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+    return 0;
+
+fail:
+    av_bprint_finalize(bp, NULL);
+    return ret;
+}
+
+static int parse_label(AVIOContext *pb, int *cur_byte, AVBPrint *bp)
+{
+    int ret;
+
+    ret = parse_string(pb, cur_byte, bp, 0);
+    if (ret < 0)
+        return ret;
+    ret = expect_byte(pb, cur_byte, ':');
+    if (ret < 0)
+        return ret;
+    return 0;
+}
+
+static int parse_boolean(AVIOContext *pb, int *cur_byte, int *result)
+{
+    const char *text[] = { "false", "true" }, *p;
+    int i;
+
+    skip_spaces(pb, cur_byte);
+    for (i = 0; i < 2; i++) {
+        p = text[i];
+        if (*cur_byte != *p)
+            continue;
+        for (; *p; p++, next_byte(pb, cur_byte))
+            if (*cur_byte != *p)
+                return AVERROR_INVALIDDATA;
+        if (BETWEEN(*cur_byte | 32, 'a', 'z'))
+            return AVERROR_INVALIDDATA;
+        *result = i;
+        return 0;
+    }
+    return AVERROR_INVALIDDATA;
+}
+
+static int parse_int(AVIOContext *pb, int *cur_byte, int64_t *result)
+{
+    int64_t val = 0;
+
+    skip_spaces(pb, cur_byte);
+    if ((unsigned)*cur_byte - '0' > 9)
+        return AVERROR_INVALIDDATA;
+    while (BETWEEN(*cur_byte, '0', '9')) {
+        val = val * 10 + (*cur_byte - '0');
+        next_byte(pb, cur_byte);
+    }
+    *result = val;
+    return 0;
+}
+
+static int parse_file(AVIOContext *pb, FFDemuxSubtitlesQueue *subs)
+{
+    int ret, cur_byte, start_of_par;
+    AVBPrint label, content;
+    int64_t pos, start, duration;
+    AVPacket *pkt;
+
+    next_byte(pb, &cur_byte);
+    ret = expect_byte(pb, &cur_byte, '{');
+    if (ret < 0)
+        return AVERROR_INVALIDDATA;
+    ret = parse_label(pb, &cur_byte, &label);
+    if (ret < 0 || strcmp(label.str, "captions"))
+        return AVERROR_INVALIDDATA;
+    ret = expect_byte(pb, &cur_byte, '[');
+    if (ret < 0)
+        return AVERROR_INVALIDDATA;
+    while (1) {
+        content.size = 0;
+        start = duration = AV_NOPTS_VALUE;
+        ret = expect_byte(pb, &cur_byte, '{');
+        if (ret < 0)
+            return ret;
+        pos = avio_tell(pb) - 1;
+        while (1) {
+            ret = parse_label(pb, &cur_byte, &label);
+            if (ret < 0)
+                return ret;
+            if (!strcmp(label.str, "startOfParagraph")) {
+                ret = parse_boolean(pb, &cur_byte, &start_of_par);
+                if (ret < 0)
+                    return ret;
+            } else if (!strcmp(label.str, "content")) {
+                ret = parse_string(pb, &cur_byte, &content, 1);
+                if (ret < 0)
+                    return ret;
+            } else if (!strcmp(label.str, "startTime")) {
+                ret = parse_int(pb, &cur_byte, &start);
+                if (ret < 0)
+                    return ret;
+            } else if (!strcmp(label.str, "duration")) {
+                ret = parse_int(pb, &cur_byte, &duration);
+                if (ret < 0)
+                    return ret;
+            } else {
+                return AVERROR_INVALIDDATA;
+            }
+            skip_spaces(pb, &cur_byte);
+            if (cur_byte != ',')
+                break;
+            next_byte(pb, &cur_byte);
+        }
+        ret = expect_byte(pb, &cur_byte, '}');
+        if (ret < 0)
+            return ret;
+
+        if (!content.size || start == AV_NOPTS_VALUE ||
+            duration == AV_NOPTS_VALUE)
+            return AVERROR_INVALIDDATA;
+        pkt = ff_subtitles_queue_insert(subs, content.str, content.len, 0);
+        if (!pkt)
+            return AVERROR(ENOMEM);
+        pkt->pos      = pos;
+        pkt->dts      =
+        pkt->pts      = start;
+        pkt->duration = duration;
+        av_bprint_finalize(&content, NULL);
+
+        skip_spaces(pb, &cur_byte);
+        if (cur_byte != ',')
+            break;
+        next_byte(pb, &cur_byte);
+    }
+    ret = expect_byte(pb, &cur_byte, ']');
+    if (ret < 0)
+        return ret;
+    ret = expect_byte(pb, &cur_byte, '}');
+    if (ret < 0)
+        return ret;
+    skip_spaces(pb, &cur_byte);
+    if (cur_byte != AVERROR_EOF)
+        return ERR_CODE(cur_byte);
+    return 0;
+}
+
+static av_cold int tedcaptions_read_header(AVFormatContext *avf)
+{
+    TEDCaptionsDemuxer *tc = avf->priv_data;
+    AVStream *st;
+    int ret, i;
+    AVPacket *last;
+
+    ret = parse_file(avf->pb, &tc->subs);
+    if (ret < 0) {
+        if (ret == AVERROR_INVALIDDATA)
+            av_log(avf, AV_LOG_ERROR, "Syntax error near offset %"PRId64".\n",
+                   avio_tell(avf->pb));
+        ff_subtitles_queue_clean(&tc->subs);
+        return ret;
+    }
+    ff_subtitles_queue_finalize(&tc->subs);
+    for (i = 0; i < tc->subs.nb_subs; i++)
+        tc->subs.subs[i].pts += tc->start_time;
+
+    last = &tc->subs.subs[tc->subs.nb_subs - 1];
+    st = avformat_new_stream(avf, NULL);
+    if (!st)
+        return AVERROR(ENOMEM);
+    st->codec->codec_type     = AVMEDIA_TYPE_SUBTITLE;
+    st->codec->codec_id       = CODEC_ID_TEXT;
+    avpriv_set_pts_info(st, 64, 1, 1000);
+    st->probe_packets = 0;
+    st->start_time    = 0;
+    st->duration      = last->pts + last->duration;
+    st->cur_dts       = 0;
+
+    return 0;
+}
+
+static int tedcaptions_read_packet(AVFormatContext *avf, AVPacket *packet)
+{
+    TEDCaptionsDemuxer *tc = avf->priv_data;
+
+    return ff_subtitles_queue_read_packet(&tc->subs, packet);
+}
+
+static int tedcaptions_read_close(AVFormatContext *avf)
+{
+    TEDCaptionsDemuxer *tc = avf->priv_data;
+
+    ff_subtitles_queue_clean(&tc->subs);
+    return 0;
+}
+
+static av_cold int tedcaptions_read_probe(AVProbeData *p)
+{
+    static const char *const tags[] = {
+        "\"captions\"", "\"duration\"", "\"content\"",
+        "\"startOfParagraph\"", "\"startTime\"",
+    };
+    unsigned i, count = 0;
+    const char *t;
+
+    if (p->buf[strspn(p->buf, " \t\r\n")] != '{')
+        return 0;
+    for (i = 0; i < FF_ARRAY_ELEMS(tags); i++) {
+        if (!(t = strstr(p->buf, tags[i])))
+            continue;
+        t += strlen(tags[i]);
+        t += strspn(t, " \t\r\n");
+        if (*t == ':')
+            count++;
+    }
+    return count == FF_ARRAY_ELEMS(tags) ? AVPROBE_SCORE_MAX :
+           count                         ? AVPROBE_SCORE_MAX / 2 : 0;
+}
+
+AVInputFormat ff_tedcaptions_demuxer = {
+    .name           = "tedcaptions",
+    .long_name      = NULL_IF_CONFIG_SMALL("TED Talks captions"),
+    .priv_data_size = sizeof(TEDCaptionsDemuxer),
+    .read_header    = tedcaptions_read_header,
+    .read_packet    = tedcaptions_read_packet,
+    .read_close     = tedcaptions_read_close,
+    .read_probe     = tedcaptions_read_probe,
+    .priv_class     = &tedcaptions_demuxer_class,
+};
-- 
1.7.10.4



More information about the ffmpeg-devel mailing list