[FFmpeg-devel] [PATCH] lavf: JSON captions demuxer.

Nicolas George nicolas.george at normalesup.org
Tue Nov 20 20:40:43 CET 2012


TODO version bump.

Signed-off-by: Nicolas George <nicolas.george at normalesup.org>
---
 Changelog                     |    1 +
 doc/demuxers.texi             |   21 +++
 libavformat/Makefile          |    1 +
 libavformat/allformats.c      |    1 +
 libavformat/jsoncaptionsdec.c |  350 +++++++++++++++++++++++++++++++++++++++++
 5 files changed, 374 insertions(+)
 create mode 100644 libavformat/jsoncaptionsdec.c

diff --git a/Changelog b/Changelog
index 783c4c3..a460cfb 100644
--- a/Changelog
+++ b/Changelog
@@ -26,6 +26,7 @@ version <next>:
 - remove ffserver daemon mode
 - AST demuxer
 - new expansion syntax for drawtext
+- JSON captions (used in TED talks) decoding support
 
 
 version 1.0:
diff --git a/doc/demuxers.texi b/doc/demuxers.texi
index aea4c54..7b752c1 100644
--- a/doc/demuxers.texi
+++ b/doc/demuxers.texi
@@ -184,4 +184,25 @@ the script is directly played, the actual times will match the absolute
 timestamps up to the sound controller's clock accuracy, but if the user
 somehow pauses the playback or seeks, all times will be shifted accordingly.
 
+ at section jsoncaptions
+
+JSON captions used for @url{http://www.ted.com/, TED Talks}.
+
+TED does not provide links to the captions, but they can be guessed from the
+page. The following bookmarklet can find them:
+ at url{javascript:(function()%7bd%3Dwindow.open%28%22%22%2C%22sub%22%2C%22width%3D256%2Cheight%3D512%2Cresizable%3Dyes%2Cscrollbars%3Dyes%22%29.document%3B%20l%3Ddocument.getElementById%28%22languageCode%22%29.getElementsByTagName%28%22option%22%29%3B%20for%28i%3D1%3Bi%3Cl.length%3Bi++%29%7B%20d.body.appendChild%28p%3Dd.createElement%28%22p%22%29%29%3B%20p.appendChild%28a%3Dd.createElement%28%22a%22%29%29%3B%20a.appendChild%28d.createTextNode%28l%5Bi%5D.textContent%29%29%3B%20a.href%3D%22http%3A//www.ted.com/talks/subtitles/id/%22%20+%20talkID+%22/lang/%22+l%5Bi%5D.value%3B%20%7D%20%7d)();void%200, TED Talks captions}.
+
+This demuxer accepts the following option:
+ at table @option
+ at item start_time
+Set the start time of the TED talk, in milliseconds. The default is 15s.
+It is used to sync the captions with the downloadable videos, because they
+include a 15s intro.
+ at end table
+
+Example: convert the captions to a format most players understand:
+ at example
+ffmpeg -i http://www.ted.com/talks/subtitles/id/1/lang/en talk1-en.srt
+ at end example
+
 @c man end INPUT DEVICES
diff --git a/libavformat/Makefile b/libavformat/Makefile
index 136ada8..6537307 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -149,6 +149,7 @@ OBJS-$(CONFIG_IVF_DEMUXER)               += ivfdec.o
 OBJS-$(CONFIG_IVF_MUXER)                 += ivfenc.o
 OBJS-$(CONFIG_JACOSUB_DEMUXER)           += jacosubdec.o
 OBJS-$(CONFIG_JACOSUB_MUXER)             += jacosubenc.o rawenc.o
+OBJS-$(CONFIG_JSONCAPTIONS_DEMUXER)      += jsoncaptionsdec.o
 OBJS-$(CONFIG_JV_DEMUXER)                += jvdec.o
 OBJS-$(CONFIG_LATM_DEMUXER)              += rawdec.o
 OBJS-$(CONFIG_LATM_MUXER)                += latmenc.o rawenc.o
diff --git a/libavformat/allformats.c b/libavformat/allformats.c
index d08c134..c292ada 100644
--- a/libavformat/allformats.c
+++ b/libavformat/allformats.c
@@ -131,6 +131,7 @@ void av_register_all(void)
     REGISTER_DEMUXER  (IV8, iv8);
     REGISTER_MUXDEMUX (IVF, ivf);
     REGISTER_MUXDEMUX (JACOSUB, jacosub);
+    REGISTER_DEMUXER  (JSONCAPTIONS, jsoncaptions);
     REGISTER_DEMUXER  (JV, jv);
     REGISTER_MUXDEMUX (LATM, latm);
     REGISTER_DEMUXER  (LMLM4, lmlm4);
diff --git a/libavformat/jsoncaptionsdec.c b/libavformat/jsoncaptionsdec.c
new file mode 100644
index 0000000..a6b496b
--- /dev/null
+++ b/libavformat/jsoncaptionsdec.c
@@ -0,0 +1,350 @@
+/*
+ * JSON captions format decoder
+ * Copyright (c) 2012 Nicolas George
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/bprint.h"
+#include "libavutil/log.h"
+#include "libavutil/opt.h"
+#include "avformat.h"
+#include "internal.h"
+#include "subtitles.h"
+
+typedef struct {
+    AVClass *class;
+    int64_t start_time;
+    FFDemuxSubtitlesQueue subs;
+} JSONCaptionsDemuxer;
+
+static const AVOption json_captions_options[] = {
+    { "start_time", "set the start time (offset) of the subtitles, in ms",
+      offsetof(JSONCaptionsDemuxer, start_time), FF_OPT_TYPE_INT64,
+      { .i64 = 15000 }, INT64_MIN, INT64_MAX,
+      AV_OPT_FLAG_SUBTITLE_PARAM | AV_OPT_FLAG_DECODING_PARAM },
+    { NULL },
+};
+
+static const AVClass json_captions_demuxer_class = {
+    .class_name = "json_captions_demuxer",
+    .item_name  = av_default_item_name,
+    .option     = json_captions_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+#define HEX_DIGIT_TEST(c) ( (unsigned)(c)       - '0' <= 9 || \
+                           ((unsigned)(c) | 32) - 'a' <= 5)
+#define HEX_DIGIT_VAL(c) ((c) <= '9' ? (c) - '0' : ((c) | 32) - 'a' + 10)
+#define ERR_CODE(c) (c < 0 ? c : AVERROR_INVALIDDATA)
+
+static void av_bprint_utf8(AVBPrint *bp, unsigned c)
+{
+    int bytes, i;
+
+    if (c <= 0x7F) {
+        av_bprint_chars(bp, c, 1);
+        return;
+    }
+    bytes = (av_log2(c) - 2) / 5;
+    av_bprint_chars(bp, (c >> (bytes * 6)) | ((0xFF80 >> bytes) & 0xFF), 1);
+    for (i = bytes - 1; i >= 0; i--)
+        av_bprint_chars(bp, ((c >> (i * 6)) & 0x3F) | 0x80, 1);
+}
+
+static void next_byte(AVIOContext *pb, int *cur_byte)
+{
+    uint8_t b;
+    int ret = avio_read(pb, &b, 1);
+    *cur_byte = ret > 0 ? b : ret == 0 ? AVERROR_EOF : ret;
+}
+
+static void skip_spaces(AVIOContext *pb, int *cur_byte)
+{
+    while (*cur_byte == ' '  || *cur_byte == '\t' ||
+           *cur_byte == '\n' || *cur_byte == '\r')
+        next_byte(pb, cur_byte);
+}
+
+static int expect_byte(AVIOContext *pb, int *cur_byte, uint8_t c)
+{
+    skip_spaces(pb, cur_byte);
+    if (*cur_byte != c)
+        return ERR_CODE(*cur_byte);
+    next_byte(pb, cur_byte);
+    return 0;
+}
+
+static int parse_string(AVIOContext *pb, int *cur_byte, AVBPrint *bp, int full)
+{
+    int ret;
+
+    av_bprint_init(bp, 0, full ? -1 : 1);
+    ret = expect_byte(pb, cur_byte, '"');
+    if (ret < 0)
+        goto fail;
+    while (*cur_byte > 0 && *cur_byte != '"') {
+        if (*cur_byte == '\\') {
+            next_byte(pb, cur_byte);
+            if (*cur_byte < 0) {
+                ret = AVERROR_INVALIDDATA;
+                goto fail;
+            }
+            if ((*cur_byte | 32) == 'u') {
+                unsigned chr = 0, i;
+                for (i = 0; i < 4; i++) {
+                    next_byte(pb, cur_byte);
+                    if (!HEX_DIGIT_TEST(*cur_byte)) {
+                        ret = ERR_CODE(*cur_byte);
+                        goto fail;
+                    }
+                    chr = chr * 16 + HEX_DIGIT_VAL(*cur_byte);
+                }
+                av_bprint_utf8(bp, chr);
+            } else {
+                av_bprint_chars(bp, *cur_byte, 1);
+            }
+        } else {
+            av_bprint_chars(bp, *cur_byte, 1);
+        }
+        next_byte(pb, cur_byte);
+    }
+    ret = expect_byte(pb, cur_byte, '"');
+    if (ret < 0)
+        goto fail;
+    if (full && !av_bprint_is_complete(bp)) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+    return 0;
+
+fail:
+    av_bprint_finalize(bp, NULL);
+    return ret;
+}
+
+static int parse_label(AVIOContext *pb, int *cur_byte, AVBPrint *bp)
+{
+    int ret;
+
+    ret = parse_string(pb, cur_byte, bp, 0);
+    if (ret < 0)
+        return ret;
+    ret = expect_byte(pb, cur_byte, ':');
+    if (ret < 0)
+        return ret;
+    return 0;
+}
+
+static int parse_boolean(AVIOContext *pb, int *cur_byte, int *result)
+{
+    const char *text[] = { "false", "true" }, *p;
+    int i;
+
+    skip_spaces(pb, cur_byte);
+    for (i = 0; i < 2; i++) {
+        p = text[i];
+        if (*cur_byte != *p)
+            continue;
+        for (; *p; p++, next_byte(pb, cur_byte))
+            if (*cur_byte != *p)
+                return AVERROR_INVALIDDATA;
+        if ((((unsigned)*cur_byte) | 32) - 'a' <= 'z' - 'a')
+            return AVERROR_INVALIDDATA;
+        *result = i;
+        return 0;
+    }
+    return AVERROR_INVALIDDATA;
+}
+
+static int parse_int(AVIOContext *pb, int *cur_byte, int64_t *result)
+{
+    int64_t val = 0;
+
+    skip_spaces(pb, cur_byte);
+    if ((unsigned)*cur_byte - '0' > 9)
+        return AVERROR_INVALIDDATA;
+    while ((unsigned)*cur_byte - '0' <= 9) {
+        val = val * 10 + (*cur_byte - '0');
+        next_byte(pb, cur_byte);
+    }
+    *result = val;
+    return 0;
+}
+
+static int parse_file(AVIOContext *pb, FFDemuxSubtitlesQueue *subs)
+{
+    int ret, cur_byte, start_of_par;
+    AVBPrint label, content;
+    int64_t pos, start, duration;
+    AVPacket *pkt;
+
+    next_byte(pb, &cur_byte);
+    ret = expect_byte(pb, &cur_byte, '{');
+    if (ret < 0)
+        return AVERROR_INVALIDDATA;
+    ret = parse_label(pb, &cur_byte, &label);
+    if (ret < 0 || strcmp(label.str, "captions"))
+        return AVERROR_INVALIDDATA;
+    ret = expect_byte(pb, &cur_byte, '[');
+    if (ret < 0)
+        return AVERROR_INVALIDDATA;
+    while (1) {
+        content.size = 0;
+        start = duration = AV_NOPTS_VALUE;
+        ret = expect_byte(pb, &cur_byte, '{');
+        if (ret < 0)
+            return ret;
+        pos = avio_tell(pb) - 1;
+        while (1) {
+            ret = parse_label(pb, &cur_byte, &label);
+            if (ret < 0)
+                return ret;
+            if (!strcmp(label.str, "startOfParagraph")) {
+                ret = parse_boolean(pb, &cur_byte, &start_of_par);
+                if (ret < 0)
+                    return ret;
+            } else if (!strcmp(label.str, "content")) {
+                ret = parse_string(pb, &cur_byte, &content, 1);
+                if (ret < 0)
+                    return ret;
+            } else if (!strcmp(label.str, "startTime")) {
+                ret = parse_int(pb, &cur_byte, &start);
+                if (ret < 0)
+                    return ret;
+            } else if (!strcmp(label.str, "duration")) {
+                ret = parse_int(pb, &cur_byte, &duration);
+                if (ret < 0)
+                    return ret;
+            } else {
+                return AVERROR_INVALIDDATA;
+            }
+            skip_spaces(pb, &cur_byte);
+            if (cur_byte != ',')
+                break;
+            next_byte(pb, &cur_byte);
+        }
+        ret = expect_byte(pb, &cur_byte, '}');
+        if (ret < 0)
+            return ret;
+
+        if (!content.size || start == AV_NOPTS_VALUE ||
+            duration == AV_NOPTS_VALUE)
+            return AVERROR_INVALIDDATA;
+        pkt = ff_subtitles_queue_insert(subs, content.str, content.len, 0);
+        if (!pkt)
+            return AVERROR(ENOMEM);
+        pkt->pos      = pos;
+        pkt->dts      =
+        pkt->pts      = start;
+        pkt->duration = duration;
+        av_bprint_finalize(&content, NULL);
+
+        skip_spaces(pb, &cur_byte);
+        if (cur_byte != ',')
+            break;
+        next_byte(pb, &cur_byte);
+    }
+    ret = expect_byte(pb, &cur_byte, ']');
+    if (ret < 0)
+        return ret;
+    ret = expect_byte(pb, &cur_byte, '}');
+    if (ret < 0)
+        return ret;
+    skip_spaces(pb, &cur_byte);
+    if (cur_byte != AVERROR_EOF)
+        return ERR_CODE(cur_byte);
+    return 0;
+}
+
+static av_cold int json_captions_read_header(AVFormatContext *avf)
+{
+    JSONCaptionsDemuxer *jc = avf->priv_data;
+    AVStream *st;
+    int ret, i;
+    AVPacket *last;
+
+    ret = parse_file(avf->pb, &jc->subs);
+    if (ret < 0) {
+        if (ret == AVERROR_INVALIDDATA)
+            av_log(avf, AV_LOG_ERROR, "Syntax error near offset %"PRId64".\n",
+                   avio_tell(avf->pb));
+        ff_subtitles_queue_clean(&jc->subs);
+        return ret;
+    }
+    ff_subtitles_queue_finalize(&jc->subs);
+    for (i = 0; i < jc->subs.nb_subs; i++)
+        jc->subs.subs[i].pts += jc->start_time;
+
+    last = &jc->subs.subs[jc->subs.nb_subs - 1];
+    st = avformat_new_stream(avf, NULL);
+    if (!st)
+        return AVERROR(ENOMEM);
+    st->codec->codec_type     = AVMEDIA_TYPE_SUBTITLE;
+    st->codec->codec_id       = CODEC_ID_TEXT;
+    avpriv_set_pts_info(st, 64, 1, 1000);
+    st->probe_packets = 0;
+    st->start_time    = 0;
+    st->duration      = last->pts + last->duration;
+    st->cur_dts       = 0;
+
+    return 0;
+
+}
+
+static int json_captions_read_packet(AVFormatContext *avf, AVPacket *packet)
+{
+    JSONCaptionsDemuxer *jc = avf->priv_data;
+
+    return ff_subtitles_queue_read_packet(&jc->subs, packet);
+}
+
+static int json_captions_read_close(AVFormatContext *avf)
+{
+    JSONCaptionsDemuxer *jc = avf->priv_data;
+
+    ff_subtitles_queue_clean(&jc->subs);
+    return 0;
+}
+
+static av_cold int json_captions_read_probe(AVProbeData *p)
+{
+    FFDemuxSubtitlesQueue subs = { 0 };
+    AVIOContext *input = avio_alloc_context(p->buf, p->buf_size, 0,
+                                            NULL, NULL, NULL, NULL);
+    int ret;
+
+    if (!input)
+        return 0;
+    ret = parse_file(input, &subs);
+    ret = !ret || avio_tell(input) == p->buf_size ?
+          subs.nb_subs > 1 ? AVPROBE_SCORE_MAX : AVPROBE_SCORE_MAX / 2 : 0;
+    ff_subtitles_queue_clean(&subs);
+    av_free(input);
+    return ret;
+}
+
+AVInputFormat ff_jsoncaptions_demuxer = {
+    .name           = "json_captions",
+    .long_name      = NULL_IF_CONFIG_SMALL("JSON captions (used in TED talks)"),
+    .priv_data_size = sizeof(JSONCaptionsDemuxer),
+    .read_header    = json_captions_read_header,
+    .read_packet    = json_captions_read_packet,
+    .read_close     = json_captions_read_close,
+    .read_probe     = json_captions_read_probe,
+    .priv_class     = &json_captions_demuxer_class,
+};
-- 
1.7.10.4



More information about the ffmpeg-devel mailing list