[FFmpeg-devel] [PATCH 2/2] lavf: youtube support

Lukasz Marek lukasz.m.luki at gmail.com
Fri Aug 23 23:45:57 CEST 2013


Signed-off-by: Lukasz Marek <lukasz.m.luki at gmail.com>
---
 Changelog                |    1 +
 MAINTAINERS              |    1 +
 configure                |    1 +
 doc/protocols.texi       |   45 +++++
 libavformat/Makefile     |    1 +
 libavformat/allformats.c |    1 +
 libavformat/youtube.c    |  463 ++++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 513 insertions(+), 0 deletions(-)
 create mode 100644 libavformat/youtube.c

diff --git a/Changelog b/Changelog
index 4a6c60c..22de825 100644
--- a/Changelog
+++ b/Changelog
@@ -15,6 +15,7 @@ version <next>
   data read from an input file
 - incomplete Voxware MetaSound decoder
 - read EXIF metadata from JPEG
+- YouTube support
 
 
 version 2.0:
diff --git a/MAINTAINERS b/MAINTAINERS
index 098b430..6b21360 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -433,6 +433,7 @@ Protocols:
   http.c                                Ronald S. Bultje
   mms*.c                                Ronald S. Bultje
   udp.c                                 Luca Abeni
+  youtube.c                             Lukasz Marek
 
 
 libswresample
diff --git a/configure b/configure
index ad9e03d..0754853 100755
--- a/configure
+++ b/configure
@@ -2171,6 +2171,7 @@ tls_protocol_select="tcp_protocol"
 udp_protocol_select="network"
 unix_protocol_deps="sys_un_h"
 unix_protocol_select="network"
+youtube_protocol_select="http_protocol"
 
 # filters
 aconvert_filter_deps="swresample"
diff --git a/doc/protocols.texi b/doc/protocols.texi
index 5c43f01..630e2f6 100644
--- a/doc/protocols.texi
+++ b/doc/protocols.texi
@@ -970,4 +970,49 @@ Timeout in ms.
 Create the Unix socket in listening mode.
 @end table
 
+ at section youtube
+
+YouTube service protocol.
+
+Allow to read from media stored on YouTube service.
+
+The accepted options are:
+ at table @option
+ at item timeout
+Set timeout of socket I/O operations used by the underlying low level
+operation. By default it is set to -1, which means that the timeout is
+not specified.
+ at item youtube-stream
+Select prefferred stream from all available for certain @var{YouTubeURL}.
+There may be different set of streams for different @var{YouTubeURL}.
+If not provided, first available stream is used (usually the best available quality).
+List of all available streams is displayed when prefferred stream is not selected.
+
+Allowed values:
+ at table @samp
+ at item 144p, 240p, 270p, 360p, 480p, 520p, 720p, 1080p, 3072p
+ at item itag=@var{number}
+ at end table
+ at end table
+
+Accepted URL formats:
+ at example
+youtube:@var{YouTubeURL}
+youtube+ at var{YouTubeURL}
+ at end example
+
+Examples:
+
+Play video with 720p quality:
+ at example
+ffplay -youtube-stream 720p youtube:http://www.youtube.com/watch?v=@var{VideoID}
+ at end example
+
+Reencode video from flv into mp4 container:
+ at example
+ffmpeg -youtube-stream "itag=34" -i youtube:http://www.youtube.com/watch?v=@var{VideoID} -strict -2 output.mp4
+ at end example
+
+NOTE: This protocol is EXPERIMENTAL.
+
 @c man end PROTOCOLS
diff --git a/libavformat/Makefile b/libavformat/Makefile
index 35d49f7..24ab5a6 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -458,6 +458,7 @@ OBJS-$(CONFIG_TCP_PROTOCOL)              += tcp.o
 OBJS-$(CONFIG_TLS_PROTOCOL)              += tls.o
 OBJS-$(CONFIG_UDP_PROTOCOL)              += udp.o
 OBJS-$(CONFIG_UNIX_PROTOCOL)             += unix.o
+OBJS-$(CONFIG_YOUTUBE_PROTOCOL)          += youtube.o
 
 SKIPHEADERS-$(CONFIG_FFRTMPCRYPT_PROTOCOL) += rtmpdh.h
 SKIPHEADERS-$(CONFIG_NETWORK)            += network.h rtsp.h
diff --git a/libavformat/allformats.c b/libavformat/allformats.c
index b3b2a3b..4ef8cfe 100644
--- a/libavformat/allformats.c
+++ b/libavformat/allformats.c
@@ -336,6 +336,7 @@ void av_register_all(void)
     REGISTER_PROTOCOL(TLS,              tls);
     REGISTER_PROTOCOL(UDP,              udp);
     REGISTER_PROTOCOL(UNIX,             unix);
+    REGISTER_PROTOCOL(YOUTUBE,          youtube);
 
     /* external libraries */
     REGISTER_DEMUXER (LIBGME,           libgme);
diff --git a/libavformat/youtube.c b/libavformat/youtube.c
new file mode 100644
index 0000000..ba6d977
--- /dev/null
+++ b/libavformat/youtube.c
@@ -0,0 +1,463 @@
+/*
+ * Copyright (c) 2013 Lukasz Marek <lukasz.m.luki at gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avstring.h"
+#include "avformat.h"
+#include "url.h"
+#include "libavutil/opt.h"
+#include "libavutil/bprint.h"
+#include "libavutil/log.h"
+
+typedef struct {
+    int itag;
+    const char *video_resolution1;
+    const char *video_resolution2;
+    const char *container;
+    const char *video_codec;
+    const char *audio_codec;
+} YouTubeFormat;
+
+static YouTubeFormat youtube_formats[] = {
+    { 5,   "240p",   NULL,   "FLV",  "Sorenson H.263", "MP3" },
+    { 6,   "270p",   NULL,   "FLV",  "Sorenson H.263", "MP3" },
+    { 13,  NULL,     NULL,   "3GP",  "MPEG-4 Visual",  "AAC" },
+    { 17,  "144p",   NULL,   "3GP",  "MPEG-4 Visual",  "AAC" },
+    { 18,  "270p",   "360p", "MP4",  "H.264",          "AAC" },
+    { 22,  "720p",   NULL,   "MP4",  "H.264",          "AAC" },
+    { 34,  "360p",   NULL,   "FLV",  "H.264",          "AAC" },
+    { 35,  "480p",   NULL,   "FLV",  "H.264",          "AAC" },
+    { 36,  "240p",   NULL,   "3GP",  "MPEG-4 Visual",  "AAC" },
+    { 37,  "1080p",  NULL,   "MP4",  "H.264",          "AAC" },
+    { 38,  "3072p",  NULL,   "MP4",  "H.264",          "AAC" },
+    { 43,  "360p",   NULL,   "WebM", "VP8",            "Vorbis" },
+    { 44,  "480p",   NULL,   "WebM", "VP8",            "Vorbis" },
+    { 45,  "720p",   NULL,   "WebM", "VP8",            "Vorbis" },
+    { 46,  "1080p",  NULL,   "WebM", "VP8",            "Vorbis" },
+    { 82,  "360p",   NULL,   "MP4",  "H.264",          "AAC" },
+    { 83,  "240p",   NULL,   "MP4",  "H.264",          "AAC" },
+    { 84,  "720p",   NULL,   "MP4",  "H.264",          "AAC" },
+    { 85,  "520p",   NULL,   "MP4",  "H.264",          "AAC" },
+    { 100, "360p",   NULL,   "WebM", "VP8",            "Vorbis" },
+    { 101, "360p",   NULL,   "WebM", "VP8",            "Vorbis" },
+    { 102, "720p",   NULL,   "WebM", "VP8",            "Vorbis" },
+    { 120, "720p",   NULL,   "FLV",  "AVC",            "AAC" },
+    { 0,   NULL,     NULL,   NULL,   NULL,             NULL }
+};
+
+
+typedef struct {
+    const AVClass *class;
+    int rw_timeout;                     /**< Network timeout. */
+    const char *prefferred_stream;      /**< Preferred stream. */
+    URLContext *media;                  /**< Connection to video stream */
+} YouTubeContext;
+
+#define OFFSET(x) offsetof(YouTubeContext, x)
+#define D AV_OPT_FLAG_DECODING_PARAM
+static const AVOption options[] = {
+    {"timeout", "set timeout of socket I/O operations", OFFSET(rw_timeout), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, D },
+    {"youtube-stream", "Preferred stream.", OFFSET(prefferred_stream), AV_OPT_TYPE_STRING, { 0 }, 0, 0, D },
+    {NULL}
+};
+
+static const AVClass youtube_context_class = {
+    .class_name     = "youtube",
+    .item_name      = av_default_item_name,
+    .option         = options,
+    .version        = LIBAVUTIL_VERSION_INT,
+};
+
+static av_cold YouTubeFormat* youtube_find_format_by_itag(int itag)
+{
+    for (int f = 0; youtube_formats[f].itag; ++f) {
+        if (youtube_formats[f].itag == itag)
+            return &youtube_formats[f];
+    }
+    return NULL;
+}
+
+/* Replace occurrences of the string with other string.
+ * There is an assumption that new string is shorter than old one.
+ */
+static void replace_string_with_string(char *str, const char *old, const char *new)
+{
+    size_t len_old, len_new;
+    char *pos;
+
+    len_old = strlen(old);
+    len_new = strlen(new);
+    while ((pos = strstr(str, old))) {
+        memmove(pos, new, len_new);
+        memmove(pos + len_new, pos + len_old, strlen(pos + len_old) + 1);
+        str = pos + len_new;
+    }
+}
+
+/* Removes from first occurrance of the string until first & or end of the string*/
+static void remove_parameter_from_url(char *str, const char *remove)
+{
+    char *pos1 = str, *pos2;
+    while ((pos1 = strstr(pos1, remove))) {
+        pos2 = strstr(pos1, "&");
+        if (!pos2) { /* This is the last component */
+            if (pos1 > str && pos1[-1] == '&')
+                --pos1;
+            *pos1 = '\0';
+            return;
+        }
+        ++pos2;
+        memmove(pos1, pos2, strlen(pos2) + 1);
+    }
+}
+
+static av_cold char* youtube_clean_url(char *url)
+{
+    static const char *signature_token1 = "sig=";
+    static const char *signature_token2 = "s=";
+    static const char *url_token = "url=";
+    static const char *itag_token = "itag=";
+    const char *signature_token = signature_token1;
+    char *signature = NULL;
+    char *pos1, *pos2;
+
+    static const struct { const char *ori; const char *new; } restore_char_map[] = {
+        { "%3A", ":" },
+        { "%2F", "/" },
+        { "%3F", "?" },
+        { "%3D", "=" },
+        { "%252C", "%2C" },
+        { "%26", "&" },
+        { "\\u0026", "&" },
+        {NULL, NULL}
+    };
+
+    static const char* remove[] = {
+        "type=",
+        "fallback_host=",
+        "quality=",
+        NULL
+    };
+
+    /* restore special characters */
+    for (int i = 0; restore_char_map[i].ori; ++i)
+        replace_string_with_string(url, restore_char_map[i].ori, restore_char_map[i].new);
+
+    /* remove not needed parts */
+    for (int i = 0; remove[i]; ++i)
+        remove_parameter_from_url(url, remove[i]);
+
+    /* extract signature */
+    pos1 = strstr(url, signature_token);
+    if (!pos1) {
+        signature_token = signature_token2;
+        pos1 = strstr(url, signature_token);
+    }
+    if (!pos1)
+        return NULL;
+    pos1 += strlen(signature_token);
+    pos2 = strstr(pos1, "&");
+    if (pos2) /* XXX: signature may be the last component in url */
+        *pos2 = '\0';
+    signature = av_strdup(pos1);
+    if (pos2)
+        *pos2 = '&';
+
+    /* remove signature from url */
+    remove_parameter_from_url(url, signature_token);
+
+    /* remove last itag component in case it's the last component of the url */
+    pos1 = url;
+    pos2 = NULL;
+    while ((pos1 = strstr(pos1, itag_token))) {
+        pos2 = pos1;
+        pos1 += strlen(itag_token);
+    }
+    if (pos2 > url && !strstr(pos2, "&")) {
+        pos2[-1] = '\0';
+    }
+
+    replace_string_with_string(url, "&&", "&");
+
+    /* find beginning of the real url */
+    if (!(pos1 = strstr(url, url_token))) {
+        av_free(signature);
+        return NULL;
+    }
+    pos1 += strlen(url_token);
+
+    /* alloc new memory for url and append signature to it */
+    /* XXX: magic number 12 = length of "&signature=" and trailing zero  */
+    pos2 = av_mallocz(strlen(pos1) + strlen(signature) + 12);
+    if (pos2) {
+        strcpy(pos2, pos1);
+        pos1 = pos2;
+        pos1 += strlen(pos1);
+        strcpy(pos1, "&signature=");
+        pos1 += 11;
+        strcpy(pos1, signature);
+    }
+    av_free(signature);
+    return pos2;
+}
+
+static av_cold int youtube_pick_url(URLContext *h, char *urls, char **media_url)
+{
+    char *url = NULL;
+    const char *pos = NULL;
+    YouTubeContext *s = h->priv_data;
+
+    /* find position of itag parameter */
+    if (s->prefferred_stream) {
+        if (av_strstart(s->prefferred_stream, "itag=", NULL)) {
+            pos = strstr(urls, s->prefferred_stream);
+        } else {
+            YouTubeFormat *format;
+            char itag[20];
+            for (int f = 0; youtube_formats[f].itag; ++f) {
+                format = &youtube_formats[f];
+                if ((format->video_resolution1 && !strcmp(format->video_resolution1, s->prefferred_stream)) ||
+                    (format->video_resolution2 && !strcmp(format->video_resolution2, s->prefferred_stream)))
+                {
+                    snprintf(itag, 20, "itag=%d", format->itag);
+                    if ((pos = strstr(urls, itag))) {
+                        break;
+                    }
+                }
+            }
+        }
+    }
+
+    if (pos) {
+        /* terminate the string on first ',' character after current position */
+        char *pos2;
+        pos2 = strstr(pos, ",");
+        if (pos2)
+            *pos2 = '\0';
+        /* set url beginning after last ',' character if any */
+        url = urls;
+        while (pos2 = strstr(url, ","))
+            url = pos2 + 1;
+    } else if (s->prefferred_stream)
+        av_log(h, AV_LOG_ERROR, "Selected stream '%s' not found.\n", s->prefferred_stream);
+
+    if (!url) {
+        /* use first url on the list if not found specified */
+        url = strstr(urls, ",");
+        if (url)
+            *url = '\0';
+        url = urls;
+    }
+
+    *media_url = youtube_clean_url(url);
+    av_dlog(h, "download url: %s\n", *media_url);
+    return *media_url ? 0 : -1;
+}
+
+static av_cold int youtube_download_html(URLContext *h, const char *youtube_url, char **html)
+{
+#define BUFFER_SIZE 4096
+    YouTubeContext *s = h->priv_data;
+    URLContext *url_ctx;
+    char buffer[BUFFER_SIZE];
+    AVDictionary *opts = NULL;
+    int err;
+    AVBPrint html_buffer;
+
+    if (!av_strstart(youtube_url, "youtube:", &youtube_url) &&
+        !av_strstart(youtube_url, "youtube+", &youtube_url)) {
+        av_log(h, AV_LOG_ERROR, "Unsupported url %s\n", youtube_url);
+        return AVERROR(EINVAL);
+    }
+    if (!av_strstart(youtube_url, "http:", NULL) &&
+        !av_strstart(youtube_url, "https:", NULL)) {
+        av_log(h, AV_LOG_ERROR, "Unsupported url %s\n", youtube_url);
+        return AVERROR(EINVAL);
+    }
+
+    if (s->rw_timeout != -1) {
+        snprintf(buffer, BUFFER_SIZE, "%d", s->rw_timeout);
+        av_dict_set(&opts, "timeout", buffer, 0);
+    } /* if option is not given, don't pass it and let http use its own default */
+    err = ffurl_open(&url_ctx, youtube_url, AVIO_FLAG_READ, &h->interrupt_callback, &opts);
+    av_dict_free(&opts);
+    if (err < 0)
+        return err;
+
+    /* TODO: this loop may be broken earlier - when media urls are downloaded */
+    av_bprint_init(&html_buffer, 100 * 1024, AV_BPRINT_SIZE_UNLIMITED);
+    while ((err = ffurl_read(url_ctx, buffer, BUFFER_SIZE)) > 0) {
+        av_bprint_append_data(&html_buffer, buffer, err);
+        if (!av_bprint_is_complete(&html_buffer)) {
+            av_log(h, AV_LOG_WARNING, "Not complete html downloaded\n");
+            break;
+        }
+    }
+    ffurl_close(url_ctx);
+
+    if ((err = av_bprint_finalize(&html_buffer, html)) < 0) {
+        av_freep(html);
+        return err;
+    }
+
+#undef BUFFER_SIZE
+
+    return 0;
+}
+
+static av_cold int youtube_get_media_urls(URLContext *h, const char* youtube_url, char **urls)
+{
+    static const char *needle = "url_encoded_fmt_stream_map";
+    char *begin = NULL, *end = NULL, *html = NULL;
+
+    if (youtube_download_html(h, youtube_url, &html) < 0)
+        goto err;
+
+    if (!(begin = strstr(html, needle)))
+        goto err;
+    if (!(begin = strstr(begin, "\"")))
+        goto err;
+    begin++;
+    if (!(begin = strstr(begin, "\"")))
+        goto err;
+    begin++;
+
+    if (!(end = strstr(begin, "\"")))
+        goto err;
+
+    *end = '\0';
+    *urls = av_strdup(begin);
+    av_free(html);
+    return 0;
+
+  err:
+    av_free(html);
+    return -1;
+}
+
+static void youtube_print_urls(URLContext *h, const char *urls)
+{
+    int stream_no = 1, itag;
+    const char *pos;
+
+    pos = urls;
+    while ((pos = strstr(pos, "itag="))) {
+        if (sscanf(pos, "itag=%d", &itag) == 1) {
+            YouTubeFormat *format = youtube_find_format_by_itag(itag);
+            if (format) {
+                av_log(h, AV_LOG_INFO,
+                       "Stream %2d: itag=%-3d %-5s Container:%-4s Video: %-14s Audio: %s\n",
+                       stream_no++, itag, format->video_resolution1,
+                       format->container, format->video_codec, format->audio_codec);
+            } else {
+                av_log(h, AV_LOG_INFO, "Stream %2d: itag=%-3d Unknown details\n", stream_no++, itag);
+            }
+        }
+        pos = strstr(pos, ",");
+        if (!pos)
+            break;
+    }
+}
+
+static int youtube_open(URLContext *h, const char *url, int flags)
+{
+    int ret;
+    AVDictionary *opts = NULL;
+    char *urls = NULL, *media_url = NULL;
+    YouTubeContext *s = h->priv_data;
+
+    s->media = NULL;
+
+    if (flags & AVIO_FLAG_WRITE) {
+        av_log(h, AV_LOG_ERROR, "YouTube protocol is readonly.\n");
+        return AVERROR(EIO);
+    }
+
+    if ((ret = youtube_get_media_urls(h, url, &urls)) < 0)
+        goto err;
+
+    if (!s->prefferred_stream) {
+        av_log(h, AV_LOG_INFO, "YouTube stream not selected. "
+                               "First available is used. "
+                               "There are available as follows:\n");
+        youtube_print_urls(h, urls);
+    }
+
+    if ((ret = youtube_pick_url(h, urls, &media_url)) < 0)
+        goto err;
+
+    if (s->rw_timeout != -1) {
+        char buffer[20];
+        snprintf(buffer, 20, "%d", s->rw_timeout);
+        av_dict_set(&opts, "timeout", buffer, 0);
+    } /* if option is not given, don't pass it and let http use its own default */
+    ret = ffurl_open(&s->media, media_url, AVIO_FLAG_READ, &h->interrupt_callback, &opts);
+    av_dict_free(&opts);
+    if (ret < 0)
+        goto err;
+
+    ret = 0;
+  err:
+    av_free(urls);
+    av_free(media_url);
+    return ret;
+}
+
+static int youtube_read(URLContext *h, unsigned char *buf, int size)
+{
+    YouTubeContext *s = h->priv_data;
+    return ffurl_read(s->media, buf, size);
+}
+
+static int64_t youtube_seek(URLContext *h, int64_t pos, int whence)
+{
+    YouTubeContext *s = h->priv_data;
+    return ffurl_seek(s->media, pos, whence);
+}
+
+static int youtube_close(URLContext *h)
+{
+    YouTubeContext *s = h->priv_data;
+    return ffurl_closep(&s->media);
+}
+
+static int youtube_get_file_handle(URLContext *h)
+{
+    YouTubeContext *s = h->priv_data;
+    return ffurl_get_file_handle(s->media);
+}
+
+static int youtube_shutdown(URLContext *h, int flags)
+{
+    YouTubeContext *s = h->priv_data;
+    return ffurl_shutdown(s->media, flags);
+}
+
+URLProtocol ff_youtube_protocol = {
+    .name                = "youtube",
+    .url_open            = youtube_open,
+    .url_read            = youtube_read,
+    .url_seek            = youtube_seek,
+    .url_close           = youtube_close,
+    .url_get_file_handle = youtube_get_file_handle,
+    .url_shutdown        = youtube_shutdown,
+    .priv_data_size      = sizeof(YouTubeContext),
+    .priv_data_class     = &youtube_context_class,
+    .flags               = URL_PROTOCOL_FLAG_NETWORK
+};
-- 
1.7.2.5



More information about the ffmpeg-devel mailing list