[FFmpeg-devel] [PATCH] lavf/http: implement directory listing callbacks for Apache

Mariusz SzczepaƄczyk mszczepanczyk at gmail.com
Thu Aug 20 23:32:54 CEST 2015

 configure          |   3 +
 libavformat/http.c | 194 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 197 insertions(+)

diff --git a/configure b/configure
index e67ddf6..401e041 100755
--- a/configure
+++ b/configure
@@ -265,6 +265,7 @@ External library support:
   --enable-libxcb-shm      enable X11 grabbing shm communication [autodetect]
   --enable-libxcb-xfixes   enable X11 grabbing mouse rendering [autodetect]
   --enable-libxcb-shape    enable X11 grabbing shape rendering [autodetect]
+  --enable-libxml2         enable HTML parsing via libxml2 [no]
   --enable-libxvid         enable Xvid encoding via xvidcore,
                            native MPEG-4/Xvid encoder exists [no]
   --enable-libzmq          enable message passing via libzmq [no]
@@ -1428,6 +1429,7 @@ EXTERNAL_LIBRARY_LIST="
+    libxml2
@@ -5309,6 +5311,7 @@ enabled libx265           && require_pkg_config x265 x265.h x265_api_get &&
                              { check_cpp_condition x265.h "X265_BUILD >= 57" ||
                                die "ERROR: libx265 version must be >= 57."; }
 enabled libxavs           && require libxavs xavs.h xavs_encoder_encode -lxavs
+enabled libxml2           && require_pkg_config libxml-2.0 libxml/parser.h xmlInitParser
 enabled libxvid           && require libxvid xvid.h xvid_global -lxvidcore
 enabled libzmq            && require_pkg_config libzmq zmq.h zmq_ctx_new
 enabled libzvbi           && require libzvbi libzvbi.h vbi_decoder_new -lzvbi
diff --git a/libavformat/http.c b/libavformat/http.c
index 1eb716b..df45958 100644
--- a/libavformat/http.c
+++ b/libavformat/http.c
@@ -21,6 +21,10 @@
 #include "config.h"
+#include <libxml/HTMLparser.h>
+#endif /* CONFIG_LIBXML2 */
 #include <zlib.h>
 #endif /* CONFIG_ZLIB */
@@ -54,6 +58,16 @@ typedef enum {
+typedef struct AVIODirEntryQueueNode {
+    struct AVIODirEntry *entry;
+    struct AVIODirEntryQueueNode *next;
+} AVIODirEntryQueueNode;
+typedef struct AVIODirEntryQueue {
+    struct AVIODirEntryQueueNode *front;
+    struct AVIODirEntryQueueNode *rear;
+} AVIODirEntryQueue;
 typedef struct HTTPContext {
     const AVClass *class;
     URLContext *hd;
@@ -70,6 +84,7 @@ typedef struct HTTPContext {
     char *mime_type;
     char *user_agent;
     char *content_type;
+    char *server;
     /* Set if the server correctly handles Connection: close and will close
      * the connection after feeding us the content. */
     int willclose;
@@ -111,6 +126,11 @@ typedef struct HTTPContext {
     int is_multi_client;
     HandshakeState handshake_step;
     int is_connected_server;
+    htmlParserCtxtPtr html_parser;
+    AVIODirEntryQueue *entry_queue;
+    AVIODirEntry *entry;
+#endif /* CONFIG_LIBXML2 */
 } HTTPContext;
 #define OFFSET(x) offsetof(HTTPContext, x)
@@ -808,6 +828,8 @@ static int process_line(URLContext *h, char *line, int line_count,
             if (!strcmp(p, "close"))
                 s->willclose = 1;
         } else if (!av_strcasecmp(tag, "Server")) {
+            av_free(s->server);
+            s->server = av_strdup(p);
             if (!av_strcasecmp(p, "AkamaiGHost")) {
                 s->is_akamai = 1;
             } else if (!av_strncasecmp(p, "MediaGateway", 12)) {
@@ -1409,6 +1431,7 @@ static int http_close(URLContext *h)
     if (s->hd)
+    av_freep(&s->server);
     return ret;
@@ -1471,6 +1494,167 @@ static int http_get_file_handle(URLContext *h)
     return ffurl_get_file_handle(s->hd);
+static void avio_dir_entry_queue_push(AVIODirEntryQueue *queue, AVIODirEntry *entry)
+    AVIODirEntryQueueNode *node;
+    if (!queue)
+        return;
+    node = av_mallocz(sizeof(AVIODirEntryQueueNode));
+    node->entry = entry;
+    if (!queue->front) {
+        queue->front = queue->rear = node;
+    } else {
+        queue->rear->next = node;
+        queue->rear = node;
+    }
+static AVIODirEntry *avio_dir_entry_queue_pop(AVIODirEntryQueue *queue)
+    AVIODirEntry *entry;
+    AVIODirEntryQueueNode *tmp;
+    if (!queue || !queue->front)
+        return NULL;
+    tmp = queue->front;
+    entry = queue->front->entry;
+    if (queue->front == queue->rear)
+        queue->front = queue->rear = NULL;
+    else
+        queue->front = queue->front->next;
+    av_freep(&tmp);
+    return entry;
+static const char *get_attr(const xmlChar **attrs, const char *key)
+    unsigned char i;
+    if (!attrs)
+        return NULL;
+    for (i = 0; attrs[i] && i < UCHAR_MAX - 1; i += 2) {
+        if (!strcmp(attrs[i], key))
+            return attrs[i + 1];
+    }
+    return NULL;
+static void parse_apache(void *ctx, const xmlChar *tag, const xmlChar **attrs)
+    URLContext *h = (URLContext *) ctx;
+    HTTPContext *s = h->priv_data;
+    const char *url, *alt, *src;
+    if (!strcmp(tag, "img")) {
+        av_freep(&s->entry);
+        alt = get_attr(attrs, "alt");
+        src = get_attr(attrs, "src");
+        if (alt && alt[0] == '['
+                && alt[strlen(alt) - 1] == ']'
+                && strcmp(alt, "[PARENTDIR]")) {
+            if (!src || strcmp(src, "/icons/back.gif")) {
+                s->entry = ff_alloc_dir_entry();
+                if (!strcmp(alt, "[DIR]"))
+                    s->entry->type = AVIO_ENTRY_DIRECTORY;
+                else
+                    s->entry->type = AVIO_ENTRY_FILE;
+            }
+        }
+    } else if (!strcmp(tag, "a")) {
+        if (s->entry && (url = get_attr(attrs, "href"))
+                && strcmp(url, "/")) {
+            s->entry->name = av_strdup(url);
+            if (s->entry->name[strlen(s->entry->name) - 1] == '/')
+                s->entry->name[strlen(s->entry->name) - 1] = 0;
+            avio_dir_entry_queue_push(s->entry_queue, s->entry);
+            s->entry = NULL;
+        } else
+            av_freep(&s->entry);
+    } else if (!strcmp(tag, "th") && s->entry) {
+        av_freep(&s->entry);
+    }
+static int http_open_dir(URLContext *h)
+    HTTPContext *s = h->priv_data;
+    xmlSAXHandler handlers = {};
+    int ret;
+    if (ret = http_open(h, h->filename, 0, NULL) < 0)
+        goto fail;
+    if (!s->mime_type || !strstr(s->mime_type, "text/html")) {
+        ret = AVERROR(ENOSYS);
+        goto fail;
+    }
+    if (s->server && strstr(s->server, "Apache"))
+        handlers.startElement = parse_apache;
+    if (!handlers.startElement) {
+        ret = AVERROR(ENOSYS);
+        goto fail;
+    }
+    s->entry_queue = av_mallocz(sizeof(AVIODirEntryQueue));
+    s->html_parser = htmlCreatePushParserCtxt(&handlers, h, NULL, 0, h->filename, XML_CHAR_ENCODING_UTF8);
+    if (!s->html_parser) {
+        ret = AVERROR(EIO);
+        goto fail;
+    }
+    return 0;
+    http_close(h);
+    return ret;
+static int http_read_dir(URLContext *h, AVIODirEntry **next)
+    HTTPContext *s = h->priv_data;
+    int ret;
+    char buf[BUFFER_SIZE];
+    if ((*next = avio_dir_entry_queue_pop(s->entry_queue)))
+        return 0;
+    while ((ret = ffurl_read(h, (unsigned char *) buf, BUFFER_SIZE - 1)) > 0) {
+        htmlParseChunk(s->html_parser, (const char *) buf, ret, 0);
+        if ((*next = avio_dir_entry_queue_pop(s->entry_queue)))
+            return 0;
+    }
+    if (ret < 0)
+        return ret;
+    return 0;
+static int http_close_dir(URLContext *h)
+    HTTPContext *s = h->priv_data;
+    AVIODirEntry *entry;
+    while (s->entry_queue && (entry = avio_dir_entry_queue_pop(s->entry_queue)))
+        av_freep(&entry);
+    av_freep(&s->entry_queue);
+    av_freep(&s->entry);
+    htmlFreeParserCtxt(s->html_parser);
+    s->html_parser = NULL;
+    http_close(h);
+    return 0;
+#endif /* CONFIG_LIBXML2 */
 #define HTTP_CLASS(flavor)                          \
 static const AVClass flavor ## _context_class = {   \
     .class_name = # flavor,                         \
@@ -1493,6 +1677,11 @@ URLProtocol ff_http_protocol = {
     .url_close           = http_close,
     .url_get_file_handle = http_get_file_handle,
     .url_shutdown        = http_shutdown,
+    .url_open_dir        = http_open_dir,
+    .url_read_dir        = http_read_dir,
+    .url_close_dir       = http_close_dir,
+#endif /* CONFIG_LIBXML2 */
     .priv_data_size      = sizeof(HTTPContext),
     .priv_data_class     = &http_context_class,
     .flags               = URL_PROTOCOL_FLAG_NETWORK,
@@ -1511,6 +1700,11 @@ URLProtocol ff_https_protocol = {
     .url_close           = http_close,
     .url_get_file_handle = http_get_file_handle,
     .url_shutdown        = http_shutdown,
+    .url_open_dir        = http_open_dir,
+    .url_read_dir        = http_read_dir,
+    .url_close_dir       = http_close_dir,
+#endif /* CONFIG_LIBXML2 */
     .priv_data_size      = sizeof(HTTPContext),
     .priv_data_class     = &https_context_class,
     .flags               = URL_PROTOCOL_FLAG_NETWORK,

More information about the ffmpeg-devel mailing list