[FFmpeg-devel] [RFC][PATCH] ffmpeg: add option to transform metadata using iconv

James Darnley james.darnley at gmail.com
Tue Sep 22 19:10:27 CEST 2015


At present it only converts global metadata as that is what I wanted to do.  It
should be possible to extend it so that the conversion can be different for
different files or streams.
---
 doc/ffmpeg.texi |   6 +++
 ffmpeg.c        |  15 ++++++
 ffmpeg.h        |   1 +
 ffmpeg_opt.c    | 149 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 169 insertions(+), 2 deletions(-)

diff --git a/doc/ffmpeg.texi b/doc/ffmpeg.texi
index f4ffc6c..d4c1c23 100644
--- a/doc/ffmpeg.texi
+++ b/doc/ffmpeg.texi
@@ -855,6 +855,12 @@ such streams is attempted.
 Allow input streams with unknown type to be copied instead of failing if copying
 such streams is attempted.
 
+ at item -metadata_iconv_code_page_list @var{code_page_list}
+Force the metadata from input files to be converted through the given codepages
+using iconv. This allows the user to correct
+ at uref{https://en.wikipedia.org/wiki/Mojibake, mojibake} providing they know the
+correct code pages to use.
+
 @item -map_channel [@var{input_file_id}. at var{stream_specifier}. at var{channel_id}|-1][:@var{output_file_id}. at var{stream_specifier}]
 Map an audio channel from a given input to an output. If
 @var{output_file_id}. at var{stream_specifier} is not set, the audio channel will
diff --git a/ffmpeg.c b/ffmpeg.c
index e31a2c6..5c04571 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -101,6 +101,10 @@
 #include <pthread.h>
 #endif
 
+#if CONFIG_ICONV
+#include <iconv.h>
+#endif
+
 #include <time.h>
 
 #include "ffmpeg.h"
@@ -564,6 +568,17 @@ static void ffmpeg_cleanup(int ret)
         fclose(vstats_file);
     av_freep(&vstats_filename);
 
+#if CONFIG_ICONV
+    if (metadata_iconv_contexts) {
+        iconv_t *cd = metadata_iconv_contexts;
+        for (i = 0; cd[i]; i++) {
+            iconv_close(cd[i]);
+            cd[i] = NULL;
+        }
+    }
+    av_freep(&metadata_iconv_contexts);
+#endif
+
     av_freep(&input_streams);
     av_freep(&input_files);
     av_freep(&output_streams);
diff --git a/ffmpeg.h b/ffmpeg.h
index 6544e6f..2b8cbd7 100644
--- a/ffmpeg.h
+++ b/ffmpeg.h
@@ -495,6 +495,7 @@ extern int        nb_filtergraphs;
 
 extern char *vstats_filename;
 extern char *sdp_filename;
+extern void *metadata_iconv_contexts;
 
 extern float audio_drift_threshold;
 extern float dts_delta_threshold;
diff --git a/ffmpeg_opt.c b/ffmpeg_opt.c
index 4edd118..d226f78 100644
--- a/ffmpeg_opt.c
+++ b/ffmpeg_opt.c
@@ -42,6 +42,10 @@
 #include "libavutil/pixfmt.h"
 #include "libavutil/time_internal.h"
 
+#if CONFIG_ICONV
+# include <iconv.h>
+#endif
+
 #define DEFAULT_PASS_LOGFILENAME_PREFIX "ffmpeg2pass"
 
 #define MATCH_PER_STREAM_OPT(name, type, outvar, fmtctx, st)\
@@ -84,6 +88,7 @@ const HWAccel hwaccels[] = {
 
 char *vstats_filename;
 char *sdp_filename;
+void *metadata_iconv_contexts;
 
 float audio_drift_threshold = 0.1;
 float dts_delta_threshold   = 10;
@@ -120,6 +125,7 @@ static int input_stream_potentially_available = 0;
 static int ignore_unknown_streams = 0;
 static int copy_unknown_streams = 0;
 
+
 static void uninit_options(OptionsContext *o)
 {
     const OptionDef *po = options;
@@ -196,6 +202,58 @@ static AVDictionary *strip_specifiers(AVDictionary *dict)
     return ret;
 }
 
+static int opt_metadata_iconv(void *optctx, const char *opt, const char *arg)
+{
+#if !CONFIG_ICONV
+    av_log(NULL, AV_LOG_ERROR, "converting metadata through codepages requires "
+            "ffmpeg to be built with iconv support\n");
+    return AVERROR(EINVAL);
+#else
+    void *temp;
+    char **list = NULL;
+    char *code_page_list = av_strdup(arg);
+    char *token = av_strtok(code_page_list, ",", (char**)&temp);
+    int i, token_count = 0;
+    iconv_t *cd;
+
+    /* TODO: correct handling of memory in error case. */
+
+    while (token) {
+        av_dynarray_add(&list, &token_count, token);
+        if (!list)
+            return AVERROR(ENOMEM);
+        token = av_strtok(NULL, ",", (char**)&temp);
+    }
+
+    if (token_count < 2) {
+        av_log(NULL, AV_LOG_ERROR, "too few code pages (%d) in list (%s)\n", token_count, code_page_list);
+        return AVERROR(EINVAL);
+    }
+
+    cd = av_mallocz_array(sizeof(iconv_t), token_count);
+    if (!cd)
+        return AVERROR(ENOMEM);
+
+    for (i = 0; i < token_count - 1; i++) {
+        av_log(NULL, AV_LOG_DEBUG, "Opening iconv with code pages: %s, %s\n", list[i], list[i+1]);
+
+        temp = iconv_open(list[i], list[i+1]);
+        if (temp == (iconv_t)(-1)) {
+            av_log(NULL, AV_LOG_ERROR, "error opening iconv with code pages (%s, %s)\n", list[i], list[i+1]);
+            return AVERROR(EINVAL);
+            /* TODO: check for other errors. */
+        }
+        cd[i] = temp;
+    }
+    metadata_iconv_contexts = cd;
+
+    av_freep(&code_page_list);
+    av_freep(&list);
+
+    return 0;
+#endif /* !CONFIG_ICONV */
+}
+
 static int opt_sameq(void *optctx, const char *opt, const char *arg)
 {
     av_log(NULL, AV_LOG_ERROR, "Option '%s' was removed. "
@@ -454,6 +512,84 @@ static void parse_meta_type(char *arg, char *type, int *index, const char **stre
         *type = 'g';
 }
 
+#if CONFIG_ICONV
+static int run_one_iconv(iconv_t cd, char *buf1, size_t len, char *buf2, size_t buf2_len)
+{
+    /* TODO: maybe handle other errors. */
+    if (iconv(cd, &buf1, &len, &buf2, &buf2_len) == (size_t)(-1)
+            && errno == E2BIG)
+        return E2BIG;
+
+    buf2[0] = 0;
+    return 0;
+}
+
+static int metadata_iconv_internal(AVDictionary **dst, const AVDictionary *src, int flags,
+        iconv_t *cd)
+{
+    AVDictionaryEntry *t = NULL;
+
+    size_t BUF_LEN = 256;
+
+    char *buffer1 = av_realloc(NULL, BUF_LEN);
+    char *buffer2 = av_realloc(NULL, BUF_LEN);
+    if (!buffer1 || !buffer2)
+        return AVERROR(ENOMEM);
+
+    memset(buffer1, 0, BUF_LEN);
+    memset(buffer2, 0, BUF_LEN);
+
+    while ((t = av_dict_get(src, "", t, AV_DICT_IGNORE_SUFFIX))) {
+        size_t tag_len = strlen(t->value);
+        int i;
+
+        while (BUF_LEN < tag_len) {
+            buffer1 = av_realloc_f(buffer1, BUF_LEN, 2);
+            buffer2 = av_realloc_f(buffer2, BUF_LEN, 2);
+            if (!buffer1 || !buffer2)
+                return AVERROR(ENOMEM);
+            BUF_LEN *= 2;
+        }
+
+        strncpy(buffer1, t->value, BUF_LEN-1);
+
+        for (i = 0; cd[i]; i++) {
+            char *temp;
+
+            while (run_one_iconv(cd[i], buffer1, tag_len, buffer2, BUF_LEN - 1) == E2BIG) {
+                buffer1 = av_realloc_f(buffer1, BUF_LEN, 2);
+                buffer2 = av_realloc_f(buffer2, BUF_LEN, 2);
+                if (!buffer1 || !buffer2)
+                    return AVERROR(ENOMEM);
+                BUF_LEN *= 2;
+            }
+
+            tag_len = strlen(buffer2);
+            temp = buffer1;
+            buffer1 = buffer2;
+            buffer2 = temp;
+        }
+
+        av_dict_set(dst, t->key, buffer1, flags);
+    }
+
+    return 0;
+}
+#endif /* CONFIG_ICONV */
+
+static int av_dict_copy_with_iconv(AVDictionary **dst, const AVDictionary *src, int flags,
+        void *metadata_iconv_contexts)
+{
+    if (!metadata_iconv_contexts)
+        av_dict_copy(dst, src, flags);
+
+#if CONFIG_ICONV
+    return metadata_iconv_internal(dst, src, flags, metadata_iconv_contexts);
+#endif
+
+    return 0;
+}
+
 static int copy_metadata(char *outspec, char *inspec, AVFormatContext *oc, AVFormatContext *ic, OptionsContext *o)
 {
     AVDictionary **meta_in = NULL;
@@ -2306,8 +2442,8 @@ loop_end:
 
     /* copy global metadata by default */
     if (!o->metadata_global_manual && nb_input_files){
-        av_dict_copy(&oc->metadata, input_files[0]->ctx->metadata,
-                     AV_DICT_DONT_OVERWRITE);
+        av_dict_copy_with_iconv(&oc->metadata, input_files[0]->ctx->metadata,
+                     AV_DICT_DONT_OVERWRITE, metadata_iconv_contexts);
         if(o->recording_time != INT64_MAX)
             av_dict_set(&oc->metadata, "duration", NULL, 0);
         av_dict_set(&oc->metadata, "creation_time", NULL, 0);
@@ -3094,6 +3230,15 @@ const OptionDef options[] = {
     { "map_chapters",   HAS_ARG | OPT_INT | OPT_EXPERT | OPT_OFFSET |
                         OPT_OUTPUT,                                  { .off = OFFSET(chapters_input_file) },
         "set chapters mapping", "input_file_index" },
+
+    {
+        "metadata_iconv_code_page_list",
+        HAS_ARG | OPT_EXPERT,
+        { .func_arg = opt_metadata_iconv },
+        "convert metadata through the comma-separated list of code pages",
+        "code_page_list",
+    },
+
     { "t",              HAS_ARG | OPT_TIME | OPT_OFFSET |
                         OPT_INPUT | OPT_OUTPUT,                      { .off = OFFSET(recording_time) },
         "record or transcode \"duration\" seconds of audio/video",
-- 
2.5.1



More information about the ffmpeg-devel mailing list