[FFmpeg-devel] [RFC][PATCH] ffmpeg: add option to transform metadata using iconv
James Darnley
james.darnley at gmail.com
Tue Sep 22 19:10:27 CEST 2015
At present it only converts global metadata as that is what I wanted to do. It
should be possible to extend it so that the conversion can be different for
different files or streams.
---
doc/ffmpeg.texi | 6 +++
ffmpeg.c | 15 ++++++
ffmpeg.h | 1 +
ffmpeg_opt.c | 149 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
4 files changed, 169 insertions(+), 2 deletions(-)
diff --git a/doc/ffmpeg.texi b/doc/ffmpeg.texi
index f4ffc6c..d4c1c23 100644
--- a/doc/ffmpeg.texi
+++ b/doc/ffmpeg.texi
@@ -855,6 +855,12 @@ such streams is attempted.
Allow input streams with unknown type to be copied instead of failing if copying
such streams is attempted.
+ at item -metadata_iconv_code_page_list @var{code_page_list}
+Force the metadata from input files to be converted through the given codepages
+using iconv. This allows the user to correct
+ at uref{https://en.wikipedia.org/wiki/Mojibake, mojibake} providing they know the
+correct code pages to use.
+
@item -map_channel [@var{input_file_id}. at var{stream_specifier}. at var{channel_id}|-1][:@var{output_file_id}. at var{stream_specifier}]
Map an audio channel from a given input to an output. If
@var{output_file_id}. at var{stream_specifier} is not set, the audio channel will
diff --git a/ffmpeg.c b/ffmpeg.c
index e31a2c6..5c04571 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -101,6 +101,10 @@
#include <pthread.h>
#endif
+#if CONFIG_ICONV
+#include <iconv.h>
+#endif
+
#include <time.h>
#include "ffmpeg.h"
@@ -564,6 +568,17 @@ static void ffmpeg_cleanup(int ret)
fclose(vstats_file);
av_freep(&vstats_filename);
+#if CONFIG_ICONV
+ if (metadata_iconv_contexts) {
+ iconv_t *cd = metadata_iconv_contexts;
+ for (i = 0; cd[i]; i++) {
+ iconv_close(cd[i]);
+ cd[i] = NULL;
+ }
+ }
+ av_freep(&metadata_iconv_contexts);
+#endif
+
av_freep(&input_streams);
av_freep(&input_files);
av_freep(&output_streams);
diff --git a/ffmpeg.h b/ffmpeg.h
index 6544e6f..2b8cbd7 100644
--- a/ffmpeg.h
+++ b/ffmpeg.h
@@ -495,6 +495,7 @@ extern int nb_filtergraphs;
extern char *vstats_filename;
extern char *sdp_filename;
+extern void *metadata_iconv_contexts;
extern float audio_drift_threshold;
extern float dts_delta_threshold;
diff --git a/ffmpeg_opt.c b/ffmpeg_opt.c
index 4edd118..d226f78 100644
--- a/ffmpeg_opt.c
+++ b/ffmpeg_opt.c
@@ -42,6 +42,10 @@
#include "libavutil/pixfmt.h"
#include "libavutil/time_internal.h"
+#if CONFIG_ICONV
+# include <iconv.h>
+#endif
+
#define DEFAULT_PASS_LOGFILENAME_PREFIX "ffmpeg2pass"
#define MATCH_PER_STREAM_OPT(name, type, outvar, fmtctx, st)\
@@ -84,6 +88,7 @@ const HWAccel hwaccels[] = {
char *vstats_filename;
char *sdp_filename;
+void *metadata_iconv_contexts;
float audio_drift_threshold = 0.1;
float dts_delta_threshold = 10;
@@ -120,6 +125,7 @@ static int input_stream_potentially_available = 0;
static int ignore_unknown_streams = 0;
static int copy_unknown_streams = 0;
+
static void uninit_options(OptionsContext *o)
{
const OptionDef *po = options;
@@ -196,6 +202,58 @@ static AVDictionary *strip_specifiers(AVDictionary *dict)
return ret;
}
+static int opt_metadata_iconv(void *optctx, const char *opt, const char *arg)
+{
+#if !CONFIG_ICONV
+ av_log(NULL, AV_LOG_ERROR, "converting metadata through codepages requires "
+ "ffmpeg to be built with iconv support\n");
+ return AVERROR(EINVAL);
+#else
+ void *temp;
+ char **list = NULL;
+ char *code_page_list = av_strdup(arg);
+ char *token = av_strtok(code_page_list, ",", (char**)&temp);
+ int i, token_count = 0;
+ iconv_t *cd;
+
+ /* TODO: correct handling of memory in error case. */
+
+ while (token) {
+ av_dynarray_add(&list, &token_count, token);
+ if (!list)
+ return AVERROR(ENOMEM);
+ token = av_strtok(NULL, ",", (char**)&temp);
+ }
+
+ if (token_count < 2) {
+ av_log(NULL, AV_LOG_ERROR, "too few code pages (%d) in list (%s)\n", token_count, code_page_list);
+ return AVERROR(EINVAL);
+ }
+
+ cd = av_mallocz_array(sizeof(iconv_t), token_count);
+ if (!cd)
+ return AVERROR(ENOMEM);
+
+ for (i = 0; i < token_count - 1; i++) {
+ av_log(NULL, AV_LOG_DEBUG, "Opening iconv with code pages: %s, %s\n", list[i], list[i+1]);
+
+ temp = iconv_open(list[i], list[i+1]);
+ if (temp == (iconv_t)(-1)) {
+ av_log(NULL, AV_LOG_ERROR, "error opening iconv with code pages (%s, %s)\n", list[i], list[i+1]);
+ return AVERROR(EINVAL);
+ /* TODO: check for other errors. */
+ }
+ cd[i] = temp;
+ }
+ metadata_iconv_contexts = cd;
+
+ av_freep(&code_page_list);
+ av_freep(&list);
+
+ return 0;
+#endif /* !CONFIG_ICONV */
+}
+
static int opt_sameq(void *optctx, const char *opt, const char *arg)
{
av_log(NULL, AV_LOG_ERROR, "Option '%s' was removed. "
@@ -454,6 +512,84 @@ static void parse_meta_type(char *arg, char *type, int *index, const char **stre
*type = 'g';
}
+#if CONFIG_ICONV
+static int run_one_iconv(iconv_t cd, char *buf1, size_t len, char *buf2, size_t buf2_len)
+{
+ /* TODO: maybe handle other errors. */
+ if (iconv(cd, &buf1, &len, &buf2, &buf2_len) == (size_t)(-1)
+ && errno == E2BIG)
+ return E2BIG;
+
+ buf2[0] = 0;
+ return 0;
+}
+
+static int metadata_iconv_internal(AVDictionary **dst, const AVDictionary *src, int flags,
+ iconv_t *cd)
+{
+ AVDictionaryEntry *t = NULL;
+
+ size_t BUF_LEN = 256;
+
+ char *buffer1 = av_realloc(NULL, BUF_LEN);
+ char *buffer2 = av_realloc(NULL, BUF_LEN);
+ if (!buffer1 || !buffer2)
+ return AVERROR(ENOMEM);
+
+ memset(buffer1, 0, BUF_LEN);
+ memset(buffer2, 0, BUF_LEN);
+
+ while ((t = av_dict_get(src, "", t, AV_DICT_IGNORE_SUFFIX))) {
+ size_t tag_len = strlen(t->value);
+ int i;
+
+ while (BUF_LEN < tag_len) {
+ buffer1 = av_realloc_f(buffer1, BUF_LEN, 2);
+ buffer2 = av_realloc_f(buffer2, BUF_LEN, 2);
+ if (!buffer1 || !buffer2)
+ return AVERROR(ENOMEM);
+ BUF_LEN *= 2;
+ }
+
+ strncpy(buffer1, t->value, BUF_LEN-1);
+
+ for (i = 0; cd[i]; i++) {
+ char *temp;
+
+ while (run_one_iconv(cd[i], buffer1, tag_len, buffer2, BUF_LEN - 1) == E2BIG) {
+ buffer1 = av_realloc_f(buffer1, BUF_LEN, 2);
+ buffer2 = av_realloc_f(buffer2, BUF_LEN, 2);
+ if (!buffer1 || !buffer2)
+ return AVERROR(ENOMEM);
+ BUF_LEN *= 2;
+ }
+
+ tag_len = strlen(buffer2);
+ temp = buffer1;
+ buffer1 = buffer2;
+ buffer2 = temp;
+ }
+
+ av_dict_set(dst, t->key, buffer1, flags);
+ }
+
+ return 0;
+}
+#endif /* CONFIG_ICONV */
+
+static int av_dict_copy_with_iconv(AVDictionary **dst, const AVDictionary *src, int flags,
+ void *metadata_iconv_contexts)
+{
+ if (!metadata_iconv_contexts)
+ av_dict_copy(dst, src, flags);
+
+#if CONFIG_ICONV
+ return metadata_iconv_internal(dst, src, flags, metadata_iconv_contexts);
+#endif
+
+ return 0;
+}
+
static int copy_metadata(char *outspec, char *inspec, AVFormatContext *oc, AVFormatContext *ic, OptionsContext *o)
{
AVDictionary **meta_in = NULL;
@@ -2306,8 +2442,8 @@ loop_end:
/* copy global metadata by default */
if (!o->metadata_global_manual && nb_input_files){
- av_dict_copy(&oc->metadata, input_files[0]->ctx->metadata,
- AV_DICT_DONT_OVERWRITE);
+ av_dict_copy_with_iconv(&oc->metadata, input_files[0]->ctx->metadata,
+ AV_DICT_DONT_OVERWRITE, metadata_iconv_contexts);
if(o->recording_time != INT64_MAX)
av_dict_set(&oc->metadata, "duration", NULL, 0);
av_dict_set(&oc->metadata, "creation_time", NULL, 0);
@@ -3094,6 +3230,15 @@ const OptionDef options[] = {
{ "map_chapters", HAS_ARG | OPT_INT | OPT_EXPERT | OPT_OFFSET |
OPT_OUTPUT, { .off = OFFSET(chapters_input_file) },
"set chapters mapping", "input_file_index" },
+
+ {
+ "metadata_iconv_code_page_list",
+ HAS_ARG | OPT_EXPERT,
+ { .func_arg = opt_metadata_iconv },
+ "convert metadata through the comma-separated list of code pages",
+ "code_page_list",
+ },
+
{ "t", HAS_ARG | OPT_TIME | OPT_OFFSET |
OPT_INPUT | OPT_OUTPUT, { .off = OFFSET(recording_time) },
"record or transcode \"duration\" seconds of audio/video",
--
2.5.1
More information about the ffmpeg-devel
mailing list