[FFmpeg-devel] [PATCH] avcodec/webvttdec: strip classes
Andreas Rheinhardt
andreas.rheinhardt at outlook.com
Thu Mar 20 10:02:44 EET 2025
Leon Grutters:
> If a supported tag has a class, e.g "<i.bold>" it is ignored entirely;
> so for example "<i.bold>Hello</i>" would be converted to "Hello{\i0}"
> instead of the intended "{\i1}Hello{\i0}".
>
> Signed-off-by: Leon Grutters <gruttersleonbot2 at gmail.com>
> ---
> libavcodec/webvttdec.c | 51 +++++++++++++++++++++++++++++++++---------
> 1 file changed, 40 insertions(+), 11 deletions(-)
>
> diff --git a/libavcodec/webvttdec.c b/libavcodec/webvttdec.c
> index 35bdbe805d..4111d138c4 100644
> --- a/libavcodec/webvttdec.c
> +++ b/libavcodec/webvttdec.c
> @@ -29,25 +29,53 @@
> #include "ass.h"
> #include "codec_internal.h"
> #include "libavutil/bprint.h"
> +#include "libavutil/mem.h"
>
> static const struct {
> const char *from;
> const char *to;
> } webvtt_tag_replace[] = {
> - {"<i>", "{\\i1}"}, {"</i>", "{\\i0}"},
> - {"<b>", "{\\b1}"}, {"</b>", "{\\b0}"},
> - {"<u>", "{\\u1}"}, {"</u>", "{\\u0}"},
> {"{", "\\{{}"}, {"\\", "\\\xe2\x81\xa0"}, // escape to avoid ASS markup conflicts
> {">", ">"}, {"<", "<"},
> {"", "\xe2\x80\x8e"}, {"", "\xe2\x80\x8f"},
> {"&", "&"}, {" ", "\\h"},
> };
> +static const struct {
> + const char *from;
> + const char *to;
> +} webvtt_valid_tags[] = {
> + {"i", "{\\i1}"}, {"/i", "{\\i0}"},
> + {"b", "{\\b1}"}, {"/b", "{\\b0}"},
> + {"u", "{\\u1}"}, {"/u", "{\\u0}"},
> +};
These strings are so small that one can avoid the relocations by using
fixes-size buffers.
>
> static int webvtt_event_to_ass(AVBPrint *buf, const char *p)
> {
> - int i, again = 0, skip = 0;
> + int i, again = 0/*, skip = 0*/;
>
> while (*p) {
> + if (*p == '<') {
> + const char *tag_end = strchr(p, '>');
> + char *tag_body, *tag_name, *saveptr = NULL;
> + ptrdiff_t len;
> + if (!tag_end)
> + break;
> + len = tag_end - p + 1;
> + tag_body = av_strndup(p + 1, len - 2);
> + if (!tag_body)
> + return AVERROR(ENOMEM);
This allocation seems unnecessary. You can inspect the string without
modifying it by using strncmp() below and by using a maximum field width
for the %s directive in a parameter.
> + tag_name = av_strtok(tag_body, ".", &saveptr);
> + for (i = 0; i < FF_ARRAY_ELEMS(webvtt_valid_tags); i++) {
> + const char *from = webvtt_valid_tags[i].from;
> + if(!strcmp(tag_name, from)) {
> + av_bprintf(buf, "%s", webvtt_valid_tags[i].to);
> + break;
> + }
> + }
> + p += len;
> + again = 1;
> + av_freep(&tag_body);
> + }
>
> for (i = 0; i < FF_ARRAY_ELEMS(webvtt_tag_replace); i++) {
> const char *from = webvtt_tag_replace[i].from;
> @@ -59,21 +87,22 @@ static int webvtt_event_to_ass(AVBPrint *buf, const char *p)
> break;
> }
> }
> +
> if (!*p)
> break;
>
> if (again) {
> again = 0;
> - skip = 0;
> + // skip = 0;
> continue;
> }
> - if (*p == '<')
> - skip = 1;
> - else if (*p == '>')
> - skip = 0;
> - else if (p[0] == '\n' && p[1])
> + // if (*p == '<')
> + // skip = 1;
> + // else if (*p == '>')
> + // skip = 0;
> + if (p[0] == '\n' && p[1])
> av_bprintf(buf, "\\N");
> - else if (!skip && *p != '\r')
> + else if (/*!skip && */*p != '\r')
> av_bprint_chars(buf, *p, 1);
> p++;
> }
More information about the ffmpeg-devel
mailing list