[FFmpeg-devel] [PATCH] avcodec/webvttdec: strip classes
Leon Grutters
gruttersleonbot2 at gmail.com
Thu Mar 20 20:16:12 EET 2025
On 3/20/25 9:02 AM, Andreas Rheinhardt wrote:
> Leon Grutters:
>> If a supported tag has a class, e.g "<i.bold>" it is ignored entirely;
>> so for example "<i.bold>Hello</i>" would be converted to "Hello{\i0}"
>> instead of the intended "{\i1}Hello{\i0}".
>>
>> Signed-off-by: Leon Grutters<gruttersleonbot2 at gmail.com>
>> ---
>> libavcodec/webvttdec.c | 51 +++++++++++++++++++++++++++++++++---------
>> 1 file changed, 40 insertions(+), 11 deletions(-)
>>
>> diff --git a/libavcodec/webvttdec.c b/libavcodec/webvttdec.c
>> index 35bdbe805d..4111d138c4 100644
>> --- a/libavcodec/webvttdec.c
>> +++ b/libavcodec/webvttdec.c
>> @@ -29,25 +29,53 @@
>> #include "ass.h"
>> #include "codec_internal.h"
>> #include "libavutil/bprint.h"
>> +#include "libavutil/mem.h"
>>
>> static const struct {
>> const char *from;
>> const char *to;
>> } webvtt_tag_replace[] = {
>> - {"<i>", "{\\i1}"}, {"</i>", "{\\i0}"},
>> - {"<b>", "{\\b1}"}, {"</b>", "{\\b0}"},
>> - {"<u>", "{\\u1}"}, {"</u>", "{\\u0}"},
>> {"{", "\\{{}"}, {"\\", "\\\xe2\x81\xa0"}, // escape to avoid ASS markup conflicts
>> {">", ">"}, {"<", "<"},
>> {"", "\xe2\x80\x8e"}, {"", "\xe2\x80\x8f"},
>> {"&", "&"}, {" ", "\\h"},
>> };
>> +static const struct {
>> + const char *from;
>> + const char *to;
>> +} webvtt_valid_tags[] = {
>> + {"i", "{\\i1}"}, {"/i", "{\\i0}"},
>> + {"b", "{\\b1}"}, {"/b", "{\\b0}"},
>> + {"u", "{\\u1}"}, {"/u", "{\\u0}"},
>> +};
> These strings are so small that one can avoid the relocations by using
> fixes-size buffers.
What do you mean exactly? I'm not sure I understand.
>>
>> static int webvtt_event_to_ass(AVBPrint *buf, const char *p)
>> {
>> - int i, again = 0, skip = 0;
>> + int i, again = 0/*, skip = 0*/;
>>
>> while (*p) {
>> + if (*p == '<') {
>> + const char *tag_end = strchr(p, '>');
>> + char *tag_body, *tag_name, *saveptr = NULL;
>> + ptrdiff_t len;
>> + if (!tag_end)
>> + break;
>> + len = tag_end - p + 1;
>> + tag_body = av_strndup(p + 1, len - 2);
>> + if (!tag_body)
>> + return AVERROR(ENOMEM);
> This allocation seems unnecessary. You can inspect the string without
> modifying it by using strncmp() below and by using a maximum field width
> for the %s directive in a parameter.
My thought process here was that if it's done this way, you have saveptr
for possibly getting the classes (and tag annotation) later on.
>> + tag_name = av_strtok(tag_body, ".", &saveptr);
>> + for (i = 0; i < FF_ARRAY_ELEMS(webvtt_valid_tags); i++) {
>> + const char *from = webvtt_valid_tags[i].from;
>> + if(!strcmp(tag_name, from)) {
>> + av_bprintf(buf, "%s", webvtt_valid_tags[i].to);
>> + break;
>> + }
>> + }
>> + p += len;
>> + again = 1;
>> + av_freep(&tag_body);
>> + }
>>
>> for (i = 0; i < FF_ARRAY_ELEMS(webvtt_tag_replace); i++) {
>> const char *from = webvtt_tag_replace[i].from;
>> @@ -59,21 +87,22 @@ static int webvtt_event_to_ass(AVBPrint *buf, const char *p)
>> break;
>> }
>> }
>> +
>> if (!*p)
>> break;
>>
>> if (again) {
>> again = 0;
>> - skip = 0;
>> + // skip = 0;
>> continue;
>> }
>> - if (*p == '<')
>> - skip = 1;
>> - else if (*p == '>')
>> - skip = 0;
>> - else if (p[0] == '\n' && p[1])
>> + // if (*p == '<')
>> + // skip = 1;
>> + // else if (*p == '>')
>> + // skip = 0;
>> + if (p[0] == '\n' && p[1])
>> av_bprintf(buf, "\\N");
>> - else if (!skip && *p != '\r')
>> + else if (/*!skip && */*p != '\r')
>> av_bprint_chars(buf, *p, 1);
>> p++;
>> }
More information about the ffmpeg-devel
mailing list