[FFmpeg-devel] [PATCH] avcodec/ccaption_dec: Avoid relocations for strings

Thu Mar 7 10:13:34 EET 2024

Andreas Rheinhardt:
> The longest string here takes four bytes, so using an array
> of pointers is wasteful even when ignoring the cost of relocations;
> the lack of relocations also implies that this array
> will now be put into .rodata and not into .data.rel.ro.
> 
> Static asserts are used to ensure that all strings are always
> properly zero-terminated.
> 
> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt at outlook.com>
> ---
> Now using static asserts to address the main point of
> criticism in https://patchwork.ffmpeg.org/project/ffmpeg/patch/20210106081702.2495510-1-andreas.rheinhardt@gmail.com/
> 
>  libavcodec/ccaption_dec.c | 215 ++++++++++++++++++++------------------
>  1 file changed, 115 insertions(+), 100 deletions(-)
> 
> diff --git a/libavcodec/ccaption_dec.c b/libavcodec/ccaption_dec.c
> index 95143e7e46..1550e4b253 100644
> --- a/libavcodec/ccaption_dec.c
> +++ b/libavcodec/ccaption_dec.c
> @@ -67,108 +67,123 @@ enum cc_charset {
>      CCSET_EXTENDED_PORTUGUESE_GERMAN_DANISH,
>  };
>  
> -static const char *charset_overrides[4][128] =
> +#define CHARSET_OVERRIDE_LIST(START_SET, ENTRY, END_SET) \
> +    START_SET(CCSET_BASIC_AMERICAN)                      \
> +        ENTRY(0x27, "\u2019")                            \
> +        ENTRY(0x2a, "\u00e1")                            \
> +        ENTRY(0x5c, "\u00e9")                            \
> +        ENTRY(0x5e, "\u00ed")                            \
> +        ENTRY(0x5f, "\u00f3")                            \
> +        ENTRY(0x60, "\u00fa")                            \
> +        ENTRY(0x7b, "\u00e7")                            \
> +        ENTRY(0x7c, "\u00f7")                            \
> +        ENTRY(0x7d, "\u00d1")                            \
> +        ENTRY(0x7e, "\u00f1")                            \
> +        ENTRY(0x7f, "\u2588")                            \
> +    END_SET                                              \
> +    START_SET(CCSET_SPECIAL_AMERICAN)                    \
> +        ENTRY(0x30, "\u00ae")                            \
> +        ENTRY(0x31, "\u00b0")                            \
> +        ENTRY(0x32, "\u00bd")                            \
> +        ENTRY(0x33, "\u00bf")                            \
> +        ENTRY(0x34, "\u2122")                            \
> +        ENTRY(0x35, "\u00a2")                            \
> +        ENTRY(0x36, "\u00a3")                            \
> +        ENTRY(0x37, "\u266a")                            \
> +        ENTRY(0x38, "\u00e0")                            \
> +        ENTRY(0x39, "\u00A0")                            \
> +        ENTRY(0x3a, "\u00e8")                            \
> +        ENTRY(0x3b, "\u00e2")                            \
> +        ENTRY(0x3c, "\u00ea")                            \
> +        ENTRY(0x3d, "\u00ee")                            \
> +        ENTRY(0x3e, "\u00f4")                            \
> +        ENTRY(0x3f, "\u00fb")                            \
> +    END_SET                                              \
> +    START_SET(CCSET_EXTENDED_SPANISH_FRENCH_MISC)        \
> +        ENTRY(0x20, "\u00c1")                            \
> +        ENTRY(0x21, "\u00c9")                            \
> +        ENTRY(0x22, "\u00d3")                            \
> +        ENTRY(0x23, "\u00da")                            \
> +        ENTRY(0x24, "\u00dc")                            \
> +        ENTRY(0x25, "\u00fc")                            \
> +        ENTRY(0x26, "\u00b4")                            \
> +        ENTRY(0x27, "\u00a1")                            \
> +        ENTRY(0x28, "*")                                 \
> +        ENTRY(0x29, "\u2018")                            \
> +        ENTRY(0x2a, "-")                                 \
> +        ENTRY(0x2b, "\u00a9")                            \
> +        ENTRY(0x2c, "\u2120")                            \
> +        ENTRY(0x2d, "\u00b7")                            \
> +        ENTRY(0x2e, "\u201c")                            \
> +        ENTRY(0x2f, "\u201d")                            \
> +        ENTRY(0x30, "\u00c0")                            \
> +        ENTRY(0x31, "\u00c2")                            \
> +        ENTRY(0x32, "\u00c7")                            \
> +        ENTRY(0x33, "\u00c8")                            \
> +        ENTRY(0x34, "\u00ca")                            \
> +        ENTRY(0x35, "\u00cb")                            \
> +        ENTRY(0x36, "\u00eb")                            \
> +        ENTRY(0x37, "\u00ce")                            \
> +        ENTRY(0x38, "\u00cf")                            \
> +        ENTRY(0x39, "\u00ef")                            \
> +        ENTRY(0x3a, "\u00d4")                            \
> +        ENTRY(0x3b, "\u00d9")                            \
> +        ENTRY(0x3c, "\u00f9")                            \
> +        ENTRY(0x3d, "\u00db")                            \
> +        ENTRY(0x3e, "\u00ab")                            \
> +        ENTRY(0x3f, "\u00bb")                            \
> +    END_SET                                              \
> +    START_SET(CCSET_EXTENDED_PORTUGUESE_GERMAN_DANISH)   \
> +        ENTRY(0x20, "\u00c3")                            \
> +        ENTRY(0x21, "\u00e3")                            \
> +        ENTRY(0x22, "\u00cd")                            \
> +        ENTRY(0x23, "\u00cc")                            \
> +        ENTRY(0x24, "\u00ec")                            \
> +        ENTRY(0x25, "\u00d2")                            \
> +        ENTRY(0x26, "\u00f2")                            \
> +        ENTRY(0x27, "\u00d5")                            \
> +        ENTRY(0x28, "\u00f5")                            \
> +        ENTRY(0x29, "{")                                 \
> +        ENTRY(0x2a, "}")                                 \
> +        ENTRY(0x2b, "\\")                                \
> +        ENTRY(0x2c, "^")                                 \
> +        ENTRY(0x2d, "_")                                 \
> +        ENTRY(0x2e, "|")                                 \
> +        ENTRY(0x2f, "~")                                 \
> +        ENTRY(0x30, "\u00c4")                            \
> +        ENTRY(0x31, "\u00e4")                            \
> +        ENTRY(0x32, "\u00d6")                            \
> +        ENTRY(0x33, "\u00f6")                            \
> +        ENTRY(0x34, "\u00df")                            \
> +        ENTRY(0x35, "\u00a5")                            \
> +        ENTRY(0x36, "\u00a4")                            \
> +        ENTRY(0x37, "\u00a6")                            \
> +        ENTRY(0x38, "\u00c5")                            \
> +        ENTRY(0x39, "\u00e5")                            \
> +        ENTRY(0x3a, "\u00d8")                            \
> +        ENTRY(0x3b, "\u00f8")                            \
> +        ENTRY(0x3c, "\u250c")                            \
> +        ENTRY(0x3d, "\u2510")                            \
> +        ENTRY(0x3e, "\u2514")                            \
> +        ENTRY(0x3f, "\u2518")                            \
> +    END_SET                                              \
> +
> +static const char charset_overrides[4][128][sizeof("\u266a")] =
>  {
> -    [CCSET_BASIC_AMERICAN] = {
> -        [0x27] = "\u2019",
> -        [0x2a] = "\u00e1",
> -        [0x5c] = "\u00e9",
> -        [0x5e] = "\u00ed",
> -        [0x5f] = "\u00f3",
> -        [0x60] = "\u00fa",
> -        [0x7b] = "\u00e7",
> -        [0x7c] = "\u00f7",
> -        [0x7d] = "\u00d1",
> -        [0x7e] = "\u00f1",
> -        [0x7f] = "\u2588"
> -    },
> -    [CCSET_SPECIAL_AMERICAN] = {
> -        [0x30] = "\u00ae",
> -        [0x31] = "\u00b0",
> -        [0x32] = "\u00bd",
> -        [0x33] = "\u00bf",
> -        [0x34] = "\u2122",
> -        [0x35] = "\u00a2",
> -        [0x36] = "\u00a3",
> -        [0x37] = "\u266a",
> -        [0x38] = "\u00e0",
> -        [0x39] = "\u00A0",
> -        [0x3a] = "\u00e8",
> -        [0x3b] = "\u00e2",
> -        [0x3c] = "\u00ea",
> -        [0x3d] = "\u00ee",
> -        [0x3e] = "\u00f4",
> -        [0x3f] = "\u00fb",
> -    },
> -    [CCSET_EXTENDED_SPANISH_FRENCH_MISC] = {
> -        [0x20] = "\u00c1",
> -        [0x21] = "\u00c9",
> -        [0x22] = "\u00d3",
> -        [0x23] = "\u00da",
> -        [0x24] = "\u00dc",
> -        [0x25] = "\u00fc",
> -        [0x26] = "\u00b4",
> -        [0x27] = "\u00a1",
> -        [0x28] = "*",
> -        [0x29] = "\u2018",
> -        [0x2a] = "-",
> -        [0x2b] = "\u00a9",
> -        [0x2c] = "\u2120",
> -        [0x2d] = "\u00b7",
> -        [0x2e] = "\u201c",
> -        [0x2f] = "\u201d",
> -        [0x30] = "\u00c0",
> -        [0x31] = "\u00c2",
> -        [0x32] = "\u00c7",
> -        [0x33] = "\u00c8",
> -        [0x34] = "\u00ca",
> -        [0x35] = "\u00cb",
> -        [0x36] = "\u00eb",
> -        [0x37] = "\u00ce",
> -        [0x38] = "\u00cf",
> -        [0x39] = "\u00ef",
> -        [0x3a] = "\u00d4",
> -        [0x3b] = "\u00d9",
> -        [0x3c] = "\u00f9",
> -        [0x3d] = "\u00db",
> -        [0x3e] = "\u00ab",
> -        [0x3f] = "\u00bb",
> -    },
> -    [CCSET_EXTENDED_PORTUGUESE_GERMAN_DANISH] = {
> -        [0x20] = "\u00c3",
> -        [0x21] = "\u00e3",
> -        [0x22] = "\u00cd",
> -        [0x23] = "\u00cc",
> -        [0x24] = "\u00ec",
> -        [0x25] = "\u00d2",
> -        [0x26] = "\u00f2",
> -        [0x27] = "\u00d5",
> -        [0x28] = "\u00f5",
> -        [0x29] = "{",
> -        [0x2a] = "}",
> -        [0x2b] = "\\",
> -        [0x2c] = "^",
> -        [0x2d] = "_",
> -        [0x2e] = "|",
> -        [0x2f] = "~",
> -        [0x30] = "\u00c4",
> -        [0x31] = "\u00e4",
> -        [0x32] = "\u00d6",
> -        [0x33] = "\u00f6",
> -        [0x34] = "\u00df",
> -        [0x35] = "\u00a5",
> -        [0x36] = "\u00a4",
> -        [0x37] = "\u00a6",
> -        [0x38] = "\u00c5",
> -        [0x39] = "\u00e5",
> -        [0x3a] = "\u00d8",
> -        [0x3b] = "\u00f8",
> -        [0x3c] = "\u250c",
> -        [0x3d] = "\u2510",
> -        [0x3e] = "\u2514",
> -        [0x3f] = "\u2518",
> +#define START_SET(IDX) \
> +    [IDX] = {
> +#define ENTRY(idx, string) \
> +        [idx] = string,
> +#define END_SET \
>      },
> +    CHARSET_OVERRIDE_LIST(START_SET, ENTRY, END_SET)
>  };
> +#define EMPTY_START(IDX)
> +#define EMPTY_END
> +#define ASSERT_ENTRY(IDX, str)                                     \
> +    _Static_assert(sizeof(str) <= sizeof(charset_overrides[0][0]), \
> +                   "'" str "' string takes too much space");
> +CHARSET_OVERRIDE_LIST(EMPTY_START, ASSERT_ENTRY, EMPTY_END)
>  
>  static const unsigned char bg_attribs[8] = // Color
>  {
> @@ -571,7 +586,7 @@ static int capture_screen(CCaptionSubContext *ctx)
>                  prev_color = color[j];
>                  prev_bg_color = bg[j];
>                  override = charset_overrides[(int)charset[j]][(int)row[j]];
> -                if (override) {
> +                if (override[0]) {
>                      av_bprintf(&ctx->buffer[bidx], "%s%s%s%s%s", e_tag, s_tag, c_tag, b_tag, override);
>                      seen_char = 1;
>                  } else if (row[j] == ' ' && !seen_char) {

Will apply this patch tomorrow unless there are objections.

- Andreas