diff mbox series

[FFmpeg-devel] avcodec/ccaption_dec: Avoid relocations for strings

Message ID AS8P250MB07446AE07339477C060B599E8F222@AS8P250MB0744.EURP250.PROD.OUTLOOK.COM
State Accepted
Commit ec1b6e0cd404b2f7f4d202802b1c0a40d52fc9b0
Headers show
Series [FFmpeg-devel] avcodec/ccaption_dec: Avoid relocations for strings | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Andreas Rheinhardt March 5, 2024, 8:29 p.m. UTC
The longest string here takes four bytes, so using an array
of pointers is wasteful even when ignoring the cost of relocations;
the lack of relocations also implies that this array
will now be put into .rodata and not into .data.rel.ro.

Static asserts are used to ensure that all strings are always
properly zero-terminated.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
Now using static asserts to address the main point of
criticism in https://patchwork.ffmpeg.org/project/ffmpeg/patch/20210106081702.2495510-1-andreas.rheinhardt@gmail.com/

 libavcodec/ccaption_dec.c | 215 ++++++++++++++++++++------------------
 1 file changed, 115 insertions(+), 100 deletions(-)

Comments

Marth64 March 5, 2024, 8:56 p.m. UTC | #1
I did a quick validation with ATSC, DVD, and SCTE sources. Results were as
expected.

On Tue, Mar 5, 2024 at 2:30 PM Andreas Rheinhardt <
andreas.rheinhardt@outlook.com> wrote:

> The longest string here takes four bytes, so using an array
> of pointers is wasteful even when ignoring the cost of relocations;
> the lack of relocations also implies that this array
> will now be put into .rodata and not into .data.rel.ro.
>
> Static asserts are used to ensure that all strings are always
> properly zero-terminated.
>
> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
> ---
> Now using static asserts to address the main point of
> criticism in
> https://patchwork.ffmpeg.org/project/ffmpeg/patch/20210106081702.2495510-1-andreas.rheinhardt@gmail.com/
>
>  libavcodec/ccaption_dec.c | 215 ++++++++++++++++++++------------------
>  1 file changed, 115 insertions(+), 100 deletions(-)
>
> diff --git a/libavcodec/ccaption_dec.c b/libavcodec/ccaption_dec.c
> index 95143e7e46..1550e4b253 100644
> --- a/libavcodec/ccaption_dec.c
> +++ b/libavcodec/ccaption_dec.c
> @@ -67,108 +67,123 @@ enum cc_charset {
>      CCSET_EXTENDED_PORTUGUESE_GERMAN_DANISH,
>  };
>
> -static const char *charset_overrides[4][128] =
> +#define CHARSET_OVERRIDE_LIST(START_SET, ENTRY, END_SET) \
> +    START_SET(CCSET_BASIC_AMERICAN)                      \
> +        ENTRY(0x27, "\u2019")                            \
> +        ENTRY(0x2a, "\u00e1")                            \
> +        ENTRY(0x5c, "\u00e9")                            \
> +        ENTRY(0x5e, "\u00ed")                            \
> +        ENTRY(0x5f, "\u00f3")                            \
> +        ENTRY(0x60, "\u00fa")                            \
> +        ENTRY(0x7b, "\u00e7")                            \
> +        ENTRY(0x7c, "\u00f7")                            \
> +        ENTRY(0x7d, "\u00d1")                            \
> +        ENTRY(0x7e, "\u00f1")                            \
> +        ENTRY(0x7f, "\u2588")                            \
> +    END_SET                                              \
> +    START_SET(CCSET_SPECIAL_AMERICAN)                    \
> +        ENTRY(0x30, "\u00ae")                            \
> +        ENTRY(0x31, "\u00b0")                            \
> +        ENTRY(0x32, "\u00bd")                            \
> +        ENTRY(0x33, "\u00bf")                            \
> +        ENTRY(0x34, "\u2122")                            \
> +        ENTRY(0x35, "\u00a2")                            \
> +        ENTRY(0x36, "\u00a3")                            \
> +        ENTRY(0x37, "\u266a")                            \
> +        ENTRY(0x38, "\u00e0")                            \
> +        ENTRY(0x39, "\u00A0")                            \
> +        ENTRY(0x3a, "\u00e8")                            \
> +        ENTRY(0x3b, "\u00e2")                            \
> +        ENTRY(0x3c, "\u00ea")                            \
> +        ENTRY(0x3d, "\u00ee")                            \
> +        ENTRY(0x3e, "\u00f4")                            \
> +        ENTRY(0x3f, "\u00fb")                            \
> +    END_SET                                              \
> +    START_SET(CCSET_EXTENDED_SPANISH_FRENCH_MISC)        \
> +        ENTRY(0x20, "\u00c1")                            \
> +        ENTRY(0x21, "\u00c9")                            \
> +        ENTRY(0x22, "\u00d3")                            \
> +        ENTRY(0x23, "\u00da")                            \
> +        ENTRY(0x24, "\u00dc")                            \
> +        ENTRY(0x25, "\u00fc")                            \
> +        ENTRY(0x26, "\u00b4")                            \
> +        ENTRY(0x27, "\u00a1")                            \
> +        ENTRY(0x28, "*")                                 \
> +        ENTRY(0x29, "\u2018")                            \
> +        ENTRY(0x2a, "-")                                 \
> +        ENTRY(0x2b, "\u00a9")                            \
> +        ENTRY(0x2c, "\u2120")                            \
> +        ENTRY(0x2d, "\u00b7")                            \
> +        ENTRY(0x2e, "\u201c")                            \
> +        ENTRY(0x2f, "\u201d")                            \
> +        ENTRY(0x30, "\u00c0")                            \
> +        ENTRY(0x31, "\u00c2")                            \
> +        ENTRY(0x32, "\u00c7")                            \
> +        ENTRY(0x33, "\u00c8")                            \
> +        ENTRY(0x34, "\u00ca")                            \
> +        ENTRY(0x35, "\u00cb")                            \
> +        ENTRY(0x36, "\u00eb")                            \
> +        ENTRY(0x37, "\u00ce")                            \
> +        ENTRY(0x38, "\u00cf")                            \
> +        ENTRY(0x39, "\u00ef")                            \
> +        ENTRY(0x3a, "\u00d4")                            \
> +        ENTRY(0x3b, "\u00d9")                            \
> +        ENTRY(0x3c, "\u00f9")                            \
> +        ENTRY(0x3d, "\u00db")                            \
> +        ENTRY(0x3e, "\u00ab")                            \
> +        ENTRY(0x3f, "\u00bb")                            \
> +    END_SET                                              \
> +    START_SET(CCSET_EXTENDED_PORTUGUESE_GERMAN_DANISH)   \
> +        ENTRY(0x20, "\u00c3")                            \
> +        ENTRY(0x21, "\u00e3")                            \
> +        ENTRY(0x22, "\u00cd")                            \
> +        ENTRY(0x23, "\u00cc")                            \
> +        ENTRY(0x24, "\u00ec")                            \
> +        ENTRY(0x25, "\u00d2")                            \
> +        ENTRY(0x26, "\u00f2")                            \
> +        ENTRY(0x27, "\u00d5")                            \
> +        ENTRY(0x28, "\u00f5")                            \
> +        ENTRY(0x29, "{")                                 \
> +        ENTRY(0x2a, "}")                                 \
> +        ENTRY(0x2b, "\\")                                \
> +        ENTRY(0x2c, "^")                                 \
> +        ENTRY(0x2d, "_")                                 \
> +        ENTRY(0x2e, "|")                                 \
> +        ENTRY(0x2f, "~")                                 \
> +        ENTRY(0x30, "\u00c4")                            \
> +        ENTRY(0x31, "\u00e4")                            \
> +        ENTRY(0x32, "\u00d6")                            \
> +        ENTRY(0x33, "\u00f6")                            \
> +        ENTRY(0x34, "\u00df")                            \
> +        ENTRY(0x35, "\u00a5")                            \
> +        ENTRY(0x36, "\u00a4")                            \
> +        ENTRY(0x37, "\u00a6")                            \
> +        ENTRY(0x38, "\u00c5")                            \
> +        ENTRY(0x39, "\u00e5")                            \
> +        ENTRY(0x3a, "\u00d8")                            \
> +        ENTRY(0x3b, "\u00f8")                            \
> +        ENTRY(0x3c, "\u250c")                            \
> +        ENTRY(0x3d, "\u2510")                            \
> +        ENTRY(0x3e, "\u2514")                            \
> +        ENTRY(0x3f, "\u2518")                            \
> +    END_SET                                              \
> +
> +static const char charset_overrides[4][128][sizeof("\u266a")] =
>  {
> -    [CCSET_BASIC_AMERICAN] = {
> -        [0x27] = "\u2019",
> -        [0x2a] = "\u00e1",
> -        [0x5c] = "\u00e9",
> -        [0x5e] = "\u00ed",
> -        [0x5f] = "\u00f3",
> -        [0x60] = "\u00fa",
> -        [0x7b] = "\u00e7",
> -        [0x7c] = "\u00f7",
> -        [0x7d] = "\u00d1",
> -        [0x7e] = "\u00f1",
> -        [0x7f] = "\u2588"
> -    },
> -    [CCSET_SPECIAL_AMERICAN] = {
> -        [0x30] = "\u00ae",
> -        [0x31] = "\u00b0",
> -        [0x32] = "\u00bd",
> -        [0x33] = "\u00bf",
> -        [0x34] = "\u2122",
> -        [0x35] = "\u00a2",
> -        [0x36] = "\u00a3",
> -        [0x37] = "\u266a",
> -        [0x38] = "\u00e0",
> -        [0x39] = "\u00A0",
> -        [0x3a] = "\u00e8",
> -        [0x3b] = "\u00e2",
> -        [0x3c] = "\u00ea",
> -        [0x3d] = "\u00ee",
> -        [0x3e] = "\u00f4",
> -        [0x3f] = "\u00fb",
> -    },
> -    [CCSET_EXTENDED_SPANISH_FRENCH_MISC] = {
> -        [0x20] = "\u00c1",
> -        [0x21] = "\u00c9",
> -        [0x22] = "\u00d3",
> -        [0x23] = "\u00da",
> -        [0x24] = "\u00dc",
> -        [0x25] = "\u00fc",
> -        [0x26] = "\u00b4",
> -        [0x27] = "\u00a1",
> -        [0x28] = "*",
> -        [0x29] = "\u2018",
> -        [0x2a] = "-",
> -        [0x2b] = "\u00a9",
> -        [0x2c] = "\u2120",
> -        [0x2d] = "\u00b7",
> -        [0x2e] = "\u201c",
> -        [0x2f] = "\u201d",
> -        [0x30] = "\u00c0",
> -        [0x31] = "\u00c2",
> -        [0x32] = "\u00c7",
> -        [0x33] = "\u00c8",
> -        [0x34] = "\u00ca",
> -        [0x35] = "\u00cb",
> -        [0x36] = "\u00eb",
> -        [0x37] = "\u00ce",
> -        [0x38] = "\u00cf",
> -        [0x39] = "\u00ef",
> -        [0x3a] = "\u00d4",
> -        [0x3b] = "\u00d9",
> -        [0x3c] = "\u00f9",
> -        [0x3d] = "\u00db",
> -        [0x3e] = "\u00ab",
> -        [0x3f] = "\u00bb",
> -    },
> -    [CCSET_EXTENDED_PORTUGUESE_GERMAN_DANISH] = {
> -        [0x20] = "\u00c3",
> -        [0x21] = "\u00e3",
> -        [0x22] = "\u00cd",
> -        [0x23] = "\u00cc",
> -        [0x24] = "\u00ec",
> -        [0x25] = "\u00d2",
> -        [0x26] = "\u00f2",
> -        [0x27] = "\u00d5",
> -        [0x28] = "\u00f5",
> -        [0x29] = "{",
> -        [0x2a] = "}",
> -        [0x2b] = "\\",
> -        [0x2c] = "^",
> -        [0x2d] = "_",
> -        [0x2e] = "|",
> -        [0x2f] = "~",
> -        [0x30] = "\u00c4",
> -        [0x31] = "\u00e4",
> -        [0x32] = "\u00d6",
> -        [0x33] = "\u00f6",
> -        [0x34] = "\u00df",
> -        [0x35] = "\u00a5",
> -        [0x36] = "\u00a4",
> -        [0x37] = "\u00a6",
> -        [0x38] = "\u00c5",
> -        [0x39] = "\u00e5",
> -        [0x3a] = "\u00d8",
> -        [0x3b] = "\u00f8",
> -        [0x3c] = "\u250c",
> -        [0x3d] = "\u2510",
> -        [0x3e] = "\u2514",
> -        [0x3f] = "\u2518",
> +#define START_SET(IDX) \
> +    [IDX] = {
> +#define ENTRY(idx, string) \
> +        [idx] = string,
> +#define END_SET \
>      },
> +    CHARSET_OVERRIDE_LIST(START_SET, ENTRY, END_SET)
>  };
> +#define EMPTY_START(IDX)
> +#define EMPTY_END
> +#define ASSERT_ENTRY(IDX, str)                                     \
> +    _Static_assert(sizeof(str) <= sizeof(charset_overrides[0][0]), \
> +                   "'" str "' string takes too much space");
> +CHARSET_OVERRIDE_LIST(EMPTY_START, ASSERT_ENTRY, EMPTY_END)
>
>  static const unsigned char bg_attribs[8] = // Color
>  {
> @@ -571,7 +586,7 @@ static int capture_screen(CCaptionSubContext *ctx)
>                  prev_color = color[j];
>                  prev_bg_color = bg[j];
>                  override =
> charset_overrides[(int)charset[j]][(int)row[j]];
> -                if (override) {
> +                if (override[0]) {
>                      av_bprintf(&ctx->buffer[bidx], "%s%s%s%s%s", e_tag,
> s_tag, c_tag, b_tag, override);
>                      seen_char = 1;
>                  } else if (row[j] == ' ' && !seen_char) {
> --
> 2.40.1
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
Andreas Rheinhardt March 7, 2024, 8:13 a.m. UTC | #2
Andreas Rheinhardt:
> The longest string here takes four bytes, so using an array
> of pointers is wasteful even when ignoring the cost of relocations;
> the lack of relocations also implies that this array
> will now be put into .rodata and not into .data.rel.ro.
> 
> Static asserts are used to ensure that all strings are always
> properly zero-terminated.
> 
> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
> ---
> Now using static asserts to address the main point of
> criticism in https://patchwork.ffmpeg.org/project/ffmpeg/patch/20210106081702.2495510-1-andreas.rheinhardt@gmail.com/
> 
>  libavcodec/ccaption_dec.c | 215 ++++++++++++++++++++------------------
>  1 file changed, 115 insertions(+), 100 deletions(-)
> 
> diff --git a/libavcodec/ccaption_dec.c b/libavcodec/ccaption_dec.c
> index 95143e7e46..1550e4b253 100644
> --- a/libavcodec/ccaption_dec.c
> +++ b/libavcodec/ccaption_dec.c
> @@ -67,108 +67,123 @@ enum cc_charset {
>      CCSET_EXTENDED_PORTUGUESE_GERMAN_DANISH,
>  };
>  
> -static const char *charset_overrides[4][128] =
> +#define CHARSET_OVERRIDE_LIST(START_SET, ENTRY, END_SET) \
> +    START_SET(CCSET_BASIC_AMERICAN)                      \
> +        ENTRY(0x27, "\u2019")                            \
> +        ENTRY(0x2a, "\u00e1")                            \
> +        ENTRY(0x5c, "\u00e9")                            \
> +        ENTRY(0x5e, "\u00ed")                            \
> +        ENTRY(0x5f, "\u00f3")                            \
> +        ENTRY(0x60, "\u00fa")                            \
> +        ENTRY(0x7b, "\u00e7")                            \
> +        ENTRY(0x7c, "\u00f7")                            \
> +        ENTRY(0x7d, "\u00d1")                            \
> +        ENTRY(0x7e, "\u00f1")                            \
> +        ENTRY(0x7f, "\u2588")                            \
> +    END_SET                                              \
> +    START_SET(CCSET_SPECIAL_AMERICAN)                    \
> +        ENTRY(0x30, "\u00ae")                            \
> +        ENTRY(0x31, "\u00b0")                            \
> +        ENTRY(0x32, "\u00bd")                            \
> +        ENTRY(0x33, "\u00bf")                            \
> +        ENTRY(0x34, "\u2122")                            \
> +        ENTRY(0x35, "\u00a2")                            \
> +        ENTRY(0x36, "\u00a3")                            \
> +        ENTRY(0x37, "\u266a")                            \
> +        ENTRY(0x38, "\u00e0")                            \
> +        ENTRY(0x39, "\u00A0")                            \
> +        ENTRY(0x3a, "\u00e8")                            \
> +        ENTRY(0x3b, "\u00e2")                            \
> +        ENTRY(0x3c, "\u00ea")                            \
> +        ENTRY(0x3d, "\u00ee")                            \
> +        ENTRY(0x3e, "\u00f4")                            \
> +        ENTRY(0x3f, "\u00fb")                            \
> +    END_SET                                              \
> +    START_SET(CCSET_EXTENDED_SPANISH_FRENCH_MISC)        \
> +        ENTRY(0x20, "\u00c1")                            \
> +        ENTRY(0x21, "\u00c9")                            \
> +        ENTRY(0x22, "\u00d3")                            \
> +        ENTRY(0x23, "\u00da")                            \
> +        ENTRY(0x24, "\u00dc")                            \
> +        ENTRY(0x25, "\u00fc")                            \
> +        ENTRY(0x26, "\u00b4")                            \
> +        ENTRY(0x27, "\u00a1")                            \
> +        ENTRY(0x28, "*")                                 \
> +        ENTRY(0x29, "\u2018")                            \
> +        ENTRY(0x2a, "-")                                 \
> +        ENTRY(0x2b, "\u00a9")                            \
> +        ENTRY(0x2c, "\u2120")                            \
> +        ENTRY(0x2d, "\u00b7")                            \
> +        ENTRY(0x2e, "\u201c")                            \
> +        ENTRY(0x2f, "\u201d")                            \
> +        ENTRY(0x30, "\u00c0")                            \
> +        ENTRY(0x31, "\u00c2")                            \
> +        ENTRY(0x32, "\u00c7")                            \
> +        ENTRY(0x33, "\u00c8")                            \
> +        ENTRY(0x34, "\u00ca")                            \
> +        ENTRY(0x35, "\u00cb")                            \
> +        ENTRY(0x36, "\u00eb")                            \
> +        ENTRY(0x37, "\u00ce")                            \
> +        ENTRY(0x38, "\u00cf")                            \
> +        ENTRY(0x39, "\u00ef")                            \
> +        ENTRY(0x3a, "\u00d4")                            \
> +        ENTRY(0x3b, "\u00d9")                            \
> +        ENTRY(0x3c, "\u00f9")                            \
> +        ENTRY(0x3d, "\u00db")                            \
> +        ENTRY(0x3e, "\u00ab")                            \
> +        ENTRY(0x3f, "\u00bb")                            \
> +    END_SET                                              \
> +    START_SET(CCSET_EXTENDED_PORTUGUESE_GERMAN_DANISH)   \
> +        ENTRY(0x20, "\u00c3")                            \
> +        ENTRY(0x21, "\u00e3")                            \
> +        ENTRY(0x22, "\u00cd")                            \
> +        ENTRY(0x23, "\u00cc")                            \
> +        ENTRY(0x24, "\u00ec")                            \
> +        ENTRY(0x25, "\u00d2")                            \
> +        ENTRY(0x26, "\u00f2")                            \
> +        ENTRY(0x27, "\u00d5")                            \
> +        ENTRY(0x28, "\u00f5")                            \
> +        ENTRY(0x29, "{")                                 \
> +        ENTRY(0x2a, "}")                                 \
> +        ENTRY(0x2b, "\\")                                \
> +        ENTRY(0x2c, "^")                                 \
> +        ENTRY(0x2d, "_")                                 \
> +        ENTRY(0x2e, "|")                                 \
> +        ENTRY(0x2f, "~")                                 \
> +        ENTRY(0x30, "\u00c4")                            \
> +        ENTRY(0x31, "\u00e4")                            \
> +        ENTRY(0x32, "\u00d6")                            \
> +        ENTRY(0x33, "\u00f6")                            \
> +        ENTRY(0x34, "\u00df")                            \
> +        ENTRY(0x35, "\u00a5")                            \
> +        ENTRY(0x36, "\u00a4")                            \
> +        ENTRY(0x37, "\u00a6")                            \
> +        ENTRY(0x38, "\u00c5")                            \
> +        ENTRY(0x39, "\u00e5")                            \
> +        ENTRY(0x3a, "\u00d8")                            \
> +        ENTRY(0x3b, "\u00f8")                            \
> +        ENTRY(0x3c, "\u250c")                            \
> +        ENTRY(0x3d, "\u2510")                            \
> +        ENTRY(0x3e, "\u2514")                            \
> +        ENTRY(0x3f, "\u2518")                            \
> +    END_SET                                              \
> +
> +static const char charset_overrides[4][128][sizeof("\u266a")] =
>  {
> -    [CCSET_BASIC_AMERICAN] = {
> -        [0x27] = "\u2019",
> -        [0x2a] = "\u00e1",
> -        [0x5c] = "\u00e9",
> -        [0x5e] = "\u00ed",
> -        [0x5f] = "\u00f3",
> -        [0x60] = "\u00fa",
> -        [0x7b] = "\u00e7",
> -        [0x7c] = "\u00f7",
> -        [0x7d] = "\u00d1",
> -        [0x7e] = "\u00f1",
> -        [0x7f] = "\u2588"
> -    },
> -    [CCSET_SPECIAL_AMERICAN] = {
> -        [0x30] = "\u00ae",
> -        [0x31] = "\u00b0",
> -        [0x32] = "\u00bd",
> -        [0x33] = "\u00bf",
> -        [0x34] = "\u2122",
> -        [0x35] = "\u00a2",
> -        [0x36] = "\u00a3",
> -        [0x37] = "\u266a",
> -        [0x38] = "\u00e0",
> -        [0x39] = "\u00A0",
> -        [0x3a] = "\u00e8",
> -        [0x3b] = "\u00e2",
> -        [0x3c] = "\u00ea",
> -        [0x3d] = "\u00ee",
> -        [0x3e] = "\u00f4",
> -        [0x3f] = "\u00fb",
> -    },
> -    [CCSET_EXTENDED_SPANISH_FRENCH_MISC] = {
> -        [0x20] = "\u00c1",
> -        [0x21] = "\u00c9",
> -        [0x22] = "\u00d3",
> -        [0x23] = "\u00da",
> -        [0x24] = "\u00dc",
> -        [0x25] = "\u00fc",
> -        [0x26] = "\u00b4",
> -        [0x27] = "\u00a1",
> -        [0x28] = "*",
> -        [0x29] = "\u2018",
> -        [0x2a] = "-",
> -        [0x2b] = "\u00a9",
> -        [0x2c] = "\u2120",
> -        [0x2d] = "\u00b7",
> -        [0x2e] = "\u201c",
> -        [0x2f] = "\u201d",
> -        [0x30] = "\u00c0",
> -        [0x31] = "\u00c2",
> -        [0x32] = "\u00c7",
> -        [0x33] = "\u00c8",
> -        [0x34] = "\u00ca",
> -        [0x35] = "\u00cb",
> -        [0x36] = "\u00eb",
> -        [0x37] = "\u00ce",
> -        [0x38] = "\u00cf",
> -        [0x39] = "\u00ef",
> -        [0x3a] = "\u00d4",
> -        [0x3b] = "\u00d9",
> -        [0x3c] = "\u00f9",
> -        [0x3d] = "\u00db",
> -        [0x3e] = "\u00ab",
> -        [0x3f] = "\u00bb",
> -    },
> -    [CCSET_EXTENDED_PORTUGUESE_GERMAN_DANISH] = {
> -        [0x20] = "\u00c3",
> -        [0x21] = "\u00e3",
> -        [0x22] = "\u00cd",
> -        [0x23] = "\u00cc",
> -        [0x24] = "\u00ec",
> -        [0x25] = "\u00d2",
> -        [0x26] = "\u00f2",
> -        [0x27] = "\u00d5",
> -        [0x28] = "\u00f5",
> -        [0x29] = "{",
> -        [0x2a] = "}",
> -        [0x2b] = "\\",
> -        [0x2c] = "^",
> -        [0x2d] = "_",
> -        [0x2e] = "|",
> -        [0x2f] = "~",
> -        [0x30] = "\u00c4",
> -        [0x31] = "\u00e4",
> -        [0x32] = "\u00d6",
> -        [0x33] = "\u00f6",
> -        [0x34] = "\u00df",
> -        [0x35] = "\u00a5",
> -        [0x36] = "\u00a4",
> -        [0x37] = "\u00a6",
> -        [0x38] = "\u00c5",
> -        [0x39] = "\u00e5",
> -        [0x3a] = "\u00d8",
> -        [0x3b] = "\u00f8",
> -        [0x3c] = "\u250c",
> -        [0x3d] = "\u2510",
> -        [0x3e] = "\u2514",
> -        [0x3f] = "\u2518",
> +#define START_SET(IDX) \
> +    [IDX] = {
> +#define ENTRY(idx, string) \
> +        [idx] = string,
> +#define END_SET \
>      },
> +    CHARSET_OVERRIDE_LIST(START_SET, ENTRY, END_SET)
>  };
> +#define EMPTY_START(IDX)
> +#define EMPTY_END
> +#define ASSERT_ENTRY(IDX, str)                                     \
> +    _Static_assert(sizeof(str) <= sizeof(charset_overrides[0][0]), \
> +                   "'" str "' string takes too much space");
> +CHARSET_OVERRIDE_LIST(EMPTY_START, ASSERT_ENTRY, EMPTY_END)
>  
>  static const unsigned char bg_attribs[8] = // Color
>  {
> @@ -571,7 +586,7 @@ static int capture_screen(CCaptionSubContext *ctx)
>                  prev_color = color[j];
>                  prev_bg_color = bg[j];
>                  override = charset_overrides[(int)charset[j]][(int)row[j]];
> -                if (override) {
> +                if (override[0]) {
>                      av_bprintf(&ctx->buffer[bidx], "%s%s%s%s%s", e_tag, s_tag, c_tag, b_tag, override);
>                      seen_char = 1;
>                  } else if (row[j] == ' ' && !seen_char) {

Will apply this patch tomorrow unless there are objections.

- Andreas
diff mbox series

Patch

diff --git a/libavcodec/ccaption_dec.c b/libavcodec/ccaption_dec.c
index 95143e7e46..1550e4b253 100644
--- a/libavcodec/ccaption_dec.c
+++ b/libavcodec/ccaption_dec.c
@@ -67,108 +67,123 @@  enum cc_charset {
     CCSET_EXTENDED_PORTUGUESE_GERMAN_DANISH,
 };
 
-static const char *charset_overrides[4][128] =
+#define CHARSET_OVERRIDE_LIST(START_SET, ENTRY, END_SET) \
+    START_SET(CCSET_BASIC_AMERICAN)                      \
+        ENTRY(0x27, "\u2019")                            \
+        ENTRY(0x2a, "\u00e1")                            \
+        ENTRY(0x5c, "\u00e9")                            \
+        ENTRY(0x5e, "\u00ed")                            \
+        ENTRY(0x5f, "\u00f3")                            \
+        ENTRY(0x60, "\u00fa")                            \
+        ENTRY(0x7b, "\u00e7")                            \
+        ENTRY(0x7c, "\u00f7")                            \
+        ENTRY(0x7d, "\u00d1")                            \
+        ENTRY(0x7e, "\u00f1")                            \
+        ENTRY(0x7f, "\u2588")                            \
+    END_SET                                              \
+    START_SET(CCSET_SPECIAL_AMERICAN)                    \
+        ENTRY(0x30, "\u00ae")                            \
+        ENTRY(0x31, "\u00b0")                            \
+        ENTRY(0x32, "\u00bd")                            \
+        ENTRY(0x33, "\u00bf")                            \
+        ENTRY(0x34, "\u2122")                            \
+        ENTRY(0x35, "\u00a2")                            \
+        ENTRY(0x36, "\u00a3")                            \
+        ENTRY(0x37, "\u266a")                            \
+        ENTRY(0x38, "\u00e0")                            \
+        ENTRY(0x39, "\u00A0")                            \
+        ENTRY(0x3a, "\u00e8")                            \
+        ENTRY(0x3b, "\u00e2")                            \
+        ENTRY(0x3c, "\u00ea")                            \
+        ENTRY(0x3d, "\u00ee")                            \
+        ENTRY(0x3e, "\u00f4")                            \
+        ENTRY(0x3f, "\u00fb")                            \
+    END_SET                                              \
+    START_SET(CCSET_EXTENDED_SPANISH_FRENCH_MISC)        \
+        ENTRY(0x20, "\u00c1")                            \
+        ENTRY(0x21, "\u00c9")                            \
+        ENTRY(0x22, "\u00d3")                            \
+        ENTRY(0x23, "\u00da")                            \
+        ENTRY(0x24, "\u00dc")                            \
+        ENTRY(0x25, "\u00fc")                            \
+        ENTRY(0x26, "\u00b4")                            \
+        ENTRY(0x27, "\u00a1")                            \
+        ENTRY(0x28, "*")                                 \
+        ENTRY(0x29, "\u2018")                            \
+        ENTRY(0x2a, "-")                                 \
+        ENTRY(0x2b, "\u00a9")                            \
+        ENTRY(0x2c, "\u2120")                            \
+        ENTRY(0x2d, "\u00b7")                            \
+        ENTRY(0x2e, "\u201c")                            \
+        ENTRY(0x2f, "\u201d")                            \
+        ENTRY(0x30, "\u00c0")                            \
+        ENTRY(0x31, "\u00c2")                            \
+        ENTRY(0x32, "\u00c7")                            \
+        ENTRY(0x33, "\u00c8")                            \
+        ENTRY(0x34, "\u00ca")                            \
+        ENTRY(0x35, "\u00cb")                            \
+        ENTRY(0x36, "\u00eb")                            \
+        ENTRY(0x37, "\u00ce")                            \
+        ENTRY(0x38, "\u00cf")                            \
+        ENTRY(0x39, "\u00ef")                            \
+        ENTRY(0x3a, "\u00d4")                            \
+        ENTRY(0x3b, "\u00d9")                            \
+        ENTRY(0x3c, "\u00f9")                            \
+        ENTRY(0x3d, "\u00db")                            \
+        ENTRY(0x3e, "\u00ab")                            \
+        ENTRY(0x3f, "\u00bb")                            \
+    END_SET                                              \
+    START_SET(CCSET_EXTENDED_PORTUGUESE_GERMAN_DANISH)   \
+        ENTRY(0x20, "\u00c3")                            \
+        ENTRY(0x21, "\u00e3")                            \
+        ENTRY(0x22, "\u00cd")                            \
+        ENTRY(0x23, "\u00cc")                            \
+        ENTRY(0x24, "\u00ec")                            \
+        ENTRY(0x25, "\u00d2")                            \
+        ENTRY(0x26, "\u00f2")                            \
+        ENTRY(0x27, "\u00d5")                            \
+        ENTRY(0x28, "\u00f5")                            \
+        ENTRY(0x29, "{")                                 \
+        ENTRY(0x2a, "}")                                 \
+        ENTRY(0x2b, "\\")                                \
+        ENTRY(0x2c, "^")                                 \
+        ENTRY(0x2d, "_")                                 \
+        ENTRY(0x2e, "|")                                 \
+        ENTRY(0x2f, "~")                                 \
+        ENTRY(0x30, "\u00c4")                            \
+        ENTRY(0x31, "\u00e4")                            \
+        ENTRY(0x32, "\u00d6")                            \
+        ENTRY(0x33, "\u00f6")                            \
+        ENTRY(0x34, "\u00df")                            \
+        ENTRY(0x35, "\u00a5")                            \
+        ENTRY(0x36, "\u00a4")                            \
+        ENTRY(0x37, "\u00a6")                            \
+        ENTRY(0x38, "\u00c5")                            \
+        ENTRY(0x39, "\u00e5")                            \
+        ENTRY(0x3a, "\u00d8")                            \
+        ENTRY(0x3b, "\u00f8")                            \
+        ENTRY(0x3c, "\u250c")                            \
+        ENTRY(0x3d, "\u2510")                            \
+        ENTRY(0x3e, "\u2514")                            \
+        ENTRY(0x3f, "\u2518")                            \
+    END_SET                                              \
+
+static const char charset_overrides[4][128][sizeof("\u266a")] =
 {
-    [CCSET_BASIC_AMERICAN] = {
-        [0x27] = "\u2019",
-        [0x2a] = "\u00e1",
-        [0x5c] = "\u00e9",
-        [0x5e] = "\u00ed",
-        [0x5f] = "\u00f3",
-        [0x60] = "\u00fa",
-        [0x7b] = "\u00e7",
-        [0x7c] = "\u00f7",
-        [0x7d] = "\u00d1",
-        [0x7e] = "\u00f1",
-        [0x7f] = "\u2588"
-    },
-    [CCSET_SPECIAL_AMERICAN] = {
-        [0x30] = "\u00ae",
-        [0x31] = "\u00b0",
-        [0x32] = "\u00bd",
-        [0x33] = "\u00bf",
-        [0x34] = "\u2122",
-        [0x35] = "\u00a2",
-        [0x36] = "\u00a3",
-        [0x37] = "\u266a",
-        [0x38] = "\u00e0",
-        [0x39] = "\u00A0",
-        [0x3a] = "\u00e8",
-        [0x3b] = "\u00e2",
-        [0x3c] = "\u00ea",
-        [0x3d] = "\u00ee",
-        [0x3e] = "\u00f4",
-        [0x3f] = "\u00fb",
-    },
-    [CCSET_EXTENDED_SPANISH_FRENCH_MISC] = {
-        [0x20] = "\u00c1",
-        [0x21] = "\u00c9",
-        [0x22] = "\u00d3",
-        [0x23] = "\u00da",
-        [0x24] = "\u00dc",
-        [0x25] = "\u00fc",
-        [0x26] = "\u00b4",
-        [0x27] = "\u00a1",
-        [0x28] = "*",
-        [0x29] = "\u2018",
-        [0x2a] = "-",
-        [0x2b] = "\u00a9",
-        [0x2c] = "\u2120",
-        [0x2d] = "\u00b7",
-        [0x2e] = "\u201c",
-        [0x2f] = "\u201d",
-        [0x30] = "\u00c0",
-        [0x31] = "\u00c2",
-        [0x32] = "\u00c7",
-        [0x33] = "\u00c8",
-        [0x34] = "\u00ca",
-        [0x35] = "\u00cb",
-        [0x36] = "\u00eb",
-        [0x37] = "\u00ce",
-        [0x38] = "\u00cf",
-        [0x39] = "\u00ef",
-        [0x3a] = "\u00d4",
-        [0x3b] = "\u00d9",
-        [0x3c] = "\u00f9",
-        [0x3d] = "\u00db",
-        [0x3e] = "\u00ab",
-        [0x3f] = "\u00bb",
-    },
-    [CCSET_EXTENDED_PORTUGUESE_GERMAN_DANISH] = {
-        [0x20] = "\u00c3",
-        [0x21] = "\u00e3",
-        [0x22] = "\u00cd",
-        [0x23] = "\u00cc",
-        [0x24] = "\u00ec",
-        [0x25] = "\u00d2",
-        [0x26] = "\u00f2",
-        [0x27] = "\u00d5",
-        [0x28] = "\u00f5",
-        [0x29] = "{",
-        [0x2a] = "}",
-        [0x2b] = "\\",
-        [0x2c] = "^",
-        [0x2d] = "_",
-        [0x2e] = "|",
-        [0x2f] = "~",
-        [0x30] = "\u00c4",
-        [0x31] = "\u00e4",
-        [0x32] = "\u00d6",
-        [0x33] = "\u00f6",
-        [0x34] = "\u00df",
-        [0x35] = "\u00a5",
-        [0x36] = "\u00a4",
-        [0x37] = "\u00a6",
-        [0x38] = "\u00c5",
-        [0x39] = "\u00e5",
-        [0x3a] = "\u00d8",
-        [0x3b] = "\u00f8",
-        [0x3c] = "\u250c",
-        [0x3d] = "\u2510",
-        [0x3e] = "\u2514",
-        [0x3f] = "\u2518",
+#define START_SET(IDX) \
+    [IDX] = {
+#define ENTRY(idx, string) \
+        [idx] = string,
+#define END_SET \
     },
+    CHARSET_OVERRIDE_LIST(START_SET, ENTRY, END_SET)
 };
+#define EMPTY_START(IDX)
+#define EMPTY_END
+#define ASSERT_ENTRY(IDX, str)                                     \
+    _Static_assert(sizeof(str) <= sizeof(charset_overrides[0][0]), \
+                   "'" str "' string takes too much space");
+CHARSET_OVERRIDE_LIST(EMPTY_START, ASSERT_ENTRY, EMPTY_END)
 
 static const unsigned char bg_attribs[8] = // Color
 {
@@ -571,7 +586,7 @@  static int capture_screen(CCaptionSubContext *ctx)
                 prev_color = color[j];
                 prev_bg_color = bg[j];
                 override = charset_overrides[(int)charset[j]][(int)row[j]];
-                if (override) {
+                if (override[0]) {
                     av_bprintf(&ctx->buffer[bidx], "%s%s%s%s%s", e_tag, s_tag, c_tag, b_tag, override);
                     seen_char = 1;
                 } else if (row[j] == ' ' && !seen_char) {