diff mbox series

[FFmpeg-devel,v2] Replace arrays of pointers to strings by arrays of strings

Message ID 20210106081702.2495510-1-andreas.rheinhardt@gmail.com
State New
Headers show
Series [FFmpeg-devel,v2] Replace arrays of pointers to strings by arrays of strings | expand

Checks

Context Check Description
andriy/x86_make success Make finished
andriy/x86_make_fate success Make fate finished
andriy/PPC64_make success Make finished
andriy/PPC64_make_fate success Make fate finished

Commit Message

Andreas Rheinhardt Jan. 6, 2021, 8:17 a.m. UTC
When the difference of the longest size and the average size of
collection of strings is smaller than the size of a pointer, it makes
sense to store the strings directly in an array instead of using an
array of pointers to strings (unless doing so precludes deduplicating
strings); doing so also avoids relocations. This requirement is
fulfilled for several arrays of pointers to strings that are changed in
this commit.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@gmail.com>
---
Now used sizeof("longest string") for the array element size.

I'd still like to know whether I can simply work under the assumption
that pointers are 64bit when estimating whether other patches of this
kind are worth it (some changes that are worth it in 64bit could
actually be counterproductive size-wise for 32bit systems; but given
that these patches avoid relocations, overestimating sizeof(char*)
seems actually more accurate).

 libavcodec/ccaption_dec.c | 4 ++--
 libavcodec/vaapi_encode.c | 2 +-
 libavfilter/af_hdcd.c     | 2 +-
 libavfilter/f_perms.c     | 4 ++--
 libavformat/iff.c         | 4 ++--
 libavformat/matroskadec.c | 2 +-
 libavformat/sdp.c         | 2 +-
 libavutil/parseutils.c    | 2 +-
 8 files changed, 11 insertions(+), 11 deletions(-)

Comments

Marton Balint Jan. 6, 2021, 9:34 a.m. UTC | #1
On Wed, 6 Jan 2021, Andreas Rheinhardt wrote:

> When the difference of the longest size and the average size of
> collection of strings is smaller than the size of a pointer, it makes
> sense to store the strings directly in an array instead of using an
> array of pointers to strings (unless doing so precludes deduplicating
> strings); doing so also avoids relocations. This requirement is
> fulfilled for several arrays of pointers to strings that are changed in
> this commit.
>
> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@gmail.com>
> ---
> Now used sizeof("longest string") for the array element size.

Duplicating the "longest string" is very ugly, I don't find it better than 
a constant number, because a sizeof still does not say it tries to find 
the longest string.

TBH I am not a fan of these kind of patches for a few byte of savings, and 
when changing the list of strings it can actually become worse which won't 
be checked by anybody...

Regards,
Marton

>
> I'd still like to know whether I can simply work under the assumption
> that pointers are 64bit when estimating whether other patches of this
> kind are worth it (some changes that are worth it in 64bit could
> actually be counterproductive size-wise for 32bit systems; but given
> that these patches avoid relocations, overestimating sizeof(char*)
> seems actually more accurate).
>
> libavcodec/ccaption_dec.c | 4 ++--
> libavcodec/vaapi_encode.c | 2 +-
> libavfilter/af_hdcd.c     | 2 +-
> libavfilter/f_perms.c     | 4 ++--
> libavformat/iff.c         | 4 ++--
> libavformat/matroskadec.c | 2 +-
> libavformat/sdp.c         | 2 +-
> libavutil/parseutils.c    | 2 +-
> 8 files changed, 11 insertions(+), 11 deletions(-)
>
> diff --git a/libavcodec/ccaption_dec.c b/libavcodec/ccaption_dec.c
> index a208e19b95..0198158c38 100644
> --- a/libavcodec/ccaption_dec.c
> +++ b/libavcodec/ccaption_dec.c
> @@ -66,7 +66,7 @@ enum cc_charset {
>     CCSET_EXTENDED_PORTUGUESE_GERMAN_DANISH,
> };
> 
> -static const char *charset_overrides[4][128] =
> +static const char charset_overrides[4][128][sizeof("\u266a")] =
> {
>     [CCSET_BASIC_AMERICAN] = {
>         [0x27] = "\u2019",
> @@ -575,7 +575,7 @@ static int capture_screen(CCaptionSubContext *ctx)
>                 prev_color = color[j];
>                 prev_bg_color = bg[j];
>                 override = charset_overrides[(int)charset[j]][(int)row[j]];
> -                if (override) {
> +                if (override[0]) {
>                     av_bprintf(&ctx->buffer[bidx], "%s%s%s%s%s", e_tag, s_tag, c_tag, b_tag, override);
>                     seen_char = 1;
>                 } else if (row[j] == ' ' && !seen_char) {
> diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
> index 518e5b2c00..d643046b5d 100644
> --- a/libavcodec/vaapi_encode.c
> +++ b/libavcodec/vaapi_encode.c
> @@ -33,7 +33,7 @@ const AVCodecHWConfigInternal *const ff_vaapi_encode_hw_configs[] = {
>     NULL,
> };
> 
> -static const char * const picture_type_name[] = { "IDR", "I", "P", "B" };
> +static const char picture_type_name[][sizeof("IDR")] = { "IDR", "I", "P", "B" };
> 
> static int vaapi_encode_make_packed_header(AVCodecContext *avctx,
>                                            VAAPIEncodePicture *pic,
> diff --git a/libavfilter/af_hdcd.c b/libavfilter/af_hdcd.c
> index 251d03229a..fc281a4620 100644
> --- a/libavfilter/af_hdcd.c
> +++ b/libavfilter/af_hdcd.c
> @@ -900,7 +900,7 @@ typedef enum {
>     HDCD_PVER_MIX        = 3, /**< Packets of type A and B discovered, most likely an encoding error */
> } hdcd_pf;
> 
> -static const char * const pf_str[] = {
> +static const char pf_str[][sizeof("A+B")] = {
>     "?", "A", "B", "A+B"
> };
> 
> diff --git a/libavfilter/f_perms.c b/libavfilter/f_perms.c
> index d984e5b150..ae4264038f 100644
> --- a/libavfilter/f_perms.c
> +++ b/libavfilter/f_perms.c
> @@ -72,7 +72,7 @@ static av_cold int init(AVFilterContext *ctx)
> }
> 
> enum perm                        {  RO,   RW  };
> -static const char * const perm_str[2] = { "RO", "RW" };
> +static const char perm_str[] = { 'O', 'W' };
> 
> static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
> {
> @@ -91,7 +91,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
>     default:            out_perm = in_perm;                                 break;
>     }
> 
> -    av_log(ctx, AV_LOG_VERBOSE, "%s -> %s%s\n",
> +    av_log(ctx, AV_LOG_VERBOSE, "R%c -> R%c%s\n",
>            perm_str[in_perm], perm_str[out_perm],
>            in_perm == out_perm ? " (no-op)" : "");
> 
> diff --git a/libavformat/iff.c b/libavformat/iff.c
> index 2dba121f6f..ad39e08649 100644
> --- a/libavformat/iff.c
> +++ b/libavformat/iff.c
> @@ -199,13 +199,13 @@ static const uint64_t dsd_loudspeaker_config[] = {
>     AV_CH_LAYOUT_5POINT0, AV_CH_LAYOUT_5POINT1,
> };
> 
> -static const char * dsd_source_comment[] = {
> +static const char dsd_source_comment[][sizeof("analogue_source_comment")] = {
>     "dsd_source_comment",
>     "analogue_source_comment",
>     "pcm_source_comment",
> };
> 
> -static const char * dsd_history_comment[] = {
> +static const char dsd_history_comment[][sizeof("creating_machine")] = {
>     "general_remark",
>     "operator_name",
>     "creating_machine",
> diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
> index 374831baa3..7b26face08 100644
> --- a/libavformat/matroskadec.c
> +++ b/libavformat/matroskadec.c
> @@ -1946,7 +1946,7 @@ static void matroska_parse_cues(MatroskaDemuxContext *matroska) {
> 
> static int matroska_aac_profile(char *codec_id)
> {
> -    static const char *const aac_profiles[] = { "MAIN", "LC", "SSR" };
> +    static const char aac_profiles[][sizeof("MAIN")] = { "MAIN", "LC", "SSR" };
>     int profile;
>
>     for (profile = 0; profile < FF_ARRAY_ELEMS(aac_profiles); profile++)
> diff --git a/libavformat/sdp.c b/libavformat/sdp.c
> index 95f3fbb876..69565e326d 100644
> --- a/libavformat/sdp.c
> +++ b/libavformat/sdp.c
> @@ -230,7 +230,7 @@ static char *extradata2psets_hevc(AVCodecParameters *par)
>     int extradata_size = par->extradata_size;
>     uint8_t *tmpbuf = NULL;
>     int ps_pos[3] = { 0 };
> -    static const char * const ps_names[3] = { "vps", "sps", "pps" };
> +    static const char ps_names[3][sizeof("vps")] = { "vps", "sps", "pps" };
>     int num_arrays, num_nalus;
>     int pos, i, j;
> 
> diff --git a/libavutil/parseutils.c b/libavutil/parseutils.c
> index 167e822648..e43c249cee 100644
> --- a/libavutil/parseutils.c
> +++ b/libavutil/parseutils.c
> @@ -140,7 +140,7 @@ static const VideoRateAbbr video_rate_abbrs[]= {
>     { "ntsc-film", { 24000, 1001 } },
> };
> 
> -static const char *months[12] = {
> +static const char months[12][sizeof("september")] = {
>     "january", "february", "march", "april", "may", "june", "july", "august",
>     "september", "october", "november", "december"
> };
> -- 
> 2.25.1
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Mark Thompson Jan. 6, 2021, 6:37 p.m. UTC | #2
On 06/01/2021 09:34, Marton Balint wrote:
> On Wed, 6 Jan 2021, Andreas Rheinhardt wrote:
>> When the difference of the longest size and the average size of
>> collection of strings is smaller than the size of a pointer, it makes
>> sense to store the strings directly in an array instead of using an
>> array of pointers to strings (unless doing so precludes deduplicating
>> strings); doing so also avoids relocations. This requirement is
>> fulfilled for several arrays of pointers to strings that are changed in
>> this commit.
>>
>> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@gmail.com>
>> ---
>> Now used sizeof("longest string") for the array element size.
> 
> Duplicating the "longest string" is very ugly, I don't find it better than a constant number, because a sizeof still does not say it tries to find the longest string.
> 
> TBH I am not a fan of these kind of patches for a few byte of savings, and when changing the list of strings it can actually become worse which won't be checked by anybody...

I'm inclined to agree.

This case is particularly evil because the compiler is unable to detect off-by-one errors, so missing the longest element may silently result in messed-up strings and crashes.

- Mark
Andreas Rheinhardt Jan. 12, 2021, 1:41 a.m. UTC | #3
Marton Balint:
> 
> 
> On Wed, 6 Jan 2021, Andreas Rheinhardt wrote:
> 
>> When the difference of the longest size and the average size of
>> collection of strings is smaller than the size of a pointer, it makes
>> sense to store the strings directly in an array instead of using an
>> array of pointers to strings (unless doing so precludes deduplicating
>> strings); doing so also avoids relocations. This requirement is
>> fulfilled for several arrays of pointers to strings that are changed in
>> this commit.
>>
>> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@gmail.com>
>> ---
>> Now used sizeof("longest string") for the array element size.
> 
> Duplicating the "longest string" is very ugly, I don't find it better
> than a constant number, because a sizeof still does not say it tries to
> find the longest string.
> 
> TBH I am not a fan of these kind of patches for a few byte of savings,
> and when changing the list of strings it can actually become worse which
> won't be checked by anybody...
> 
When using position independent code, each of these pointers that is
different from NULL requires a relocation (on ELF, these are quite
expensive: 24 byte when using 64 byte systems), so that these arrays of
pointers can't be in .rodata, but only in .data (or .data.rel.ro). Even
worse, these static const objects aren't initialized in a lazy manner,*
so that pages containing pointers to be relocated are automatically
dirty even when they are actually never used. I think it is of the
utmost importance to counter this and this patch does so in a few instances.
Of course I am all ears for how to make it clear that someone who
modifies the strings also needs to check the array dimensions.

(Here is a snippet for those who want to test this for themselves:

+typedef struct Entry {
+    const char *str;
+    char array[4088];
+} Entry;
+
+#define REPEAT(...) __VA_ARGS__, __VA_ARGS__, __VA_ARGS__, __VA_ARGS__
+#define REPEAT2(...)
REPEAT(REPEAT(REPEAT(REPEAT(REPEAT(REPEAT(REPEAT(__VA_ARGS__)))))))
+
+const Entry unused[16384] = { REPEAT2({ "", "" }) };
+
)

- Andreas

*: It seems that Windows does better:
https://devblogs.microsoft.com/oldnewthing/20160413-00/?p=93301
Reimar Döffinger Jan. 12, 2021, 7:14 p.m. UTC | #4
> On 12 Jan 2021, at 02:41, Andreas Rheinhardt <andreas.rheinhardt@gmail.com> wrote:

> Of course I am all ears for how to make it clear that someone who
> modifies the strings also needs to check the array dimensions.

I think I kind of agree with the other comments, this would/should
rather have to be something that can be checked in an automated way.
In principle I also do not much like this solution, it does not
work very well when the strings are of very different sizes.
I’ve not found a solution that is really worth the effort needed,
but I think it would be better to have an approach that is more
along the lines of what PIC does: encode only offsets.
In assembler you can just encode the strings and then have an array
with the offsets to them, but in C that is undefined behaviour…
Where performance doesn’t matter, what you definitely can do
is to just dump one string after the other in an array and then
use a function to scan for the desired index.
The ideal solution from my point of view, would be to have
such an array of all strings together and then an array with
the offsets to each.
With some kind of special preprocessor or code generator that
would be easy, but that makes the code messy.
Doing it in pure C also ends up quite a mess, the below is about as
far as I got, no idea if someone knows some magic to make it actually
bearable…


#include <stdio.h>

#define STRINGS \
X(0, 1, "test1") \
X(1, 2, "test2") \
X(2, 3, "testteststeste2") \

#define X(n, m, x) x "\0"
static const char stringdata[] = STRINGS ;
#undef X

#define X(n, m, x) static const int strpos##m = strpos##n + sizeof(x);
static const int strpos0 = 0;
STRINGS
#undef X
int main()
{
printf("0: %s\n", stringdata + strpos0);
printf("1: %s\n", stringdata + strpos1);
printf("2: %s\n", stringdata + strpos2);
}
diff mbox series

Patch

diff --git a/libavcodec/ccaption_dec.c b/libavcodec/ccaption_dec.c
index a208e19b95..0198158c38 100644
--- a/libavcodec/ccaption_dec.c
+++ b/libavcodec/ccaption_dec.c
@@ -66,7 +66,7 @@  enum cc_charset {
     CCSET_EXTENDED_PORTUGUESE_GERMAN_DANISH,
 };
 
-static const char *charset_overrides[4][128] =
+static const char charset_overrides[4][128][sizeof("\u266a")] =
 {
     [CCSET_BASIC_AMERICAN] = {
         [0x27] = "\u2019",
@@ -575,7 +575,7 @@  static int capture_screen(CCaptionSubContext *ctx)
                 prev_color = color[j];
                 prev_bg_color = bg[j];
                 override = charset_overrides[(int)charset[j]][(int)row[j]];
-                if (override) {
+                if (override[0]) {
                     av_bprintf(&ctx->buffer[bidx], "%s%s%s%s%s", e_tag, s_tag, c_tag, b_tag, override);
                     seen_char = 1;
                 } else if (row[j] == ' ' && !seen_char) {
diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
index 518e5b2c00..d643046b5d 100644
--- a/libavcodec/vaapi_encode.c
+++ b/libavcodec/vaapi_encode.c
@@ -33,7 +33,7 @@  const AVCodecHWConfigInternal *const ff_vaapi_encode_hw_configs[] = {
     NULL,
 };
 
-static const char * const picture_type_name[] = { "IDR", "I", "P", "B" };
+static const char picture_type_name[][sizeof("IDR")] = { "IDR", "I", "P", "B" };
 
 static int vaapi_encode_make_packed_header(AVCodecContext *avctx,
                                            VAAPIEncodePicture *pic,
diff --git a/libavfilter/af_hdcd.c b/libavfilter/af_hdcd.c
index 251d03229a..fc281a4620 100644
--- a/libavfilter/af_hdcd.c
+++ b/libavfilter/af_hdcd.c
@@ -900,7 +900,7 @@  typedef enum {
     HDCD_PVER_MIX        = 3, /**< Packets of type A and B discovered, most likely an encoding error */
 } hdcd_pf;
 
-static const char * const pf_str[] = {
+static const char pf_str[][sizeof("A+B")] = {
     "?", "A", "B", "A+B"
 };
 
diff --git a/libavfilter/f_perms.c b/libavfilter/f_perms.c
index d984e5b150..ae4264038f 100644
--- a/libavfilter/f_perms.c
+++ b/libavfilter/f_perms.c
@@ -72,7 +72,7 @@  static av_cold int init(AVFilterContext *ctx)
 }
 
 enum perm                        {  RO,   RW  };
-static const char * const perm_str[2] = { "RO", "RW" };
+static const char perm_str[] = { 'O', 'W' };
 
 static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
 {
@@ -91,7 +91,7 @@  static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
     default:            out_perm = in_perm;                                 break;
     }
 
-    av_log(ctx, AV_LOG_VERBOSE, "%s -> %s%s\n",
+    av_log(ctx, AV_LOG_VERBOSE, "R%c -> R%c%s\n",
            perm_str[in_perm], perm_str[out_perm],
            in_perm == out_perm ? " (no-op)" : "");
 
diff --git a/libavformat/iff.c b/libavformat/iff.c
index 2dba121f6f..ad39e08649 100644
--- a/libavformat/iff.c
+++ b/libavformat/iff.c
@@ -199,13 +199,13 @@  static const uint64_t dsd_loudspeaker_config[] = {
     AV_CH_LAYOUT_5POINT0, AV_CH_LAYOUT_5POINT1,
 };
 
-static const char * dsd_source_comment[] = {
+static const char dsd_source_comment[][sizeof("analogue_source_comment")] = {
     "dsd_source_comment",
     "analogue_source_comment",
     "pcm_source_comment",
 };
 
-static const char * dsd_history_comment[] = {
+static const char dsd_history_comment[][sizeof("creating_machine")] = {
     "general_remark",
     "operator_name",
     "creating_machine",
diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
index 374831baa3..7b26face08 100644
--- a/libavformat/matroskadec.c
+++ b/libavformat/matroskadec.c
@@ -1946,7 +1946,7 @@  static void matroska_parse_cues(MatroskaDemuxContext *matroska) {
 
 static int matroska_aac_profile(char *codec_id)
 {
-    static const char *const aac_profiles[] = { "MAIN", "LC", "SSR" };
+    static const char aac_profiles[][sizeof("MAIN")] = { "MAIN", "LC", "SSR" };
     int profile;
 
     for (profile = 0; profile < FF_ARRAY_ELEMS(aac_profiles); profile++)
diff --git a/libavformat/sdp.c b/libavformat/sdp.c
index 95f3fbb876..69565e326d 100644
--- a/libavformat/sdp.c
+++ b/libavformat/sdp.c
@@ -230,7 +230,7 @@  static char *extradata2psets_hevc(AVCodecParameters *par)
     int extradata_size = par->extradata_size;
     uint8_t *tmpbuf = NULL;
     int ps_pos[3] = { 0 };
-    static const char * const ps_names[3] = { "vps", "sps", "pps" };
+    static const char ps_names[3][sizeof("vps")] = { "vps", "sps", "pps" };
     int num_arrays, num_nalus;
     int pos, i, j;
 
diff --git a/libavutil/parseutils.c b/libavutil/parseutils.c
index 167e822648..e43c249cee 100644
--- a/libavutil/parseutils.c
+++ b/libavutil/parseutils.c
@@ -140,7 +140,7 @@  static const VideoRateAbbr video_rate_abbrs[]= {
     { "ntsc-film", { 24000, 1001 } },
 };
 
-static const char *months[12] = {
+static const char months[12][sizeof("september")] = {
     "january", "february", "march", "april", "may", "june", "july", "august",
     "september", "october", "november", "december"
 };