Message ID | CAPUDrwf4wu0ZjR+NZi6be_4Npb6MzFACGT-s-0x+LH7aiVhWJQ@mail.gmail.com |
---|---|
State | New |
Headers | show |
2018-09-08 1:50 GMT+02:00, Dale Curtis <dalecurtis@chromium.org>: > These tables represent ~70k so moving the allocation to when > they're actually used reduces memory usage in cases where the > h264 decoder isn't used. Is there a performance penalty? Carl Eugen
On 9/7/2018 8:50 PM, Dale Curtis wrote: > These tables represent ~70k so moving the allocation to when > they're actually used reduces memory usage in cases where the > h264 decoder isn't used. > > > cavlc-size.patch > > > From e1cbe52a1f41a39698136efb4695d8d019117853 Mon Sep 17 00:00:00 2001 > From: Dale Curtis <dalecurtis@chromium.org> > Date: Fri, 31 Aug 2018 16:50:23 -0700 > Subject: [PATCH] Reduce static table size for VLC tables in h264_cavlc.c > > These tables represent ~70k so moving the allocation to when > they're actually used reduces memory usage in cases where the > h264 decoder isn't used. > > Signed-off-by: Dale Curtis <dalecurtis@chromium.org> > --- > libavcodec/h264_cavlc.c | 43 +++++++++++++++++++++++++---------------- > 1 file changed, 26 insertions(+), 17 deletions(-) > > diff --git a/libavcodec/h264_cavlc.c b/libavcodec/h264_cavlc.c > index a7e60676a0..7769202401 100644 > --- a/libavcodec/h264_cavlc.c > +++ b/libavcodec/h264_cavlc.c > @@ -236,35 +236,35 @@ static const uint8_t run_bits[7][16]={ > }; > > static VLC coeff_token_vlc[4]; > -static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2]; > +static VLC_TYPE (*coeff_token_vlc_tables)[520+332+280+256][2]; > static const int coeff_token_vlc_tables_size[4]={520,332,280,256}; > > static VLC chroma_dc_coeff_token_vlc; > -static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2]; > +static VLC_TYPE (*chroma_dc_coeff_token_vlc_table)[256][2]; > static const int chroma_dc_coeff_token_vlc_table_size = 256; > > static VLC chroma422_dc_coeff_token_vlc; > -static VLC_TYPE chroma422_dc_coeff_token_vlc_table[8192][2]; > +static VLC_TYPE (*chroma422_dc_coeff_token_vlc_table)[8192][2]; > static const int chroma422_dc_coeff_token_vlc_table_size = 8192; > > static VLC total_zeros_vlc[15+1]; > -static VLC_TYPE total_zeros_vlc_tables[15][512][2]; > +static VLC_TYPE (*total_zeros_vlc_tables)[15][512][2]; > static const int total_zeros_vlc_tables_size = 512; > > static VLC chroma_dc_total_zeros_vlc[3+1]; > -static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2]; > +static VLC_TYPE (*chroma_dc_total_zeros_vlc_tables)[3][8][2]; > static const int chroma_dc_total_zeros_vlc_tables_size = 8; > > static VLC chroma422_dc_total_zeros_vlc[7+1]; > -static VLC_TYPE chroma422_dc_total_zeros_vlc_tables[7][32][2]; > +static VLC_TYPE (*chroma422_dc_total_zeros_vlc_tables)[7][32][2]; > static const int chroma422_dc_total_zeros_vlc_tables_size = 32; > > static VLC run_vlc[6+1]; > -static VLC_TYPE run_vlc_tables[6][8][2]; > +static VLC_TYPE (*run_vlc_tables)[6][8][2]; > static const int run_vlc_tables_size = 8; > > static VLC run7_vlc; > -static VLC_TYPE run7_vlc_table[96][2]; > +static VLC_TYPE (*run7_vlc_table)[96][2]; > static const int run7_vlc_table_size = 96; > > #define LEVEL_TAB_BITS 8 > @@ -331,14 +331,23 @@ av_cold void ff_h264_decode_init_vlc(void){ > int offset; > done = 1; > > - chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table; > + coeff_token_vlc_tables = av_mallocz(sizeof(*coeff_token_vlc_tables)); > + chroma_dc_coeff_token_vlc_table = av_mallocz(sizeof(*chroma_dc_coeff_token_vlc_table)); > + chroma422_dc_coeff_token_vlc_table = av_mallocz(sizeof(*chroma422_dc_coeff_token_vlc_table)); > + total_zeros_vlc_tables = av_mallocz(sizeof(*total_zeros_vlc_tables)); > + chroma_dc_total_zeros_vlc_tables = av_mallocz(sizeof(*chroma_dc_total_zeros_vlc_tables)); > + chroma422_dc_total_zeros_vlc_tables = av_mallocz(sizeof(*chroma422_dc_total_zeros_vlc_tables)); > + run_vlc_tables = av_mallocz(sizeof(*run_vlc_tables)); > + run7_vlc_table = av_mallocz(sizeof(*run7_vlc_table)); Missing allocation checks. > + > + chroma_dc_coeff_token_vlc.table = *chroma_dc_coeff_token_vlc_table; > chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size; > init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5, > &chroma_dc_coeff_token_len [0], 1, 1, > &chroma_dc_coeff_token_bits[0], 1, 1, > INIT_VLC_USE_NEW_STATIC); > > - chroma422_dc_coeff_token_vlc.table = chroma422_dc_coeff_token_vlc_table; > + chroma422_dc_coeff_token_vlc.table = *chroma422_dc_coeff_token_vlc_table; > chroma422_dc_coeff_token_vlc.table_allocated = chroma422_dc_coeff_token_vlc_table_size; > init_vlc(&chroma422_dc_coeff_token_vlc, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4*9, > &chroma422_dc_coeff_token_len [0], 1, 1, > @@ -347,7 +356,7 @@ av_cold void ff_h264_decode_init_vlc(void){ > > offset = 0; > for(i=0; i<4; i++){ > - coeff_token_vlc[i].table = coeff_token_vlc_tables+offset; > + coeff_token_vlc[i].table = (*coeff_token_vlc_tables)+offset; > coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i]; > init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17, > &coeff_token_len [i][0], 1, 1, > @@ -360,10 +369,10 @@ av_cold void ff_h264_decode_init_vlc(void){ > * the packed static coeff_token_vlc table sizes > * were initialized correctly. > */ > - av_assert0(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables)); > + av_assert0(offset == FF_ARRAY_ELEMS(*coeff_token_vlc_tables)); > > for(i=0; i<3; i++){ > - chroma_dc_total_zeros_vlc[i+1].table = chroma_dc_total_zeros_vlc_tables[i]; > + chroma_dc_total_zeros_vlc[i+1].table = (*chroma_dc_total_zeros_vlc_tables)[i]; > chroma_dc_total_zeros_vlc[i+1].table_allocated = chroma_dc_total_zeros_vlc_tables_size; > init_vlc(&chroma_dc_total_zeros_vlc[i+1], > CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4, > @@ -373,7 +382,7 @@ av_cold void ff_h264_decode_init_vlc(void){ > } > > for(i=0; i<7; i++){ > - chroma422_dc_total_zeros_vlc[i+1].table = chroma422_dc_total_zeros_vlc_tables[i]; > + chroma422_dc_total_zeros_vlc[i+1].table = (*chroma422_dc_total_zeros_vlc_tables)[i]; > chroma422_dc_total_zeros_vlc[i+1].table_allocated = chroma422_dc_total_zeros_vlc_tables_size; > init_vlc(&chroma422_dc_total_zeros_vlc[i+1], > CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8, > @@ -383,7 +392,7 @@ av_cold void ff_h264_decode_init_vlc(void){ > } > > for(i=0; i<15; i++){ > - total_zeros_vlc[i+1].table = total_zeros_vlc_tables[i]; > + total_zeros_vlc[i+1].table = (*total_zeros_vlc_tables)[i]; > total_zeros_vlc[i+1].table_allocated = total_zeros_vlc_tables_size; > init_vlc(&total_zeros_vlc[i+1], > TOTAL_ZEROS_VLC_BITS, 16, > @@ -393,7 +402,7 @@ av_cold void ff_h264_decode_init_vlc(void){ > } > > for(i=0; i<6; i++){ > - run_vlc[i+1].table = run_vlc_tables[i]; > + run_vlc[i+1].table = (*run_vlc_tables)[i]; > run_vlc[i+1].table_allocated = run_vlc_tables_size; > init_vlc(&run_vlc[i+1], > RUN_VLC_BITS, 7, > @@ -401,7 +410,7 @@ av_cold void ff_h264_decode_init_vlc(void){ > &run_bits[i][0], 1, 1, > INIT_VLC_USE_NEW_STATIC); > } > - run7_vlc.table = run7_vlc_table, > + run7_vlc.table = *run7_vlc_table, > run7_vlc.table_allocated = run7_vlc_table_size; > init_vlc(&run7_vlc, RUN7_VLC_BITS, 16, > &run_len [6][0], 1, 1, You're not freeing the allocated tables anywhere. > -- 2.19.0.rc2.392.g5ba43deb5a-goog > > > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel >
On Fri, Sep 07, 2018 at 04:50:57PM -0700, Dale Curtis wrote: > These tables represent ~70k so moving the allocation to when > they're actually used reduces memory usage in cases where the > h264 decoder isn't used. > h264_cavlc.c | 43 ++++++++++++++++++++++++++----------------- > 1 file changed, 26 insertions(+), 17 deletions(-) > c7d67012a2207911076717c1667e698842fa101a cavlc-size.patch > From e1cbe52a1f41a39698136efb4695d8d019117853 Mon Sep 17 00:00:00 2001 > From: Dale Curtis <dalecurtis@chromium.org> > Date: Fri, 31 Aug 2018 16:50:23 -0700 > Subject: [PATCH] Reduce static table size for VLC tables in h264_cavlc.c > > These tables represent ~70k so moving the allocation to when > they're actually used reduces memory usage in cases where the > h264 decoder isn't used. > > Signed-off-by: Dale Curtis <dalecurtis@chromium.org> > --- > libavcodec/h264_cavlc.c | 43 +++++++++++++++++++++++++---------------- > 1 file changed, 26 insertions(+), 17 deletions(-) > > diff --git a/libavcodec/h264_cavlc.c b/libavcodec/h264_cavlc.c > index a7e60676a0..7769202401 100644 > --- a/libavcodec/h264_cavlc.c > +++ b/libavcodec/h264_cavlc.c > @@ -236,35 +236,35 @@ static const uint8_t run_bits[7][16]={ > }; > > static VLC coeff_token_vlc[4]; > -static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2]; > +static VLC_TYPE (*coeff_token_vlc_tables)[520+332+280+256][2]; > static const int coeff_token_vlc_tables_size[4]={520,332,280,256}; dont all modern OS assign physical memory only once something is stored in these tables? [...]
On Sat, Sep 8, 2018 at 5:49 PM Michael Niedermayer <michael@niedermayer.cc> wrote: > dont all modern OS assign physical memory only once something is stored > in these tables? > This seems to be correct. I was misreading the tooling which indicated these were taking up size. So this patch can be abandoned. Sorry for the noise! - dale
From e1cbe52a1f41a39698136efb4695d8d019117853 Mon Sep 17 00:00:00 2001 From: Dale Curtis <dalecurtis@chromium.org> Date: Fri, 31 Aug 2018 16:50:23 -0700 Subject: [PATCH] Reduce static table size for VLC tables in h264_cavlc.c These tables represent ~70k so moving the allocation to when they're actually used reduces memory usage in cases where the h264 decoder isn't used. Signed-off-by: Dale Curtis <dalecurtis@chromium.org> --- libavcodec/h264_cavlc.c | 43 +++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/libavcodec/h264_cavlc.c b/libavcodec/h264_cavlc.c index a7e60676a0..7769202401 100644 --- a/libavcodec/h264_cavlc.c +++ b/libavcodec/h264_cavlc.c @@ -236,35 +236,35 @@ static const uint8_t run_bits[7][16]={ }; static VLC coeff_token_vlc[4]; -static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2]; +static VLC_TYPE (*coeff_token_vlc_tables)[520+332+280+256][2]; static const int coeff_token_vlc_tables_size[4]={520,332,280,256}; static VLC chroma_dc_coeff_token_vlc; -static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2]; +static VLC_TYPE (*chroma_dc_coeff_token_vlc_table)[256][2]; static const int chroma_dc_coeff_token_vlc_table_size = 256; static VLC chroma422_dc_coeff_token_vlc; -static VLC_TYPE chroma422_dc_coeff_token_vlc_table[8192][2]; +static VLC_TYPE (*chroma422_dc_coeff_token_vlc_table)[8192][2]; static const int chroma422_dc_coeff_token_vlc_table_size = 8192; static VLC total_zeros_vlc[15+1]; -static VLC_TYPE total_zeros_vlc_tables[15][512][2]; +static VLC_TYPE (*total_zeros_vlc_tables)[15][512][2]; static const int total_zeros_vlc_tables_size = 512; static VLC chroma_dc_total_zeros_vlc[3+1]; -static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2]; +static VLC_TYPE (*chroma_dc_total_zeros_vlc_tables)[3][8][2]; static const int chroma_dc_total_zeros_vlc_tables_size = 8; static VLC chroma422_dc_total_zeros_vlc[7+1]; -static VLC_TYPE chroma422_dc_total_zeros_vlc_tables[7][32][2]; +static VLC_TYPE (*chroma422_dc_total_zeros_vlc_tables)[7][32][2]; static const int chroma422_dc_total_zeros_vlc_tables_size = 32; static VLC run_vlc[6+1]; -static VLC_TYPE run_vlc_tables[6][8][2]; +static VLC_TYPE (*run_vlc_tables)[6][8][2]; static const int run_vlc_tables_size = 8; static VLC run7_vlc; -static VLC_TYPE run7_vlc_table[96][2]; +static VLC_TYPE (*run7_vlc_table)[96][2]; static const int run7_vlc_table_size = 96; #define LEVEL_TAB_BITS 8 @@ -331,14 +331,23 @@ av_cold void ff_h264_decode_init_vlc(void){ int offset; done = 1; - chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table; + coeff_token_vlc_tables = av_mallocz(sizeof(*coeff_token_vlc_tables)); + chroma_dc_coeff_token_vlc_table = av_mallocz(sizeof(*chroma_dc_coeff_token_vlc_table)); + chroma422_dc_coeff_token_vlc_table = av_mallocz(sizeof(*chroma422_dc_coeff_token_vlc_table)); + total_zeros_vlc_tables = av_mallocz(sizeof(*total_zeros_vlc_tables)); + chroma_dc_total_zeros_vlc_tables = av_mallocz(sizeof(*chroma_dc_total_zeros_vlc_tables)); + chroma422_dc_total_zeros_vlc_tables = av_mallocz(sizeof(*chroma422_dc_total_zeros_vlc_tables)); + run_vlc_tables = av_mallocz(sizeof(*run_vlc_tables)); + run7_vlc_table = av_mallocz(sizeof(*run7_vlc_table)); + + chroma_dc_coeff_token_vlc.table = *chroma_dc_coeff_token_vlc_table; chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size; init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5, &chroma_dc_coeff_token_len [0], 1, 1, &chroma_dc_coeff_token_bits[0], 1, 1, INIT_VLC_USE_NEW_STATIC); - chroma422_dc_coeff_token_vlc.table = chroma422_dc_coeff_token_vlc_table; + chroma422_dc_coeff_token_vlc.table = *chroma422_dc_coeff_token_vlc_table; chroma422_dc_coeff_token_vlc.table_allocated = chroma422_dc_coeff_token_vlc_table_size; init_vlc(&chroma422_dc_coeff_token_vlc, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4*9, &chroma422_dc_coeff_token_len [0], 1, 1, @@ -347,7 +356,7 @@ av_cold void ff_h264_decode_init_vlc(void){ offset = 0; for(i=0; i<4; i++){ - coeff_token_vlc[i].table = coeff_token_vlc_tables+offset; + coeff_token_vlc[i].table = (*coeff_token_vlc_tables)+offset; coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i]; init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17, &coeff_token_len [i][0], 1, 1, @@ -360,10 +369,10 @@ av_cold void ff_h264_decode_init_vlc(void){ * the packed static coeff_token_vlc table sizes * were initialized correctly. */ - av_assert0(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables)); + av_assert0(offset == FF_ARRAY_ELEMS(*coeff_token_vlc_tables)); for(i=0; i<3; i++){ - chroma_dc_total_zeros_vlc[i+1].table = chroma_dc_total_zeros_vlc_tables[i]; + chroma_dc_total_zeros_vlc[i+1].table = (*chroma_dc_total_zeros_vlc_tables)[i]; chroma_dc_total_zeros_vlc[i+1].table_allocated = chroma_dc_total_zeros_vlc_tables_size; init_vlc(&chroma_dc_total_zeros_vlc[i+1], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4, @@ -373,7 +382,7 @@ av_cold void ff_h264_decode_init_vlc(void){ } for(i=0; i<7; i++){ - chroma422_dc_total_zeros_vlc[i+1].table = chroma422_dc_total_zeros_vlc_tables[i]; + chroma422_dc_total_zeros_vlc[i+1].table = (*chroma422_dc_total_zeros_vlc_tables)[i]; chroma422_dc_total_zeros_vlc[i+1].table_allocated = chroma422_dc_total_zeros_vlc_tables_size; init_vlc(&chroma422_dc_total_zeros_vlc[i+1], CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8, @@ -383,7 +392,7 @@ av_cold void ff_h264_decode_init_vlc(void){ } for(i=0; i<15; i++){ - total_zeros_vlc[i+1].table = total_zeros_vlc_tables[i]; + total_zeros_vlc[i+1].table = (*total_zeros_vlc_tables)[i]; total_zeros_vlc[i+1].table_allocated = total_zeros_vlc_tables_size; init_vlc(&total_zeros_vlc[i+1], TOTAL_ZEROS_VLC_BITS, 16, @@ -393,7 +402,7 @@ av_cold void ff_h264_decode_init_vlc(void){ } for(i=0; i<6; i++){ - run_vlc[i+1].table = run_vlc_tables[i]; + run_vlc[i+1].table = (*run_vlc_tables)[i]; run_vlc[i+1].table_allocated = run_vlc_tables_size; init_vlc(&run_vlc[i+1], RUN_VLC_BITS, 7, @@ -401,7 +410,7 @@ av_cold void ff_h264_decode_init_vlc(void){ &run_bits[i][0], 1, 1, INIT_VLC_USE_NEW_STATIC); } - run7_vlc.table = run7_vlc_table, + run7_vlc.table = *run7_vlc_table, run7_vlc.table_allocated = run7_vlc_table_size; init_vlc(&run7_vlc, RUN7_VLC_BITS, 16, &run_len [6][0], 1, 1, -- 2.19.0.rc2.392.g5ba43deb5a-goog