[FFmpeg-devel] Reduce static table size for VLC tables in h264_cavlc.c

Submitted by Dale Curtis on Sept. 7, 2018, 11:50 p.m.

Details

Message ID CAPUDrwf4wu0ZjR+NZi6be_4Npb6MzFACGT-s-0x+LH7aiVhWJQ@mail.gmail.com
State New
Headers show

Commit Message

Dale Curtis Sept. 7, 2018, 11:50 p.m.
These tables represent ~70k so moving the allocation to when
they're actually used reduces memory usage in cases where the
h264 decoder isn't used.

Comments

Carl Eugen Hoyos Sept. 8, 2018, 12:16 a.m.
2018-09-08 1:50 GMT+02:00, Dale Curtis <dalecurtis@chromium.org>:
> These tables represent ~70k so moving the allocation to when
> they're actually used reduces memory usage in cases where the
> h264 decoder isn't used.

Is there a performance penalty?

Carl Eugen
James Almer Sept. 8, 2018, 9:13 p.m.
On 9/7/2018 8:50 PM, Dale Curtis wrote:
> These tables represent ~70k so moving the allocation to when
> they're actually used reduces memory usage in cases where the
> h264 decoder isn't used.
> 
> 
> cavlc-size.patch
> 
> 
> From e1cbe52a1f41a39698136efb4695d8d019117853 Mon Sep 17 00:00:00 2001
> From: Dale Curtis <dalecurtis@chromium.org>
> Date: Fri, 31 Aug 2018 16:50:23 -0700
> Subject: [PATCH] Reduce static table size for VLC tables in h264_cavlc.c
> 
> These tables represent ~70k so moving the allocation to when
> they're actually used reduces memory usage in cases where the
> h264 decoder isn't used.
> 
> Signed-off-by: Dale Curtis <dalecurtis@chromium.org>
> ---
>  libavcodec/h264_cavlc.c | 43 +++++++++++++++++++++++++----------------
>  1 file changed, 26 insertions(+), 17 deletions(-)
> 
> diff --git a/libavcodec/h264_cavlc.c b/libavcodec/h264_cavlc.c
> index a7e60676a0..7769202401 100644
> --- a/libavcodec/h264_cavlc.c
> +++ b/libavcodec/h264_cavlc.c
> @@ -236,35 +236,35 @@ static const uint8_t run_bits[7][16]={
>  };
>  
>  static VLC coeff_token_vlc[4];
> -static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
> +static VLC_TYPE (*coeff_token_vlc_tables)[520+332+280+256][2];
>  static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
>  
>  static VLC chroma_dc_coeff_token_vlc;
> -static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
> +static VLC_TYPE (*chroma_dc_coeff_token_vlc_table)[256][2];
>  static const int chroma_dc_coeff_token_vlc_table_size = 256;
>  
>  static VLC chroma422_dc_coeff_token_vlc;
> -static VLC_TYPE chroma422_dc_coeff_token_vlc_table[8192][2];
> +static VLC_TYPE (*chroma422_dc_coeff_token_vlc_table)[8192][2];
>  static const int chroma422_dc_coeff_token_vlc_table_size = 8192;
>  
>  static VLC total_zeros_vlc[15+1];
> -static VLC_TYPE total_zeros_vlc_tables[15][512][2];
> +static VLC_TYPE (*total_zeros_vlc_tables)[15][512][2];
>  static const int total_zeros_vlc_tables_size = 512;
>  
>  static VLC chroma_dc_total_zeros_vlc[3+1];
> -static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
> +static VLC_TYPE (*chroma_dc_total_zeros_vlc_tables)[3][8][2];
>  static const int chroma_dc_total_zeros_vlc_tables_size = 8;
>  
>  static VLC chroma422_dc_total_zeros_vlc[7+1];
> -static VLC_TYPE chroma422_dc_total_zeros_vlc_tables[7][32][2];
> +static VLC_TYPE (*chroma422_dc_total_zeros_vlc_tables)[7][32][2];
>  static const int chroma422_dc_total_zeros_vlc_tables_size = 32;
>  
>  static VLC run_vlc[6+1];
> -static VLC_TYPE run_vlc_tables[6][8][2];
> +static VLC_TYPE (*run_vlc_tables)[6][8][2];
>  static const int run_vlc_tables_size = 8;
>  
>  static VLC run7_vlc;
> -static VLC_TYPE run7_vlc_table[96][2];
> +static VLC_TYPE (*run7_vlc_table)[96][2];
>  static const int run7_vlc_table_size = 96;
>  
>  #define LEVEL_TAB_BITS 8
> @@ -331,14 +331,23 @@ av_cold void ff_h264_decode_init_vlc(void){
>          int offset;
>          done = 1;
>  
> -        chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
> +        coeff_token_vlc_tables = av_mallocz(sizeof(*coeff_token_vlc_tables));
> +        chroma_dc_coeff_token_vlc_table = av_mallocz(sizeof(*chroma_dc_coeff_token_vlc_table));
> +        chroma422_dc_coeff_token_vlc_table = av_mallocz(sizeof(*chroma422_dc_coeff_token_vlc_table));
> +        total_zeros_vlc_tables = av_mallocz(sizeof(*total_zeros_vlc_tables));
> +        chroma_dc_total_zeros_vlc_tables = av_mallocz(sizeof(*chroma_dc_total_zeros_vlc_tables));
> +        chroma422_dc_total_zeros_vlc_tables = av_mallocz(sizeof(*chroma422_dc_total_zeros_vlc_tables));
> +        run_vlc_tables = av_mallocz(sizeof(*run_vlc_tables));
> +        run7_vlc_table = av_mallocz(sizeof(*run7_vlc_table));

Missing allocation checks.

> +
> +        chroma_dc_coeff_token_vlc.table = *chroma_dc_coeff_token_vlc_table;
>          chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
>          init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
>                   &chroma_dc_coeff_token_len [0], 1, 1,
>                   &chroma_dc_coeff_token_bits[0], 1, 1,
>                   INIT_VLC_USE_NEW_STATIC);
>  
> -        chroma422_dc_coeff_token_vlc.table = chroma422_dc_coeff_token_vlc_table;
> +        chroma422_dc_coeff_token_vlc.table = *chroma422_dc_coeff_token_vlc_table;
>          chroma422_dc_coeff_token_vlc.table_allocated = chroma422_dc_coeff_token_vlc_table_size;
>          init_vlc(&chroma422_dc_coeff_token_vlc, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4*9,
>                   &chroma422_dc_coeff_token_len [0], 1, 1,
> @@ -347,7 +356,7 @@ av_cold void ff_h264_decode_init_vlc(void){
>  
>          offset = 0;
>          for(i=0; i<4; i++){
> -            coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
> +            coeff_token_vlc[i].table = (*coeff_token_vlc_tables)+offset;
>              coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
>              init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
>                       &coeff_token_len [i][0], 1, 1,
> @@ -360,10 +369,10 @@ av_cold void ff_h264_decode_init_vlc(void){
>           * the packed static coeff_token_vlc table sizes
>           * were initialized correctly.
>           */
> -        av_assert0(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
> +        av_assert0(offset == FF_ARRAY_ELEMS(*coeff_token_vlc_tables));
>  
>          for(i=0; i<3; i++){
> -            chroma_dc_total_zeros_vlc[i+1].table = chroma_dc_total_zeros_vlc_tables[i];
> +            chroma_dc_total_zeros_vlc[i+1].table = (*chroma_dc_total_zeros_vlc_tables)[i];
>              chroma_dc_total_zeros_vlc[i+1].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
>              init_vlc(&chroma_dc_total_zeros_vlc[i+1],
>                       CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
> @@ -373,7 +382,7 @@ av_cold void ff_h264_decode_init_vlc(void){
>          }
>  
>          for(i=0; i<7; i++){
> -            chroma422_dc_total_zeros_vlc[i+1].table = chroma422_dc_total_zeros_vlc_tables[i];
> +            chroma422_dc_total_zeros_vlc[i+1].table = (*chroma422_dc_total_zeros_vlc_tables)[i];
>              chroma422_dc_total_zeros_vlc[i+1].table_allocated = chroma422_dc_total_zeros_vlc_tables_size;
>              init_vlc(&chroma422_dc_total_zeros_vlc[i+1],
>                       CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8,
> @@ -383,7 +392,7 @@ av_cold void ff_h264_decode_init_vlc(void){
>          }
>  
>          for(i=0; i<15; i++){
> -            total_zeros_vlc[i+1].table = total_zeros_vlc_tables[i];
> +            total_zeros_vlc[i+1].table = (*total_zeros_vlc_tables)[i];
>              total_zeros_vlc[i+1].table_allocated = total_zeros_vlc_tables_size;
>              init_vlc(&total_zeros_vlc[i+1],
>                       TOTAL_ZEROS_VLC_BITS, 16,
> @@ -393,7 +402,7 @@ av_cold void ff_h264_decode_init_vlc(void){
>          }
>  
>          for(i=0; i<6; i++){
> -            run_vlc[i+1].table = run_vlc_tables[i];
> +            run_vlc[i+1].table = (*run_vlc_tables)[i];
>              run_vlc[i+1].table_allocated = run_vlc_tables_size;
>              init_vlc(&run_vlc[i+1],
>                       RUN_VLC_BITS, 7,
> @@ -401,7 +410,7 @@ av_cold void ff_h264_decode_init_vlc(void){
>                       &run_bits[i][0], 1, 1,
>                       INIT_VLC_USE_NEW_STATIC);
>          }
> -        run7_vlc.table = run7_vlc_table,
> +        run7_vlc.table = *run7_vlc_table,
>          run7_vlc.table_allocated = run7_vlc_table_size;
>          init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
>                   &run_len [6][0], 1, 1,

You're not freeing the allocated tables anywhere.

> -- 2.19.0.rc2.392.g5ba43deb5a-goog
> 
> 
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
Michael Niedermayer Sept. 9, 2018, 12:49 a.m.
On Fri, Sep 07, 2018 at 04:50:57PM -0700, Dale Curtis wrote:
> These tables represent ~70k so moving the allocation to when
> they're actually used reduces memory usage in cases where the
> h264 decoder isn't used.

>  h264_cavlc.c |   43 ++++++++++++++++++++++++++-----------------
>  1 file changed, 26 insertions(+), 17 deletions(-)
> c7d67012a2207911076717c1667e698842fa101a  cavlc-size.patch
> From e1cbe52a1f41a39698136efb4695d8d019117853 Mon Sep 17 00:00:00 2001
> From: Dale Curtis <dalecurtis@chromium.org>
> Date: Fri, 31 Aug 2018 16:50:23 -0700
> Subject: [PATCH] Reduce static table size for VLC tables in h264_cavlc.c
> 
> These tables represent ~70k so moving the allocation to when
> they're actually used reduces memory usage in cases where the
> h264 decoder isn't used.
> 
> Signed-off-by: Dale Curtis <dalecurtis@chromium.org>
> ---
>  libavcodec/h264_cavlc.c | 43 +++++++++++++++++++++++++----------------
>  1 file changed, 26 insertions(+), 17 deletions(-)
> 
> diff --git a/libavcodec/h264_cavlc.c b/libavcodec/h264_cavlc.c
> index a7e60676a0..7769202401 100644
> --- a/libavcodec/h264_cavlc.c
> +++ b/libavcodec/h264_cavlc.c
> @@ -236,35 +236,35 @@ static const uint8_t run_bits[7][16]={
>  };
>  
>  static VLC coeff_token_vlc[4];
> -static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
> +static VLC_TYPE (*coeff_token_vlc_tables)[520+332+280+256][2];
>  static const int coeff_token_vlc_tables_size[4]={520,332,280,256};

dont all modern OS assign physical memory only once something is stored
in these tables?

[...]
Dale Curtis Sept. 17, 2018, 9:13 p.m.
On Sat, Sep 8, 2018 at 5:49 PM Michael Niedermayer <michael@niedermayer.cc>
wrote:

> dont all modern OS assign physical memory only once something is stored
> in these tables?
>

This seems to be correct. I was misreading the tooling which indicated
these were taking up size. So this patch can be abandoned. Sorry for the
noise!

- dale

Patch hide | download patch | download mbox

From e1cbe52a1f41a39698136efb4695d8d019117853 Mon Sep 17 00:00:00 2001
From: Dale Curtis <dalecurtis@chromium.org>
Date: Fri, 31 Aug 2018 16:50:23 -0700
Subject: [PATCH] Reduce static table size for VLC tables in h264_cavlc.c

These tables represent ~70k so moving the allocation to when
they're actually used reduces memory usage in cases where the
h264 decoder isn't used.

Signed-off-by: Dale Curtis <dalecurtis@chromium.org>
---
 libavcodec/h264_cavlc.c | 43 +++++++++++++++++++++++++----------------
 1 file changed, 26 insertions(+), 17 deletions(-)

diff --git a/libavcodec/h264_cavlc.c b/libavcodec/h264_cavlc.c
index a7e60676a0..7769202401 100644
--- a/libavcodec/h264_cavlc.c
+++ b/libavcodec/h264_cavlc.c
@@ -236,35 +236,35 @@  static const uint8_t run_bits[7][16]={
 };
 
 static VLC coeff_token_vlc[4];
-static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
+static VLC_TYPE (*coeff_token_vlc_tables)[520+332+280+256][2];
 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
 
 static VLC chroma_dc_coeff_token_vlc;
-static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
+static VLC_TYPE (*chroma_dc_coeff_token_vlc_table)[256][2];
 static const int chroma_dc_coeff_token_vlc_table_size = 256;
 
 static VLC chroma422_dc_coeff_token_vlc;
-static VLC_TYPE chroma422_dc_coeff_token_vlc_table[8192][2];
+static VLC_TYPE (*chroma422_dc_coeff_token_vlc_table)[8192][2];
 static const int chroma422_dc_coeff_token_vlc_table_size = 8192;
 
 static VLC total_zeros_vlc[15+1];
-static VLC_TYPE total_zeros_vlc_tables[15][512][2];
+static VLC_TYPE (*total_zeros_vlc_tables)[15][512][2];
 static const int total_zeros_vlc_tables_size = 512;
 
 static VLC chroma_dc_total_zeros_vlc[3+1];
-static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
+static VLC_TYPE (*chroma_dc_total_zeros_vlc_tables)[3][8][2];
 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
 
 static VLC chroma422_dc_total_zeros_vlc[7+1];
-static VLC_TYPE chroma422_dc_total_zeros_vlc_tables[7][32][2];
+static VLC_TYPE (*chroma422_dc_total_zeros_vlc_tables)[7][32][2];
 static const int chroma422_dc_total_zeros_vlc_tables_size = 32;
 
 static VLC run_vlc[6+1];
-static VLC_TYPE run_vlc_tables[6][8][2];
+static VLC_TYPE (*run_vlc_tables)[6][8][2];
 static const int run_vlc_tables_size = 8;
 
 static VLC run7_vlc;
-static VLC_TYPE run7_vlc_table[96][2];
+static VLC_TYPE (*run7_vlc_table)[96][2];
 static const int run7_vlc_table_size = 96;
 
 #define LEVEL_TAB_BITS 8
@@ -331,14 +331,23 @@  av_cold void ff_h264_decode_init_vlc(void){
         int offset;
         done = 1;
 
-        chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
+        coeff_token_vlc_tables = av_mallocz(sizeof(*coeff_token_vlc_tables));
+        chroma_dc_coeff_token_vlc_table = av_mallocz(sizeof(*chroma_dc_coeff_token_vlc_table));
+        chroma422_dc_coeff_token_vlc_table = av_mallocz(sizeof(*chroma422_dc_coeff_token_vlc_table));
+        total_zeros_vlc_tables = av_mallocz(sizeof(*total_zeros_vlc_tables));
+        chroma_dc_total_zeros_vlc_tables = av_mallocz(sizeof(*chroma_dc_total_zeros_vlc_tables));
+        chroma422_dc_total_zeros_vlc_tables = av_mallocz(sizeof(*chroma422_dc_total_zeros_vlc_tables));
+        run_vlc_tables = av_mallocz(sizeof(*run_vlc_tables));
+        run7_vlc_table = av_mallocz(sizeof(*run7_vlc_table));
+
+        chroma_dc_coeff_token_vlc.table = *chroma_dc_coeff_token_vlc_table;
         chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
         init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
                  &chroma_dc_coeff_token_len [0], 1, 1,
                  &chroma_dc_coeff_token_bits[0], 1, 1,
                  INIT_VLC_USE_NEW_STATIC);
 
-        chroma422_dc_coeff_token_vlc.table = chroma422_dc_coeff_token_vlc_table;
+        chroma422_dc_coeff_token_vlc.table = *chroma422_dc_coeff_token_vlc_table;
         chroma422_dc_coeff_token_vlc.table_allocated = chroma422_dc_coeff_token_vlc_table_size;
         init_vlc(&chroma422_dc_coeff_token_vlc, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4*9,
                  &chroma422_dc_coeff_token_len [0], 1, 1,
@@ -347,7 +356,7 @@  av_cold void ff_h264_decode_init_vlc(void){
 
         offset = 0;
         for(i=0; i<4; i++){
-            coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
+            coeff_token_vlc[i].table = (*coeff_token_vlc_tables)+offset;
             coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
             init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
                      &coeff_token_len [i][0], 1, 1,
@@ -360,10 +369,10 @@  av_cold void ff_h264_decode_init_vlc(void){
          * the packed static coeff_token_vlc table sizes
          * were initialized correctly.
          */
-        av_assert0(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
+        av_assert0(offset == FF_ARRAY_ELEMS(*coeff_token_vlc_tables));
 
         for(i=0; i<3; i++){
-            chroma_dc_total_zeros_vlc[i+1].table = chroma_dc_total_zeros_vlc_tables[i];
+            chroma_dc_total_zeros_vlc[i+1].table = (*chroma_dc_total_zeros_vlc_tables)[i];
             chroma_dc_total_zeros_vlc[i+1].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
             init_vlc(&chroma_dc_total_zeros_vlc[i+1],
                      CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
@@ -373,7 +382,7 @@  av_cold void ff_h264_decode_init_vlc(void){
         }
 
         for(i=0; i<7; i++){
-            chroma422_dc_total_zeros_vlc[i+1].table = chroma422_dc_total_zeros_vlc_tables[i];
+            chroma422_dc_total_zeros_vlc[i+1].table = (*chroma422_dc_total_zeros_vlc_tables)[i];
             chroma422_dc_total_zeros_vlc[i+1].table_allocated = chroma422_dc_total_zeros_vlc_tables_size;
             init_vlc(&chroma422_dc_total_zeros_vlc[i+1],
                      CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8,
@@ -383,7 +392,7 @@  av_cold void ff_h264_decode_init_vlc(void){
         }
 
         for(i=0; i<15; i++){
-            total_zeros_vlc[i+1].table = total_zeros_vlc_tables[i];
+            total_zeros_vlc[i+1].table = (*total_zeros_vlc_tables)[i];
             total_zeros_vlc[i+1].table_allocated = total_zeros_vlc_tables_size;
             init_vlc(&total_zeros_vlc[i+1],
                      TOTAL_ZEROS_VLC_BITS, 16,
@@ -393,7 +402,7 @@  av_cold void ff_h264_decode_init_vlc(void){
         }
 
         for(i=0; i<6; i++){
-            run_vlc[i+1].table = run_vlc_tables[i];
+            run_vlc[i+1].table = (*run_vlc_tables)[i];
             run_vlc[i+1].table_allocated = run_vlc_tables_size;
             init_vlc(&run_vlc[i+1],
                      RUN_VLC_BITS, 7,
@@ -401,7 +410,7 @@  av_cold void ff_h264_decode_init_vlc(void){
                      &run_bits[i][0], 1, 1,
                      INIT_VLC_USE_NEW_STATIC);
         }
-        run7_vlc.table = run7_vlc_table,
+        run7_vlc.table = *run7_vlc_table,
         run7_vlc.table_allocated = run7_vlc_table_size;
         init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
                  &run_len [6][0], 1, 1,
-- 
2.19.0.rc2.392.g5ba43deb5a-goog