diff mbox series

[FFmpeg-devel,19/21] avcodec/smacker: Avoid allocations for decoding Smacker

Message ID 20200801134704.3647-11-andreas.rheinhardt@gmail.com
State Accepted
Commit 010e345afe5d9744956dbc6253a4999e6851e7e9
Headers show
Series [FFmpeg-devel,1/8] avcodec/smacker: Remove write-only and unused variables | expand

Checks

Context Check Description
andriy/default pending
andriy/make success Make finished
andriy/make_fate success Make fate finished

Commit Message

Andreas Rheinhardt Aug. 1, 2020, 1:47 p.m. UTC
by using buffers on the stack instead. The fact that the effective
lifetime of most of the allocated buffers doesn't overlap enables one to
limit the stack space used to a fairly modest size (about 1.5 KiB).

That all the buffers used in HuffContexts have always the same number of
elements (namely 256) makes it possible to include the buffers directly
in the HuffContext. Doing so also makes the length field redundant; it has
therefore been removed.

This is beneficial for performance: For GCC 9 the time for one call to
smka_decode_frame() for the sample in ticket #2425 went down from
1794494 to 1709043 decicyles; for Clang 9 it decreased from 1449420 to
1355273 decicycles.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@gmail.com>
---
 libavcodec/smacker.c | 69 +++++++++++++++-----------------------------
 1 file changed, 23 insertions(+), 46 deletions(-)

Comments

Paul B Mahol Aug. 1, 2020, 2 p.m. UTC | #1
On 8/1/20, Andreas Rheinhardt <andreas.rheinhardt@gmail.com> wrote:
> by using buffers on the stack instead. The fact that the effective
> lifetime of most of the allocated buffers doesn't overlap enables one to
> limit the stack space used to a fairly modest size (about 1.5 KiB).
>
> That all the buffers used in HuffContexts have always the same number of
> elements (namely 256) makes it possible to include the buffers directly
> in the HuffContext. Doing so also makes the length field redundant; it has
> therefore been removed.
>
> This is beneficial for performance: For GCC 9 the time for one call to
> smka_decode_frame() for the sample in ticket #2425 went down from
> 1794494 to 1709043 decicyles; for Clang 9 it decreased from 1449420 to
> 1355273 decicycles.
>
> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@gmail.com>
> ---
>  libavcodec/smacker.c | 69 +++++++++++++++-----------------------------
>  1 file changed, 23 insertions(+), 46 deletions(-)
>
> diff --git a/libavcodec/smacker.c b/libavcodec/smacker.c
> index 15c8856e69..e588b03820 100644
> --- a/libavcodec/smacker.c
> +++ b/libavcodec/smacker.c
> @@ -66,11 +66,10 @@ typedef struct SmackVContext {
>   * Context used for code reconstructing
>   */
>  typedef struct HuffContext {
> -    int length;
>      int current;
> -    uint32_t *bits;
> -    uint8_t *lengths;
> -    uint8_t *values;
> +    uint32_t bits[256];
> +    uint8_t lengths[256];
> +    uint8_t values[256];
>  } HuffContext;
>
>  /* common parameters used for decode_bigtree */
> @@ -114,7 +113,7 @@ static int smacker_decode_tree(GetBitContext *gb,
> HuffContext *hc, uint32_t pref
>      }
>
>      if(!get_bits1(gb)){ //Leaf
> -        if(hc->current >= hc->length){
> +        if (hc->current >= 256) {
>              av_log(NULL, AV_LOG_ERROR, "Tree size exceeded!\n");
>              return AVERROR_INVALIDDATA;
>          }
> @@ -198,7 +197,6 @@ static int smacker_decode_bigtree(GetBitContext *gb,
> DBCtx *ctx, int length)
>   */
>  static int smacker_decode_header_tree(SmackVContext *smk, GetBitContext
> *gb, int **recodes, int *last, int size)
>  {
> -    HuffContext h[2] = { 0 };
>      VLC vlc[2] = { { 0 } };
>      int escapes[3];
>      DBCtx ctx;
> @@ -210,37 +208,30 @@ static int smacker_decode_header_tree(SmackVContext
> *smk, GetBitContext *gb, int
>      }
>
>      for (int i = 0; i < 2; i++) {
> -        h[i].length  = 256;
> -        h[i].current = 0;
> -        h[i].bits    = av_malloc(256 * sizeof(h[i].bits[0]));
> -        h[i].lengths = av_malloc(256 * sizeof(h[i].lengths[0]));
> -        h[i].values  = av_malloc(256 * sizeof(h[i].values[0]));
> -        if (!h[i].bits || !h[i].lengths || !h[i].values) {
> -            err = AVERROR(ENOMEM);
> -            goto error;
> -        }
> +        HuffContext h;
> +        h.current = 0;
>          if (!get_bits1(gb)) {
>              ctx.vals[i] = 0;
>              av_log(smk->avctx, AV_LOG_ERROR, "Skipping %s bytes tree\n",
>                     i ? "high" : "low");
>              continue;
>          }
> -        err = smacker_decode_tree(gb, &h[i], 0, 0);
> +        err = smacker_decode_tree(gb, &h, 0, 0);
>          if (err < 0)
>              goto error;
>          skip_bits1(gb);
> -        if (h[i].current > 1) {
> -            err = ff_init_vlc_sparse(&vlc[i], SMKTREE_BITS, h[i].current,
> -                           INIT_VLC_DEFAULT_SIZES(h[i].lengths),
> -                           INIT_VLC_DEFAULT_SIZES(h[i].bits),
> -                                     INIT_VLC_DEFAULT_SIZES(h[i].values),
> -                           INIT_VLC_LE);
> +        if (h.current > 1) {
> +            err = ff_init_vlc_sparse(&vlc[i], SMKTREE_BITS, h.current,
> +                                     INIT_VLC_DEFAULT_SIZES(h.lengths),
> +                                     INIT_VLC_DEFAULT_SIZES(h.bits),
> +                                     INIT_VLC_DEFAULT_SIZES(h.values),
> +                                     INIT_VLC_LE);
>              if (err < 0) {
>                  av_log(smk->avctx, AV_LOG_ERROR, "Cannot build VLC
> table\n");
>                  goto error;
>              }
>          } else
> -            ctx.vals[i] = h[i].values[0];
> +            ctx.vals[i] = h.values[0];
>      }
>
>      escapes[0]  = get_bits(gb, 16);
> @@ -276,9 +267,6 @@ static int smacker_decode_header_tree(SmackVContext
> *smk, GetBitContext *gb, int
>  error:
>      for (int i = 0; i < 2; i++) {
>          ff_free_vlc(&vlc[i]);
> -        av_free(h[i].bits);
> -        av_free(h[i].lengths);
> -        av_free(h[i].values);
>      }
>
>      return err;
> @@ -603,7 +591,6 @@ static int smka_decode_frame(AVCodecContext *avctx, void
> *data,
>      const uint8_t *buf = avpkt->data;
>      int buf_size = avpkt->size;
>      GetBitContext gb;
> -    HuffContext h[4] = { { 0 } };
>      VLC vlc[4]       = { { 0 } };
>      int16_t *samples;
>      uint8_t *samples8;
> @@ -659,31 +646,24 @@ static int smka_decode_frame(AVCodecContext *avctx,
> void *data,
>
>      // Initialize
>      for(i = 0; i < (1 << (bits + stereo)); i++) {
> -        h[i].length = 256;
> -        h[i].current = 0;
> -        h[i].bits    = av_malloc(256 * sizeof(h[i].bits));
> -        h[i].lengths = av_malloc(256 * sizeof(h[i].lengths));
> -        h[i].values  = av_malloc(256 * sizeof(h[i].values));
> -        if (!h[i].bits || !h[i].lengths || !h[i].values) {
> -            ret = AVERROR(ENOMEM);
> -            goto error;
> -        }
> +        HuffContext h;
> +        h.current = 0;
>          skip_bits1(&gb);
> -        if ((ret = smacker_decode_tree(&gb, &h[i], 0, 0)) < 0)
> +        if ((ret = smacker_decode_tree(&gb, &h, 0, 0)) < 0)
>              goto error;
>          skip_bits1(&gb);
> -        if(h[i].current > 1) {
> -            ret = ff_init_vlc_sparse(&vlc[i], SMKTREE_BITS, h[i].current,
> -                           INIT_VLC_DEFAULT_SIZES(h[i].lengths),
> -                                     INIT_VLC_DEFAULT_SIZES(h[i].bits),
> -                                     INIT_VLC_DEFAULT_SIZES(h[i].values),
> +        if (h.current > 1) {
> +            ret = ff_init_vlc_sparse(&vlc[i], SMKTREE_BITS, h.current,
> +                                     INIT_VLC_DEFAULT_SIZES(h.lengths),
> +                                     INIT_VLC_DEFAULT_SIZES(h.bits),
> +                                     INIT_VLC_DEFAULT_SIZES(h.values),
>                                       INIT_VLC_LE);
>              if (ret < 0) {
>                  av_log(avctx, AV_LOG_ERROR, "Cannot build VLC table\n");
>                  goto error;
>              }
>          } else
> -            values[i] = h[i].values[0];
> +            values[i] = h.values[0];
>      }
>      /* this codec relies on wraparound instead of clipping audio */
>      if(bits) { //decode 16-bit data
> @@ -758,9 +738,6 @@ static int smka_decode_frame(AVCodecContext *avctx, void
> *data,
>  error:
>      for(i = 0; i < 4; i++) {
>          ff_free_vlc(&vlc[i]);
> -        av_free(h[i].bits);
> -        av_free(h[i].lengths);
> -        av_free(h[i].values);
>      }
>
>      return ret;
> --
> 2.20.1
>


LGTM

> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
diff mbox series

Patch

diff --git a/libavcodec/smacker.c b/libavcodec/smacker.c
index 15c8856e69..e588b03820 100644
--- a/libavcodec/smacker.c
+++ b/libavcodec/smacker.c
@@ -66,11 +66,10 @@  typedef struct SmackVContext {
  * Context used for code reconstructing
  */
 typedef struct HuffContext {
-    int length;
     int current;
-    uint32_t *bits;
-    uint8_t *lengths;
-    uint8_t *values;
+    uint32_t bits[256];
+    uint8_t lengths[256];
+    uint8_t values[256];
 } HuffContext;
 
 /* common parameters used for decode_bigtree */
@@ -114,7 +113,7 @@  static int smacker_decode_tree(GetBitContext *gb, HuffContext *hc, uint32_t pref
     }
 
     if(!get_bits1(gb)){ //Leaf
-        if(hc->current >= hc->length){
+        if (hc->current >= 256) {
             av_log(NULL, AV_LOG_ERROR, "Tree size exceeded!\n");
             return AVERROR_INVALIDDATA;
         }
@@ -198,7 +197,6 @@  static int smacker_decode_bigtree(GetBitContext *gb, DBCtx *ctx, int length)
  */
 static int smacker_decode_header_tree(SmackVContext *smk, GetBitContext *gb, int **recodes, int *last, int size)
 {
-    HuffContext h[2] = { 0 };
     VLC vlc[2] = { { 0 } };
     int escapes[3];
     DBCtx ctx;
@@ -210,37 +208,30 @@  static int smacker_decode_header_tree(SmackVContext *smk, GetBitContext *gb, int
     }
 
     for (int i = 0; i < 2; i++) {
-        h[i].length  = 256;
-        h[i].current = 0;
-        h[i].bits    = av_malloc(256 * sizeof(h[i].bits[0]));
-        h[i].lengths = av_malloc(256 * sizeof(h[i].lengths[0]));
-        h[i].values  = av_malloc(256 * sizeof(h[i].values[0]));
-        if (!h[i].bits || !h[i].lengths || !h[i].values) {
-            err = AVERROR(ENOMEM);
-            goto error;
-        }
+        HuffContext h;
+        h.current = 0;
         if (!get_bits1(gb)) {
             ctx.vals[i] = 0;
             av_log(smk->avctx, AV_LOG_ERROR, "Skipping %s bytes tree\n",
                    i ? "high" : "low");
             continue;
         }
-        err = smacker_decode_tree(gb, &h[i], 0, 0);
+        err = smacker_decode_tree(gb, &h, 0, 0);
         if (err < 0)
             goto error;
         skip_bits1(gb);
-        if (h[i].current > 1) {
-            err = ff_init_vlc_sparse(&vlc[i], SMKTREE_BITS, h[i].current,
-                           INIT_VLC_DEFAULT_SIZES(h[i].lengths),
-                           INIT_VLC_DEFAULT_SIZES(h[i].bits),
-                                     INIT_VLC_DEFAULT_SIZES(h[i].values),
-                           INIT_VLC_LE);
+        if (h.current > 1) {
+            err = ff_init_vlc_sparse(&vlc[i], SMKTREE_BITS, h.current,
+                                     INIT_VLC_DEFAULT_SIZES(h.lengths),
+                                     INIT_VLC_DEFAULT_SIZES(h.bits),
+                                     INIT_VLC_DEFAULT_SIZES(h.values),
+                                     INIT_VLC_LE);
             if (err < 0) {
                 av_log(smk->avctx, AV_LOG_ERROR, "Cannot build VLC table\n");
                 goto error;
             }
         } else
-            ctx.vals[i] = h[i].values[0];
+            ctx.vals[i] = h.values[0];
     }
 
     escapes[0]  = get_bits(gb, 16);
@@ -276,9 +267,6 @@  static int smacker_decode_header_tree(SmackVContext *smk, GetBitContext *gb, int
 error:
     for (int i = 0; i < 2; i++) {
         ff_free_vlc(&vlc[i]);
-        av_free(h[i].bits);
-        av_free(h[i].lengths);
-        av_free(h[i].values);
     }
 
     return err;
@@ -603,7 +591,6 @@  static int smka_decode_frame(AVCodecContext *avctx, void *data,
     const uint8_t *buf = avpkt->data;
     int buf_size = avpkt->size;
     GetBitContext gb;
-    HuffContext h[4] = { { 0 } };
     VLC vlc[4]       = { { 0 } };
     int16_t *samples;
     uint8_t *samples8;
@@ -659,31 +646,24 @@  static int smka_decode_frame(AVCodecContext *avctx, void *data,
 
     // Initialize
     for(i = 0; i < (1 << (bits + stereo)); i++) {
-        h[i].length = 256;
-        h[i].current = 0;
-        h[i].bits    = av_malloc(256 * sizeof(h[i].bits));
-        h[i].lengths = av_malloc(256 * sizeof(h[i].lengths));
-        h[i].values  = av_malloc(256 * sizeof(h[i].values));
-        if (!h[i].bits || !h[i].lengths || !h[i].values) {
-            ret = AVERROR(ENOMEM);
-            goto error;
-        }
+        HuffContext h;
+        h.current = 0;
         skip_bits1(&gb);
-        if ((ret = smacker_decode_tree(&gb, &h[i], 0, 0)) < 0)
+        if ((ret = smacker_decode_tree(&gb, &h, 0, 0)) < 0)
             goto error;
         skip_bits1(&gb);
-        if(h[i].current > 1) {
-            ret = ff_init_vlc_sparse(&vlc[i], SMKTREE_BITS, h[i].current,
-                           INIT_VLC_DEFAULT_SIZES(h[i].lengths),
-                                     INIT_VLC_DEFAULT_SIZES(h[i].bits),
-                                     INIT_VLC_DEFAULT_SIZES(h[i].values),
+        if (h.current > 1) {
+            ret = ff_init_vlc_sparse(&vlc[i], SMKTREE_BITS, h.current,
+                                     INIT_VLC_DEFAULT_SIZES(h.lengths),
+                                     INIT_VLC_DEFAULT_SIZES(h.bits),
+                                     INIT_VLC_DEFAULT_SIZES(h.values),
                                      INIT_VLC_LE);
             if (ret < 0) {
                 av_log(avctx, AV_LOG_ERROR, "Cannot build VLC table\n");
                 goto error;
             }
         } else
-            values[i] = h[i].values[0];
+            values[i] = h.values[0];
     }
     /* this codec relies on wraparound instead of clipping audio */
     if(bits) { //decode 16-bit data
@@ -758,9 +738,6 @@  static int smka_decode_frame(AVCodecContext *avctx, void *data,
 error:
     for(i = 0; i < 4; i++) {
         ff_free_vlc(&vlc[i]);
-        av_free(h[i].bits);
-        av_free(h[i].lengths);
-        av_free(h[i].values);
     }
 
     return ret;