diff mbox series

[FFmpeg-devel,15/25] avcodec/utvideodec: Avoid qsort when creating Huffman tables

Message ID 20200926102804.228089-15-andreas.rheinhardt@gmail.com
State Accepted
Commit 341914495e5c2f60bc920b0c6f660e5948a47f5a
Headers show
Series [FFmpeg-devel,01/25] avcodec/photocd: Simplify parsing Huffman tables a bit | expand

Checks

Context Check Description
andriy/default pending
andriy/make success Make finished
andriy/make_fate success Make fate finished

Commit Message

Andreas Rheinhardt Sept. 26, 2020, 10:27 a.m. UTC
The Ut video format uses Huffman trees which are only implicitly coded
in the bitstream: Only the lengths of the codes are coded, the rest has
to be inferred by the decoder according to the rule that the longer
codes are to the left of shorter codes in the tree and on each level the
symbols are descending from left to right.

Because longer codes are to the left of shorter codes, one needs to know
how many non-leaf nodes there are on each level in order to know the
code of the next left-most leaf (which belongs to the highest symbol on
that level). The current code does this by sorting the entries to be
ascending according to length and (for entries with the same length)
ascending according to their symbols. This array is then traversed in
reverse order, so that the lowest level is dealt with first, so that the
number of non-leaf nodes of the next higher level is known when
processing said level.

But this can also be calculated without sorting: Simply count how many
leaf nodes there are on each level. Then one can calculate the number of
non-leaf nodes on each level iteratively from the lowest level upwards:
It is just half the number of nodes of the level below.

This improves performance: For the sample from ticket #4044 the amount
of decicycles for one call to build_huff() decreased from 1055489 to
446310 for Clang 10 and from 1080306 to 535155 for GCC 9.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@gmail.com>
---
 libavcodec/utvideo.c    |  6 -----
 libavcodec/utvideo.h    |  1 -
 libavcodec/utvideodec.c | 53 ++++++++++++++++++++---------------------
 3 files changed, 26 insertions(+), 34 deletions(-)

Comments

Paul B Mahol Sept. 26, 2020, 11 a.m. UTC | #1
On Sat, Sep 26, 2020 at 12:27:54PM +0200, Andreas Rheinhardt wrote:
> The Ut video format uses Huffman trees which are only implicitly coded
> in the bitstream: Only the lengths of the codes are coded, the rest has
> to be inferred by the decoder according to the rule that the longer
> codes are to the left of shorter codes in the tree and on each level the
> symbols are descending from left to right.
> 
> Because longer codes are to the left of shorter codes, one needs to know
> how many non-leaf nodes there are on each level in order to know the
> code of the next left-most leaf (which belongs to the highest symbol on
> that level). The current code does this by sorting the entries to be
> ascending according to length and (for entries with the same length)
> ascending according to their symbols. This array is then traversed in
> reverse order, so that the lowest level is dealt with first, so that the
> number of non-leaf nodes of the next higher level is known when
> processing said level.
> 
> But this can also be calculated without sorting: Simply count how many
> leaf nodes there are on each level. Then one can calculate the number of
> non-leaf nodes on each level iteratively from the lowest level upwards:
> It is just half the number of nodes of the level below.
> 
> This improves performance: For the sample from ticket #4044 the amount
> of decicycles for one call to build_huff() decreased from 1055489 to
> 446310 for Clang 10 and from 1080306 to 535155 for GCC 9.
> 
> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@gmail.com>
> ---
>  libavcodec/utvideo.c    |  6 -----
>  libavcodec/utvideo.h    |  1 -
>  libavcodec/utvideodec.c | 53 ++++++++++++++++++++---------------------
>  3 files changed, 26 insertions(+), 34 deletions(-)
>

ok, i guess fate passes.

> diff --git a/libavcodec/utvideo.c b/libavcodec/utvideo.c
> index 5828d5ec0d..b14e56e0d8 100644
> --- a/libavcodec/utvideo.c
> +++ b/libavcodec/utvideo.c
> @@ -39,9 +39,3 @@ int ff_ut_huff_cmp_len(const void *a, const void *b)
>      const HuffEntry *aa = a, *bb = b;
>      return (aa->len - bb->len)*256 + aa->sym - bb->sym;
>  }
> -
> -int ff_ut10_huff_cmp_len(const void *a, const void *b)
> -{
> -    const HuffEntry *aa = a, *bb = b;
> -    return (aa->len - bb->len)*1024 + aa->sym - bb->sym;
> -}
> diff --git a/libavcodec/utvideo.h b/libavcodec/utvideo.h
> index cf0bb28c44..2975f287a7 100644
> --- a/libavcodec/utvideo.h
> +++ b/libavcodec/utvideo.h
> @@ -99,6 +99,5 @@ typedef struct HuffEntry {
>  
>  /* Compare huffman tree nodes */
>  int ff_ut_huff_cmp_len(const void *a, const void *b);
> -int ff_ut10_huff_cmp_len(const void *a, const void *b);
>  
>  #endif /* AVCODEC_UTVIDEO_H */
> diff --git a/libavcodec/utvideodec.c b/libavcodec/utvideodec.c
> index f014e90606..8b47c14d98 100644
> --- a/libavcodec/utvideodec.c
> +++ b/libavcodec/utvideodec.c
> @@ -43,45 +43,44 @@
>  static int build_huff(const uint8_t *src, VLC *vlc, int *fsym, unsigned nb_elems)
>  {
>      int i;
> -    HuffEntry he[1024];
> -    int last;
>      uint32_t codes[1024];
>      uint8_t bits[1024];
> -    uint16_t syms[1024];
> -    uint32_t code;
> +    uint16_t codes_count[33] = { 0 };
>  
>      *fsym = -1;
>      for (i = 0; i < nb_elems; i++) {
> -        he[i].sym = i;
> -        he[i].len = *src++;
> -    }
> -    qsort(he, nb_elems, sizeof(*he), ff_ut10_huff_cmp_len);
> +        if (src[i] == 0) {
> +            *fsym = i;
> +            return 0;
> +        } else if (src[i] == 255) {
> +            bits[i] = 0;
> +        } else if (src[i] <= 32) {
> +            bits[i] = src[i];
> +        } else
> +            return AVERROR_INVALIDDATA;
>  
> -    if (!he[0].len) {
> -        *fsym = he[0].sym;
> -        return 0;
> +        codes_count[bits[i]]++;
>      }
> +    if (codes_count[0] == nb_elems)
> +        return AVERROR_INVALIDDATA;
>  
> -    last = nb_elems - 1;
> -    while (he[last].len == 255 && last)
> -        last--;
> -
> -    if (he[last].len > 32) {
> -        return -1;
> +    for (unsigned i = 32, nb_codes = 0; i > 0; i--) {
> +        uint16_t curr = codes_count[i];   // # of leafs of length i
> +        codes_count[i] = nb_codes / 2;    // # of non-leaf nodes on level i
> +        nb_codes = codes_count[i] + curr; // # of nodes on level i
>      }
>  
> -    code = 0;
> -    for (i = last; i >= 0; i--) {
> -        codes[i] = code >> (32 - he[i].len);
> -        bits[i]  = he[i].len;
> -        syms[i]  = he[i].sym;
> -        code += 0x80000000u >> (he[i].len - 1);
> +    for (unsigned i = nb_elems; i-- > 0;) {
> +        if (!bits[i]) {
> +            codes[i] = 0;
> +            continue;
> +        }
> +        codes[i] = codes_count[bits[i]]++;
>      }
>  #define VLC_BITS 11
> -    return ff_init_vlc_sparse(vlc, VLC_BITS, last + 1,
> -                              bits,  sizeof(*bits),  sizeof(*bits),
> -                              codes, sizeof(*codes), sizeof(*codes),
> -                              syms,  sizeof(*syms),  sizeof(*syms), 0);
> +    return init_vlc(vlc, VLC_BITS, nb_elems,
> +                    bits,  sizeof(*bits),  sizeof(*bits),
> +                    codes, sizeof(*codes), sizeof(*codes), 0);
>  }
>  
>  static int decode_plane10(UtvideoContext *c, int plane_no,
> -- 
> 2.25.1
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
diff mbox series

Patch

diff --git a/libavcodec/utvideo.c b/libavcodec/utvideo.c
index 5828d5ec0d..b14e56e0d8 100644
--- a/libavcodec/utvideo.c
+++ b/libavcodec/utvideo.c
@@ -39,9 +39,3 @@  int ff_ut_huff_cmp_len(const void *a, const void *b)
     const HuffEntry *aa = a, *bb = b;
     return (aa->len - bb->len)*256 + aa->sym - bb->sym;
 }
-
-int ff_ut10_huff_cmp_len(const void *a, const void *b)
-{
-    const HuffEntry *aa = a, *bb = b;
-    return (aa->len - bb->len)*1024 + aa->sym - bb->sym;
-}
diff --git a/libavcodec/utvideo.h b/libavcodec/utvideo.h
index cf0bb28c44..2975f287a7 100644
--- a/libavcodec/utvideo.h
+++ b/libavcodec/utvideo.h
@@ -99,6 +99,5 @@  typedef struct HuffEntry {
 
 /* Compare huffman tree nodes */
 int ff_ut_huff_cmp_len(const void *a, const void *b);
-int ff_ut10_huff_cmp_len(const void *a, const void *b);
 
 #endif /* AVCODEC_UTVIDEO_H */
diff --git a/libavcodec/utvideodec.c b/libavcodec/utvideodec.c
index f014e90606..8b47c14d98 100644
--- a/libavcodec/utvideodec.c
+++ b/libavcodec/utvideodec.c
@@ -43,45 +43,44 @@ 
 static int build_huff(const uint8_t *src, VLC *vlc, int *fsym, unsigned nb_elems)
 {
     int i;
-    HuffEntry he[1024];
-    int last;
     uint32_t codes[1024];
     uint8_t bits[1024];
-    uint16_t syms[1024];
-    uint32_t code;
+    uint16_t codes_count[33] = { 0 };
 
     *fsym = -1;
     for (i = 0; i < nb_elems; i++) {
-        he[i].sym = i;
-        he[i].len = *src++;
-    }
-    qsort(he, nb_elems, sizeof(*he), ff_ut10_huff_cmp_len);
+        if (src[i] == 0) {
+            *fsym = i;
+            return 0;
+        } else if (src[i] == 255) {
+            bits[i] = 0;
+        } else if (src[i] <= 32) {
+            bits[i] = src[i];
+        } else
+            return AVERROR_INVALIDDATA;
 
-    if (!he[0].len) {
-        *fsym = he[0].sym;
-        return 0;
+        codes_count[bits[i]]++;
     }
+    if (codes_count[0] == nb_elems)
+        return AVERROR_INVALIDDATA;
 
-    last = nb_elems - 1;
-    while (he[last].len == 255 && last)
-        last--;
-
-    if (he[last].len > 32) {
-        return -1;
+    for (unsigned i = 32, nb_codes = 0; i > 0; i--) {
+        uint16_t curr = codes_count[i];   // # of leafs of length i
+        codes_count[i] = nb_codes / 2;    // # of non-leaf nodes on level i
+        nb_codes = codes_count[i] + curr; // # of nodes on level i
     }
 
-    code = 0;
-    for (i = last; i >= 0; i--) {
-        codes[i] = code >> (32 - he[i].len);
-        bits[i]  = he[i].len;
-        syms[i]  = he[i].sym;
-        code += 0x80000000u >> (he[i].len - 1);
+    for (unsigned i = nb_elems; i-- > 0;) {
+        if (!bits[i]) {
+            codes[i] = 0;
+            continue;
+        }
+        codes[i] = codes_count[bits[i]]++;
     }
 #define VLC_BITS 11
-    return ff_init_vlc_sparse(vlc, VLC_BITS, last + 1,
-                              bits,  sizeof(*bits),  sizeof(*bits),
-                              codes, sizeof(*codes), sizeof(*codes),
-                              syms,  sizeof(*syms),  sizeof(*syms), 0);
+    return init_vlc(vlc, VLC_BITS, nb_elems,
+                    bits,  sizeof(*bits),  sizeof(*bits),
+                    codes, sizeof(*codes), sizeof(*codes), 0);
 }
 
 static int decode_plane10(UtvideoContext *c, int plane_no,