diff mbox series

[FFmpeg-devel,7/7] prores: use VLC LUTs

Message ID 20230908081508.510-7-christophe.gisquet@gmail.com
State New
Headers show
Series [FFmpeg-devel,1/7] proresdec2: port and fix for cached reader | expand

Checks

Context Check Description
andriy/make_x86 fail Make failed

Commit Message

Christophe Gisquet Sept. 8, 2023, 8:15 a.m. UTC
One indirection less, around 1% speedup.
---
 libavcodec/proresdec2.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

Comments

Andreas Rheinhardt Sept. 8, 2023, 9:20 a.m. UTC | #1
Christophe Gisquet:
> One indirection less, around 1% speedup.
> ---
>  libavcodec/proresdec2.c | 16 +++++++++-------
>  1 file changed, 9 insertions(+), 7 deletions(-)
> 
> diff --git a/libavcodec/proresdec2.c b/libavcodec/proresdec2.c
> index b20021c622..85f81d92d3 100644
> --- a/libavcodec/proresdec2.c
> +++ b/libavcodec/proresdec2.c
> @@ -561,12 +561,18 @@ static av_always_inline int decode_dc_coeffs(GetBitContext *gb, int16_t *out,
>          prev_dc += (((code + 1) >> 1) ^ sign) - sign;
>          out[0] = prev_dc;
>      }
> -    return 0;
> +    return 0;	

You are adding trailing whitespace.

>  }
>  
> +#include "libavutil/timer.h"

You really need to look over your patches once more before you send
them. Both of these changes are obviously not ok to commit.

> +
> +
>  static av_always_inline int decode_ac_coeffs(AVCodecContext *avctx, GetBitContext *gb,
>                                               int16_t *out, int blocks_per_slice)
>  {
> +	static VLC* lvl_vlc[9] = { &ac_vlc[0], &ac_vlc[1], &ac_vlc[2], &ac_vlc[3], &ac_vlc[0], &ac_vlc[4], &ac_vlc[4], &ac_vlc[4], &ac_vlc[4], };
> +	static VLC* run_vlc[15] = { &ac_vlc[3], &ac_vlc[3], &ac_vlc[2], &ac_vlc[2], &ac_vlc[0], &ac_vlc[5], &ac_vlc[5], &ac_vlc[5], &ac_vlc[5],
> +	                            &ac_vlc[4], &ac_vlc[4], &ac_vlc[4], &ac_vlc[4], &ac_vlc[4], &ac_vlc[4], };

This still incurs an unnecessary indirection. The LUT should not point
to the VLC's, but rather to the VLC tables (as this is the only thing
needed from them lateron given that the number of bits is a compile-time
constant. The LUT should be initialized when the VLCs are initialized.

In fact, this is so common that I always pondered adding an explicit
function for it. Will probably do so soon.

(Apart from that: This could be "static const VLC *const run_vlc[15]".)

>      const ProresContext *ctx = avctx->priv_data;
>      int block_mask, sign;
>      unsigned pos, run, level;
> @@ -585,9 +591,7 @@ static av_always_inline int decode_ac_coeffs(AVCodecContext *avctx, GetBitContex
>              break;
>  
>          if (run < 15) {
> -            static const uint8_t ctx_to_tbl[] = { 3, 3, 2, 2, 0, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4 };
> -            const VLC* tbl = ac_vlc + ctx_to_tbl[run];
> -            run = get_vlc2(gb, tbl->table, PRORES_LEV_BITS, 3);
> +            run = get_vlc2(gb, run_vlc[run]->table, PRORES_LEV_BITS, 3);
>          } else {
>              unsigned int bits = 21 - 2*av_log2(show_bits(gb, 10));
>              run = READ_BITS(gb, bits) - 4; // up to 17 bits
> @@ -599,9 +603,7 @@ static av_always_inline int decode_ac_coeffs(AVCodecContext *avctx, GetBitContex
>          }
>  
>          if (level < 9) {
> -            static const uint8_t ctx_to_tbl[] = { 0, 1, 2, 3, 0, 4, 4, 4, 4 };
> -            const VLC* tbl = ac_vlc + ctx_to_tbl[level];
> -            level = 1+get_vlc2(gb, tbl->table, PRORES_LEV_BITS, 3);
> +            level = 1+get_vlc2(gb, lvl_vlc[level]->table, PRORES_LEV_BITS, 3);

Seems like these VLCs should be offset by 1 to avoid the "1+".

>          } else {
>              unsigned int bits = 25 - 2*av_log2(show_bits(gb, 12));
>              level = READ_BITS(gb, bits) - 4 + 1; // up to 21 bits
Christophe Gisquet Sept. 8, 2023, 9:58 a.m. UTC | #2
Le ven. 8 sept. 2023 à 11:19, Andreas Rheinhardt
<andreas.rheinhardt@outlook.com> a écrit :
> > -    return 0;
> > +    return 0;
>
> You are adding trailing whitespace.

Sorry, will fix. I had to do some of this work on a misconfigured machine.

> > +#include "libavutil/timer.h"
>
> You really need to look over your patches once more before you send
> them. Both of these changes are obviously not ok to commit.

I know the drill. Again, trying my best to help moving a situation
that had been rotting for 6 years.

> This still incurs an unnecessary indirection. The LUT should not point
> to the VLC's, but rather to the VLC tables (as this is the only thing
> needed from them lateron given that the number of bits is a compile-time
> constant. The LUT should be initialized when the VLCs are initialized.

You're right, and by the same logic from my comment, that should save
things further.

> Seems like these VLCs should be offset by 1 to avoid the "1+".

That's what I did in a previous commit, but that was before I could
share the tables. I didn't consider creating 5 more tables for this
beneficial.
diff mbox series

Patch

diff --git a/libavcodec/proresdec2.c b/libavcodec/proresdec2.c
index b20021c622..85f81d92d3 100644
--- a/libavcodec/proresdec2.c
+++ b/libavcodec/proresdec2.c
@@ -561,12 +561,18 @@  static av_always_inline int decode_dc_coeffs(GetBitContext *gb, int16_t *out,
         prev_dc += (((code + 1) >> 1) ^ sign) - sign;
         out[0] = prev_dc;
     }
-    return 0;
+    return 0;	
 }
 
+#include "libavutil/timer.h"
+
+
 static av_always_inline int decode_ac_coeffs(AVCodecContext *avctx, GetBitContext *gb,
                                              int16_t *out, int blocks_per_slice)
 {
+	static VLC* lvl_vlc[9] = { &ac_vlc[0], &ac_vlc[1], &ac_vlc[2], &ac_vlc[3], &ac_vlc[0], &ac_vlc[4], &ac_vlc[4], &ac_vlc[4], &ac_vlc[4], };
+	static VLC* run_vlc[15] = { &ac_vlc[3], &ac_vlc[3], &ac_vlc[2], &ac_vlc[2], &ac_vlc[0], &ac_vlc[5], &ac_vlc[5], &ac_vlc[5], &ac_vlc[5],
+	                            &ac_vlc[4], &ac_vlc[4], &ac_vlc[4], &ac_vlc[4], &ac_vlc[4], &ac_vlc[4], };
     const ProresContext *ctx = avctx->priv_data;
     int block_mask, sign;
     unsigned pos, run, level;
@@ -585,9 +591,7 @@  static av_always_inline int decode_ac_coeffs(AVCodecContext *avctx, GetBitContex
             break;
 
         if (run < 15) {
-            static const uint8_t ctx_to_tbl[] = { 3, 3, 2, 2, 0, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4 };
-            const VLC* tbl = ac_vlc + ctx_to_tbl[run];
-            run = get_vlc2(gb, tbl->table, PRORES_LEV_BITS, 3);
+            run = get_vlc2(gb, run_vlc[run]->table, PRORES_LEV_BITS, 3);
         } else {
             unsigned int bits = 21 - 2*av_log2(show_bits(gb, 10));
             run = READ_BITS(gb, bits) - 4; // up to 17 bits
@@ -599,9 +603,7 @@  static av_always_inline int decode_ac_coeffs(AVCodecContext *avctx, GetBitContex
         }
 
         if (level < 9) {
-            static const uint8_t ctx_to_tbl[] = { 0, 1, 2, 3, 0, 4, 4, 4, 4 };
-            const VLC* tbl = ac_vlc + ctx_to_tbl[level];
-            level = 1+get_vlc2(gb, tbl->table, PRORES_LEV_BITS, 3);
+            level = 1+get_vlc2(gb, lvl_vlc[level]->table, PRORES_LEV_BITS, 3);
         } else {
             unsigned int bits = 25 - 2*av_log2(show_bits(gb, 12));
             level = READ_BITS(gb, bits) - 4 + 1; // up to 21 bits