diff mbox series

[FFmpeg-devel,2/2] lavc/dxvenc: migrate DXT1 encoder to lavu hashtable

Message ID 20240205052724.26862-2-connorbworley@gmail.com
State New
Headers show
Series [FFmpeg-devel,1/2] lavu/hashtable: create generic robin hood hash table | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Connor Worley Feb. 5, 2024, 5:27 a.m. UTC
Offers a modest performance gain due to the switch from naive linear
probling to robin hood.

Signed-off-by: Connor Worley <connorbworley@gmail.com>
---
 libavcodec/dxvenc.c | 121 +++++++++++++-------------------------------
 1 file changed, 35 insertions(+), 86 deletions(-)

Comments

Andreas Rheinhardt Feb. 5, 2024, 12:04 p.m. UTC | #1
Connor Worley:
> Offers a modest performance gain due to the switch from naive linear
> probling to robin hood.

How much would one gain if the hash function knew that key_size and
val_size are four?

> 
> Signed-off-by: Connor Worley <connorbworley@gmail.com>
> ---
>  libavcodec/dxvenc.c | 121 +++++++++++++-------------------------------
>  1 file changed, 35 insertions(+), 86 deletions(-)
> 
> diff --git a/libavcodec/dxvenc.c b/libavcodec/dxvenc.c
> index b274175689..e17b3b2c36 100644
> --- a/libavcodec/dxvenc.c
> +++ b/libavcodec/dxvenc.c
> @@ -21,7 +21,7 @@
>  
>  #include <stdint.h>
>  
> -#include "libavutil/crc.h"
> +#include "libavutil/hashtable.h"
>  #include "libavutil/imgutils.h"
>  #include "libavutil/opt.h"
>  
> @@ -44,69 +44,6 @@ enum DXVTextureFormat {
>      DXV_FMT_DXT1 = MKBETAG('D', 'X', 'T', '1'),
>  };
>  
> -typedef struct HTEntry {
> -    uint32_t key;
> -    uint32_t pos;
> -} HTEntry;
> -
> -static void ht_init(HTEntry *ht)
> -{
> -    for (size_t i = 0; i < LOOKBACK_HT_ELEMS; i++) {
> -        ht[i].pos = -1;
> -    }
> -}
> -
> -static uint32_t ht_lookup_and_upsert(HTEntry *ht, const AVCRC *hash_ctx,
> -                                    uint32_t key, uint32_t pos)
> -{
> -    uint32_t ret = -1;
> -    size_t hash = av_crc(hash_ctx, 0, (uint8_t*)&key, 4) % LOOKBACK_HT_ELEMS;
> -    for (size_t i = hash; i < hash + LOOKBACK_HT_ELEMS; i++) {
> -        size_t wrapped_index = i % LOOKBACK_HT_ELEMS;
> -        HTEntry *entry = &ht[wrapped_index];
> -        if (entry->key == key || entry->pos == -1) {
> -            ret = entry->pos;
> -            entry->key = key;
> -            entry->pos = pos;
> -            break;
> -        }
> -    }
> -    return ret;
> -}
> -
> -static void ht_delete(HTEntry *ht, const AVCRC *hash_ctx,
> -                      uint32_t key, uint32_t pos)
> -{
> -    HTEntry *removed_entry = NULL;
> -    size_t removed_hash;
> -    size_t hash = av_crc(hash_ctx, 0, (uint8_t*)&key, 4) % LOOKBACK_HT_ELEMS;
> -
> -    for (size_t i = hash; i < hash + LOOKBACK_HT_ELEMS; i++) {
> -        size_t wrapped_index = i % LOOKBACK_HT_ELEMS;
> -        HTEntry *entry = &ht[wrapped_index];
> -        if (entry->pos == -1)
> -            return;
> -        if (removed_entry) {
> -            size_t candidate_hash = av_crc(hash_ctx, 0, (uint8_t*)&entry->key, 4) % LOOKBACK_HT_ELEMS;
> -            if ((wrapped_index > removed_hash && (candidate_hash <= removed_hash || candidate_hash > wrapped_index)) ||
> -                (wrapped_index < removed_hash && (candidate_hash <= removed_hash && candidate_hash > wrapped_index))) {
> -                *removed_entry = *entry;
> -                entry->pos = -1;
> -                removed_entry = entry;
> -                removed_hash = wrapped_index;
> -            }
> -        } else if (entry->key == key) {
> -            if (entry->pos <= pos) {
> -                entry->pos = -1;
> -                removed_entry = entry;
> -                removed_hash = wrapped_index;
> -            } else {
> -                return;
> -            }
> -        }
> -    }
> -}
> -
>  typedef struct DXVEncContext {
>      AVClass *class;
>  
> @@ -123,10 +60,8 @@ typedef struct DXVEncContext {
>      enum DXVTextureFormat tex_fmt;
>      int (*compress_tex)(AVCodecContext *avctx);
>  
> -    const AVCRC *crc_ctx;
> -
> -    HTEntry color_lookback_ht[LOOKBACK_HT_ELEMS];
> -    HTEntry lut_lookback_ht[LOOKBACK_HT_ELEMS];
> +    AVHashtableContext color_ht;
> +    AVHashtableContext lut_ht;
>  } DXVEncContext;
>  
>  /* Converts an index offset value to a 2-bit opcode and pushes it to a stream.
> @@ -161,27 +96,32 @@ static int dxv_compress_dxt1(AVCodecContext *avctx)
>      DXVEncContext *ctx = avctx->priv_data;
>      PutByteContext *pbc = &ctx->pbc;
>      uint32_t *value;
> -    uint32_t color, lut, idx, color_idx, lut_idx, prev_pos, state = 16, pos = 2, op = 0;
> +    uint32_t color, lut, idx, color_idx, lut_idx, prev_pos, state = 16, pos = 0, op = 0;
>  
> -    ht_init(ctx->color_lookback_ht);
> -    ht_init(ctx->lut_lookback_ht);
> +    av_hashtable_clear(&ctx->color_ht);
> +    av_hashtable_clear(&ctx->lut_ht);
>  
>      bytestream2_put_le32(pbc, AV_RL32(ctx->tex_data));
> +    av_hashtable_set(&ctx->color_ht, ctx->tex_data, &pos);
> +    pos++;
>      bytestream2_put_le32(pbc, AV_RL32(ctx->tex_data + 4));
> -
> -    ht_lookup_and_upsert(ctx->color_lookback_ht, ctx->crc_ctx, AV_RL32(ctx->tex_data), 0);
> -    ht_lookup_and_upsert(ctx->lut_lookback_ht, ctx->crc_ctx, AV_RL32(ctx->tex_data + 4), 1);
> +    av_hashtable_set(&ctx->lut_ht, ctx->tex_data + 4, &pos);
> +    pos++;
>  
>      while (pos + 2 <= ctx->tex_size / 4) {
>          idx = 0;
> +        color_idx = 0;
> +        lut_idx = 0;
>  
>          color = AV_RL32(ctx->tex_data + pos * 4);
> -        prev_pos = ht_lookup_and_upsert(ctx->color_lookback_ht, ctx->crc_ctx, color, pos);
> -        color_idx = prev_pos != -1 ? pos - prev_pos : 0;
> +        if (av_hashtable_get(&ctx->color_ht, &color, &prev_pos))
> +            color_idx = pos - prev_pos;
> +        av_hashtable_set(&ctx->color_ht, &color, &pos);
> +
>          if (pos >= LOOKBACK_WORDS) {
>              uint32_t old_pos = pos - LOOKBACK_WORDS;
> -            uint32_t old_color = AV_RL32(ctx->tex_data + old_pos * 4);
> -            ht_delete(ctx->color_lookback_ht, ctx->crc_ctx, old_color, old_pos);
> +            if (av_hashtable_get(&ctx->color_ht, ctx->tex_data + old_pos * 4, &prev_pos) && prev_pos <= old_pos)
> +                av_hashtable_delete(&ctx->color_ht, ctx->tex_data + old_pos * 4);
>          }
>          pos++;
>  
> @@ -190,13 +130,14 @@ static int dxv_compress_dxt1(AVCodecContext *avctx)
>              idx = color_idx;
>          } else {
>              idx = 0;
> -            prev_pos = ht_lookup_and_upsert(ctx->lut_lookback_ht, ctx->crc_ctx, lut, pos);
> -            lut_idx = prev_pos != -1 ? pos - prev_pos : 0;
> +            if (av_hashtable_get(&ctx->lut_ht, &lut, &prev_pos))
> +                lut_idx = pos - prev_pos;
> +            av_hashtable_set(&ctx->lut_ht, &lut, &pos);
>          }
>          if (pos >= LOOKBACK_WORDS) {
>              uint32_t old_pos = pos - LOOKBACK_WORDS;
> -            uint32_t old_lut = AV_RL32(ctx->tex_data + old_pos * 4);
> -            ht_delete(ctx->lut_lookback_ht, ctx->crc_ctx, old_lut, old_pos);
> +            if (av_hashtable_get(&ctx->lut_ht, ctx->tex_data + old_pos * 4, &prev_pos) && prev_pos <= old_pos)
> +                av_hashtable_delete(&ctx->lut_ht, ctx->tex_data + old_pos * 4);
>          }
>          pos++;
>  
> @@ -298,10 +239,15 @@ static av_cold int dxv_init(AVCodecContext *avctx)
>          return AVERROR(ENOMEM);
>      }
>  
> -    ctx->crc_ctx = av_crc_get_table(AV_CRC_32_IEEE);
> -    if (!ctx->crc_ctx) {
> -        av_log(avctx, AV_LOG_ERROR, "Could not initialize CRC table.\n");
> -        return AVERROR_BUG;
> +    ret = av_hashtable_init(&ctx->color_ht, sizeof(uint32_t), sizeof(uint32_t), LOOKBACK_HT_ELEMS);
> +    if (ret < 0) {
> +        av_log(avctx, AV_LOG_ERROR, "Could not initialize color lookback table.\n");
> +        return ret;
> +    }
> +    ret = av_hashtable_init(&ctx->lut_ht, sizeof(uint32_t), sizeof(uint32_t), LOOKBACK_HT_ELEMS);
> +    if (ret < 0) {
> +        av_log(avctx, AV_LOG_ERROR, "Could not initialize LUT lookback table.\n");

These av_logs are pointless.

> +        return ret;
>      }
>  
>      return 0;
> @@ -313,6 +259,9 @@ static av_cold int dxv_close(AVCodecContext *avctx)
>  
>      av_freep(&ctx->tex_data);
>  
> +    av_hashtable_destroy(&ctx->color_ht);
> +    av_hashtable_destroy(&ctx->lut_ht);
> +
>      return 0;
>  }
>
Connor Worley Feb. 5, 2024, 7:57 p.m. UTC | #2
>
> How much would one gain if the hash function knew that key_size and
> val_size are four?
>

That yields a nice 10-20% speedup on my machine. Are you thinking of macros
to parametrize key/val size, or possibly optimized versions for common
sizes?
Andreas Rheinhardt Feb. 5, 2024, 8:08 p.m. UTC | #3
Connor Worley:
>>
>> How much would one gain if the hash function knew that key_size and
>> val_size are four?
>>
> 
> That yields a nice 10-20% speedup on my machine. Are you thinking of macros
> to parametrize key/val size, or possibly optimized versions for common
> sizes?

I am thinking about it not being public at all; as long as we have only
one user...

- Andreas

PS: Is the speedup only for the hashing part or for overall decoding? Is
the comparison the current version of your patch or something that
already has my comments incorporated?
Connor Worley Feb. 5, 2024, 8:46 p.m. UTC | #4
On Mon, Feb 5, 2024 at 12:06 PM Andreas Rheinhardt <
andreas.rheinhardt@outlook.com> wrote:

> Connor Worley:
> >>
> >> How much would one gain if the hash function knew that key_size and
> >> val_size are four?
> >>
> >
> > That yields a nice 10-20% speedup on my machine. Are you thinking of
> macros
> > to parametrize key/val size, or possibly optimized versions for common
> > sizes?
>
> I am thinking about it not being public at all; as long as we have only
> one user...
>

 That's fair. I will need some variants for future DXV work, but they can
also be private if this doesn't seem generally useful.

PS: Is the speedup only for the hashing part or for overall decoding? Is
> the comparison the current version of your patch or something that
> already has my comments incorporated?
>

Overall encoding -- current version of my patch with ctx->key/val_size vs
hardcoded sizes.
diff mbox series

Patch

diff --git a/libavcodec/dxvenc.c b/libavcodec/dxvenc.c
index b274175689..e17b3b2c36 100644
--- a/libavcodec/dxvenc.c
+++ b/libavcodec/dxvenc.c
@@ -21,7 +21,7 @@ 
 
 #include <stdint.h>
 
-#include "libavutil/crc.h"
+#include "libavutil/hashtable.h"
 #include "libavutil/imgutils.h"
 #include "libavutil/opt.h"
 
@@ -44,69 +44,6 @@  enum DXVTextureFormat {
     DXV_FMT_DXT1 = MKBETAG('D', 'X', 'T', '1'),
 };
 
-typedef struct HTEntry {
-    uint32_t key;
-    uint32_t pos;
-} HTEntry;
-
-static void ht_init(HTEntry *ht)
-{
-    for (size_t i = 0; i < LOOKBACK_HT_ELEMS; i++) {
-        ht[i].pos = -1;
-    }
-}
-
-static uint32_t ht_lookup_and_upsert(HTEntry *ht, const AVCRC *hash_ctx,
-                                    uint32_t key, uint32_t pos)
-{
-    uint32_t ret = -1;
-    size_t hash = av_crc(hash_ctx, 0, (uint8_t*)&key, 4) % LOOKBACK_HT_ELEMS;
-    for (size_t i = hash; i < hash + LOOKBACK_HT_ELEMS; i++) {
-        size_t wrapped_index = i % LOOKBACK_HT_ELEMS;
-        HTEntry *entry = &ht[wrapped_index];
-        if (entry->key == key || entry->pos == -1) {
-            ret = entry->pos;
-            entry->key = key;
-            entry->pos = pos;
-            break;
-        }
-    }
-    return ret;
-}
-
-static void ht_delete(HTEntry *ht, const AVCRC *hash_ctx,
-                      uint32_t key, uint32_t pos)
-{
-    HTEntry *removed_entry = NULL;
-    size_t removed_hash;
-    size_t hash = av_crc(hash_ctx, 0, (uint8_t*)&key, 4) % LOOKBACK_HT_ELEMS;
-
-    for (size_t i = hash; i < hash + LOOKBACK_HT_ELEMS; i++) {
-        size_t wrapped_index = i % LOOKBACK_HT_ELEMS;
-        HTEntry *entry = &ht[wrapped_index];
-        if (entry->pos == -1)
-            return;
-        if (removed_entry) {
-            size_t candidate_hash = av_crc(hash_ctx, 0, (uint8_t*)&entry->key, 4) % LOOKBACK_HT_ELEMS;
-            if ((wrapped_index > removed_hash && (candidate_hash <= removed_hash || candidate_hash > wrapped_index)) ||
-                (wrapped_index < removed_hash && (candidate_hash <= removed_hash && candidate_hash > wrapped_index))) {
-                *removed_entry = *entry;
-                entry->pos = -1;
-                removed_entry = entry;
-                removed_hash = wrapped_index;
-            }
-        } else if (entry->key == key) {
-            if (entry->pos <= pos) {
-                entry->pos = -1;
-                removed_entry = entry;
-                removed_hash = wrapped_index;
-            } else {
-                return;
-            }
-        }
-    }
-}
-
 typedef struct DXVEncContext {
     AVClass *class;
 
@@ -123,10 +60,8 @@  typedef struct DXVEncContext {
     enum DXVTextureFormat tex_fmt;
     int (*compress_tex)(AVCodecContext *avctx);
 
-    const AVCRC *crc_ctx;
-
-    HTEntry color_lookback_ht[LOOKBACK_HT_ELEMS];
-    HTEntry lut_lookback_ht[LOOKBACK_HT_ELEMS];
+    AVHashtableContext color_ht;
+    AVHashtableContext lut_ht;
 } DXVEncContext;
 
 /* Converts an index offset value to a 2-bit opcode and pushes it to a stream.
@@ -161,27 +96,32 @@  static int dxv_compress_dxt1(AVCodecContext *avctx)
     DXVEncContext *ctx = avctx->priv_data;
     PutByteContext *pbc = &ctx->pbc;
     uint32_t *value;
-    uint32_t color, lut, idx, color_idx, lut_idx, prev_pos, state = 16, pos = 2, op = 0;
+    uint32_t color, lut, idx, color_idx, lut_idx, prev_pos, state = 16, pos = 0, op = 0;
 
-    ht_init(ctx->color_lookback_ht);
-    ht_init(ctx->lut_lookback_ht);
+    av_hashtable_clear(&ctx->color_ht);
+    av_hashtable_clear(&ctx->lut_ht);
 
     bytestream2_put_le32(pbc, AV_RL32(ctx->tex_data));
+    av_hashtable_set(&ctx->color_ht, ctx->tex_data, &pos);
+    pos++;
     bytestream2_put_le32(pbc, AV_RL32(ctx->tex_data + 4));
-
-    ht_lookup_and_upsert(ctx->color_lookback_ht, ctx->crc_ctx, AV_RL32(ctx->tex_data), 0);
-    ht_lookup_and_upsert(ctx->lut_lookback_ht, ctx->crc_ctx, AV_RL32(ctx->tex_data + 4), 1);
+    av_hashtable_set(&ctx->lut_ht, ctx->tex_data + 4, &pos);
+    pos++;
 
     while (pos + 2 <= ctx->tex_size / 4) {
         idx = 0;
+        color_idx = 0;
+        lut_idx = 0;
 
         color = AV_RL32(ctx->tex_data + pos * 4);
-        prev_pos = ht_lookup_and_upsert(ctx->color_lookback_ht, ctx->crc_ctx, color, pos);
-        color_idx = prev_pos != -1 ? pos - prev_pos : 0;
+        if (av_hashtable_get(&ctx->color_ht, &color, &prev_pos))
+            color_idx = pos - prev_pos;
+        av_hashtable_set(&ctx->color_ht, &color, &pos);
+
         if (pos >= LOOKBACK_WORDS) {
             uint32_t old_pos = pos - LOOKBACK_WORDS;
-            uint32_t old_color = AV_RL32(ctx->tex_data + old_pos * 4);
-            ht_delete(ctx->color_lookback_ht, ctx->crc_ctx, old_color, old_pos);
+            if (av_hashtable_get(&ctx->color_ht, ctx->tex_data + old_pos * 4, &prev_pos) && prev_pos <= old_pos)
+                av_hashtable_delete(&ctx->color_ht, ctx->tex_data + old_pos * 4);
         }
         pos++;
 
@@ -190,13 +130,14 @@  static int dxv_compress_dxt1(AVCodecContext *avctx)
             idx = color_idx;
         } else {
             idx = 0;
-            prev_pos = ht_lookup_and_upsert(ctx->lut_lookback_ht, ctx->crc_ctx, lut, pos);
-            lut_idx = prev_pos != -1 ? pos - prev_pos : 0;
+            if (av_hashtable_get(&ctx->lut_ht, &lut, &prev_pos))
+                lut_idx = pos - prev_pos;
+            av_hashtable_set(&ctx->lut_ht, &lut, &pos);
         }
         if (pos >= LOOKBACK_WORDS) {
             uint32_t old_pos = pos - LOOKBACK_WORDS;
-            uint32_t old_lut = AV_RL32(ctx->tex_data + old_pos * 4);
-            ht_delete(ctx->lut_lookback_ht, ctx->crc_ctx, old_lut, old_pos);
+            if (av_hashtable_get(&ctx->lut_ht, ctx->tex_data + old_pos * 4, &prev_pos) && prev_pos <= old_pos)
+                av_hashtable_delete(&ctx->lut_ht, ctx->tex_data + old_pos * 4);
         }
         pos++;
 
@@ -298,10 +239,15 @@  static av_cold int dxv_init(AVCodecContext *avctx)
         return AVERROR(ENOMEM);
     }
 
-    ctx->crc_ctx = av_crc_get_table(AV_CRC_32_IEEE);
-    if (!ctx->crc_ctx) {
-        av_log(avctx, AV_LOG_ERROR, "Could not initialize CRC table.\n");
-        return AVERROR_BUG;
+    ret = av_hashtable_init(&ctx->color_ht, sizeof(uint32_t), sizeof(uint32_t), LOOKBACK_HT_ELEMS);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Could not initialize color lookback table.\n");
+        return ret;
+    }
+    ret = av_hashtable_init(&ctx->lut_ht, sizeof(uint32_t), sizeof(uint32_t), LOOKBACK_HT_ELEMS);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Could not initialize LUT lookback table.\n");
+        return ret;
     }
 
     return 0;
@@ -313,6 +259,9 @@  static av_cold int dxv_close(AVCodecContext *avctx)
 
     av_freep(&ctx->tex_data);
 
+    av_hashtable_destroy(&ctx->color_ht);
+    av_hashtable_destroy(&ctx->lut_ht);
+
     return 0;
 }