Message ID | 20240205052724.26862-2-connorbworley@gmail.com |
---|---|
State | New |
Headers | show |
Series | [FFmpeg-devel,1/2] lavu/hashtable: create generic robin hood hash table | expand |
Context | Check | Description |
---|---|---|
yinshiyou/make_loongarch64 | success | Make finished |
yinshiyou/make_fate_loongarch64 | success | Make fate finished |
andriy/make_x86 | success | Make finished |
andriy/make_fate_x86 | success | Make fate finished |
Connor Worley: > Offers a modest performance gain due to the switch from naive linear > probling to robin hood. How much would one gain if the hash function knew that key_size and val_size are four? > > Signed-off-by: Connor Worley <connorbworley@gmail.com> > --- > libavcodec/dxvenc.c | 121 +++++++++++++------------------------------- > 1 file changed, 35 insertions(+), 86 deletions(-) > > diff --git a/libavcodec/dxvenc.c b/libavcodec/dxvenc.c > index b274175689..e17b3b2c36 100644 > --- a/libavcodec/dxvenc.c > +++ b/libavcodec/dxvenc.c > @@ -21,7 +21,7 @@ > > #include <stdint.h> > > -#include "libavutil/crc.h" > +#include "libavutil/hashtable.h" > #include "libavutil/imgutils.h" > #include "libavutil/opt.h" > > @@ -44,69 +44,6 @@ enum DXVTextureFormat { > DXV_FMT_DXT1 = MKBETAG('D', 'X', 'T', '1'), > }; > > -typedef struct HTEntry { > - uint32_t key; > - uint32_t pos; > -} HTEntry; > - > -static void ht_init(HTEntry *ht) > -{ > - for (size_t i = 0; i < LOOKBACK_HT_ELEMS; i++) { > - ht[i].pos = -1; > - } > -} > - > -static uint32_t ht_lookup_and_upsert(HTEntry *ht, const AVCRC *hash_ctx, > - uint32_t key, uint32_t pos) > -{ > - uint32_t ret = -1; > - size_t hash = av_crc(hash_ctx, 0, (uint8_t*)&key, 4) % LOOKBACK_HT_ELEMS; > - for (size_t i = hash; i < hash + LOOKBACK_HT_ELEMS; i++) { > - size_t wrapped_index = i % LOOKBACK_HT_ELEMS; > - HTEntry *entry = &ht[wrapped_index]; > - if (entry->key == key || entry->pos == -1) { > - ret = entry->pos; > - entry->key = key; > - entry->pos = pos; > - break; > - } > - } > - return ret; > -} > - > -static void ht_delete(HTEntry *ht, const AVCRC *hash_ctx, > - uint32_t key, uint32_t pos) > -{ > - HTEntry *removed_entry = NULL; > - size_t removed_hash; > - size_t hash = av_crc(hash_ctx, 0, (uint8_t*)&key, 4) % LOOKBACK_HT_ELEMS; > - > - for (size_t i = hash; i < hash + LOOKBACK_HT_ELEMS; i++) { > - size_t wrapped_index = i % LOOKBACK_HT_ELEMS; > - HTEntry *entry = &ht[wrapped_index]; > - if (entry->pos == -1) > - return; > - if (removed_entry) { > - size_t candidate_hash = av_crc(hash_ctx, 0, (uint8_t*)&entry->key, 4) % LOOKBACK_HT_ELEMS; > - if ((wrapped_index > removed_hash && (candidate_hash <= removed_hash || candidate_hash > wrapped_index)) || > - (wrapped_index < removed_hash && (candidate_hash <= removed_hash && candidate_hash > wrapped_index))) { > - *removed_entry = *entry; > - entry->pos = -1; > - removed_entry = entry; > - removed_hash = wrapped_index; > - } > - } else if (entry->key == key) { > - if (entry->pos <= pos) { > - entry->pos = -1; > - removed_entry = entry; > - removed_hash = wrapped_index; > - } else { > - return; > - } > - } > - } > -} > - > typedef struct DXVEncContext { > AVClass *class; > > @@ -123,10 +60,8 @@ typedef struct DXVEncContext { > enum DXVTextureFormat tex_fmt; > int (*compress_tex)(AVCodecContext *avctx); > > - const AVCRC *crc_ctx; > - > - HTEntry color_lookback_ht[LOOKBACK_HT_ELEMS]; > - HTEntry lut_lookback_ht[LOOKBACK_HT_ELEMS]; > + AVHashtableContext color_ht; > + AVHashtableContext lut_ht; > } DXVEncContext; > > /* Converts an index offset value to a 2-bit opcode and pushes it to a stream. > @@ -161,27 +96,32 @@ static int dxv_compress_dxt1(AVCodecContext *avctx) > DXVEncContext *ctx = avctx->priv_data; > PutByteContext *pbc = &ctx->pbc; > uint32_t *value; > - uint32_t color, lut, idx, color_idx, lut_idx, prev_pos, state = 16, pos = 2, op = 0; > + uint32_t color, lut, idx, color_idx, lut_idx, prev_pos, state = 16, pos = 0, op = 0; > > - ht_init(ctx->color_lookback_ht); > - ht_init(ctx->lut_lookback_ht); > + av_hashtable_clear(&ctx->color_ht); > + av_hashtable_clear(&ctx->lut_ht); > > bytestream2_put_le32(pbc, AV_RL32(ctx->tex_data)); > + av_hashtable_set(&ctx->color_ht, ctx->tex_data, &pos); > + pos++; > bytestream2_put_le32(pbc, AV_RL32(ctx->tex_data + 4)); > - > - ht_lookup_and_upsert(ctx->color_lookback_ht, ctx->crc_ctx, AV_RL32(ctx->tex_data), 0); > - ht_lookup_and_upsert(ctx->lut_lookback_ht, ctx->crc_ctx, AV_RL32(ctx->tex_data + 4), 1); > + av_hashtable_set(&ctx->lut_ht, ctx->tex_data + 4, &pos); > + pos++; > > while (pos + 2 <= ctx->tex_size / 4) { > idx = 0; > + color_idx = 0; > + lut_idx = 0; > > color = AV_RL32(ctx->tex_data + pos * 4); > - prev_pos = ht_lookup_and_upsert(ctx->color_lookback_ht, ctx->crc_ctx, color, pos); > - color_idx = prev_pos != -1 ? pos - prev_pos : 0; > + if (av_hashtable_get(&ctx->color_ht, &color, &prev_pos)) > + color_idx = pos - prev_pos; > + av_hashtable_set(&ctx->color_ht, &color, &pos); > + > if (pos >= LOOKBACK_WORDS) { > uint32_t old_pos = pos - LOOKBACK_WORDS; > - uint32_t old_color = AV_RL32(ctx->tex_data + old_pos * 4); > - ht_delete(ctx->color_lookback_ht, ctx->crc_ctx, old_color, old_pos); > + if (av_hashtable_get(&ctx->color_ht, ctx->tex_data + old_pos * 4, &prev_pos) && prev_pos <= old_pos) > + av_hashtable_delete(&ctx->color_ht, ctx->tex_data + old_pos * 4); > } > pos++; > > @@ -190,13 +130,14 @@ static int dxv_compress_dxt1(AVCodecContext *avctx) > idx = color_idx; > } else { > idx = 0; > - prev_pos = ht_lookup_and_upsert(ctx->lut_lookback_ht, ctx->crc_ctx, lut, pos); > - lut_idx = prev_pos != -1 ? pos - prev_pos : 0; > + if (av_hashtable_get(&ctx->lut_ht, &lut, &prev_pos)) > + lut_idx = pos - prev_pos; > + av_hashtable_set(&ctx->lut_ht, &lut, &pos); > } > if (pos >= LOOKBACK_WORDS) { > uint32_t old_pos = pos - LOOKBACK_WORDS; > - uint32_t old_lut = AV_RL32(ctx->tex_data + old_pos * 4); > - ht_delete(ctx->lut_lookback_ht, ctx->crc_ctx, old_lut, old_pos); > + if (av_hashtable_get(&ctx->lut_ht, ctx->tex_data + old_pos * 4, &prev_pos) && prev_pos <= old_pos) > + av_hashtable_delete(&ctx->lut_ht, ctx->tex_data + old_pos * 4); > } > pos++; > > @@ -298,10 +239,15 @@ static av_cold int dxv_init(AVCodecContext *avctx) > return AVERROR(ENOMEM); > } > > - ctx->crc_ctx = av_crc_get_table(AV_CRC_32_IEEE); > - if (!ctx->crc_ctx) { > - av_log(avctx, AV_LOG_ERROR, "Could not initialize CRC table.\n"); > - return AVERROR_BUG; > + ret = av_hashtable_init(&ctx->color_ht, sizeof(uint32_t), sizeof(uint32_t), LOOKBACK_HT_ELEMS); > + if (ret < 0) { > + av_log(avctx, AV_LOG_ERROR, "Could not initialize color lookback table.\n"); > + return ret; > + } > + ret = av_hashtable_init(&ctx->lut_ht, sizeof(uint32_t), sizeof(uint32_t), LOOKBACK_HT_ELEMS); > + if (ret < 0) { > + av_log(avctx, AV_LOG_ERROR, "Could not initialize LUT lookback table.\n"); These av_logs are pointless. > + return ret; > } > > return 0; > @@ -313,6 +259,9 @@ static av_cold int dxv_close(AVCodecContext *avctx) > > av_freep(&ctx->tex_data); > > + av_hashtable_destroy(&ctx->color_ht); > + av_hashtable_destroy(&ctx->lut_ht); > + > return 0; > } >
> > How much would one gain if the hash function knew that key_size and > val_size are four? > That yields a nice 10-20% speedup on my machine. Are you thinking of macros to parametrize key/val size, or possibly optimized versions for common sizes?
Connor Worley: >> >> How much would one gain if the hash function knew that key_size and >> val_size are four? >> > > That yields a nice 10-20% speedup on my machine. Are you thinking of macros > to parametrize key/val size, or possibly optimized versions for common > sizes? I am thinking about it not being public at all; as long as we have only one user... - Andreas PS: Is the speedup only for the hashing part or for overall decoding? Is the comparison the current version of your patch or something that already has my comments incorporated?
On Mon, Feb 5, 2024 at 12:06 PM Andreas Rheinhardt < andreas.rheinhardt@outlook.com> wrote: > Connor Worley: > >> > >> How much would one gain if the hash function knew that key_size and > >> val_size are four? > >> > > > > That yields a nice 10-20% speedup on my machine. Are you thinking of > macros > > to parametrize key/val size, or possibly optimized versions for common > > sizes? > > I am thinking about it not being public at all; as long as we have only > one user... > That's fair. I will need some variants for future DXV work, but they can also be private if this doesn't seem generally useful. PS: Is the speedup only for the hashing part or for overall decoding? Is > the comparison the current version of your patch or something that > already has my comments incorporated? > Overall encoding -- current version of my patch with ctx->key/val_size vs hardcoded sizes.
diff --git a/libavcodec/dxvenc.c b/libavcodec/dxvenc.c index b274175689..e17b3b2c36 100644 --- a/libavcodec/dxvenc.c +++ b/libavcodec/dxvenc.c @@ -21,7 +21,7 @@ #include <stdint.h> -#include "libavutil/crc.h" +#include "libavutil/hashtable.h" #include "libavutil/imgutils.h" #include "libavutil/opt.h" @@ -44,69 +44,6 @@ enum DXVTextureFormat { DXV_FMT_DXT1 = MKBETAG('D', 'X', 'T', '1'), }; -typedef struct HTEntry { - uint32_t key; - uint32_t pos; -} HTEntry; - -static void ht_init(HTEntry *ht) -{ - for (size_t i = 0; i < LOOKBACK_HT_ELEMS; i++) { - ht[i].pos = -1; - } -} - -static uint32_t ht_lookup_and_upsert(HTEntry *ht, const AVCRC *hash_ctx, - uint32_t key, uint32_t pos) -{ - uint32_t ret = -1; - size_t hash = av_crc(hash_ctx, 0, (uint8_t*)&key, 4) % LOOKBACK_HT_ELEMS; - for (size_t i = hash; i < hash + LOOKBACK_HT_ELEMS; i++) { - size_t wrapped_index = i % LOOKBACK_HT_ELEMS; - HTEntry *entry = &ht[wrapped_index]; - if (entry->key == key || entry->pos == -1) { - ret = entry->pos; - entry->key = key; - entry->pos = pos; - break; - } - } - return ret; -} - -static void ht_delete(HTEntry *ht, const AVCRC *hash_ctx, - uint32_t key, uint32_t pos) -{ - HTEntry *removed_entry = NULL; - size_t removed_hash; - size_t hash = av_crc(hash_ctx, 0, (uint8_t*)&key, 4) % LOOKBACK_HT_ELEMS; - - for (size_t i = hash; i < hash + LOOKBACK_HT_ELEMS; i++) { - size_t wrapped_index = i % LOOKBACK_HT_ELEMS; - HTEntry *entry = &ht[wrapped_index]; - if (entry->pos == -1) - return; - if (removed_entry) { - size_t candidate_hash = av_crc(hash_ctx, 0, (uint8_t*)&entry->key, 4) % LOOKBACK_HT_ELEMS; - if ((wrapped_index > removed_hash && (candidate_hash <= removed_hash || candidate_hash > wrapped_index)) || - (wrapped_index < removed_hash && (candidate_hash <= removed_hash && candidate_hash > wrapped_index))) { - *removed_entry = *entry; - entry->pos = -1; - removed_entry = entry; - removed_hash = wrapped_index; - } - } else if (entry->key == key) { - if (entry->pos <= pos) { - entry->pos = -1; - removed_entry = entry; - removed_hash = wrapped_index; - } else { - return; - } - } - } -} - typedef struct DXVEncContext { AVClass *class; @@ -123,10 +60,8 @@ typedef struct DXVEncContext { enum DXVTextureFormat tex_fmt; int (*compress_tex)(AVCodecContext *avctx); - const AVCRC *crc_ctx; - - HTEntry color_lookback_ht[LOOKBACK_HT_ELEMS]; - HTEntry lut_lookback_ht[LOOKBACK_HT_ELEMS]; + AVHashtableContext color_ht; + AVHashtableContext lut_ht; } DXVEncContext; /* Converts an index offset value to a 2-bit opcode and pushes it to a stream. @@ -161,27 +96,32 @@ static int dxv_compress_dxt1(AVCodecContext *avctx) DXVEncContext *ctx = avctx->priv_data; PutByteContext *pbc = &ctx->pbc; uint32_t *value; - uint32_t color, lut, idx, color_idx, lut_idx, prev_pos, state = 16, pos = 2, op = 0; + uint32_t color, lut, idx, color_idx, lut_idx, prev_pos, state = 16, pos = 0, op = 0; - ht_init(ctx->color_lookback_ht); - ht_init(ctx->lut_lookback_ht); + av_hashtable_clear(&ctx->color_ht); + av_hashtable_clear(&ctx->lut_ht); bytestream2_put_le32(pbc, AV_RL32(ctx->tex_data)); + av_hashtable_set(&ctx->color_ht, ctx->tex_data, &pos); + pos++; bytestream2_put_le32(pbc, AV_RL32(ctx->tex_data + 4)); - - ht_lookup_and_upsert(ctx->color_lookback_ht, ctx->crc_ctx, AV_RL32(ctx->tex_data), 0); - ht_lookup_and_upsert(ctx->lut_lookback_ht, ctx->crc_ctx, AV_RL32(ctx->tex_data + 4), 1); + av_hashtable_set(&ctx->lut_ht, ctx->tex_data + 4, &pos); + pos++; while (pos + 2 <= ctx->tex_size / 4) { idx = 0; + color_idx = 0; + lut_idx = 0; color = AV_RL32(ctx->tex_data + pos * 4); - prev_pos = ht_lookup_and_upsert(ctx->color_lookback_ht, ctx->crc_ctx, color, pos); - color_idx = prev_pos != -1 ? pos - prev_pos : 0; + if (av_hashtable_get(&ctx->color_ht, &color, &prev_pos)) + color_idx = pos - prev_pos; + av_hashtable_set(&ctx->color_ht, &color, &pos); + if (pos >= LOOKBACK_WORDS) { uint32_t old_pos = pos - LOOKBACK_WORDS; - uint32_t old_color = AV_RL32(ctx->tex_data + old_pos * 4); - ht_delete(ctx->color_lookback_ht, ctx->crc_ctx, old_color, old_pos); + if (av_hashtable_get(&ctx->color_ht, ctx->tex_data + old_pos * 4, &prev_pos) && prev_pos <= old_pos) + av_hashtable_delete(&ctx->color_ht, ctx->tex_data + old_pos * 4); } pos++; @@ -190,13 +130,14 @@ static int dxv_compress_dxt1(AVCodecContext *avctx) idx = color_idx; } else { idx = 0; - prev_pos = ht_lookup_and_upsert(ctx->lut_lookback_ht, ctx->crc_ctx, lut, pos); - lut_idx = prev_pos != -1 ? pos - prev_pos : 0; + if (av_hashtable_get(&ctx->lut_ht, &lut, &prev_pos)) + lut_idx = pos - prev_pos; + av_hashtable_set(&ctx->lut_ht, &lut, &pos); } if (pos >= LOOKBACK_WORDS) { uint32_t old_pos = pos - LOOKBACK_WORDS; - uint32_t old_lut = AV_RL32(ctx->tex_data + old_pos * 4); - ht_delete(ctx->lut_lookback_ht, ctx->crc_ctx, old_lut, old_pos); + if (av_hashtable_get(&ctx->lut_ht, ctx->tex_data + old_pos * 4, &prev_pos) && prev_pos <= old_pos) + av_hashtable_delete(&ctx->lut_ht, ctx->tex_data + old_pos * 4); } pos++; @@ -298,10 +239,15 @@ static av_cold int dxv_init(AVCodecContext *avctx) return AVERROR(ENOMEM); } - ctx->crc_ctx = av_crc_get_table(AV_CRC_32_IEEE); - if (!ctx->crc_ctx) { - av_log(avctx, AV_LOG_ERROR, "Could not initialize CRC table.\n"); - return AVERROR_BUG; + ret = av_hashtable_init(&ctx->color_ht, sizeof(uint32_t), sizeof(uint32_t), LOOKBACK_HT_ELEMS); + if (ret < 0) { + av_log(avctx, AV_LOG_ERROR, "Could not initialize color lookback table.\n"); + return ret; + } + ret = av_hashtable_init(&ctx->lut_ht, sizeof(uint32_t), sizeof(uint32_t), LOOKBACK_HT_ELEMS); + if (ret < 0) { + av_log(avctx, AV_LOG_ERROR, "Could not initialize LUT lookback table.\n"); + return ret; } return 0; @@ -313,6 +259,9 @@ static av_cold int dxv_close(AVCodecContext *avctx) av_freep(&ctx->tex_data); + av_hashtable_destroy(&ctx->color_ht); + av_hashtable_destroy(&ctx->lut_ht); + return 0; }
Offers a modest performance gain due to the switch from naive linear probling to robin hood. Signed-off-by: Connor Worley <connorbworley@gmail.com> --- libavcodec/dxvenc.c | 121 +++++++++++++------------------------------- 1 file changed, 35 insertions(+), 86 deletions(-)