Message ID | CAPYw7P4LYXDZw070aFxFZFtNuJcX9ZKLfR4ov2026JWykACsVQ@mail.gmail.com |
---|---|
State | New |
Headers | show |
Series | [FFmpeg-devel] MULTI VLC decoding boost | expand |
Context | Check | Description |
---|---|---|
yinshiyou/make_loongarch64 | fail | Make failed |
andriy/make_x86 | fail | Make failed |
Will apply soon.
On Mon, Aug 28, 2023 at 07:36:17PM +0200, Paul B Mahol wrote: > Patches attached. > > Thanks for kurosu for pointing unmerged branches. > > The UNCACHED_PATH for x86_32 is broken with this for 2 codecs it touches. > Fix is trivial, to be fixed later. [...] > +int ff_init_vlc_multi_from_lengths(VLC *vlc, VLC_MULTI *multi, int nb_bits, int nb_elems, > + int nb_codes, const int8_t *lens, int lens_wrap, > + const void *symbols, int symbols_wrap, int symbols_size, > + int offset, int flags, void *logctx) > +{ > + VLCcode localbuf[LOCALBUF_ELEMS], *buf = localbuf; > + uint64_t code; > + int ret, j, len_max = FFMIN(32, 3 * nb_bits); > + > + ret = vlc_common_init(vlc, nb_bits, nb_codes, &buf, flags); > + if (ret < 0) > + return ret; > + > + multi->table = av_malloc(sizeof(*multi->table) << nb_bits); > + if (!multi->table) > + return AVERROR(ENOMEM); > + > + j = code = 0; > + for (int i = 0; i < nb_codes; i++, lens += lens_wrap) { > + int len = *lens; > + if (len > 0) { > + unsigned sym; > + > + buf[j].bits = len; > + if (symbols) > + GET_DATA(sym, symbols, i, symbols_wrap, symbols_size) > + else > + sym = i; > + buf[j].symbol = sym + offset; > + buf[j++].code = code; > + } else if (len < 0) { > + len = -len; > + } else > + continue; > + if (len > len_max || code & ((1U << (32 - len)) - 1)) { > + av_log(logctx, AV_LOG_ERROR, "Invalid VLC (length %u)\n", len); > + goto fail; > + } > + code += 1U << (32 - len); > + if (code > UINT32_MAX + 1ULL) { > + av_log(logctx, AV_LOG_ERROR, "Overdetermined VLC tree\n"); > + goto fail; > + } > + } > + ret = vlc_common_end(vlc, nb_bits, j, buf, flags, localbuf); > + if (ret < 0) > + goto fail; > + return vlc_multi_gen(multi->table, vlc, nb_elems, j, nb_bits, buf, logctx); > +fail: > + if (buf != localbuf) > + av_free(buf); > + return AVERROR_INVALIDDATA; > +} this is copy and pasted from int ff_vlc_init_from_lengths(VLC *vlc, int nb_bits, int nb_codes, leading to code duplication, it would be better if you could factor the duplication out thx [...]
On Fri, Sep 15, 2023 at 12:01 AM Michael Niedermayer <michael@niedermayer.cc> wrote: > On Mon, Aug 28, 2023 at 07:36:17PM +0200, Paul B Mahol wrote: > > Patches attached. > > > > Thanks for kurosu for pointing unmerged branches. > > > > The UNCACHED_PATH for x86_32 is broken with this for 2 codecs it touches. > > Fix is trivial, to be fixed later. > [...] > > > +int ff_init_vlc_multi_from_lengths(VLC *vlc, VLC_MULTI *multi, int > nb_bits, int nb_elems, > > + int nb_codes, const int8_t *lens, int > lens_wrap, > > + const void *symbols, int > symbols_wrap, int symbols_size, > > + int offset, int flags, void *logctx) > > +{ > > + VLCcode localbuf[LOCALBUF_ELEMS], *buf = localbuf; > > + uint64_t code; > > + int ret, j, len_max = FFMIN(32, 3 * nb_bits); > > + > > + ret = vlc_common_init(vlc, nb_bits, nb_codes, &buf, flags); > > + if (ret < 0) > > + return ret; > > + > > + multi->table = av_malloc(sizeof(*multi->table) << nb_bits); > > + if (!multi->table) > > + return AVERROR(ENOMEM); > > + > > + j = code = 0; > > + for (int i = 0; i < nb_codes; i++, lens += lens_wrap) { > > + int len = *lens; > > + if (len > 0) { > > + unsigned sym; > > + > > + buf[j].bits = len; > > + if (symbols) > > + GET_DATA(sym, symbols, i, symbols_wrap, symbols_size) > > + else > > + sym = i; > > + buf[j].symbol = sym + offset; > > + buf[j++].code = code; > > + } else if (len < 0) { > > + len = -len; > > + } else > > + continue; > > + if (len > len_max || code & ((1U << (32 - len)) - 1)) { > > + av_log(logctx, AV_LOG_ERROR, "Invalid VLC (length %u)\n", > len); > > + goto fail; > > + } > > + code += 1U << (32 - len); > > + if (code > UINT32_MAX + 1ULL) { > > + av_log(logctx, AV_LOG_ERROR, "Overdetermined VLC tree\n"); > > + goto fail; > > + } > > + } > > + ret = vlc_common_end(vlc, nb_bits, j, buf, flags, localbuf); > > + if (ret < 0) > > + goto fail; > > + return vlc_multi_gen(multi->table, vlc, nb_elems, j, nb_bits, buf, > logctx); > > +fail: > > + if (buf != localbuf) > > + av_free(buf); > > + return AVERROR_INVALIDDATA; > > +} > > this is copy and pasted from > > int ff_vlc_init_from_lengths(VLC *vlc, int nb_bits, int nb_codes, > > leading to code duplication, it would be better if you could > factor the duplication out > > thx > -1 > > [...] > > -- > Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB > > "You are 36 times more likely to die in a bathtub than at the hands of a > terrorist. Also, you are 2.5 times more likely to become a president and > 2 times more likely to become an astronaut, than to die in a terrorist > attack." -- Thoughty2 > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". >
On Mon, Aug 28, 2023 at 07:36:17PM +0200, Paul B Mahol wrote: > Patches attached. > > Thanks for kurosu for pointing unmerged branches. > [...] > +static void add_level(VLC_MULTI_ELEM *table, const int nb_elems, > + const int num, const int numbits, > + const VLCcode *buf, > + uint32_t curcode, int curlen, > + int curlimit, int curlevel, > + const int minlen, const int max, > + unsigned* levelcnt, VLC_MULTI_ELEM *info) > +{ > + if (nb_elems > 256 && curlevel > 2) > + return; // No room this and > + for (int i = num-1; i > max; i--) { > + for (int j = 0; j < 2; j++) { > + int newlimit, sym; > + int t = j ? i-1 : i; > + int l = buf[t].bits; > + uint32_t code; > + > + sym = buf[t].symbol; > + if (l > curlimit) > + return; > + code = curcode + (buf[t].code >> curlen); > + newlimit = curlimit - l; > + l += curlen; > + if (nb_elems>256) AV_WN16(info->val+2*curlevel, sym); > + else info->val[curlevel] = sym&0xFF; > + > + if (curlevel) { // let's not add single entries > + uint32_t val = code >> (32 - numbits); > + uint32_t nb = val + (1U << (numbits - l)); > + info->len = l; > + info->num = curlevel+1; > + for (; val < nb; val++) > + AV_COPY64(table+val, info); > + levelcnt[curlevel-1]++; > + } > + > + if (curlevel+1 < VLC_MULTI_MAX_SYMBOLS && newlimit >= minlen) { this are 2 checks doing the same thing for 8 and 16 bit what mess is this ? for 8bit we have VLC_MULTI_MAX_SYMBOLS space (6) in the array so we skip beyond that for 16bit we have VLC_MULTI_MAX_SYMBOLS/2 space which is 3 and the skip instead is inside add_level() above with hardcoded litteral number (nb_elems > 256 is a check for if its 8 or 16bit) why is such totally hacked up code pushed with standing objections and no review ? yes, ill fix this one but i have the feeling this code has more surprises > + add_level(table, nb_elems, num, numbits, buf, > + code, l, newlimit, curlevel+1, > + minlen, max, levelcnt, info); > + } > + } > + } > +} [...]
From 4250d74dad2bfb4c8d01fc26c9635c56293fc74c Mon Sep 17 00:00:00 2001 From: Christophe Gisquet <christophe.gisquet@gmail.com> Date: Sun, 9 Jul 2017 12:56:35 +0000 Subject: [PATCH 3/3] avcodec/utvideodec: add vlc multi support Faster decoding, by average 50% faster overall. Signed-off-by: Paul B Mahol <onemda@gmail.com> --- libavcodec/utvideo.h | 1 + libavcodec/utvideodec.c | 91 ++++++++++++++++++++--------------------- 2 files changed, 45 insertions(+), 47 deletions(-) diff --git a/libavcodec/utvideo.h b/libavcodec/utvideo.h index 9da9329ff3..d274b6586d 100644 --- a/libavcodec/utvideo.h +++ b/libavcodec/utvideo.h @@ -80,6 +80,7 @@ typedef struct UtvideoContext { ptrdiff_t slice_stride; uint8_t *slice_bits, *slice_buffer[4]; + void *buffer; int slice_bits_size; const uint8_t *packed_stream[4][256]; diff --git a/libavcodec/utvideodec.c b/libavcodec/utvideodec.c index 1f00c58950..0b0352b7ec 100644 --- a/libavcodec/utvideodec.c +++ b/libavcodec/utvideodec.c @@ -46,7 +46,7 @@ typedef struct HuffEntry { } HuffEntry; static int build_huff(UtvideoContext *c, const uint8_t *src, VLC *vlc, - int *fsym, unsigned nb_elems) + VLC_MULTI *multi, int *fsym, unsigned nb_elems) { int i; HuffEntry he[1024]; @@ -82,11 +82,34 @@ static int build_huff(UtvideoContext *c, const uint8_t *src, VLC *vlc, he[--codes_count[bits[i]]] = (HuffEntry) { bits[i], i }; #define VLC_BITS 11 - return ff_init_vlc_from_lengths(vlc, VLC_BITS, codes_count[0], + return ff_init_vlc_multi_from_lengths(vlc, multi, VLC_BITS, nb_elems, codes_count[0], &he[0].len, sizeof(*he), &he[0].sym, sizeof(*he), 2, 0, 0, c->avctx); } +#define READ_PLANE(b, end) \ +{ \ + buf = !use_pred ? dest : c->buffer; \ + for (i = 0; CACHED_BITSTREAM_READER && i < width-end && get_bits_left(&gb) > 0;) {\ + ret = get_vlc_multi(&gb, (uint8_t *)buf + i * b, multi.table, \ + vlc.table, VLC_BITS, 3); \ + if (ret > 0) \ + i += ret; \ + if (ret <= 0) \ + goto fail; \ + } \ + for (; i < width && get_bits_left(&gb) > 0; i++) \ + buf[i] = get_vlc2(&gb, vlc.table, VLC_BITS, 3); \ + if (use_pred) { \ + if (b == 2) \ + c->llviddsp.add_left_pred_int16((uint16_t *)dest, (const uint16_t *)buf, 0x3ff, width, prev); \ + else \ + c->llviddsp.add_left_pred((uint8_t *)dest, (const uint8_t *)buf, width, prev); \ + } \ + prev = dest[width-1]; \ + dest += stride; \ +} + static int decode_plane10(UtvideoContext *c, int plane_no, uint16_t *dst, ptrdiff_t stride, int width, int height, @@ -95,11 +118,12 @@ static int decode_plane10(UtvideoContext *c, int plane_no, { int i, j, slice, pix, ret; int sstart, send; + VLC_MULTI multi; VLC vlc; GetBitContext gb; int prev, fsym; - if ((ret = build_huff(c, huff, &vlc, &fsym, 1024)) < 0) { + if ((ret = build_huff(c, huff, &vlc, &multi, &fsym, 1024)) < 0) { av_log(c->avctx, AV_LOG_ERROR, "Cannot build Huffman codes\n"); return ret; } @@ -131,7 +155,7 @@ static int decode_plane10(UtvideoContext *c, int plane_no, send = 0; for (slice = 0; slice < c->slices; slice++) { - uint16_t *dest; + uint16_t *dest, *buf; int slice_data_start, slice_data_end, slice_size; sstart = send; @@ -156,37 +180,20 @@ static int decode_plane10(UtvideoContext *c, int plane_no, init_get_bits(&gb, c->slice_bits, slice_size * 8); prev = 0x200; - for (j = sstart; j < send; j++) { - for (i = 0; i < width; i++) { - pix = get_vlc2(&gb, vlc.table, VLC_BITS, 3); - if (pix < 0) { - av_log(c->avctx, AV_LOG_ERROR, "Decoding error\n"); - goto fail; - } - if (use_pred) { - prev += pix; - prev &= 0x3FF; - pix = prev; - } - dest[i] = pix; - } - dest += stride; - if (get_bits_left(&gb) < 0) { - av_log(c->avctx, AV_LOG_ERROR, - "Slice decoding ran out of bits\n"); - goto fail; - } - } + for (j = sstart; j < send; j++) + READ_PLANE(2, 3) if (get_bits_left(&gb) > 32) av_log(c->avctx, AV_LOG_WARNING, "%d bits left after decoding slice\n", get_bits_left(&gb)); } ff_free_vlc(&vlc); + ff_free_vlc_multi(&multi); return 0; fail: ff_free_vlc(&vlc); + ff_free_vlc_multi(&multi); return AVERROR_INVALIDDATA; } @@ -207,6 +214,7 @@ static int decode_plane(UtvideoContext *c, int plane_no, { int i, j, slice, pix; int sstart, send; + VLC_MULTI multi; VLC vlc; GetBitContext gb; int ret, prev, fsym; @@ -259,7 +267,7 @@ static int decode_plane(UtvideoContext *c, int plane_no, return 0; } - if (build_huff(c, src, &vlc, &fsym, 256)) { + if (build_huff(c, src, &vlc, &multi, &fsym, 256)) { av_log(c->avctx, AV_LOG_ERROR, "Cannot build Huffman codes\n"); return AVERROR_INVALIDDATA; } @@ -292,7 +300,7 @@ static int decode_plane(UtvideoContext *c, int plane_no, send = 0; for (slice = 0; slice < c->slices; slice++) { - uint8_t *dest; + uint8_t *dest, *buf; int slice_data_start, slice_data_end, slice_size; sstart = send; @@ -317,36 +325,20 @@ static int decode_plane(UtvideoContext *c, int plane_no, init_get_bits(&gb, c->slice_bits, slice_size * 8); prev = 0x80; - for (j = sstart; j < send; j++) { - for (i = 0; i < width; i++) { - pix = get_vlc2(&gb, vlc.table, VLC_BITS, 3); - if (pix < 0) { - av_log(c->avctx, AV_LOG_ERROR, "Decoding error\n"); - goto fail; - } - if (use_pred) { - prev += pix; - pix = prev; - } - dest[i] = pix; - } - if (get_bits_left(&gb) < 0) { - av_log(c->avctx, AV_LOG_ERROR, - "Slice decoding ran out of bits\n"); - goto fail; - } - dest += stride; - } + for (j = sstart; j < send; j++) + READ_PLANE(1, 5) if (get_bits_left(&gb) > 32) av_log(c->avctx, AV_LOG_WARNING, "%d bits left after decoding slice\n", get_bits_left(&gb)); } ff_free_vlc(&vlc); + ff_free_vlc_multi(&multi); return 0; fail: ff_free_vlc(&vlc); + ff_free_vlc_multi(&multi); return AVERROR_INVALIDDATA; } @@ -992,6 +984,10 @@ static av_cold int decode_init(AVCodecContext *avctx) return AVERROR_INVALIDDATA; } + c->buffer = av_calloc(avctx->width, c->pro?2:1); + if (!c->buffer) + return AVERROR(ENOMEM); + av_pix_fmt_get_chroma_sub_sample(avctx->pix_fmt, &h_shift, &v_shift); if ((avctx->width & ((1<<h_shift)-1)) || (avctx->height & ((1<<v_shift)-1))) { @@ -1047,6 +1043,7 @@ static av_cold int decode_end(AVCodecContext *avctx) UtvideoContext * const c = avctx->priv_data; av_freep(&c->slice_bits); + av_freep(&c->buffer); return 0; } -- 2.39.1