diff mbox

[FFmpeg-devel,1/3] avcodec/get_bits: add cached bitstream reader

Message ID 20170713102703.2354-1-onemda@gmail.com
State Superseded
Headers show

Commit Message

Paul B Mahol July 13, 2017, 10:27 a.m. UTC
Signed-off-by: Paul B Mahol <onemda@gmail.com>
---
 libavcodec/get_bits.h | 263 +++++++++++++++++++++++++++++++++++++++++++++-----
 libavcodec/golomb.h   | 151 +++++++++++++++++++++++++++++
 2 files changed, 388 insertions(+), 26 deletions(-)

Comments

foo86 July 14, 2017, 2:08 p.m. UTC | #1
On Thu, Jul 13, 2017 at 12:27:03PM +0200, Paul B Mahol wrote:
> +static inline unsigned int get_bits(GetBitContext *s, int n)
>  {
> +#ifdef CACHED_BITSTREAM_READER
> +    register int tmp = 0;
> +#ifdef BITSTREAM_READER_LE
> +    uint64_t left = 0;
> +#endif
> +
> +    av_assert2(n>0 && n<=32);
> +    if (n > s->bits_left) {
> +        n -= s->bits_left;
> +#ifdef BITSTREAM_READER_LE
> +        left = s->bits_left;
> +#endif
> +        tmp = get_val(s, s->bits_left);
This triggers an assert in get_val() if s->bits_left == 0.

> +        refill_32(s);
> +    }
> +
> +#ifdef BITSTREAM_READER_LE
> +    tmp = get_val(s, n) << left | tmp;
> +#else
> +    tmp = get_val(s, n) | tmp << n;
This causes undefined behavior if n > 30.

> +#endif
> +
> +#else
>      register int tmp;
>      OPEN_READER(re, s);
>      av_assert2(n>0 && n<=25);
>      UPDATE_CACHE(re, s);
> -    tmp = SHOW_SBITS(re, s, n);
> +    tmp = SHOW_UBITS(re, s, n);
>      LAST_SKIP_BITS(re, s, n);
>      CLOSE_READER(re, s);
> +#endif
>      return tmp;
>  }

The code under #ifdef CACHED_BITSTREAM_READER can probably be simplified
like this (analogous to show_bits()):

    if (n > s->bits_left)
        refill_32(s);

    tmp = get_val(s, n);

This avoids UB and is simpler/faster. Or am I missing something here?
Hendrik Leppkes July 14, 2017, 3:12 p.m. UTC | #2
On Fri, Jul 14, 2017 at 4:08 PM, foo86 <foobaz86@gmail.com> wrote:
> On Thu, Jul 13, 2017 at 12:27:03PM +0200, Paul B Mahol wrote:
>> +static inline unsigned int get_bits(GetBitContext *s, int n)
>>  {
>> +#ifdef CACHED_BITSTREAM_READER
>> +    register int tmp = 0;
>> +#ifdef BITSTREAM_READER_LE
>> +    uint64_t left = 0;
>> +#endif
>> +
>> +    av_assert2(n>0 && n<=32);
>> +    if (n > s->bits_left) {
>> +        n -= s->bits_left;
>> +#ifdef BITSTREAM_READER_LE
>> +        left = s->bits_left;
>> +#endif
>> +        tmp = get_val(s, s->bits_left);
> This triggers an assert in get_val() if s->bits_left == 0.
>
>> +        refill_32(s);
>> +    }
>> +
>> +#ifdef BITSTREAM_READER_LE
>> +    tmp = get_val(s, n) << left | tmp;
>> +#else
>> +    tmp = get_val(s, n) | tmp << n;
> This causes undefined behavior if n > 30.

get_bits is only valid until n = 25 in the "non-cached" case, so its
not a problem to impose the same limitation on the cached reader.
In fact, if they are to share the exact same API, it should probably
follow that they also share the same constraints, so that we can do
proper performance comparisons between the two, instead of having to
re-write the using code.

- Hendrik
foo86 July 14, 2017, 3:37 p.m. UTC | #3
On Fri, Jul 14, 2017 at 05:12:25PM +0200, Hendrik Leppkes wrote:
> On Fri, Jul 14, 2017 at 4:08 PM, foo86 <foobaz86@gmail.com> wrote:
> > On Thu, Jul 13, 2017 at 12:27:03PM +0200, Paul B Mahol wrote:
> >> +static inline unsigned int get_bits(GetBitContext *s, int n)
> >>  {
> >> +#ifdef CACHED_BITSTREAM_READER
> >> +    register int tmp = 0;
> >> +#ifdef BITSTREAM_READER_LE
> >> +    uint64_t left = 0;
> >> +#endif
> >> +
> >> +    av_assert2(n>0 && n<=32);
> >> +    if (n > s->bits_left) {
> >> +        n -= s->bits_left;
> >> +#ifdef BITSTREAM_READER_LE
> >> +        left = s->bits_left;
> >> +#endif
> >> +        tmp = get_val(s, s->bits_left);
> > This triggers an assert in get_val() if s->bits_left == 0.
> >
> >> +        refill_32(s);
> >> +    }
> >> +
> >> +#ifdef BITSTREAM_READER_LE
> >> +    tmp = get_val(s, n) << left | tmp;
> >> +#else
> >> +    tmp = get_val(s, n) | tmp << n;
> > This causes undefined behavior if n > 30.
> 
> get_bits is only valid until n = 25 in the "non-cached" case, so its
> not a problem to impose the same limitation on the cached reader.
> In fact, if they are to share the exact same API, it should probably
> follow that they also share the same constraints, so that we can do
> proper performance comparisons between the two, instead of having to
> re-write the using code.

Cached bitstream reader currently uses get_bits() to implement
get_bits_long(), which means cached get_bits() must support reading
values up to 32 bits.

I agree however that cached/uncached bistream readers should have the
same API contraints. That means cached get_bits_long() should probably
have a separate implementation.

> 
> - Hendrik
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
diff mbox

Patch

diff --git a/libavcodec/get_bits.h b/libavcodec/get_bits.h
index c530015..dbacdda 100644
--- a/libavcodec/get_bits.h
+++ b/libavcodec/get_bits.h
@@ -1,5 +1,6 @@ 
 /*
- * copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2016 Alexandra Hájková
  *
  * This file is part of FFmpeg.
  *
@@ -54,6 +55,10 @@ 
 
 typedef struct GetBitContext {
     const uint8_t *buffer, *buffer_end;
+#ifdef CACHED_BITSTREAM_READER
+    uint64_t cache;
+    unsigned bits_left;
+#endif
     int index;
     int size_in_bits;
     int size_in_bits_plus8;
@@ -106,12 +111,16 @@  typedef struct GetBitContext {
  * For examples see get_bits, show_bits, skip_bits, get_vlc.
  */
 
-#ifdef LONG_BITSTREAM_READER
+#ifdef CACHED_BITSTREAM_READER
+#   define MIN_CACHE_BITS 64
+#elif defined LONG_BITSTREAM_READER
 #   define MIN_CACHE_BITS 32
 #else
 #   define MIN_CACHE_BITS 25
 #endif
 
+#ifndef CACHED_BITSTREAM_READER
+
 #define OPEN_READER_NOSIZE(name, gb)            \
     unsigned int name ## _index = (gb)->index;  \
     unsigned int av_unused name ## _cache
@@ -196,20 +205,113 @@  typedef struct GetBitContext {
 
 #define GET_CACHE(name, gb) ((uint32_t) name ## _cache)
 
+#endif
+
 static inline int get_bits_count(const GetBitContext *s)
 {
+#ifdef CACHED_BITSTREAM_READER
+    return s->index - s->bits_left;
+#else
     return s->index;
+#endif
 }
 
-static inline void skip_bits_long(GetBitContext *s, int n)
+static inline void refill_32(GetBitContext *s)
 {
-#if UNCHECKED_BITSTREAM_READER
-    s->index += n;
+#ifdef CACHED_BITSTREAM_READER
+#if !UNCHECKED_BITSTREAM_READER
+    if (s->index >> 3 >= s->buffer_end - s->buffer)
+        return;
+#endif
+
+#ifdef BITSTREAM_READER_LE
+    s->cache       = (uint64_t)AV_RL32(s->buffer + (s->index >> 3)) << s->bits_left | s->cache;
 #else
-    s->index += av_clip(n, -s->index, s->size_in_bits_plus8 - s->index);
+    s->cache       = s->cache | (uint64_t)AV_RB32(s->buffer + (s->index >> 3)) << (32 - s->bits_left);
+#endif
+    s->index     += 32;
+    s->bits_left += 32;
+#endif
+}
+
+static inline void refill_64(GetBitContext *s)
+{
+#ifdef CACHED_BITSTREAM_READER
+#if !UNCHECKED_BITSTREAM_READER
+    if (s->index >> 3 >= s->buffer_end - s->buffer)
+        return;
+#endif
+
+#ifdef BITSTREAM_READER_LE
+    s->cache = AV_RL64(s->buffer + (s->index >> 3));
+#else
+    s->cache = AV_RB64(s->buffer + (s->index >> 3));
+#endif
+    s->index += 64;
+    s->bits_left = 64;
+#endif
+}
+
+#ifdef CACHED_BITSTREAM_READER
+static inline uint64_t get_val(GetBitContext *s, unsigned n)
+{
+    uint64_t ret;
+    av_assert2(n>0 && n<=63);
+#ifdef BITSTREAM_READER_LE
+    ret = s->cache & ((UINT64_C(1) << n) - 1);
+    s->cache >>= n;
+#else
+    ret = s->cache >> (64 - n);
+    s->cache <<= n;
+#endif
+    s->bits_left -= n;
+    return ret;
+}
+#endif
+
+#ifdef CACHED_BITSTREAM_READER
+static inline unsigned show_val(const GetBitContext *s, unsigned n)
+{
+#ifdef BITSTREAM_READER_LE
+    return s->cache & ((UINT64_C(1) << n) - 1);
+#else
+    return s->cache >> (64 - n);
+#endif
+}
+#endif
+
+/**
+ * Show 1-25 bits.
+ */
+static inline unsigned int show_bits(GetBitContext *s, int n)
+{
+    register int tmp;
+#ifdef CACHED_BITSTREAM_READER
+    if (n > s->bits_left)
+        refill_32(s);
+
+    tmp = show_val(s, n);
+#else
+    OPEN_READER_NOSIZE(re, s);
+    av_assert2(n>0 && n<=25);
+    UPDATE_CACHE(re, s);
+    tmp = SHOW_UBITS(re, s, n);
 #endif
+    return tmp;
 }
 
+#ifdef CACHED_BITSTREAM_READER
+static inline void skip_remaining(GetBitContext *s, unsigned n)
+{
+#ifdef BITSTREAM_READER_LE
+    s->cache >>= n;
+#else
+    s->cache <<= n;
+#endif
+    s->bits_left -= n;
+}
+#endif
+
 /**
  * Read MPEG-1 dc-style VLC (sign bit + mantissa with no MSB).
  * if MSB not set it is negative
@@ -217,6 +319,13 @@  static inline void skip_bits_long(GetBitContext *s, int n)
  */
 static inline int get_xbits(GetBitContext *s, int n)
 {
+#ifdef CACHED_BITSTREAM_READER
+    int32_t cache = show_bits(s, 32);
+    int sign = ~cache >> 31;
+    skip_remaining(s, n);
+
+    return ((((uint32_t)(sign ^ cache)) >> (32 - n)) ^ sign) - sign;
+#else
     register int sign;
     register int32_t cache;
     OPEN_READER(re, s);
@@ -227,8 +336,10 @@  static inline int get_xbits(GetBitContext *s, int n)
     LAST_SKIP_BITS(re, s, n);
     CLOSE_READER(re, s);
     return (NEG_USR32(sign ^ cache, n) ^ sign) - sign;
+#endif
 }
 
+#ifndef CACHED_BITSTREAM_READER
 static inline int get_xbits_le(GetBitContext *s, int n)
 {
     register int sign;
@@ -242,31 +353,61 @@  static inline int get_xbits_le(GetBitContext *s, int n)
     CLOSE_READER(re, s);
     return (zero_extend(sign ^ cache, n) ^ sign) - sign;
 }
+#endif
 
-static inline int get_sbits(GetBitContext *s, int n)
+/**
+ * Read 1-25 bits.
+ */
+static inline unsigned int get_bits(GetBitContext *s, int n)
 {
+#ifdef CACHED_BITSTREAM_READER
+    register int tmp = 0;
+#ifdef BITSTREAM_READER_LE
+    uint64_t left = 0;
+#endif
+
+    av_assert2(n>0 && n<=32);
+    if (n > s->bits_left) {
+        n -= s->bits_left;
+#ifdef BITSTREAM_READER_LE
+        left = s->bits_left;
+#endif
+        tmp = get_val(s, s->bits_left);
+        refill_32(s);
+    }
+
+#ifdef BITSTREAM_READER_LE
+    tmp = get_val(s, n) << left | tmp;
+#else
+    tmp = get_val(s, n) | tmp << n;
+#endif
+
+#else
     register int tmp;
     OPEN_READER(re, s);
     av_assert2(n>0 && n<=25);
     UPDATE_CACHE(re, s);
-    tmp = SHOW_SBITS(re, s, n);
+    tmp = SHOW_UBITS(re, s, n);
     LAST_SKIP_BITS(re, s, n);
     CLOSE_READER(re, s);
+#endif
     return tmp;
 }
 
-/**
- * Read 1-25 bits.
- */
-static inline unsigned int get_bits(GetBitContext *s, int n)
+static inline int get_sbits(GetBitContext *s, int n)
 {
     register int tmp;
+#ifdef CACHED_BITSTREAM_READER
+    av_assert2(n>0 && n<=25);
+    tmp = sign_extend(get_bits(s, n), n);
+#else
     OPEN_READER(re, s);
     av_assert2(n>0 && n<=25);
     UPDATE_CACHE(re, s);
-    tmp = SHOW_UBITS(re, s, n);
+    tmp = SHOW_SBITS(re, s, n);
     LAST_SKIP_BITS(re, s, n);
     CLOSE_READER(re, s);
+#endif
     return tmp;
 }
 
@@ -278,6 +419,7 @@  static av_always_inline int get_bitsz(GetBitContext *s, int n)
     return n ? get_bits(s, n) : 0;
 }
 
+#ifndef CACHED_BITSTREAM_READER
 static inline unsigned int get_bits_le(GetBitContext *s, int n)
 {
     register int tmp;
@@ -289,29 +431,56 @@  static inline unsigned int get_bits_le(GetBitContext *s, int n)
     CLOSE_READER(re, s);
     return tmp;
 }
-
-/**
- * Show 1-25 bits.
- */
-static inline unsigned int show_bits(GetBitContext *s, int n)
-{
-    register int tmp;
-    OPEN_READER_NOSIZE(re, s);
-    av_assert2(n>0 && n<=25);
-    UPDATE_CACHE(re, s);
-    tmp = SHOW_UBITS(re, s, n);
-    return tmp;
-}
+#endif
 
 static inline void skip_bits(GetBitContext *s, int n)
 {
+#ifdef CACHED_BITSTREAM_READER
+    if (n < s->bits_left)
+        skip_remaining(s, n);
+    else {
+        n -= s->bits_left;
+        s->cache = 0;
+        s->bits_left = 0;
+
+        if (n >= 64) {
+            unsigned skip = (n / 8) * 8;
+
+            n -= skip;
+            s->index += skip;
+        }
+        refill_64(s);
+        if (n)
+            skip_remaining(s, n);
+    }
+#else
     OPEN_READER(re, s);
     LAST_SKIP_BITS(re, s, n);
     CLOSE_READER(re, s);
+#endif
+}
+
+static inline void skip_bits_long(GetBitContext *s, int n)
+{
+#ifdef CACHED_BITSTREAM_READER
+    skip_bits(s, n);
+#else
+#if UNCHECKED_BITSTREAM_READER
+    s->index += n;
+#else
+    s->index += av_clip(n, -s->index, s->size_in_bits_plus8 - s->index);
+#endif
+#endif
 }
 
 static inline unsigned int get_bits1(GetBitContext *s)
 {
+#ifdef CACHED_BITSTREAM_READER
+    if (!s->bits_left)
+        refill_64(s);
+
+    return get_val(s, 1);
+#else
     unsigned int index = s->index;
     uint8_t result     = s->buffer[index >> 3];
 #ifdef BITSTREAM_READER_LE
@@ -328,6 +497,7 @@  static inline unsigned int get_bits1(GetBitContext *s)
     s->index = index;
 
     return result;
+#endif
 }
 
 static inline unsigned int show_bits1(GetBitContext *s)
@@ -348,6 +518,10 @@  static inline unsigned int get_bits_long(GetBitContext *s, int n)
     av_assert2(n>=0 && n<=32);
     if (!n) {
         return 0;
+#ifdef CACHED_BITSTREAM_READER
+    }
+    return get_bits(s, n);
+#else
     } else if (n <= MIN_CACHE_BITS) {
         return get_bits(s, n);
     } else {
@@ -359,6 +533,7 @@  static inline unsigned int get_bits_long(GetBitContext *s, int n)
         return ret | get_bits(s, n - 16);
 #endif
     }
+#endif
 }
 
 /**
@@ -442,6 +617,10 @@  static inline int init_get_bits(GetBitContext *s, const uint8_t *buffer,
     s->buffer_end         = buffer + buffer_size;
     s->index              = 0;
 
+#ifdef CACHED_BITSTREAM_READER
+    refill_64(s);
+#endif
+
     return ret;
 }
 
@@ -543,6 +722,19 @@  static inline const uint8_t *align_get_bits(GetBitContext *s)
         SKIP_BITS(name, gb, n);                                 \
     } while (0)
 
+/* Return the LUT element for the given bitstream configuration. */
+static inline int set_idx(GetBitContext *s, int code, int *n, int *nb_bits,
+                          VLC_TYPE (*table)[2])
+{
+    unsigned idx;
+
+    *nb_bits = -*n;
+    idx = show_bits(s, *nb_bits) + code;
+    *n = table[idx][1];
+
+    return table[idx][0];
+}
+
 /**
  * Parse a vlc code.
  * @param bits is the number of bits which will be read at once, must be
@@ -554,6 +746,24 @@  static inline const uint8_t *align_get_bits(GetBitContext *s)
 static av_always_inline int get_vlc2(GetBitContext *s, VLC_TYPE (*table)[2],
                                      int bits, int max_depth)
 {
+#ifdef CACHED_BITSTREAM_READER
+    int nb_bits;
+    unsigned idx = show_bits(s, bits);
+    int code = table[idx][0];
+    int n    = table[idx][1];
+
+    if (max_depth > 1 && n < 0) {
+        skip_remaining(s, bits);
+        code = set_idx(s, code, &n, &nb_bits, table);
+        if (max_depth > 2 && n < 0) {
+            skip_remaining(s, nb_bits);
+            code = set_idx(s, code, &n, &nb_bits, table);
+        }
+    }
+    skip_remaining(s, n);
+
+    return code;
+#else
     int code;
 
     OPEN_READER(re, s);
@@ -564,6 +774,7 @@  static av_always_inline int get_vlc2(GetBitContext *s, VLC_TYPE (*table)[2],
     CLOSE_READER(re, s);
 
     return code;
+#endif
 }
 
 static inline int decode012(GetBitContext *gb)
diff --git a/libavcodec/golomb.h b/libavcodec/golomb.h
index efb1eff..e66675b 100644
--- a/libavcodec/golomb.h
+++ b/libavcodec/golomb.h
@@ -54,6 +54,23 @@  static inline int get_ue_golomb(GetBitContext *gb)
 {
     unsigned int buf;
 
+#ifdef CACHED_BITSTREAM_READER
+    buf = show_bits_long(gb, 32);
+
+    if (buf >= (1 << 27)) {
+        buf >>= 32 - 9;
+        skip_bits_long(gb, ff_golomb_vlc_len[buf]);
+
+        return ff_ue_golomb_vlc_code[buf];
+    } else {
+        int log = 2 * av_log2(buf) - 31;
+        buf >>= log;
+        buf--;
+        skip_bits_long(gb, 32 - log);
+
+        return buf;
+    }
+#else
     OPEN_READER(re, gb);
     UPDATE_CACHE(re, gb);
     buf = GET_CACHE(re, gb);
@@ -77,6 +94,7 @@  static inline int get_ue_golomb(GetBitContext *gb)
 
         return buf;
     }
+#endif
 }
 
 /**
@@ -101,6 +119,13 @@  static inline int get_ue_golomb_31(GetBitContext *gb)
 {
     unsigned int buf;
 
+#ifdef CACHED_BITSTREAM_READER
+    buf = show_bits_long(gb, 32);
+
+    buf >>= 32 - 9;
+    skip_bits_long(gb, ff_golomb_vlc_len[buf]);
+#else
+
     OPEN_READER(re, gb);
     UPDATE_CACHE(re, gb);
     buf = GET_CACHE(re, gb);
@@ -108,6 +133,7 @@  static inline int get_ue_golomb_31(GetBitContext *gb)
     buf >>= 32 - 9;
     LAST_SKIP_BITS(re, gb, ff_golomb_vlc_len[buf]);
     CLOSE_READER(re, gb);
+#endif
 
     return ff_ue_golomb_vlc_code[buf];
 }
@@ -116,6 +142,33 @@  static inline unsigned get_interleaved_ue_golomb(GetBitContext *gb)
 {
     uint32_t buf;
 
+#ifdef CACHED_BITSTREAM_READER
+    buf = show_bits_long(gb, 32);
+
+    if (buf & 0xAA800000) {
+        buf >>= 32 - 8;
+        skip_bits_long(gb, ff_interleaved_golomb_vlc_len[buf]);
+
+        return ff_interleaved_ue_golomb_vlc_code[buf];
+    } else {
+        unsigned ret = 1;
+
+        do {
+            buf >>= 32 - 8;
+            skip_bits_long(gb, FFMIN(ff_interleaved_golomb_vlc_len[buf], 8));
+
+            if (ff_interleaved_golomb_vlc_len[buf] != 9) {
+                ret <<= (ff_interleaved_golomb_vlc_len[buf] - 1) >> 1;
+                ret  |= ff_interleaved_dirac_golomb_vlc_code[buf];
+                break;
+            }
+            ret = (ret << 4) | ff_interleaved_dirac_golomb_vlc_code[buf];
+            buf = show_bits_long(gb, 32);
+        } while (get_bits_left(gb) > 0);
+
+        return ret - 1;
+    }
+#else
     OPEN_READER(re, gb);
     UPDATE_CACHE(re, gb);
     buf = GET_CACHE(re, gb);
@@ -147,6 +200,7 @@  static inline unsigned get_interleaved_ue_golomb(GetBitContext *gb)
         CLOSE_READER(re, gb);
         return ret - 1;
     }
+#endif
 }
 
 /**
@@ -184,6 +238,28 @@  static inline int get_se_golomb(GetBitContext *gb)
 {
     unsigned int buf;
 
+#ifdef CACHED_BITSTREAM_READER
+    buf = show_bits_long(gb, 32);
+
+    if (buf >= (1 << 27)) {
+        buf >>= 32 - 9;
+        skip_bits_long(gb, ff_golomb_vlc_len[buf]);
+
+        return ff_se_golomb_vlc_code[buf];
+    } else {
+        int log = 2 * av_log2(buf) - 31;
+        buf >>= log;
+
+        skip_bits_long(gb, 32 - log);
+
+        if (buf & 1)
+            buf = -(buf >> 1);
+        else
+            buf = (buf >> 1);
+
+        return buf;
+    }
+#else
     OPEN_READER(re, gb);
     UPDATE_CACHE(re, gb);
     buf = GET_CACHE(re, gb);
@@ -210,6 +286,7 @@  static inline int get_se_golomb(GetBitContext *gb)
 
         return buf;
     }
+#endif
 }
 
 static inline int get_se_golomb_long(GetBitContext *gb)
@@ -223,6 +300,30 @@  static inline int get_interleaved_se_golomb(GetBitContext *gb)
 {
     unsigned int buf;
 
+#ifdef CACHED_BITSTREAM_READER
+    buf = show_bits_long(gb, 32);
+
+    if (buf & 0xAA800000) {
+        buf >>= 32 - 8;
+        skip_bits_long(gb, ff_interleaved_golomb_vlc_len[buf]);
+
+        return ff_interleaved_se_golomb_vlc_code[buf];
+    } else {
+        int log;
+        skip_bits(gb, 8);
+        buf |= 1 | show_bits_long(gb, 24);
+
+        if ((buf & 0xAAAAAAAA) == 0)
+            return INVALID_VLC;
+
+        for (log = 31; (buf & 0x80000000) == 0; log--)
+            buf = (buf << 2) - ((buf << log) >> (log - 1)) + (buf >> 30);
+
+        skip_bits_long(gb, 63 - 2 * log - 8);
+
+        return (signed) (((((buf << log) >> log) - 1) ^ -(buf & 0x1)) + 1) >> 1;
+    }
+#else
     OPEN_READER(re, gb);
     UPDATE_CACHE(re, gb);
     buf = GET_CACHE(re, gb);
@@ -250,6 +351,7 @@  static inline int get_interleaved_se_golomb(GetBitContext *gb)
 
         return (signed) (((((buf << log) >> log) - 1) ^ -(buf & 0x1)) + 1) >> 1;
     }
+#endif
 }
 
 static inline int dirac_get_se_golomb(GetBitContext *gb)
@@ -273,6 +375,24 @@  static inline int get_ur_golomb(GetBitContext *gb, int k, int limit,
     unsigned int buf;
     int log;
 
+#ifdef CACHED_BITSTREAM_READER
+    buf = show_bits_long(gb, 32);
+
+    log = av_log2(buf);
+
+    if (log > 31 - limit) {
+        buf >>= log - k;
+        buf  += (30 - log) << k;
+        skip_bits_long(gb, 32 + k - log);
+
+        return buf;
+    } else {
+        skip_bits_long(gb, limit);
+        buf = get_bits_long(gb, esc_len);
+
+        return buf + limit - 1;
+    }
+#else
     OPEN_READER(re, gb);
     UPDATE_CACHE(re, gb);
     buf = GET_CACHE(re, gb);
@@ -297,6 +417,7 @@  static inline int get_ur_golomb(GetBitContext *gb, int k, int limit,
 
         return buf + limit - 1;
     }
+#endif
 }
 
 /**
@@ -308,6 +429,35 @@  static inline int get_ur_golomb_jpegls(GetBitContext *gb, int k, int limit,
     unsigned int buf;
     int log;
 
+#ifdef CACHED_BITSTREAM_READER
+    buf = show_bits_long(gb, 32);
+
+    log = av_log2(buf);
+
+    if (log - k >= 1 && 32 - log < limit) {
+        buf >>= log - k;
+        buf  += (30 - log) << k;
+        skip_bits_long(gb, 32 + k - log);
+
+        return buf;
+    } else {
+        int i;
+        for (i = 0;
+             i < limit && get_bits1(gb) == 0 && get_bits_left(gb) > 0;
+             i++);
+
+        if (i < limit - 1) {
+            buf = get_bits_long(gb, k);
+
+            return buf + (i << k);
+        } else if (i == limit - 1) {
+            buf = get_bits_long(gb, esc_len);
+
+            return buf + 1;
+        } else
+            return -1;
+    }
+#else
     OPEN_READER(re, gb);
     UPDATE_CACHE(re, gb);
     buf = GET_CACHE(re, gb);
@@ -364,6 +514,7 @@  static inline int get_ur_golomb_jpegls(GetBitContext *gb, int k, int limit,
         CLOSE_READER(re, gb);
         return buf;
     }
+#endif
 }
 
 /**