diff mbox

[FFmpeg-devel,1/3] avcodec/get_bits: add cached bitstream reader

Message ID 20170707184848.20864-1-onemda@gmail.com
State Superseded
Headers show

Commit Message

Paul B Mahol July 7, 2017, 6:48 p.m. UTC
Signed-off-by: Paul B Mahol <onemda@gmail.com>
---
 libavcodec/get_bits.h | 205 +++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 196 insertions(+), 9 deletions(-)

Comments

Ronald S. Bultje July 7, 2017, 8:25 p.m. UTC | #1
Hi,

On Fri, Jul 7, 2017 at 2:48 PM, Paul B Mahol <onemda@gmail.com> wrote:

>  typedef struct GetBitContext {
>      const uint8_t *buffer, *buffer_end;
> +#ifdef CACHED_BITSTREAM_READER
> +    uint64_t cache;
> +    unsigned bits_left;
> +#endif
>

Can you post some stats (from relevant systems, ideally, e.g. 32-bit binary
on x86, or 32-bit arm) on how a 32bit cache performs compared to a 64bit
cache on systems with HAVE_FAST_64BIT=0?


> +static inline void refill_32(GetBitContext *s)
>
[..]

> +#ifdef BITSTREAM_READER_LE
> +    s->cache       = (uint64_t)AV_RL32(s->buffer + (s->index >> 3)) <<
> s->cache | s->cache;
>

As said on IRC: middle s->cache should be s->bits_left.

Overall very nice improvement, I would in particular not be surprised if
this is generally faster for almost all users, except those using the
lower-level macros (things like SHOW_BITS() etc.) in the old interface. If
that's true, it may be positive to enable this by default and disable only
for those using the low-level interface.

(I'm assuming the low-level interface no longer works with the cached
reader, so can we prevent users from accessing these macros unless
cached=1?)

Ronald
Paul B Mahol July 7, 2017, 8:43 p.m. UTC | #2
On 7/7/17, Ronald S. Bultje <rsbultje@gmail.com> wrote:
> Hi,
>
> On Fri, Jul 7, 2017 at 2:48 PM, Paul B Mahol <onemda@gmail.com> wrote:
>
>>  typedef struct GetBitContext {
>>      const uint8_t *buffer, *buffer_end;
>> +#ifdef CACHED_BITSTREAM_READER
>> +    uint64_t cache;
>> +    unsigned bits_left;
>> +#endif
>>
>
> Can you post some stats (from relevant systems, ideally, e.g. 32-bit binary
> on x86, or 32-bit arm) on how a 32bit cache performs compared to a 64bit
> cache on systems with HAVE_FAST_64BIT=0?

I can only post results from 64bit x86 with gcc 6.2.0.

>
>
>> +static inline void refill_32(GetBitContext *s)
>>
> [..]
>
>> +#ifdef BITSTREAM_READER_LE
>> +    s->cache       = (uint64_t)AV_RL32(s->buffer + (s->index >> 3)) <<
>> s->cache | s->cache;
>>
>
> As said on IRC: middle s->cache should be s->bits_left.
>
> Overall very nice improvement, I would in particular not be surprised if
> this is generally faster for almost all users, except those using the
> lower-level macros (things like SHOW_BITS() etc.) in the old interface. If
> that's true, it may be positive to enable this by default and disable only
> for those using the low-level interface.
>
> (I'm assuming the low-level interface no longer works with the cached
> reader, so can we prevent users from accessing these macros unless
> cached=1?)

They are not supposed to work, Do you mean to not define such macros
if cached is defined?
Ronald S. Bultje July 7, 2017, 8:46 p.m. UTC | #3
Hi,

On Fri, Jul 7, 2017 at 4:43 PM, Paul B Mahol <onemda@gmail.com> wrote:

> On 7/7/17, Ronald S. Bultje <rsbultje@gmail.com> wrote:
> > (I'm assuming the low-level interface no longer works with the cached
> > reader, so can we prevent users from accessing these macros unless
> > cached=1?)
>
> They are not supposed to work, Do you mean to not define such macros
> if cached is defined?


Yes.

Ronald
Michael Niedermayer July 8, 2017, midnight UTC | #4
On Fri, Jul 07, 2017 at 08:48:46PM +0200, Paul B Mahol wrote:
> Signed-off-by: Paul B Mahol <onemda@gmail.com>
> ---
>  libavcodec/get_bits.h | 205 +++++++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 196 insertions(+), 9 deletions(-)
> 
> diff --git a/libavcodec/get_bits.h b/libavcodec/get_bits.h
> index c530015..8a9021a 100644
> --- a/libavcodec/get_bits.h
> +++ b/libavcodec/get_bits.h
> @@ -1,5 +1,6 @@
>  /*
> - * copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
> + * Copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
> + * Copyright (c) 2016 Alexandra Hájková
>   *
>   * This file is part of FFmpeg.
>   *
> @@ -54,6 +55,10 @@
>  
>  typedef struct GetBitContext {
>      const uint8_t *buffer, *buffer_end;
> +#ifdef CACHED_BITSTREAM_READER
> +    uint64_t cache;
> +    unsigned bits_left;
> +#endif
>      int index;
>      int size_in_bits;
>      int size_in_bits_plus8;
> @@ -106,7 +111,9 @@ typedef struct GetBitContext {
>   * For examples see get_bits, show_bits, skip_bits, get_vlc.
>   */
>  
> -#ifdef LONG_BITSTREAM_READER
> +#ifdef CACHED_BITSTREAM_READER
> +#   define MIN_CACHE_BITS 64
> +#elif defined LONG_BITSTREAM_READER
>  #   define MIN_CACHE_BITS 32
>  #else
>  #   define MIN_CACHE_BITS 25
> @@ -198,7 +205,11 @@ typedef struct GetBitContext {
>  
>  static inline int get_bits_count(const GetBitContext *s)
>  {
> +#ifdef CACHED_BITSTREAM_READER
> +    return s->index - s->bits_left;
> +#else
>      return s->index;
> +#endif
>  }
>  
>  static inline void skip_bits_long(GetBitContext *s, int n)
> @@ -210,6 +221,68 @@ static inline void skip_bits_long(GetBitContext *s, int n)
>  #endif
>  }
>  
> +static inline void refill_32(GetBitContext *s)
> +{
> +#ifdef CACHED_BITSTREAM_READER

> +    if (s->buffer + (s->index >> 3) >= s->buffer_end)
> +        return;

should be under !UNCHECKED_BITSTREAM_READER

also this looks like it can result in intermediate invalid pointers
this should avoid that:
s->index >> 3 >= s->buffer_end - s->buffer
same issue in refill_64()

patch overall is nice

thanks

[...]
diff mbox

Patch

diff --git a/libavcodec/get_bits.h b/libavcodec/get_bits.h
index c530015..8a9021a 100644
--- a/libavcodec/get_bits.h
+++ b/libavcodec/get_bits.h
@@ -1,5 +1,6 @@ 
 /*
- * copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2016 Alexandra Hájková
  *
  * This file is part of FFmpeg.
  *
@@ -54,6 +55,10 @@ 
 
 typedef struct GetBitContext {
     const uint8_t *buffer, *buffer_end;
+#ifdef CACHED_BITSTREAM_READER
+    uint64_t cache;
+    unsigned bits_left;
+#endif
     int index;
     int size_in_bits;
     int size_in_bits_plus8;
@@ -106,7 +111,9 @@  typedef struct GetBitContext {
  * For examples see get_bits, show_bits, skip_bits, get_vlc.
  */
 
-#ifdef LONG_BITSTREAM_READER
+#ifdef CACHED_BITSTREAM_READER
+#   define MIN_CACHE_BITS 64
+#elif defined LONG_BITSTREAM_READER
 #   define MIN_CACHE_BITS 32
 #else
 #   define MIN_CACHE_BITS 25
@@ -198,7 +205,11 @@  typedef struct GetBitContext {
 
 static inline int get_bits_count(const GetBitContext *s)
 {
+#ifdef CACHED_BITSTREAM_READER
+    return s->index - s->bits_left;
+#else
     return s->index;
+#endif
 }
 
 static inline void skip_bits_long(GetBitContext *s, int n)
@@ -210,6 +221,68 @@  static inline void skip_bits_long(GetBitContext *s, int n)
 #endif
 }
 
+static inline void refill_32(GetBitContext *s)
+{
+#ifdef CACHED_BITSTREAM_READER
+    if (s->buffer + (s->index >> 3) >= s->buffer_end)
+        return;
+
+#ifdef BITSTREAM_READER_LE
+    s->cache       = (uint64_t)AV_RL32(s->buffer + (s->index >> 3)) << s->cache | s->cache;
+#else
+    s->cache       = s->cache | (uint64_t)AV_RB32(s->buffer + (s->index >> 3)) << (32 - s->bits_left);
+#endif
+    s->index     += 32;
+    s->bits_left += 32;
+#endif
+}
+
+static inline void refill_64(GetBitContext *s)
+{
+#ifdef CACHED_BITSTREAM_READER
+#if !UNCHECKED_BITSTREAM_READER
+    if (s->buffer + (s->index >> 3) >= s->buffer_end)
+        return;
+#endif
+
+#ifdef BITSTREAM_READER_LE
+    s->cache = AV_RL64(s->buffer + (s->index >> 3));
+#else
+    s->cache = AV_RB64(s->buffer + (s->index >> 3));
+#endif
+    s->index += 64;
+    s->bits_left = 64;
+#endif
+}
+
+#ifdef CACHED_BITSTREAM_READER
+static inline uint64_t get_val(GetBitContext *s, unsigned n)
+{
+    uint64_t ret;
+    av_assert2(n>0 && n<=63);
+#ifdef BITSTREAM_READER_LE
+    ret = s->cache & ((UINT64_C(1) << n) - 1);
+    s->cache >>= n;
+#else
+    ret = s->cache >> (64 - n);
+    s->cache <<= n;
+#endif
+    s->bits_left -= n;
+    return ret;
+}
+#endif
+
+#ifdef CACHED_BITSTREAM_READER
+static inline unsigned show_val(const GetBitContext *s, unsigned n)
+{
+#ifdef BITSTREAM_READER_LE
+    return s->cache & ((UINT64_C(1) << n) - 1);
+#else
+    return s->cache >> (64 - n);
+#endif
+}
+#endif
+
 /**
  * Read MPEG-1 dc-style VLC (sign bit + mantissa with no MSB).
  * if MSB not set it is negative
@@ -243,30 +316,59 @@  static inline int get_xbits_le(GetBitContext *s, int n)
     return (zero_extend(sign ^ cache, n) ^ sign) - sign;
 }
 
-static inline int get_sbits(GetBitContext *s, int n)
+/**
+ * Read 1-25 bits.
+ */
+static inline unsigned int get_bits(GetBitContext *s, int n)
 {
+#ifdef CACHED_BITSTREAM_READER
+    register int tmp = 0;
+#ifdef BITSTREAM_READER_LE
+    uint64_t left = 0;
+#endif
+
+    av_assert2(n>0 && n<=32);
+    if (n > s->bits_left) {
+        n -= s->bits_left;
+#ifdef BITSTREAM_READER_LE
+        left = s->bits_left;
+#endif
+        tmp = get_val(s, s->bits_left);
+        refill_32(s);
+    }
+
+#ifdef BITSTREAM_READER_LE
+    tmp = get_val(s, n) << left | tmp;
+#else
+    tmp = get_val(s, n) | tmp << n;
+#endif
+
+#else
     register int tmp;
     OPEN_READER(re, s);
     av_assert2(n>0 && n<=25);
     UPDATE_CACHE(re, s);
-    tmp = SHOW_SBITS(re, s, n);
+    tmp = SHOW_UBITS(re, s, n);
     LAST_SKIP_BITS(re, s, n);
     CLOSE_READER(re, s);
+#endif
     return tmp;
 }
 
-/**
- * Read 1-25 bits.
- */
-static inline unsigned int get_bits(GetBitContext *s, int n)
+static inline int get_sbits(GetBitContext *s, int n)
 {
     register int tmp;
+#ifdef CACHED_BITSTREAM_READER
+    av_assert2(n>0 && n<=25);
+    tmp = sign_extend(get_bits(s, n), n);
+#else
     OPEN_READER(re, s);
     av_assert2(n>0 && n<=25);
     UPDATE_CACHE(re, s);
-    tmp = SHOW_UBITS(re, s, n);
+    tmp = SHOW_SBITS(re, s, n);
     LAST_SKIP_BITS(re, s, n);
     CLOSE_READER(re, s);
+#endif
     return tmp;
 }
 
@@ -296,22 +398,65 @@  static inline unsigned int get_bits_le(GetBitContext *s, int n)
 static inline unsigned int show_bits(GetBitContext *s, int n)
 {
     register int tmp;
+#ifdef CACHED_BITSTREAM_READER
+    if (n > s->bits_left)
+        refill_32(s);
+
+    tmp = show_val(s, n);
+#else
     OPEN_READER_NOSIZE(re, s);
     av_assert2(n>0 && n<=25);
     UPDATE_CACHE(re, s);
     tmp = SHOW_UBITS(re, s, n);
+#endif
     return tmp;
 }
 
+#ifdef CACHED_BITSTREAM_READER
+static inline void skip_remaining(GetBitContext *s, unsigned n)
+{
+#ifdef BITSTREAM_READER_LE
+    s->cache >>= n;
+#else
+    s->cache <<= n;
+#endif
+    s->bits_left -= n;
+}
+#endif
+
 static inline void skip_bits(GetBitContext *s, int n)
 {
+#ifdef CACHED_BITSTREAM_READER
+    if (n <= s->bits_left)
+        skip_remaining(s, n);
+    else {
+        n -= s->bits_left;
+        skip_remaining(s, s->bits_left);
+        if (n >= 64) {
+            unsigned skip = n;
+
+            n -= skip;
+            s->index += skip;
+        }
+        refill_32(s);
+        if (n)
+            skip_remaining(s, n);
+    }
+#else
     OPEN_READER(re, s);
     LAST_SKIP_BITS(re, s, n);
     CLOSE_READER(re, s);
+#endif
 }
 
 static inline unsigned int get_bits1(GetBitContext *s)
 {
+#ifdef CACHED_BITSTREAM_READER
+    if (!s->bits_left)
+        refill_64(s);
+
+    return get_val(s, 1);
+#else
     unsigned int index = s->index;
     uint8_t result     = s->buffer[index >> 3];
 #ifdef BITSTREAM_READER_LE
@@ -328,6 +473,7 @@  static inline unsigned int get_bits1(GetBitContext *s)
     s->index = index;
 
     return result;
+#endif
 }
 
 static inline unsigned int show_bits1(GetBitContext *s)
@@ -348,6 +494,10 @@  static inline unsigned int get_bits_long(GetBitContext *s, int n)
     av_assert2(n>=0 && n<=32);
     if (!n) {
         return 0;
+#ifdef CACHED_BITSTREAM_READER
+    }
+    return get_bits(s, n);
+#else
     } else if (n <= MIN_CACHE_BITS) {
         return get_bits(s, n);
     } else {
@@ -359,6 +509,7 @@  static inline unsigned int get_bits_long(GetBitContext *s, int n)
         return ret | get_bits(s, n - 16);
 #endif
     }
+#endif
 }
 
 /**
@@ -442,6 +593,10 @@  static inline int init_get_bits(GetBitContext *s, const uint8_t *buffer,
     s->buffer_end         = buffer + buffer_size;
     s->index              = 0;
 
+#ifdef CACHED_BITSTREAM_READER
+    refill_64(s);
+#endif
+
     return ret;
 }
 
@@ -543,6 +698,19 @@  static inline const uint8_t *align_get_bits(GetBitContext *s)
         SKIP_BITS(name, gb, n);                                 \
     } while (0)
 
+/* Return the LUT element for the given bitstream configuration. */
+static inline int set_idx(GetBitContext *s, int code, int *n, int *nb_bits,
+                          VLC_TYPE (*table)[2])
+{
+    unsigned idx;
+
+    *nb_bits = -*n;
+    idx = show_bits(s, *nb_bits) + code;
+    *n = table[idx][1];
+
+    return table[idx][0];
+}
+
 /**
  * Parse a vlc code.
  * @param bits is the number of bits which will be read at once, must be
@@ -554,6 +722,24 @@  static inline const uint8_t *align_get_bits(GetBitContext *s)
 static av_always_inline int get_vlc2(GetBitContext *s, VLC_TYPE (*table)[2],
                                      int bits, int max_depth)
 {
+#ifdef CACHED_BITSTREAM_READER
+    int nb_bits;
+    unsigned idx = show_bits(s, bits);
+    int code = table[idx][0];
+    int n    = table[idx][1];
+
+    if (max_depth > 1 && n < 0) {
+        skip_remaining(s, bits);
+        code = set_idx(s, code, &n, &nb_bits, table);
+        if (max_depth > 2 && n < 0) {
+            skip_remaining(s, nb_bits);
+            code = set_idx(s, code, &n, &nb_bits, table);
+        }
+    }
+    skip_remaining(s, n);
+
+    return code;
+#else
     int code;
 
     OPEN_READER(re, s);
@@ -564,6 +750,7 @@  static av_always_inline int get_vlc2(GetBitContext *s, VLC_TYPE (*table)[2],
     CLOSE_READER(re, s);
 
     return code;
+#endif
 }
 
 static inline int decode012(GetBitContext *gb)