[FFmpeg-devel,1/3] avcodec/get_bits: add cached bitstream reader
Commit Message
Signed-off-by: Paul B Mahol <onemda@gmail.com>
---
libavcodec/get_bits.h | 205 +++++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 196 insertions(+), 9 deletions(-)
Comments
Hi,
On Fri, Jul 7, 2017 at 2:48 PM, Paul B Mahol <onemda@gmail.com> wrote:
> typedef struct GetBitContext {
> const uint8_t *buffer, *buffer_end;
> +#ifdef CACHED_BITSTREAM_READER
> + uint64_t cache;
> + unsigned bits_left;
> +#endif
>
Can you post some stats (from relevant systems, ideally, e.g. 32-bit binary
on x86, or 32-bit arm) on how a 32bit cache performs compared to a 64bit
cache on systems with HAVE_FAST_64BIT=0?
> +static inline void refill_32(GetBitContext *s)
>
[..]
> +#ifdef BITSTREAM_READER_LE
> + s->cache = (uint64_t)AV_RL32(s->buffer + (s->index >> 3)) <<
> s->cache | s->cache;
>
As said on IRC: middle s->cache should be s->bits_left.
Overall very nice improvement, I would in particular not be surprised if
this is generally faster for almost all users, except those using the
lower-level macros (things like SHOW_BITS() etc.) in the old interface. If
that's true, it may be positive to enable this by default and disable only
for those using the low-level interface.
(I'm assuming the low-level interface no longer works with the cached
reader, so can we prevent users from accessing these macros unless
cached=1?)
Ronald
On 7/7/17, Ronald S. Bultje <rsbultje@gmail.com> wrote:
> Hi,
>
> On Fri, Jul 7, 2017 at 2:48 PM, Paul B Mahol <onemda@gmail.com> wrote:
>
>> typedef struct GetBitContext {
>> const uint8_t *buffer, *buffer_end;
>> +#ifdef CACHED_BITSTREAM_READER
>> + uint64_t cache;
>> + unsigned bits_left;
>> +#endif
>>
>
> Can you post some stats (from relevant systems, ideally, e.g. 32-bit binary
> on x86, or 32-bit arm) on how a 32bit cache performs compared to a 64bit
> cache on systems with HAVE_FAST_64BIT=0?
I can only post results from 64bit x86 with gcc 6.2.0.
>
>
>> +static inline void refill_32(GetBitContext *s)
>>
> [..]
>
>> +#ifdef BITSTREAM_READER_LE
>> + s->cache = (uint64_t)AV_RL32(s->buffer + (s->index >> 3)) <<
>> s->cache | s->cache;
>>
>
> As said on IRC: middle s->cache should be s->bits_left.
>
> Overall very nice improvement, I would in particular not be surprised if
> this is generally faster for almost all users, except those using the
> lower-level macros (things like SHOW_BITS() etc.) in the old interface. If
> that's true, it may be positive to enable this by default and disable only
> for those using the low-level interface.
>
> (I'm assuming the low-level interface no longer works with the cached
> reader, so can we prevent users from accessing these macros unless
> cached=1?)
They are not supposed to work, Do you mean to not define such macros
if cached is defined?
Hi,
On Fri, Jul 7, 2017 at 4:43 PM, Paul B Mahol <onemda@gmail.com> wrote:
> On 7/7/17, Ronald S. Bultje <rsbultje@gmail.com> wrote:
> > (I'm assuming the low-level interface no longer works with the cached
> > reader, so can we prevent users from accessing these macros unless
> > cached=1?)
>
> They are not supposed to work, Do you mean to not define such macros
> if cached is defined?
Yes.
Ronald
On Fri, Jul 07, 2017 at 08:48:46PM +0200, Paul B Mahol wrote:
> Signed-off-by: Paul B Mahol <onemda@gmail.com>
> ---
> libavcodec/get_bits.h | 205 +++++++++++++++++++++++++++++++++++++++++++++++---
> 1 file changed, 196 insertions(+), 9 deletions(-)
>
> diff --git a/libavcodec/get_bits.h b/libavcodec/get_bits.h
> index c530015..8a9021a 100644
> --- a/libavcodec/get_bits.h
> +++ b/libavcodec/get_bits.h
> @@ -1,5 +1,6 @@
> /*
> - * copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
> + * Copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
> + * Copyright (c) 2016 Alexandra Hájková
> *
> * This file is part of FFmpeg.
> *
> @@ -54,6 +55,10 @@
>
> typedef struct GetBitContext {
> const uint8_t *buffer, *buffer_end;
> +#ifdef CACHED_BITSTREAM_READER
> + uint64_t cache;
> + unsigned bits_left;
> +#endif
> int index;
> int size_in_bits;
> int size_in_bits_plus8;
> @@ -106,7 +111,9 @@ typedef struct GetBitContext {
> * For examples see get_bits, show_bits, skip_bits, get_vlc.
> */
>
> -#ifdef LONG_BITSTREAM_READER
> +#ifdef CACHED_BITSTREAM_READER
> +# define MIN_CACHE_BITS 64
> +#elif defined LONG_BITSTREAM_READER
> # define MIN_CACHE_BITS 32
> #else
> # define MIN_CACHE_BITS 25
> @@ -198,7 +205,11 @@ typedef struct GetBitContext {
>
> static inline int get_bits_count(const GetBitContext *s)
> {
> +#ifdef CACHED_BITSTREAM_READER
> + return s->index - s->bits_left;
> +#else
> return s->index;
> +#endif
> }
>
> static inline void skip_bits_long(GetBitContext *s, int n)
> @@ -210,6 +221,68 @@ static inline void skip_bits_long(GetBitContext *s, int n)
> #endif
> }
>
> +static inline void refill_32(GetBitContext *s)
> +{
> +#ifdef CACHED_BITSTREAM_READER
> + if (s->buffer + (s->index >> 3) >= s->buffer_end)
> + return;
should be under !UNCHECKED_BITSTREAM_READER
also this looks like it can result in intermediate invalid pointers
this should avoid that:
s->index >> 3 >= s->buffer_end - s->buffer
same issue in refill_64()
patch overall is nice
thanks
[...]
@@ -1,5 +1,6 @@
/*
- * copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2016 Alexandra Hájková
*
* This file is part of FFmpeg.
*
@@ -54,6 +55,10 @@
typedef struct GetBitContext {
const uint8_t *buffer, *buffer_end;
+#ifdef CACHED_BITSTREAM_READER
+ uint64_t cache;
+ unsigned bits_left;
+#endif
int index;
int size_in_bits;
int size_in_bits_plus8;
@@ -106,7 +111,9 @@ typedef struct GetBitContext {
* For examples see get_bits, show_bits, skip_bits, get_vlc.
*/
-#ifdef LONG_BITSTREAM_READER
+#ifdef CACHED_BITSTREAM_READER
+# define MIN_CACHE_BITS 64
+#elif defined LONG_BITSTREAM_READER
# define MIN_CACHE_BITS 32
#else
# define MIN_CACHE_BITS 25
@@ -198,7 +205,11 @@ typedef struct GetBitContext {
static inline int get_bits_count(const GetBitContext *s)
{
+#ifdef CACHED_BITSTREAM_READER
+ return s->index - s->bits_left;
+#else
return s->index;
+#endif
}
static inline void skip_bits_long(GetBitContext *s, int n)
@@ -210,6 +221,68 @@ static inline void skip_bits_long(GetBitContext *s, int n)
#endif
}
+static inline void refill_32(GetBitContext *s)
+{
+#ifdef CACHED_BITSTREAM_READER
+ if (s->buffer + (s->index >> 3) >= s->buffer_end)
+ return;
+
+#ifdef BITSTREAM_READER_LE
+ s->cache = (uint64_t)AV_RL32(s->buffer + (s->index >> 3)) << s->cache | s->cache;
+#else
+ s->cache = s->cache | (uint64_t)AV_RB32(s->buffer + (s->index >> 3)) << (32 - s->bits_left);
+#endif
+ s->index += 32;
+ s->bits_left += 32;
+#endif
+}
+
+static inline void refill_64(GetBitContext *s)
+{
+#ifdef CACHED_BITSTREAM_READER
+#if !UNCHECKED_BITSTREAM_READER
+ if (s->buffer + (s->index >> 3) >= s->buffer_end)
+ return;
+#endif
+
+#ifdef BITSTREAM_READER_LE
+ s->cache = AV_RL64(s->buffer + (s->index >> 3));
+#else
+ s->cache = AV_RB64(s->buffer + (s->index >> 3));
+#endif
+ s->index += 64;
+ s->bits_left = 64;
+#endif
+}
+
+#ifdef CACHED_BITSTREAM_READER
+static inline uint64_t get_val(GetBitContext *s, unsigned n)
+{
+ uint64_t ret;
+ av_assert2(n>0 && n<=63);
+#ifdef BITSTREAM_READER_LE
+ ret = s->cache & ((UINT64_C(1) << n) - 1);
+ s->cache >>= n;
+#else
+ ret = s->cache >> (64 - n);
+ s->cache <<= n;
+#endif
+ s->bits_left -= n;
+ return ret;
+}
+#endif
+
+#ifdef CACHED_BITSTREAM_READER
+static inline unsigned show_val(const GetBitContext *s, unsigned n)
+{
+#ifdef BITSTREAM_READER_LE
+ return s->cache & ((UINT64_C(1) << n) - 1);
+#else
+ return s->cache >> (64 - n);
+#endif
+}
+#endif
+
/**
* Read MPEG-1 dc-style VLC (sign bit + mantissa with no MSB).
* if MSB not set it is negative
@@ -243,30 +316,59 @@ static inline int get_xbits_le(GetBitContext *s, int n)
return (zero_extend(sign ^ cache, n) ^ sign) - sign;
}
-static inline int get_sbits(GetBitContext *s, int n)
+/**
+ * Read 1-25 bits.
+ */
+static inline unsigned int get_bits(GetBitContext *s, int n)
{
+#ifdef CACHED_BITSTREAM_READER
+ register int tmp = 0;
+#ifdef BITSTREAM_READER_LE
+ uint64_t left = 0;
+#endif
+
+ av_assert2(n>0 && n<=32);
+ if (n > s->bits_left) {
+ n -= s->bits_left;
+#ifdef BITSTREAM_READER_LE
+ left = s->bits_left;
+#endif
+ tmp = get_val(s, s->bits_left);
+ refill_32(s);
+ }
+
+#ifdef BITSTREAM_READER_LE
+ tmp = get_val(s, n) << left | tmp;
+#else
+ tmp = get_val(s, n) | tmp << n;
+#endif
+
+#else
register int tmp;
OPEN_READER(re, s);
av_assert2(n>0 && n<=25);
UPDATE_CACHE(re, s);
- tmp = SHOW_SBITS(re, s, n);
+ tmp = SHOW_UBITS(re, s, n);
LAST_SKIP_BITS(re, s, n);
CLOSE_READER(re, s);
+#endif
return tmp;
}
-/**
- * Read 1-25 bits.
- */
-static inline unsigned int get_bits(GetBitContext *s, int n)
+static inline int get_sbits(GetBitContext *s, int n)
{
register int tmp;
+#ifdef CACHED_BITSTREAM_READER
+ av_assert2(n>0 && n<=25);
+ tmp = sign_extend(get_bits(s, n), n);
+#else
OPEN_READER(re, s);
av_assert2(n>0 && n<=25);
UPDATE_CACHE(re, s);
- tmp = SHOW_UBITS(re, s, n);
+ tmp = SHOW_SBITS(re, s, n);
LAST_SKIP_BITS(re, s, n);
CLOSE_READER(re, s);
+#endif
return tmp;
}
@@ -296,22 +398,65 @@ static inline unsigned int get_bits_le(GetBitContext *s, int n)
static inline unsigned int show_bits(GetBitContext *s, int n)
{
register int tmp;
+#ifdef CACHED_BITSTREAM_READER
+ if (n > s->bits_left)
+ refill_32(s);
+
+ tmp = show_val(s, n);
+#else
OPEN_READER_NOSIZE(re, s);
av_assert2(n>0 && n<=25);
UPDATE_CACHE(re, s);
tmp = SHOW_UBITS(re, s, n);
+#endif
return tmp;
}
+#ifdef CACHED_BITSTREAM_READER
+static inline void skip_remaining(GetBitContext *s, unsigned n)
+{
+#ifdef BITSTREAM_READER_LE
+ s->cache >>= n;
+#else
+ s->cache <<= n;
+#endif
+ s->bits_left -= n;
+}
+#endif
+
static inline void skip_bits(GetBitContext *s, int n)
{
+#ifdef CACHED_BITSTREAM_READER
+ if (n <= s->bits_left)
+ skip_remaining(s, n);
+ else {
+ n -= s->bits_left;
+ skip_remaining(s, s->bits_left);
+ if (n >= 64) {
+ unsigned skip = n;
+
+ n -= skip;
+ s->index += skip;
+ }
+ refill_32(s);
+ if (n)
+ skip_remaining(s, n);
+ }
+#else
OPEN_READER(re, s);
LAST_SKIP_BITS(re, s, n);
CLOSE_READER(re, s);
+#endif
}
static inline unsigned int get_bits1(GetBitContext *s)
{
+#ifdef CACHED_BITSTREAM_READER
+ if (!s->bits_left)
+ refill_64(s);
+
+ return get_val(s, 1);
+#else
unsigned int index = s->index;
uint8_t result = s->buffer[index >> 3];
#ifdef BITSTREAM_READER_LE
@@ -328,6 +473,7 @@ static inline unsigned int get_bits1(GetBitContext *s)
s->index = index;
return result;
+#endif
}
static inline unsigned int show_bits1(GetBitContext *s)
@@ -348,6 +494,10 @@ static inline unsigned int get_bits_long(GetBitContext *s, int n)
av_assert2(n>=0 && n<=32);
if (!n) {
return 0;
+#ifdef CACHED_BITSTREAM_READER
+ }
+ return get_bits(s, n);
+#else
} else if (n <= MIN_CACHE_BITS) {
return get_bits(s, n);
} else {
@@ -359,6 +509,7 @@ static inline unsigned int get_bits_long(GetBitContext *s, int n)
return ret | get_bits(s, n - 16);
#endif
}
+#endif
}
/**
@@ -442,6 +593,10 @@ static inline int init_get_bits(GetBitContext *s, const uint8_t *buffer,
s->buffer_end = buffer + buffer_size;
s->index = 0;
+#ifdef CACHED_BITSTREAM_READER
+ refill_64(s);
+#endif
+
return ret;
}
@@ -543,6 +698,19 @@ static inline const uint8_t *align_get_bits(GetBitContext *s)
SKIP_BITS(name, gb, n); \
} while (0)
+/* Return the LUT element for the given bitstream configuration. */
+static inline int set_idx(GetBitContext *s, int code, int *n, int *nb_bits,
+ VLC_TYPE (*table)[2])
+{
+ unsigned idx;
+
+ *nb_bits = -*n;
+ idx = show_bits(s, *nb_bits) + code;
+ *n = table[idx][1];
+
+ return table[idx][0];
+}
+
/**
* Parse a vlc code.
* @param bits is the number of bits which will be read at once, must be
@@ -554,6 +722,24 @@ static inline const uint8_t *align_get_bits(GetBitContext *s)
static av_always_inline int get_vlc2(GetBitContext *s, VLC_TYPE (*table)[2],
int bits, int max_depth)
{
+#ifdef CACHED_BITSTREAM_READER
+ int nb_bits;
+ unsigned idx = show_bits(s, bits);
+ int code = table[idx][0];
+ int n = table[idx][1];
+
+ if (max_depth > 1 && n < 0) {
+ skip_remaining(s, bits);
+ code = set_idx(s, code, &n, &nb_bits, table);
+ if (max_depth > 2 && n < 0) {
+ skip_remaining(s, nb_bits);
+ code = set_idx(s, code, &n, &nb_bits, table);
+ }
+ }
+ skip_remaining(s, n);
+
+ return code;
+#else
int code;
OPEN_READER(re, s);
@@ -564,6 +750,7 @@ static av_always_inline int get_vlc2(GetBitContext *s, VLC_TYPE (*table)[2],
CLOSE_READER(re, s);
return code;
+#endif
}
static inline int decode012(GetBitContext *gb)