From patchwork Sat Jul 18 14:53:02 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Steinar H. Gunderson" X-Patchwork-Id: 21171 Return-Path: X-Original-To: patchwork@ffaux-bg.ffmpeg.org Delivered-To: patchwork@ffaux-bg.ffmpeg.org Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by ffaux.localdomain (Postfix) with ESMTP id F38AB44A108 for ; Sat, 18 Jul 2020 17:53:11 +0300 (EEST) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id D9D5668B771; Sat, 18 Jul 2020 17:53:11 +0300 (EEST) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from pannekake.samfundet.no (pannekake.samfundet.no [193.35.52.50]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 6BFEA68B6F8 for ; Sat, 18 Jul 2020 17:53:05 +0300 (EEST) Received: from sesse by pannekake.samfundet.no with local (Exim 4.92) (envelope-from ) id 1jwoD2-0004Rv-Eg; Sat, 18 Jul 2020 16:53:04 +0200 From: "Steinar H. Gunderson" To: ffmpeg-devel@ffmpeg.org Date: Sat, 18 Jul 2020 16:53:02 +0200 Message-Id: <20200718145303.17059-1-steinar+ffmpeg@gunderson.no> X-Mailer: git-send-email 2.20.1 MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH v3 1/2] avcodec/put_bits: Parametrize bit buffer type X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Cc: "Steinar H. Gunderson" Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" Preparatory patch for making the bit buffer different size on different platforms; make a typedef and make all the hardcoded sizes into expressions deriving from this size. No functional change; generated assembler is near-identical. --- libavcodec/mpegvideo_enc.c | 2 +- libavcodec/put_bits.h | 95 +++++++++++++++++++++----------------- 2 files changed, 53 insertions(+), 44 deletions(-) diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c index c3ef40556a..21c30a9f8a 100644 --- a/libavcodec/mpegvideo_enc.c +++ b/libavcodec/mpegvideo_enc.c @@ -3914,7 +3914,7 @@ static int encode_picture(MpegEncContext *s, int picture_number) s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*)); for(i=1; ipb.buf_end == s->thread_context[i]->pb.buf) - set_put_bits_buffer_size(&s->pb, FFMIN(s->thread_context[i]->pb.buf_end - s->pb.buf, INT_MAX/8-32)); + set_put_bits_buffer_size(&s->pb, FFMIN(s->thread_context[i]->pb.buf_end - s->pb.buf, INT_MAX/8-BUF_BITS)); merge_context_after_encode(s, s->thread_context[i]); } emms_c(); diff --git a/libavcodec/put_bits.h b/libavcodec/put_bits.h index 7d11a3576a..c6a8f3ac14 100644 --- a/libavcodec/put_bits.h +++ b/libavcodec/put_bits.h @@ -32,8 +32,14 @@ #include "libavutil/intreadwrite.h" #include "libavutil/avassert.h" +typedef uint32_t BitBuf; +#define AV_WBBUF AV_WB32 +#define AV_WLBUF AV_WL32 + +static const int BUF_BITS = 8 * sizeof(BitBuf); + typedef struct PutBitContext { - uint32_t bit_buf; + BitBuf bit_buf; int bit_left; uint8_t *buf, *buf_ptr, *buf_end; int size_in_bits; @@ -57,7 +63,7 @@ static inline void init_put_bits(PutBitContext *s, uint8_t *buffer, s->buf = buffer; s->buf_end = s->buf + buffer_size; s->buf_ptr = s->buf; - s->bit_left = 32; + s->bit_left = BUF_BITS; s->bit_buf = 0; } @@ -66,7 +72,7 @@ static inline void init_put_bits(PutBitContext *s, uint8_t *buffer, */ static inline int put_bits_count(PutBitContext *s) { - return (s->buf_ptr - s->buf) * 8 + 32 - s->bit_left; + return (s->buf_ptr - s->buf) * 8 + BUF_BITS - s->bit_left; } /** @@ -92,7 +98,7 @@ static inline void rebase_put_bits(PutBitContext *s, uint8_t *buffer, */ static inline int put_bits_left(PutBitContext* s) { - return (s->buf_end - s->buf_ptr) * 8 - 32 + s->bit_left; + return (s->buf_end - s->buf_ptr) * 8 - BUF_BITS + s->bit_left; } /** @@ -101,33 +107,33 @@ static inline int put_bits_left(PutBitContext* s) static inline void flush_put_bits(PutBitContext *s) { #ifndef BITSTREAM_WRITER_LE - if (s->bit_left < 32) + if (s->bit_left < BUF_BITS) s->bit_buf <<= s->bit_left; #endif - while (s->bit_left < 32) { + while (s->bit_left < BUF_BITS) { av_assert0(s->buf_ptr < s->buf_end); #ifdef BITSTREAM_WRITER_LE *s->buf_ptr++ = s->bit_buf; s->bit_buf >>= 8; #else - *s->buf_ptr++ = s->bit_buf >> 24; + *s->buf_ptr++ = s->bit_buf >> (BUF_BITS - 8); s->bit_buf <<= 8; #endif s->bit_left += 8; } - s->bit_left = 32; + s->bit_left = BUF_BITS; s->bit_buf = 0; } static inline void flush_put_bits_le(PutBitContext *s) { - while (s->bit_left < 32) { + while (s->bit_left < BUF_BITS) { av_assert0(s->buf_ptr < s->buf_end); *s->buf_ptr++ = s->bit_buf; s->bit_buf >>= 8; s->bit_left += 8; } - s->bit_left = 32; + s->bit_left = BUF_BITS; s->bit_buf = 0; } @@ -161,29 +167,29 @@ void avpriv_copy_bits(PutBitContext *pb, const uint8_t *src, int length); * Write up to 31 bits into a bitstream. * Use put_bits32 to write 32 bits. */ -static inline void put_bits(PutBitContext *s, int n, unsigned int value) +static inline void put_bits(PutBitContext *s, int n, BitBuf value) { - unsigned int bit_buf; + BitBuf bit_buf; int bit_left; - av_assert2(n <= 31 && value < (1U << n)); + av_assert2(n <= 31 && value < (1UL << n)); bit_buf = s->bit_buf; bit_left = s->bit_left; /* XXX: optimize */ #ifdef BITSTREAM_WRITER_LE - bit_buf |= value << (32 - bit_left); + bit_buf |= value << (BUF_BITS - bit_left); if (n >= bit_left) { - if (3 < s->buf_end - s->buf_ptr) { - AV_WL32(s->buf_ptr, bit_buf); - s->buf_ptr += 4; + if (s->buf_end - s->buf_ptr >= sizeof(BitBuf)) { + AV_WLBUF(s->buf_ptr, bit_buf); + s->buf_ptr += sizeof(BitBuf); } else { av_log(NULL, AV_LOG_ERROR, "Internal error, put_bits buffer too small\n"); av_assert2(0); } bit_buf = value >> bit_left; - bit_left += 32; + bit_left += BUF_BITS; } bit_left -= n; #else @@ -193,14 +199,14 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value) } else { bit_buf <<= bit_left; bit_buf |= value >> (n - bit_left); - if (3 < s->buf_end - s->buf_ptr) { - AV_WB32(s->buf_ptr, bit_buf); - s->buf_ptr += 4; + if (s->buf_end - s->buf_ptr >= sizeof(BitBuf)) { + AV_WBBUF(s->buf_ptr, bit_buf); + s->buf_ptr += sizeof(BitBuf); } else { av_log(NULL, AV_LOG_ERROR, "Internal error, put_bits buffer too small\n"); av_assert2(0); } - bit_left += 32 - n; + bit_left += BUF_BITS - n; bit_buf = value; } #endif @@ -209,27 +215,27 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value) s->bit_left = bit_left; } -static inline void put_bits_le(PutBitContext *s, int n, unsigned int value) +static inline void put_bits_le(PutBitContext *s, int n, BitBuf value) { - unsigned int bit_buf; + BitBuf bit_buf; int bit_left; - av_assert2(n <= 31 && value < (1U << n)); + av_assert2(n <= 31 && value < (1UL << n)); bit_buf = s->bit_buf; bit_left = s->bit_left; - bit_buf |= value << (32 - bit_left); + bit_buf |= value << (BUF_BITS - bit_left); if (n >= bit_left) { - if (3 < s->buf_end - s->buf_ptr) { - AV_WL32(s->buf_ptr, bit_buf); - s->buf_ptr += 4; + if (s->buf_end - s->buf_ptr >= sizeof(BitBuf)) { + AV_WLBUF(s->buf_ptr, bit_buf); + s->buf_ptr += sizeof(BitBuf); } else { av_log(NULL, AV_LOG_ERROR, "Internal error, put_bits buffer too small\n"); av_assert2(0); } bit_buf = value >> bit_left; - bit_left += 32; + bit_left += BUF_BITS; } bit_left -= n; @@ -249,17 +255,17 @@ static inline void put_sbits(PutBitContext *pb, int n, int32_t value) */ static void av_unused put_bits32(PutBitContext *s, uint32_t value) { - unsigned int bit_buf; + BitBuf bit_buf; int bit_left; bit_buf = s->bit_buf; bit_left = s->bit_left; #ifdef BITSTREAM_WRITER_LE - bit_buf |= value << (32 - bit_left); - if (3 < s->buf_end - s->buf_ptr) { - AV_WL32(s->buf_ptr, bit_buf); - s->buf_ptr += 4; + bit_buf |= (BitBuf)value << (BUF_BITS - bit_left); + if (s->buf_end - s->buf_ptr >= sizeof(BitBuf)) { + AV_WLBUF(s->buf_ptr, bit_buf); + s->buf_ptr += sizeof(BitBuf); } else { av_log(NULL, AV_LOG_ERROR, "Internal error, put_bits buffer too small\n"); av_assert2(0); @@ -267,10 +273,10 @@ static void av_unused put_bits32(PutBitContext *s, uint32_t value) bit_buf = (uint64_t)value >> bit_left; #else bit_buf = (uint64_t)bit_buf << bit_left; - bit_buf |= value >> (32 - bit_left); - if (3 < s->buf_end - s->buf_ptr) { - AV_WB32(s->buf_ptr, bit_buf); - s->buf_ptr += 4; + bit_buf |= (BitBuf)value >> (BUF_BITS - bit_left); + if (s->buf_end - s->buf_ptr >= sizeof(BitBuf)) { + AV_WBBUF(s->buf_ptr, bit_buf); + s->buf_ptr += sizeof(BitBuf); } else { av_log(NULL, AV_LOG_ERROR, "Internal error, put_bits buffer too small\n"); av_assert2(0); @@ -333,7 +339,7 @@ static inline uint8_t *put_bits_ptr(PutBitContext *s) static inline void skip_put_bytes(PutBitContext *s, int n) { av_assert2((put_bits_count(s) & 7) == 0); - av_assert2(s->bit_left == 32); + av_assert2(s->bit_left == BUF_BITS); av_assert0(n <= s->buf_end - s->buf_ptr); s->buf_ptr += n; } @@ -346,8 +352,8 @@ static inline void skip_put_bytes(PutBitContext *s, int n) static inline void skip_put_bits(PutBitContext *s, int n) { s->bit_left -= n; - s->buf_ptr -= 4 * (s->bit_left >> 5); - s->bit_left &= 31; + s->buf_ptr -= sizeof(BitBuf) * ((unsigned)s->bit_left / BUF_BITS); + s->bit_left &= (BUF_BITS - 1); } /** @@ -357,9 +363,12 @@ static inline void skip_put_bits(PutBitContext *s, int n) */ static inline void set_put_bits_buffer_size(PutBitContext *s, int size) { - av_assert0(size <= INT_MAX/8 - 32); + av_assert0(size <= INT_MAX/8 - BUF_BITS); s->buf_end = s->buf + size; s->size_in_bits = 8*size; } +#undef AV_WBBUF +#undef AV_WLBUF + #endif /* AVCODEC_PUT_BITS_H */ From patchwork Sat Jul 18 14:53:03 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Steinar H. Gunderson" X-Patchwork-Id: 21172 Return-Path: X-Original-To: patchwork@ffaux-bg.ffmpeg.org Delivered-To: patchwork@ffaux-bg.ffmpeg.org Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by ffaux.localdomain (Postfix) with ESMTP id C2B6E44A108 for ; Sat, 18 Jul 2020 17:53:14 +0300 (EEST) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id A875668B773; Sat, 18 Jul 2020 17:53:14 +0300 (EEST) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from pannekake.samfundet.no (pannekake.samfundet.no [193.35.52.50]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 8423768B757 for ; Sat, 18 Jul 2020 17:53:06 +0300 (EEST) Received: from sesse by pannekake.samfundet.no with local (Exim 4.92) (envelope-from ) id 1jwoD3-0004SI-RR; Sat, 18 Jul 2020 16:53:05 +0200 From: "Steinar H. Gunderson" To: ffmpeg-devel@ffmpeg.org Date: Sat, 18 Jul 2020 16:53:03 +0200 Message-Id: <20200718145303.17059-2-steinar+ffmpeg@gunderson.no> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20200718145303.17059-1-steinar+ffmpeg@gunderson.no> References: <20200718145303.17059-1-steinar+ffmpeg@gunderson.no> MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH v3 2/2] avcodec/put_bits: Make bit buffers 64-bit X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Cc: "Steinar H. Gunderson" Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" Change BitBuf into uint64_t on 64-bit x86. This means we need to flush the buffer less often, which is a significant speed win. All other platforms, including all 32-bit ones, are unchanged. Output bitstream is the same. All API constraints are kept in place, e.g., you still cannot put_bits() more than 31 bits at a time. This is so that codecs cannot accidentally become 64-bit-only or similar. Benchmarking on transcoding to various formats shows consistently positive results: dnxhd 25.60 fps -> 26.26 fps ( +2.6%) dvvideo 24.88 fps -> 25.17 fps ( +1.2%) ffv1 14.32 fps -> 14.58 fps ( +1.8%) huffyuv 58.75 fps -> 63.27 fps ( +7.7%) jpegls 6.22 fps -> 6.34 fps ( +1.8%) magicyuv 57.10 fps -> 63.29 fps (+10.8%) mjpeg 48.65 fps -> 49.01 fps ( +0.7%) mpeg1video 76.41 fps -> 77.01 fps ( +0.8%) mpeg2video 75.99 fps -> 77.43 fps ( +1.9%) mpeg4 80.66 fps -> 81.37 fps ( +0.9%) prores 12.35 fps -> 12.88 fps ( +4.3%) prores_ks 16.20 fps -> 16.80 fps ( +3.7%) rv20 62.80 fps -> 62.99 fps ( +0.3%) utvideo 68.41 fps -> 76.32 fps (+11.6%) Note that this includes video decoding and all other encoding work, such as DCTs. If you isolate the actual bit-writing routines, it is likely to be much more. Benchmark details: Transcoding the first 30 seconds of Big Buck Bunny in 1080p, Haswell 2.1 GHz, GCC 8.3, generally quantizer locked to 5.0. (Exceptions: DNxHD needs fixed bitrate, and JPEG-LS is so slow that I only took the first 10 seconds, not 30.) All runs were done ten times and single-threaded, top and bottom two results discarded to get rid of outliers, arithmetic mean between the remaining six. --- libavcodec/asvenc.c | 1 + libavcodec/put_bits.h | 31 ++++++++++++++++++++++++------- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/libavcodec/asvenc.c b/libavcodec/asvenc.c index c2c940f365..28f7a94071 100644 --- a/libavcodec/asvenc.c +++ b/libavcodec/asvenc.c @@ -295,6 +295,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, while (put_bits_count(&a->pb) & 31) put_bits(&a->pb, 8, 0); + flush_put_bits(&a->pb); size = put_bits_count(&a->pb) / 32; if (avctx->codec_id == AV_CODEC_ID_ASV1) { diff --git a/libavcodec/put_bits.h b/libavcodec/put_bits.h index c6a8f3ac14..ddd97906b2 100644 --- a/libavcodec/put_bits.h +++ b/libavcodec/put_bits.h @@ -29,12 +29,20 @@ #include #include +#include "config.h" #include "libavutil/intreadwrite.h" #include "libavutil/avassert.h" +#if ARCH_X86_64 +// TODO: Benchmark and optionally enable on other 64-bit architectures. +typedef uint64_t BitBuf; +#define AV_WBBUF AV_WB64 +#define AV_WLBUF AV_WL64 +#else typedef uint32_t BitBuf; #define AV_WBBUF AV_WB32 #define AV_WLBUF AV_WL32 +#endif static const int BUF_BITS = 8 * sizeof(BitBuf); @@ -163,17 +171,11 @@ void avpriv_put_string(PutBitContext *pb, const char *string, void avpriv_copy_bits(PutBitContext *pb, const uint8_t *src, int length); #endif -/** - * Write up to 31 bits into a bitstream. - * Use put_bits32 to write 32 bits. - */ -static inline void put_bits(PutBitContext *s, int n, BitBuf value) +static inline void put_bits_no_assert(PutBitContext *s, int n, BitBuf value) { BitBuf bit_buf; int bit_left; - av_assert2(n <= 31 && value < (1UL << n)); - bit_buf = s->bit_buf; bit_left = s->bit_left; @@ -215,6 +217,16 @@ static inline void put_bits(PutBitContext *s, int n, BitBuf value) s->bit_left = bit_left; } +/** + * Write up to 31 bits into a bitstream. + * Use put_bits32 to write 32 bits. + */ +static inline void put_bits(PutBitContext *s, int n, BitBuf value) +{ + av_assert2(n <= 31 && value < (1UL << n)); + put_bits_no_assert(s, n, value); +} + static inline void put_bits_le(PutBitContext *s, int n, BitBuf value) { BitBuf bit_buf; @@ -258,6 +270,11 @@ static void av_unused put_bits32(PutBitContext *s, uint32_t value) BitBuf bit_buf; int bit_left; + if (BUF_BITS > 32) { + put_bits_no_assert(s, 32, value); + return; + } + bit_buf = s->bit_buf; bit_left = s->bit_left;