From patchwork Tue Apr 14 10:24:58 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Christophe Gisquet X-Patchwork-Id: 18946 Return-Path: X-Original-To: patchwork@ffaux-bg.ffmpeg.org Delivered-To: patchwork@ffaux-bg.ffmpeg.org Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by ffaux.localdomain (Postfix) with ESMTP id C47B244B934 for ; Tue, 14 Apr 2020 13:25:20 +0300 (EEST) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id B0CDD68B682; Tue, 14 Apr 2020 13:25:20 +0300 (EEST) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from mail-wm1-f66.google.com (mail-wm1-f66.google.com [209.85.128.66]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 0F1F068B532 for ; Tue, 14 Apr 2020 13:25:12 +0300 (EEST) Received: by mail-wm1-f66.google.com with SMTP id a201so12958267wme.1 for ; Tue, 14 Apr 2020 03:25:12 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025; h=from:to:subject:date:message-id:in-reply-to:references:mime-version :content-transfer-encoding; bh=N8Mr7GGO6E3/0WWy/eas5ZfNBBfLjwZHijCW90H92cY=; b=T4ryIFnzHmFuF5EG3Gi2KNye9M3320xZb9qdWIeMxfEmRsQs5pSpbZXUr/ffZEo01G gzc9fh18cqfO7FSMvXpz6EtzUPsKnO2Br5rBYhrE5y3hABNEceyyk+YylRqwxPJnQeWt k9y61rpsVv+6QWbVNyxxwwLJtGqClxOp14/qo1ZrljMbdTD9yuZWCBCNnrLq+A8vHMKb l9/iq4VD4SIU/UyFeN0t5TOMfpgiUJagBsOulit5PfSa85dmK0BGaJFmGHprWNoh+B/C kyEdZ7T0j5VHNWsQtNiqJP9K1DrC/U7jkmlp7BjYfPKizWfT/nGj6G6dtule+4kSpIlY gVQg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=N8Mr7GGO6E3/0WWy/eas5ZfNBBfLjwZHijCW90H92cY=; b=Hjms4Nbuv5wbVup2Jb6I22Gx+tERgZw34OK5UwpkbcejFQDMjmLjmL+U1Ax6ik+ecv K9DTnr1gYIpWpV1j6w/xpYoLiocOdW74SaD2RMXyQEZeMTuUgByOJiBFT0vwkLmo+058 JjnVhDi033F38V1ANJp9BYo2FIQeaIgNDOEyA4DYJNBnFEYZ+XccN+awzqAzvg6wgJ/9 7rDX/7WhiUlRhgbLMLzxOV0YLAztRm0I6KBzxEPu6Kmamlteb5K2JOGQnp4ifsAelqiJ L4RtxzHRoHxsc2iyKiB0o6Ft4yWgEq3iXQz4WNA8DCJ29NfpzOSIXUjtSVvdAFKSpJWM BgMA== X-Gm-Message-State: AGi0PubV5QXKIZybswdnHzSSFkE+0WwrB9FdE6rZeChU123/4lSvkizg bj0np3btJraymwN8VT50yNr0XYc= X-Google-Smtp-Source: APiQypLrPBf5w7vsRTal2MWSkE8+oyxWrNBlenH0CWR7RkXx7ygSUh67MuDAh2uXmR2/0E/uD24JUQ== X-Received: by 2002:a1c:7ed7:: with SMTP id z206mr21522288wmc.64.1586859911232; Tue, 14 Apr 2020 03:25:11 -0700 (PDT) Received: from localhost.localdomain (2a01cb0881b29e00157b082460d14b2a.ipv6.abo.wanadoo.fr. [2a01:cb08:81b2:9e00:157b:824:60d1:4b2a]) by smtp.gmail.com with ESMTPSA id i17sm8574487wru.39.2020.04.14.03.25.10 for (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 14 Apr 2020 03:25:10 -0700 (PDT) From: Christophe Gisquet To: ffmpeg-devel@ffmpeg.org Date: Tue, 14 Apr 2020 10:24:58 +0000 Message-Id: <20200414102503.7858-3-christophe.gisquet@gmail.com> X-Mailer: git-send-email 2.26.0 In-Reply-To: <20200414102503.7858-1-christophe.gisquet@gmail.com> References: <20200414102503.7858-1-christophe.gisquet@gmail.com> MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH 2/7] get_bits: support 32bits cache X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" Therefore, also activate it under ARCH_X86 (testing for more archs welcome) for the only codecs supporting said cache reader. For UTVideo, on 8 bits samples and ARCH_X86_32 (X86_64 being unaffected), timings for one line do ~19.4k -> 15.1k and 16.5k (roughly 17% speedup). --- libavcodec/get_bits.h | 110 ++++++++++++++++++++++++++++------------ libavcodec/mvha.c | 2 +- libavcodec/utvideodec.c | 2 +- 3 files changed, 80 insertions(+), 34 deletions(-) diff --git a/libavcodec/get_bits.h b/libavcodec/get_bits.h index 66fb877599..cb4df98e54 100644 --- a/libavcodec/get_bits.h +++ b/libavcodec/get_bits.h @@ -58,10 +58,40 @@ #define CACHED_BITSTREAM_READER 0 #endif +#if CACHED_BITSTREAM_READER + +# ifndef BITSTREAM_BITS +# if HAVE_FAST_64BIT || defined(LONG_BITSTREAM_READER) +# define BITSTREAM_BITS 64 +# else +# define BITSTREAM_BITS 32 +# endif +# endif + +# if BITSTREAM_BITS == 64 +# define BITSTREAM_HBITS 32 +typedef uint64_t cache_type; +# define AV_RB_ALL AV_RB64 +# define AV_RL_ALL AV_RL64 +# define AV_RB_HALF AV_RB32 +# define AV_RL_HALF AV_RL32 +# define CACHE_TYPE(a) UINT64_C(a) +# else +# define BITSTREAM_HBITS 16 +typedef uint32_t cache_type; +# define AV_RB_ALL AV_RB32 +# define AV_RL_ALL AV_RL32 +# define AV_RB_HALF AV_RB16 +# define AV_RL_HALF AV_RL16 +# define CACHE_TYPE(a) UINT32_C(a) +#endif + +#endif + typedef struct GetBitContext { const uint8_t *buffer, *buffer_end; #if CACHED_BITSTREAM_READER - uint64_t cache; + cache_type cache; unsigned bits_left; #endif int index; @@ -121,7 +151,11 @@ static inline unsigned int show_bits(GetBitContext *s, int n); */ #if CACHED_BITSTREAM_READER -# define MIN_CACHE_BITS 64 +# if BITSTREAM_BITS == 32 +# define MIN_CACHE_BITS (32-7) +# else +# define MIN_CACHE_BITS 32 +# endif #elif defined LONG_BITSTREAM_READER # define MIN_CACHE_BITS 32 #else @@ -226,22 +260,34 @@ static inline int get_bits_count(const GetBitContext *s) } #if CACHED_BITSTREAM_READER -static inline void refill_32(GetBitContext *s, int is_le) +static inline void refill_half(GetBitContext *s, int is_le) { #if !UNCHECKED_BITSTREAM_READER if (s->index >> 3 >= s->buffer_end - s->buffer) return; #endif +#if BITSTREAM_BITS == 32 + if (s->bits_left > 16) { + if (is_le) + s->cache |= (uint32_t)s->buffer[s->index >> 3] << s->bits_left; + else + s->cache |= (uint32_t)s->buffer[s->index >> 3] << (32 - s->bits_left); + s->index += 8; + s->bits_left += 8; + return; + } +#endif + if (is_le) - s->cache = (uint64_t)AV_RL32(s->buffer + (s->index >> 3)) << s->bits_left | s->cache; + s->cache |= (cache_type)AV_RL_HALF(s->buffer + (s->index >> 3)) << s->bits_left; else - s->cache = s->cache | (uint64_t)AV_RB32(s->buffer + (s->index >> 3)) << (32 - s->bits_left); - s->index += 32; - s->bits_left += 32; + s->cache |= (cache_type)AV_RB_HALF(s->buffer + (s->index >> 3)) << (BITSTREAM_HBITS - s->bits_left); + s->index += BITSTREAM_HBITS; + s->bits_left += BITSTREAM_HBITS; } -static inline void refill_64(GetBitContext *s, int is_le) +static inline void refill_all(GetBitContext *s, int is_le) { #if !UNCHECKED_BITSTREAM_READER if (s->index >> 3 >= s->buffer_end - s->buffer) @@ -249,22 +295,22 @@ static inline void refill_64(GetBitContext *s, int is_le) #endif if (is_le) - s->cache = AV_RL64(s->buffer + (s->index >> 3)); + s->cache = AV_RL_ALL(s->buffer + (s->index >> 3)); else - s->cache = AV_RB64(s->buffer + (s->index >> 3)); - s->index += 64; - s->bits_left = 64; + s->cache = AV_RB_ALL(s->buffer + (s->index >> 3)); + s->index += BITSTREAM_BITS; + s->bits_left = BITSTREAM_BITS; } -static inline uint64_t get_val(GetBitContext *s, unsigned n, int is_le) +static inline cache_type get_val(GetBitContext *s, unsigned n, int is_le) { - uint64_t ret; + cache_type ret; av_assert2(n>0 && n<=63); if (is_le) { - ret = s->cache & ((UINT64_C(1) << n) - 1); + ret = s->cache & ((CACHE_TYPE(1) << n) - 1); s->cache >>= n; } else { - ret = s->cache >> (64 - n); + ret = s->cache >> (BITSTREAM_BITS - n); s->cache <<= n; } s->bits_left -= n; @@ -274,12 +320,12 @@ static inline uint64_t get_val(GetBitContext *s, unsigned n, int is_le) static inline unsigned show_val(const GetBitContext *s, unsigned n) { #ifdef BITSTREAM_READER_LE - return s->cache & ((UINT64_C(1) << n) - 1); + return s->cache & ((CACHE_TYPE(1) << n) - 1); #else - return s->cache >> (64 - n); + return s->cache >> (BITSTREAM_BITS - n); #endif } -#endif +#endif // ~CACHED_BITSTREAM_READER /** * Skips the specified number of bits. @@ -384,11 +430,11 @@ static inline unsigned int get_bits(GetBitContext *s, int n) av_assert2(n>0 && n<=32); if (n > s->bits_left) { #ifdef BITSTREAM_READER_LE - refill_32(s, 1); + refill_half(s, 1); #else - refill_32(s, 0); + refill_half(s, 0); #endif - if (s->bits_left < 32) + if (s->bits_left < BITSTREAM_HBITS) s->bits_left = n; } @@ -422,8 +468,8 @@ static inline unsigned int get_bits_le(GetBitContext *s, int n) #if CACHED_BITSTREAM_READER av_assert2(n>0 && n<=32); if (n > s->bits_left) { - refill_32(s, 1); - if (s->bits_left < 32) + refill_half(s, 1); + if (s->bits_left < BITSTREAM_HBITS) s->bits_left = n; } @@ -449,9 +495,9 @@ static inline unsigned int show_bits(GetBitContext *s, int n) #if CACHED_BITSTREAM_READER if (n > s->bits_left) #ifdef BITSTREAM_READER_LE - refill_32(s, 1); + refill_half(s, 1); #else - refill_32(s, 0); + refill_half(s, 0); #endif tmp = show_val(s, n); @@ -474,16 +520,16 @@ static inline void skip_bits(GetBitContext *s, int n) s->cache = 0; s->bits_left = 0; - if (n >= 64) { + if (n >= BITSTREAM_BITS) { unsigned skip = (n / 8) * 8; n -= skip; s->index += skip; } #ifdef BITSTREAM_READER_LE - refill_64(s, 1); + refill_all(s, 1); #else - refill_64(s, 0); + refill_all(s, 0); #endif if (n) skip_remaining(s, n); @@ -500,9 +546,9 @@ static inline unsigned int get_bits1(GetBitContext *s) #if CACHED_BITSTREAM_READER if (!s->bits_left) #ifdef BITSTREAM_READER_LE - refill_64(s, 1); + refill_all(s, 1); #else - refill_64(s, 0); + refill_all(s, 0); #endif #ifdef BITSTREAM_READER_LE @@ -642,7 +688,7 @@ static inline int init_get_bits_xe(GetBitContext *s, const uint8_t *buffer, #if CACHED_BITSTREAM_READER s->cache = 0; s->bits_left = 0; - refill_64(s, is_le); + refill_all(s, is_le); #endif return ret; diff --git a/libavcodec/mvha.c b/libavcodec/mvha.c index c603ef6975..9d6dd9352a 100644 --- a/libavcodec/mvha.c +++ b/libavcodec/mvha.c @@ -24,7 +24,7 @@ #include #include -#define CACHED_BITSTREAM_READER !ARCH_X86_32 +#define CACHED_BITSTREAM_READER ARCH_X86 #include "libavutil/intreadwrite.h" #include "avcodec.h" diff --git a/libavcodec/utvideodec.c b/libavcodec/utvideodec.c index 4da257fc61..36dd661e90 100644 --- a/libavcodec/utvideodec.c +++ b/libavcodec/utvideodec.c @@ -27,7 +27,7 @@ #include #include -#define CACHED_BITSTREAM_READER !ARCH_X86_32 +#define CACHED_BITSTREAM_READER ARCH_X86 #define UNCHECKED_BITSTREAM_READER 1 #include "libavutil/intreadwrite.h"