Message ID | 20201019142501.6867-3-michael@niedermayer.cc |
---|---|
State | Accepted |
Headers | show |
Series | [FFmpeg-devel,1/7] avcodec/fits: Check bscale | expand |
Context | Check | Description |
---|---|---|
andriy/x86_make | success | Make finished |
andriy/x86_make_fate | success | Make fate finished |
Michael Niedermayer: > No benchmark because this is not used in any speed relevant pathes nor is it > used where __builtin_add_overflow is available. > So I do not know how to realistically benchmark it. > > Signed-off-by: Michael Niedermayer <michael@niedermayer.cc> > --- > libavutil/common.h | 9 ++++----- > 1 file changed, 4 insertions(+), 5 deletions(-) > > diff --git a/libavutil/common.h b/libavutil/common.h > index 92b721a59c..a48c0648f8 100644 > --- a/libavutil/common.h > +++ b/libavutil/common.h > @@ -303,11 +303,10 @@ static av_always_inline int64_t av_sat_add64_c(int64_t a, int64_t b) { > int64_t tmp; > return !__builtin_add_overflow(a, b, &tmp) ? tmp : (tmp < 0 ? INT64_MAX : INT64_MIN); > #else > - if (b >= 0 && a >= INT64_MAX - b) > - return INT64_MAX; > - if (b <= 0 && a <= INT64_MIN - b) > - return INT64_MIN; > - return a + b; > + int64_t s = a+(uint64_t)b; > + if ((int64_t)(a^b | ~s^b) >= 0) > + return b < 0 ? INT64_MIN : INT64_MAX; > + return s; > #endif > } > > Returning INT64_MAX ^ (b >> 63) would have even less branches (untested). See https://godbolt.org/z/oYadG3 - Andreas
On Wed, Oct 21, 2020 at 12:17:41PM +0200, Andreas Rheinhardt wrote: > Michael Niedermayer: > > No benchmark because this is not used in any speed relevant pathes nor is it > > used where __builtin_add_overflow is available. > > So I do not know how to realistically benchmark it. > > > > Signed-off-by: Michael Niedermayer <michael@niedermayer.cc> > > --- > > libavutil/common.h | 9 ++++----- > > 1 file changed, 4 insertions(+), 5 deletions(-) > > > > diff --git a/libavutil/common.h b/libavutil/common.h > > index 92b721a59c..a48c0648f8 100644 > > --- a/libavutil/common.h > > +++ b/libavutil/common.h > > @@ -303,11 +303,10 @@ static av_always_inline int64_t av_sat_add64_c(int64_t a, int64_t b) { > > int64_t tmp; > > return !__builtin_add_overflow(a, b, &tmp) ? tmp : (tmp < 0 ? INT64_MAX : INT64_MIN); > > #else > > - if (b >= 0 && a >= INT64_MAX - b) > > - return INT64_MAX; > > - if (b <= 0 && a <= INT64_MIN - b) > > - return INT64_MIN; > > - return a + b; > > + int64_t s = a+(uint64_t)b; > > + if ((int64_t)(a^b | ~s^b) >= 0) > > + return b < 0 ? INT64_MIN : INT64_MAX; > > + return s; > > #endif > > } > > > > > Returning INT64_MAX ^ (b >> 63) would have even less branches will do that in case noone objects to the patch itself [...] thx
On Wed, Oct 21, 2020 at 12:56:55PM +0200, Michael Niedermayer wrote: > On Wed, Oct 21, 2020 at 12:17:41PM +0200, Andreas Rheinhardt wrote: > > Michael Niedermayer: > > > No benchmark because this is not used in any speed relevant pathes nor is it > > > used where __builtin_add_overflow is available. > > > So I do not know how to realistically benchmark it. > > > > > > Signed-off-by: Michael Niedermayer <michael@niedermayer.cc> > > > --- > > > libavutil/common.h | 9 ++++----- > > > 1 file changed, 4 insertions(+), 5 deletions(-) > > > > > > diff --git a/libavutil/common.h b/libavutil/common.h > > > index 92b721a59c..a48c0648f8 100644 > > > --- a/libavutil/common.h > > > +++ b/libavutil/common.h > > > @@ -303,11 +303,10 @@ static av_always_inline int64_t av_sat_add64_c(int64_t a, int64_t b) { > > > int64_t tmp; > > > return !__builtin_add_overflow(a, b, &tmp) ? tmp : (tmp < 0 ? INT64_MAX : INT64_MIN); > > > #else > > > - if (b >= 0 && a >= INT64_MAX - b) > > > - return INT64_MAX; > > > - if (b <= 0 && a <= INT64_MIN - b) > > > - return INT64_MIN; > > > - return a + b; > > > + int64_t s = a+(uint64_t)b; > > > + if ((int64_t)(a^b | ~s^b) >= 0) > > > + return b < 0 ? INT64_MIN : INT64_MAX; > > > + return s; > > > #endif > > > } > > > > > > > > Returning INT64_MAX ^ (b >> 63) would have even less branches > > will do that in case noone objects to the patch itself will apply [...]
diff --git a/libavutil/common.h b/libavutil/common.h index 92b721a59c..a48c0648f8 100644 --- a/libavutil/common.h +++ b/libavutil/common.h @@ -303,11 +303,10 @@ static av_always_inline int64_t av_sat_add64_c(int64_t a, int64_t b) { int64_t tmp; return !__builtin_add_overflow(a, b, &tmp) ? tmp : (tmp < 0 ? INT64_MAX : INT64_MIN); #else - if (b >= 0 && a >= INT64_MAX - b) - return INT64_MAX; - if (b <= 0 && a <= INT64_MIN - b) - return INT64_MIN; - return a + b; + int64_t s = a+(uint64_t)b; + if ((int64_t)(a^b | ~s^b) >= 0) + return b < 0 ? INT64_MIN : INT64_MAX; + return s; #endif }
No benchmark because this is not used in any speed relevant pathes nor is it used where __builtin_add_overflow is available. So I do not know how to realistically benchmark it. Signed-off-by: Michael Niedermayer <michael@niedermayer.cc> --- libavutil/common.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-)