diff mbox series

[FFmpeg-devel,3/7] avutil/common: Implement av_sat_add64_c() with fewer branches

Message ID 20201019142501.6867-3-michael@niedermayer.cc
State Accepted
Headers show
Series [FFmpeg-devel,1/7] avcodec/fits: Check bscale
Related show

Checks

Context Check Description
andriy/x86_make success Make finished
andriy/x86_make_fate success Make fate finished

Commit Message

Michael Niedermayer Oct. 19, 2020, 2:24 p.m. UTC
No benchmark because this is not used in any speed relevant pathes nor is it
used where __builtin_add_overflow is available.
So I do not know how to realistically benchmark it.

Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
---
 libavutil/common.h | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

Comments

Andreas Rheinhardt Oct. 21, 2020, 10:17 a.m. UTC | #1
Michael Niedermayer:
> No benchmark because this is not used in any speed relevant pathes nor is it
> used where __builtin_add_overflow is available.
> So I do not know how to realistically benchmark it.
> 
> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
> ---
>  libavutil/common.h | 9 ++++-----
>  1 file changed, 4 insertions(+), 5 deletions(-)
> 
> diff --git a/libavutil/common.h b/libavutil/common.h
> index 92b721a59c..a48c0648f8 100644
> --- a/libavutil/common.h
> +++ b/libavutil/common.h
> @@ -303,11 +303,10 @@ static av_always_inline int64_t av_sat_add64_c(int64_t a, int64_t b) {
>      int64_t tmp;
>      return !__builtin_add_overflow(a, b, &tmp) ? tmp : (tmp < 0 ? INT64_MAX : INT64_MIN);
>  #else
> -    if (b >= 0 && a >= INT64_MAX - b)
> -        return INT64_MAX;
> -    if (b <= 0 && a <= INT64_MIN - b)
> -        return INT64_MIN;
> -    return a + b;
> +    int64_t s = a+(uint64_t)b;
> +    if ((int64_t)(a^b | ~s^b) >= 0)
> +        return b < 0 ? INT64_MIN : INT64_MAX;
> +    return s;
>  #endif
>  }
>  
> 
Returning INT64_MAX ^ (b >> 63) would have even less branches
(untested). See https://godbolt.org/z/oYadG3

- Andreas
Michael Niedermayer Oct. 21, 2020, 10:56 a.m. UTC | #2
On Wed, Oct 21, 2020 at 12:17:41PM +0200, Andreas Rheinhardt wrote:
> Michael Niedermayer:
> > No benchmark because this is not used in any speed relevant pathes nor is it
> > used where __builtin_add_overflow is available.
> > So I do not know how to realistically benchmark it.
> > 
> > Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
> > ---
> >  libavutil/common.h | 9 ++++-----
> >  1 file changed, 4 insertions(+), 5 deletions(-)
> > 
> > diff --git a/libavutil/common.h b/libavutil/common.h
> > index 92b721a59c..a48c0648f8 100644
> > --- a/libavutil/common.h
> > +++ b/libavutil/common.h
> > @@ -303,11 +303,10 @@ static av_always_inline int64_t av_sat_add64_c(int64_t a, int64_t b) {
> >      int64_t tmp;
> >      return !__builtin_add_overflow(a, b, &tmp) ? tmp : (tmp < 0 ? INT64_MAX : INT64_MIN);
> >  #else
> > -    if (b >= 0 && a >= INT64_MAX - b)
> > -        return INT64_MAX;
> > -    if (b <= 0 && a <= INT64_MIN - b)
> > -        return INT64_MIN;
> > -    return a + b;
> > +    int64_t s = a+(uint64_t)b;
> > +    if ((int64_t)(a^b | ~s^b) >= 0)
> > +        return b < 0 ? INT64_MIN : INT64_MAX;
> > +    return s;
> >  #endif
> >  }
> >  
> > 
> Returning INT64_MAX ^ (b >> 63) would have even less branches

will do that in case noone objects to the patch itself

[...]

thx
Michael Niedermayer Oct. 24, 2020, 5:08 p.m. UTC | #3
On Wed, Oct 21, 2020 at 12:56:55PM +0200, Michael Niedermayer wrote:
> On Wed, Oct 21, 2020 at 12:17:41PM +0200, Andreas Rheinhardt wrote:
> > Michael Niedermayer:
> > > No benchmark because this is not used in any speed relevant pathes nor is it
> > > used where __builtin_add_overflow is available.
> > > So I do not know how to realistically benchmark it.
> > > 
> > > Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
> > > ---
> > >  libavutil/common.h | 9 ++++-----
> > >  1 file changed, 4 insertions(+), 5 deletions(-)
> > > 
> > > diff --git a/libavutil/common.h b/libavutil/common.h
> > > index 92b721a59c..a48c0648f8 100644
> > > --- a/libavutil/common.h
> > > +++ b/libavutil/common.h
> > > @@ -303,11 +303,10 @@ static av_always_inline int64_t av_sat_add64_c(int64_t a, int64_t b) {
> > >      int64_t tmp;
> > >      return !__builtin_add_overflow(a, b, &tmp) ? tmp : (tmp < 0 ? INT64_MAX : INT64_MIN);
> > >  #else
> > > -    if (b >= 0 && a >= INT64_MAX - b)
> > > -        return INT64_MAX;
> > > -    if (b <= 0 && a <= INT64_MIN - b)
> > > -        return INT64_MIN;
> > > -    return a + b;
> > > +    int64_t s = a+(uint64_t)b;
> > > +    if ((int64_t)(a^b | ~s^b) >= 0)
> > > +        return b < 0 ? INT64_MIN : INT64_MAX;
> > > +    return s;
> > >  #endif
> > >  }
> > >  
> > > 
> > Returning INT64_MAX ^ (b >> 63) would have even less branches
> 
> will do that in case noone objects to the patch itself

will apply

[...]
diff mbox series

Patch

diff --git a/libavutil/common.h b/libavutil/common.h
index 92b721a59c..a48c0648f8 100644
--- a/libavutil/common.h
+++ b/libavutil/common.h
@@ -303,11 +303,10 @@  static av_always_inline int64_t av_sat_add64_c(int64_t a, int64_t b) {
     int64_t tmp;
     return !__builtin_add_overflow(a, b, &tmp) ? tmp : (tmp < 0 ? INT64_MAX : INT64_MIN);
 #else
-    if (b >= 0 && a >= INT64_MAX - b)
-        return INT64_MAX;
-    if (b <= 0 && a <= INT64_MIN - b)
-        return INT64_MIN;
-    return a + b;
+    int64_t s = a+(uint64_t)b;
+    if ((int64_t)(a^b | ~s^b) >= 0)
+        return b < 0 ? INT64_MIN : INT64_MAX;
+    return s;
 #endif
 }