diff mbox series

[FFmpeg-devel,v3,2/2] swscale/input: clip rgbf32 values before lrintf

Message ID 20211115062221.1650-2-mindmark@gmail.com
State New
Headers show
Series [FFmpeg-devel,v3,1/2] libavutil/common: clip nan value to amin | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished
andriy/make_ppc success Make finished
andriy/make_fate_ppc success Make fate finished

Commit Message

Mark Reid Nov. 15, 2021, 6:22 a.m. UTC
From: Mark Reid <mindmark@gmail.com>

if the float pixel * 65535.0f > 2147483647.0f
lrintf may overfow and return negative values, depending on implementation.
nan and +/-inf values may also be implementation defined

clip the value first so lrintf always works.

values <     0.0f, -inf, nan = 0.0f
values > 65535.0f, +inf      = 65535.0f

old timings
 195960 decicycles in planar_rgbf32le_to_uv,       1 runs,      0 skips
 186120 decicycles in planar_rgbf32le_to_uv,       2 runs,      0 skips
 188645 decicycles in planar_rgbf32le_to_uv,       4 runs,      0 skips
 183625 decicycles in planar_rgbf32le_to_uv,       8 runs,      0 skips
 181157 decicycles in planar_rgbf32le_to_uv,      16 runs,      0 skips
 177533 decicycles in planar_rgbf32le_to_uv,      32 runs,      0 skips
 175689 decicycles in planar_rgbf32le_to_uv,      64 runs,      0 skips

 232960 decicycles in planar_rgbf32be_to_uv,       1 runs,      0 skips
 221380 decicycles in planar_rgbf32be_to_uv,       2 runs,      0 skips
 216640 decicycles in planar_rgbf32be_to_uv,       4 runs,      0 skips
 213505 decicycles in planar_rgbf32be_to_uv,       8 runs,      0 skips
 211558 decicycles in planar_rgbf32be_to_uv,      16 runs,      0 skips
 210596 decicycles in planar_rgbf32be_to_uv,      32 runs,      0 skips
 210202 decicycles in planar_rgbf32be_to_uv,      64 runs,      0 skips

 161680 decicycles in planar_rgbf32le_to_y,       1 runs,      0 skips
 153540 decicycles in planar_rgbf32le_to_y,       2 runs,      0 skips
 148255 decicycles in planar_rgbf32le_to_y,       4 runs,      0 skips
 140600 decicycles in planar_rgbf32le_to_y,       8 runs,      0 skips
 132935 decicycles in planar_rgbf32le_to_y,      16 runs,      0 skips
 128531 decicycles in planar_rgbf32le_to_y,      32 runs,      0 skips
 140933 decicycles in planar_rgbf32le_to_y,      64 runs,      0 skips

 190980 decicycles in planar_rgbf32be_to_y,       1 runs,      0 skips
 176080 decicycles in planar_rgbf32be_to_y,       2 runs,      0 skips
 167980 decicycles in planar_rgbf32be_to_y,       4 runs,      0 skips
 164685 decicycles in planar_rgbf32be_to_y,       8 runs,      0 skips
 162751 decicycles in planar_rgbf32be_to_y,      16 runs,      0 skips
 162404 decicycles in planar_rgbf32be_to_y,      32 runs,      0 skips
 167849 decicycles in planar_rgbf32be_to_y,      64 runs,      0 skips

new timings
 183320 decicycles in planar_rgbf32le_to_uv,       1 runs,      0 skips
 175700 decicycles in planar_rgbf32le_to_uv,       2 runs,      0 skips
 179570 decicycles in planar_rgbf32le_to_uv,       4 runs,      0 skips
 172932 decicycles in planar_rgbf32le_to_uv,       8 runs,      0 skips
 168707 decicycles in planar_rgbf32le_to_uv,      16 runs,      0 skips
 165224 decicycles in planar_rgbf32le_to_uv,      32 runs,      0 skips
 163423 decicycles in planar_rgbf32le_to_uv,      64 runs,      0 skips

 184940 decicycles in planar_rgbf32be_to_uv,       1 runs,      0 skips
 185150 decicycles in planar_rgbf32be_to_uv,       2 runs,      0 skips
 185790 decicycles in planar_rgbf32be_to_uv,       4 runs,      0 skips
 185472 decicycles in planar_rgbf32be_to_uv,       8 runs,      0 skips
 185277 decicycles in planar_rgbf32be_to_uv,      16 runs,      0 skips
 185813 decicycles in planar_rgbf32be_to_uv,      32 runs,      0 skips
 185332 decicycles in planar_rgbf32be_to_uv,      64 runs,      0 skips

 145400 decicycles in planar_rgbf32le_to_y,       1 runs,      0 skips
 145100 decicycles in planar_rgbf32le_to_y,       2 runs,      0 skips
 143490 decicycles in planar_rgbf32le_to_y,       4 runs,      0 skips
 136687 decicycles in planar_rgbf32le_to_y,       8 runs,      0 skips
 131271 decicycles in planar_rgbf32le_to_y,      16 runs,      0 skips
 128698 decicycles in planar_rgbf32le_to_y,      32 runs,      0 skips
 127170 decicycles in planar_rgbf32le_to_y,      64 runs,      0 skips

 156020 decicycles in planar_rgbf32be_to_y,       1 runs,      0 skips
 146990 decicycles in planar_rgbf32be_to_y,       2 runs,      0 skips
 142020 decicycles in planar_rgbf32be_to_y,       4 runs,      0 skips
 141052 decicycles in planar_rgbf32be_to_y,       8 runs,      0 skips
 138973 decicycles in planar_rgbf32be_to_y,      16 runs,      0 skips
 138027 decicycles in planar_rgbf32be_to_y,      32 runs,      0 skips
 143939 decicycles in planar_rgbf32be_to_y,      64 runs,      0 skips

---
 libswscale/input.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

--
2.31.1.windows.1

Comments

Michael Niedermayer Nov. 15, 2021, 3:29 p.m. UTC | #1
On Sun, Nov 14, 2021 at 10:22:21PM -0800, mindmark@gmail.com wrote:
> From: Mark Reid <mindmark@gmail.com>
> 
> if the float pixel * 65535.0f > 2147483647.0f
> lrintf may overfow and return negative values, depending on implementation.
> nan and +/-inf values may also be implementation defined
> 
> clip the value first so lrintf always works.
> 
> values <     0.0f, -inf, nan = 0.0f
> values > 65535.0f, +inf      = 65535.0f
> 
> old timings
>  195960 decicycles in planar_rgbf32le_to_uv,       1 runs,      0 skips
>  186120 decicycles in planar_rgbf32le_to_uv,       2 runs,      0 skips
>  188645 decicycles in planar_rgbf32le_to_uv,       4 runs,      0 skips
>  183625 decicycles in planar_rgbf32le_to_uv,       8 runs,      0 skips
>  181157 decicycles in planar_rgbf32le_to_uv,      16 runs,      0 skips
>  177533 decicycles in planar_rgbf32le_to_uv,      32 runs,      0 skips
>  175689 decicycles in planar_rgbf32le_to_uv,      64 runs,      0 skips
> 
>  232960 decicycles in planar_rgbf32be_to_uv,       1 runs,      0 skips
>  221380 decicycles in planar_rgbf32be_to_uv,       2 runs,      0 skips
>  216640 decicycles in planar_rgbf32be_to_uv,       4 runs,      0 skips
>  213505 decicycles in planar_rgbf32be_to_uv,       8 runs,      0 skips
>  211558 decicycles in planar_rgbf32be_to_uv,      16 runs,      0 skips
>  210596 decicycles in planar_rgbf32be_to_uv,      32 runs,      0 skips
>  210202 decicycles in planar_rgbf32be_to_uv,      64 runs,      0 skips
> 
>  161680 decicycles in planar_rgbf32le_to_y,       1 runs,      0 skips
>  153540 decicycles in planar_rgbf32le_to_y,       2 runs,      0 skips
>  148255 decicycles in planar_rgbf32le_to_y,       4 runs,      0 skips
>  140600 decicycles in planar_rgbf32le_to_y,       8 runs,      0 skips
>  132935 decicycles in planar_rgbf32le_to_y,      16 runs,      0 skips
>  128531 decicycles in planar_rgbf32le_to_y,      32 runs,      0 skips
>  140933 decicycles in planar_rgbf32le_to_y,      64 runs,      0 skips
> 
>  190980 decicycles in planar_rgbf32be_to_y,       1 runs,      0 skips
>  176080 decicycles in planar_rgbf32be_to_y,       2 runs,      0 skips
>  167980 decicycles in planar_rgbf32be_to_y,       4 runs,      0 skips
>  164685 decicycles in planar_rgbf32be_to_y,       8 runs,      0 skips
>  162751 decicycles in planar_rgbf32be_to_y,      16 runs,      0 skips
>  162404 decicycles in planar_rgbf32be_to_y,      32 runs,      0 skips
>  167849 decicycles in planar_rgbf32be_to_y,      64 runs,      0 skips
> 
> new timings
>  183320 decicycles in planar_rgbf32le_to_uv,       1 runs,      0 skips
>  175700 decicycles in planar_rgbf32le_to_uv,       2 runs,      0 skips
>  179570 decicycles in planar_rgbf32le_to_uv,       4 runs,      0 skips
>  172932 decicycles in planar_rgbf32le_to_uv,       8 runs,      0 skips
>  168707 decicycles in planar_rgbf32le_to_uv,      16 runs,      0 skips
>  165224 decicycles in planar_rgbf32le_to_uv,      32 runs,      0 skips
>  163423 decicycles in planar_rgbf32le_to_uv,      64 runs,      0 skips
> 
>  184940 decicycles in planar_rgbf32be_to_uv,       1 runs,      0 skips
>  185150 decicycles in planar_rgbf32be_to_uv,       2 runs,      0 skips
>  185790 decicycles in planar_rgbf32be_to_uv,       4 runs,      0 skips
>  185472 decicycles in planar_rgbf32be_to_uv,       8 runs,      0 skips
>  185277 decicycles in planar_rgbf32be_to_uv,      16 runs,      0 skips
>  185813 decicycles in planar_rgbf32be_to_uv,      32 runs,      0 skips
>  185332 decicycles in planar_rgbf32be_to_uv,      64 runs,      0 skips
> 
>  145400 decicycles in planar_rgbf32le_to_y,       1 runs,      0 skips
>  145100 decicycles in planar_rgbf32le_to_y,       2 runs,      0 skips
>  143490 decicycles in planar_rgbf32le_to_y,       4 runs,      0 skips
>  136687 decicycles in planar_rgbf32le_to_y,       8 runs,      0 skips
>  131271 decicycles in planar_rgbf32le_to_y,      16 runs,      0 skips
>  128698 decicycles in planar_rgbf32le_to_y,      32 runs,      0 skips
>  127170 decicycles in planar_rgbf32le_to_y,      64 runs,      0 skips
> 
>  156020 decicycles in planar_rgbf32be_to_y,       1 runs,      0 skips
>  146990 decicycles in planar_rgbf32be_to_y,       2 runs,      0 skips
>  142020 decicycles in planar_rgbf32be_to_y,       4 runs,      0 skips
>  141052 decicycles in planar_rgbf32be_to_y,       8 runs,      0 skips
>  138973 decicycles in planar_rgbf32be_to_y,      16 runs,      0 skips
>  138027 decicycles in planar_rgbf32be_to_y,      32 runs,      0 skips
>  143939 decicycles in planar_rgbf32be_to_y,      64 runs,      0 skips

LGTM

thx

[...]
James Almer Nov. 15, 2021, 7:52 p.m. UTC | #2
On 11/15/2021 12:29 PM, Michael Niedermayer wrote:
> On Sun, Nov 14, 2021 at 10:22:21PM -0800, mindmark@gmail.com wrote:
>> From: Mark Reid <mindmark@gmail.com>
>>
>> if the float pixel * 65535.0f > 2147483647.0f
>> lrintf may overfow and return negative values, depending on implementation.
>> nan and +/-inf values may also be implementation defined
>>
>> clip the value first so lrintf always works.
>>
>> values <     0.0f, -inf, nan = 0.0f
>> values > 65535.0f, +inf      = 65535.0f
>>
>> old timings
>>   195960 decicycles in planar_rgbf32le_to_uv,       1 runs,      0 skips
>>   186120 decicycles in planar_rgbf32le_to_uv,       2 runs,      0 skips
>>   188645 decicycles in planar_rgbf32le_to_uv,       4 runs,      0 skips
>>   183625 decicycles in planar_rgbf32le_to_uv,       8 runs,      0 skips
>>   181157 decicycles in planar_rgbf32le_to_uv,      16 runs,      0 skips
>>   177533 decicycles in planar_rgbf32le_to_uv,      32 runs,      0 skips
>>   175689 decicycles in planar_rgbf32le_to_uv,      64 runs,      0 skips
>>
>>   232960 decicycles in planar_rgbf32be_to_uv,       1 runs,      0 skips
>>   221380 decicycles in planar_rgbf32be_to_uv,       2 runs,      0 skips
>>   216640 decicycles in planar_rgbf32be_to_uv,       4 runs,      0 skips
>>   213505 decicycles in planar_rgbf32be_to_uv,       8 runs,      0 skips
>>   211558 decicycles in planar_rgbf32be_to_uv,      16 runs,      0 skips
>>   210596 decicycles in planar_rgbf32be_to_uv,      32 runs,      0 skips
>>   210202 decicycles in planar_rgbf32be_to_uv,      64 runs,      0 skips
>>
>>   161680 decicycles in planar_rgbf32le_to_y,       1 runs,      0 skips
>>   153540 decicycles in planar_rgbf32le_to_y,       2 runs,      0 skips
>>   148255 decicycles in planar_rgbf32le_to_y,       4 runs,      0 skips
>>   140600 decicycles in planar_rgbf32le_to_y,       8 runs,      0 skips
>>   132935 decicycles in planar_rgbf32le_to_y,      16 runs,      0 skips
>>   128531 decicycles in planar_rgbf32le_to_y,      32 runs,      0 skips
>>   140933 decicycles in planar_rgbf32le_to_y,      64 runs,      0 skips
>>
>>   190980 decicycles in planar_rgbf32be_to_y,       1 runs,      0 skips
>>   176080 decicycles in planar_rgbf32be_to_y,       2 runs,      0 skips
>>   167980 decicycles in planar_rgbf32be_to_y,       4 runs,      0 skips
>>   164685 decicycles in planar_rgbf32be_to_y,       8 runs,      0 skips
>>   162751 decicycles in planar_rgbf32be_to_y,      16 runs,      0 skips
>>   162404 decicycles in planar_rgbf32be_to_y,      32 runs,      0 skips
>>   167849 decicycles in planar_rgbf32be_to_y,      64 runs,      0 skips
>>
>> new timings
>>   183320 decicycles in planar_rgbf32le_to_uv,       1 runs,      0 skips
>>   175700 decicycles in planar_rgbf32le_to_uv,       2 runs,      0 skips
>>   179570 decicycles in planar_rgbf32le_to_uv,       4 runs,      0 skips
>>   172932 decicycles in planar_rgbf32le_to_uv,       8 runs,      0 skips
>>   168707 decicycles in planar_rgbf32le_to_uv,      16 runs,      0 skips
>>   165224 decicycles in planar_rgbf32le_to_uv,      32 runs,      0 skips
>>   163423 decicycles in planar_rgbf32le_to_uv,      64 runs,      0 skips
>>
>>   184940 decicycles in planar_rgbf32be_to_uv,       1 runs,      0 skips
>>   185150 decicycles in planar_rgbf32be_to_uv,       2 runs,      0 skips
>>   185790 decicycles in planar_rgbf32be_to_uv,       4 runs,      0 skips
>>   185472 decicycles in planar_rgbf32be_to_uv,       8 runs,      0 skips
>>   185277 decicycles in planar_rgbf32be_to_uv,      16 runs,      0 skips
>>   185813 decicycles in planar_rgbf32be_to_uv,      32 runs,      0 skips
>>   185332 decicycles in planar_rgbf32be_to_uv,      64 runs,      0 skips
>>
>>   145400 decicycles in planar_rgbf32le_to_y,       1 runs,      0 skips
>>   145100 decicycles in planar_rgbf32le_to_y,       2 runs,      0 skips
>>   143490 decicycles in planar_rgbf32le_to_y,       4 runs,      0 skips
>>   136687 decicycles in planar_rgbf32le_to_y,       8 runs,      0 skips
>>   131271 decicycles in planar_rgbf32le_to_y,      16 runs,      0 skips
>>   128698 decicycles in planar_rgbf32le_to_y,      32 runs,      0 skips
>>   127170 decicycles in planar_rgbf32le_to_y,      64 runs,      0 skips
>>
>>   156020 decicycles in planar_rgbf32be_to_y,       1 runs,      0 skips
>>   146990 decicycles in planar_rgbf32be_to_y,       2 runs,      0 skips
>>   142020 decicycles in planar_rgbf32be_to_y,       4 runs,      0 skips
>>   141052 decicycles in planar_rgbf32be_to_y,       8 runs,      0 skips
>>   138973 decicycles in planar_rgbf32be_to_y,      16 runs,      0 skips
>>   138027 decicycles in planar_rgbf32be_to_y,      32 runs,      0 skips
>>   143939 decicycles in planar_rgbf32be_to_y,      64 runs,      0 skips
> 
> LGTM
> 
> thx

Applied.
diff mbox series

Patch

diff --git a/libswscale/input.c b/libswscale/input.c
index 90efdd2ffc..1351ea5bd4 100644
--- a/libswscale/input.c
+++ b/libswscale/input.c
@@ -973,7 +973,7 @@  static av_always_inline void planar_rgbf32_to_a(uint8_t *_dst, const uint8_t *_s
     uint16_t *dst        = (uint16_t *)_dst;

     for (i = 0; i < width; i++) {
-        dst[i] = av_clip_uint16(lrintf(65535.0f * rdpx(src[3] + i)));
+        dst[i] = lrintf(av_clipf(65535.0f * rdpx(src[3] + i), 0.0f, 65535.0f));
     }
 }

@@ -987,9 +987,9 @@  static av_always_inline void planar_rgbf32_to_uv(uint8_t *_dstU, uint8_t *_dstV,
     int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];

     for (i = 0; i < width; i++) {
-        int g = av_clip_uint16(lrintf(65535.0f * rdpx(src[0] + i)));
-        int b = av_clip_uint16(lrintf(65535.0f * rdpx(src[1] + i)));
-        int r = av_clip_uint16(lrintf(65535.0f * rdpx(src[2] + i)));
+        int g = lrintf(av_clipf(65535.0f * rdpx(src[0] + i), 0.0f, 65535.0f));
+        int b = lrintf(av_clipf(65535.0f * rdpx(src[1] + i), 0.0f, 65535.0f));
+        int r = lrintf(av_clipf(65535.0f * rdpx(src[2] + i), 0.0f, 65535.0f));

         dstU[i] = (ru*r + gu*g + bu*b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT;
         dstV[i] = (rv*r + gv*g + bv*b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT;
@@ -1005,9 +1005,9 @@  static av_always_inline void planar_rgbf32_to_y(uint8_t *_dst, const uint8_t *_s
     int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];

     for (i = 0; i < width; i++) {
-        int g = av_clip_uint16(lrintf(65535.0f * rdpx(src[0] + i)));
-        int b = av_clip_uint16(lrintf(65535.0f * rdpx(src[1] + i)));
-        int r = av_clip_uint16(lrintf(65535.0f * rdpx(src[2] + i)));
+        int g = lrintf(av_clipf(65535.0f * rdpx(src[0] + i), 0.0f, 65535.0f));
+        int b = lrintf(av_clipf(65535.0f * rdpx(src[1] + i), 0.0f, 65535.0f));
+        int r = lrintf(av_clipf(65535.0f * rdpx(src[2] + i), 0.0f, 65535.0f));

         dst[i] = (ry*r + gy*g + by*b + (0x2001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT;
     }
@@ -1021,7 +1021,7 @@  static av_always_inline void grayf32ToY16_c(uint8_t *_dst, const uint8_t *_src,
     uint16_t *dst    = (uint16_t *)_dst;

     for (i = 0; i < width; ++i){
-        dst[i] = av_clip_uint16(lrintf(65535.0f * rdpx(src + i)));
+        dst[i] = lrintf(av_clipf(65535.0f * rdpx(src + i), 0.0f,  65535.0f));
     }
 }