diff mbox series

[FFmpeg-devel,06/11] avutil/half2float: adjust conversion of NaN

Message ID 20220810204712.3123-6-timo@rothenpieler.org
State New
Headers show
Series [FFmpeg-devel,01/11] lavu/pixfmt: add packed RGBA float16 format | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Timo Rothenpieler Aug. 10, 2022, 8:47 p.m. UTC
IEEE-754 differentiates two different kind of NaNs.
Quiet and Signaling ones. They are differentiated by the MSB of the
mantissa.

For whatever reason, actual hardware conversion of half to single always
sets the signaling bit to 1 if the mantissa is != 0, and to 0 if it's 0.
So our code has to follow suite or fate-testing hardware float16 will be
impossible.
---
 libavcodec/exr.c                                    | 2 +-
 libavcodec/pnm.h                                    | 2 +-
 libavutil/half2float.h                              | 5 +++++
 tests/ref/fate/exr-rgb-scanline-zip-half-0x0-0xFFFF | 2 +-
 4 files changed, 8 insertions(+), 3 deletions(-)

Comments

Andreas Rheinhardt Aug. 10, 2022, 9:24 p.m. UTC | #1
Timo Rothenpieler:
> IEEE-754 differentiates two different kind of NaNs.
> Quiet and Signaling ones. They are differentiated by the MSB of the
> mantissa.
> 
> For whatever reason, actual hardware conversion of half to single always
> sets the signaling bit to 1 if the mantissa is != 0, and to 0 if it's 0.
> So our code has to follow suite or fate-testing hardware float16 will be
> impossible.

What does the exr spec say about quiet and signaling nans?

> ---
>  libavcodec/exr.c                                    | 2 +-
>  libavcodec/pnm.h                                    | 2 +-
>  libavutil/half2float.h                              | 5 +++++
>  tests/ref/fate/exr-rgb-scanline-zip-half-0x0-0xFFFF | 2 +-
>  4 files changed, 8 insertions(+), 3 deletions(-)
> 
> diff --git a/libavcodec/exr.c b/libavcodec/exr.c
> index 5c6ca9adbf..47f4786491 100644
> --- a/libavcodec/exr.c
> +++ b/libavcodec/exr.c
> @@ -191,7 +191,7 @@ typedef struct EXRContext {
>      float gamma;
>      union av_intfloat32 gamma_table[65536];
>  
> -    uint32_t mantissatable[2048];
> +    uint32_t mantissatable[3072];
>      uint32_t exponenttable[64];
>      uint16_t offsettable[64];
>  } EXRContext;
> diff --git a/libavcodec/pnm.h b/libavcodec/pnm.h
> index 5bf2eaa4d9..7e5445f529 100644
> --- a/libavcodec/pnm.h
> +++ b/libavcodec/pnm.h
> @@ -34,7 +34,7 @@ typedef struct PNMContext {
>      int half;
>      float scale;
>  
> -    uint32_t mantissatable[2048];
> +    uint32_t mantissatable[3072];
>      uint32_t exponenttable[64];
>      uint16_t offsettable[64];
>  } PNMContext;
> diff --git a/libavutil/half2float.h b/libavutil/half2float.h
> index 1f6deade07..5af4690cfe 100644
> --- a/libavutil/half2float.h
> +++ b/libavutil/half2float.h
> @@ -45,6 +45,9 @@ static void half2float_table(uint32_t *mantissatable, uint32_t *exponenttable,
>          mantissatable[i] = convertmantissa(i);
>      for (int i = 1024; i < 2048; i++)
>          mantissatable[i] = 0x38000000UL + ((i - 1024) << 13UL);
> +    for (int i = 2048; i < 3072; i++)
> +        mantissatable[i] = mantissatable[i - 1024] | 0x400000UL;
> +    mantissatable[2048] = mantissatable[1024];
>  
>      exponenttable[0] = 0;
>      for (int i = 1; i < 31; i++)
> @@ -58,7 +61,9 @@ static void half2float_table(uint32_t *mantissatable, uint32_t *exponenttable,
>      offsettable[0] = 0;
>      for (int i = 1; i < 64; i++)
>          offsettable[i] = 1024;
> +    offsettable[31] = 2048;
>      offsettable[32] = 0;
> +    offsettable[63] = 2048;
>  }
>  
>  static uint32_t half2float(uint16_t h, const uint32_t *mantissatable, const uint32_t *exponenttable,
> diff --git a/tests/ref/fate/exr-rgb-scanline-zip-half-0x0-0xFFFF b/tests/ref/fate/exr-rgb-scanline-zip-half-0x0-0xFFFF
> index b6201116fe..e45a40b498 100644
> --- a/tests/ref/fate/exr-rgb-scanline-zip-half-0x0-0xFFFF
> +++ b/tests/ref/fate/exr-rgb-scanline-zip-half-0x0-0xFFFF
> @@ -3,4 +3,4 @@
>  #codec_id 0: rawvideo
>  #dimensions 0: 256x256
>  #sar 0: 1/1
> -0,          0,          0,        1,   786432, 0x1445e411
> +0,          0,          0,        1,   786432, 0xce9be2be
Timo Rothenpieler Aug. 10, 2022, 9:36 p.m. UTC | #2
On 10.08.2022 23:24, Andreas Rheinhardt wrote:
> Timo Rothenpieler:
>> IEEE-754 differentiates two different kind of NaNs.
>> Quiet and Signaling ones. They are differentiated by the MSB of the
>> mantissa.
>>
>> For whatever reason, actual hardware conversion of half to single always
>> sets the signaling bit to 1 if the mantissa is != 0, and to 0 if it's 0.
>> So our code has to follow suite or fate-testing hardware float16 will be
>> impossible.
> 
> What does the exr spec say about quiet and signaling nans?

Not sure how exr would be involved here.
But I tested this on both aarch64, x86 with sse2 emulation and x86 f16c 
on alderlake and zen2.
They all perfectly agree and match 100% what this changed code produces 
for the entire range of 65k possible values.
Andreas Rheinhardt Aug. 10, 2022, 9:43 p.m. UTC | #3
Timo Rothenpieler:
> On 10.08.2022 23:24, Andreas Rheinhardt wrote:
>> Timo Rothenpieler:
>>> IEEE-754 differentiates two different kind of NaNs.
>>> Quiet and Signaling ones. They are differentiated by the MSB of the
>>> mantissa.
>>>
>>> For whatever reason, actual hardware conversion of half to single always
>>> sets the signaling bit to 1 if the mantissa is != 0, and to 0 if it's 0.
>>> So our code has to follow suite or fate-testing hardware float16 will be
>>> impossible.
>>
>> What does the exr spec say about quiet and signaling nans?
> 
> Not sure how exr would be involved here.

Your patch changes the output of an exr-test. The output of the exr
decoder is presumably determined by the exr spec. There is after all the
possibility that what hardware does in hardware and what this patch does
in software is incompatible with what exr specifies.

> But I tested this on both aarch64, x86 with sse2 emulation and x86 f16c
> on alderlake and zen2.
> They all perfectly agree and match 100% what this changed code produces
> for the entire range of 65k possible values.
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Timo Rothenpieler Aug. 10, 2022, 9:53 p.m. UTC | #4
On 10.08.2022 23:43, Andreas Rheinhardt wrote:
> Timo Rothenpieler:
>> On 10.08.2022 23:24, Andreas Rheinhardt wrote:
>>> Timo Rothenpieler:
>>>> IEEE-754 differentiates two different kind of NaNs.
>>>> Quiet and Signaling ones. They are differentiated by the MSB of the
>>>> mantissa.
>>>>
>>>> For whatever reason, actual hardware conversion of half to single always
>>>> sets the signaling bit to 1 if the mantissa is != 0, and to 0 if it's 0.
>>>> So our code has to follow suite or fate-testing hardware float16 will be
>>>> impossible.
>>>
>>> What does the exr spec say about quiet and signaling nans?
>>
>> Not sure how exr would be involved here.
> 
> Your patch changes the output of an exr-test. The output of the exr
> decoder is presumably determined by the exr spec. There is after all the
> possibility that what hardware does in hardware and what this patch does
> in software is incompatible with what exr specifies.

The exr spec just says something along the lines of analogous to 
ieee-754 floats: 
https://openexr.readthedocs.io/en/latest/TechnicalIntroduction.html?highlight=ieee#the-half-data-type
It barely ever mentions NaNs, other than that they exist. Which makes 
sense, given they don't typically appear in images.

The only output changed is that for how NaNs are converted.
And given the cross-validation with multiple hardware implementations, 
I'm confident that it's correct.
Mark Reid Aug. 10, 2022, 10:14 p.m. UTC | #5
On Wed, Aug 10, 2022 at 2:53 PM Timo Rothenpieler <timo@rothenpieler.org>
wrote:

> On 10.08.2022 23:43, Andreas Rheinhardt wrote:
> > Timo Rothenpieler:
> >> On 10.08.2022 23:24, Andreas Rheinhardt wrote:
> >>> Timo Rothenpieler:
> >>>> IEEE-754 differentiates two different kind of NaNs.
> >>>> Quiet and Signaling ones. They are differentiated by the MSB of the
> >>>> mantissa.
> >>>>
> >>>> For whatever reason, actual hardware conversion of half to single
> always
> >>>> sets the signaling bit to 1 if the mantissa is != 0, and to 0 if it's
> 0.
> >>>> So our code has to follow suite or fate-testing hardware float16 will
> be
> >>>> impossible.
> >>>
> >>> What does the exr spec say about quiet and signaling nans?
> >>
> >> Not sure how exr would be involved here.
> >
> > Your patch changes the output of an exr-test. The output of the exr
> > decoder is presumably determined by the exr spec. There is after all the
> > possibility that what hardware does in hardware and what this patch does
> > in software is incompatible with what exr specifies.
>
> The exr spec just says something along the lines of analogous to
> ieee-754 floats:
>
> https://openexr.readthedocs.io/en/latest/TechnicalIntroduction.html?highlight=ieee#the-half-data-type
> It barely ever mentions NaNs, other than that they exist. Which makes
> sense, given they don't typically appear in images.
>
> The only output changed is that for how NaNs are converted.
> And given the cross-validation with multiple hardware implementations,
> I'm confident that it's correct.
>

here is openexr implementation
https://github.com/AcademySoftwareFoundation/Imath/blob/main/src/Imath/toFloat.cpp#L78
It has been a while since I check but I believe the current implementation
matches this.

The fate sample: rgb_scanline_zip_half_float_0x0_to_0xFFFF.exr was created
to test this.
it contains every possible float16 value


> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
James Almer Aug. 10, 2022, 10:18 p.m. UTC | #6
On 8/10/2022 7:14 PM, Mark Reid wrote:
> On Wed, Aug 10, 2022 at 2:53 PM Timo Rothenpieler <timo@rothenpieler.org>
> wrote:
> 
>> On 10.08.2022 23:43, Andreas Rheinhardt wrote:
>>> Timo Rothenpieler:
>>>> On 10.08.2022 23:24, Andreas Rheinhardt wrote:
>>>>> Timo Rothenpieler:
>>>>>> IEEE-754 differentiates two different kind of NaNs.
>>>>>> Quiet and Signaling ones. They are differentiated by the MSB of the
>>>>>> mantissa.
>>>>>>
>>>>>> For whatever reason, actual hardware conversion of half to single
>> always
>>>>>> sets the signaling bit to 1 if the mantissa is != 0, and to 0 if it's
>> 0.
>>>>>> So our code has to follow suite or fate-testing hardware float16 will
>> be
>>>>>> impossible.
>>>>>
>>>>> What does the exr spec say about quiet and signaling nans?
>>>>
>>>> Not sure how exr would be involved here.
>>>
>>> Your patch changes the output of an exr-test. The output of the exr
>>> decoder is presumably determined by the exr spec. There is after all the
>>> possibility that what hardware does in hardware and what this patch does
>>> in software is incompatible with what exr specifies.
>>
>> The exr spec just says something along the lines of analogous to
>> ieee-754 floats:
>>
>> https://openexr.readthedocs.io/en/latest/TechnicalIntroduction.html?highlight=ieee#the-half-data-type
>> It barely ever mentions NaNs, other than that they exist. Which makes
>> sense, given they don't typically appear in images.
>>
>> The only output changed is that for how NaNs are converted.
>> And given the cross-validation with multiple hardware implementations,
>> I'm confident that it's correct.
>>
> 
> here is openexr implementation
> https://github.com/AcademySoftwareFoundation/Imath/blob/main/src/Imath/toFloat.cpp#L78
> It has been a while since I check but I believe the current implementation
> matches this.
> 
> The fate sample: rgb_scanline_zip_half_float_0x0_to_0xFFFF.exr was created
> to test this.
> it contains every possible float16 value

Then maybe the current implementation should be moved back to exr (it 
used to be internal to exr until Paul made it standalone), so this lavu 
module can match the existing hardware implementations of IEEE-734 half 
floats for the purpose of relevant pixel format support.
Timo Rothenpieler Aug. 10, 2022, 10:28 p.m. UTC | #7
On 11.08.2022 00:18, James Almer wrote:
> Then maybe the current implementation should be moved back to exr (it 
> used to be internal to exr until Paul made it standalone), so this lavu 
> module can match the existing hardware implementations of IEEE-734 half 
> floats for the purpose of relevant pixel format support.

That doesn't seem necessary to me.
The values produced before and now are both correct, just different.
But there is no functional difference in the values it produces.

Duplicating the entirety of that code just for that seems extremely 
unnecessary.
Mark Reid Aug. 10, 2022, 10:37 p.m. UTC | #8
On Wed, Aug 10, 2022 at 3:28 PM Timo Rothenpieler <timo@rothenpieler.org>
wrote:

> On 11.08.2022 00:18, James Almer wrote:
> > Then maybe the current implementation should be moved back to exr (it
> > used to be internal to exr until Paul made it standalone), so this lavu
> > module can match the existing hardware implementations of IEEE-734 half
> > floats for the purpose of relevant pixel format support.
>
> That doesn't seem necessary to me.
> The values produced before and now are both correct, just different.
> But there is no functional difference in the values it produces.
>
> Duplicating the entirety of that code just for that seems extremely
> unnecessary.
>

openexr does note the intel implementations difference here
https://github.com/AcademySoftwareFoundation/Imath/blob/main/src/Imath/half.h#L288


> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
Timo Rothenpieler Aug. 10, 2022, 10:55 p.m. UTC | #9
On 11.08.2022 00:37, Mark Reid wrote:
> On Wed, Aug 10, 2022 at 3:28 PM Timo Rothenpieler <timo@rothenpieler.org>
> wrote:
> 
>> On 11.08.2022 00:18, James Almer wrote:
>>> Then maybe the current implementation should be moved back to exr (it
>>> used to be internal to exr until Paul made it standalone), so this lavu
>>> module can match the existing hardware implementations of IEEE-734 half
>>> floats for the purpose of relevant pixel format support.
>>
>> That doesn't seem necessary to me.
>> The values produced before and now are both correct, just different.
>> But there is no functional difference in the values it produces.
>>
>> Duplicating the entirety of that code just for that seems extremely
>> unnecessary.
>>
> 
> openexr does note the intel implementations difference here
> https://github.com/AcademySoftwareFoundation/Imath/blob/main/src/Imath/half.h#L288

It's actually quite curious how that came to be.
My natural idea would be that our current and EXRs code does it right.

But all hardware as well as gccs software emulation agrees. Makes me 
wonder if it's fully intentional and according to some spec. But I 
couldn't find anything on the matter.
Mark Reid Aug. 11, 2022, 2:18 a.m. UTC | #10
On Wed, Aug 10, 2022 at 3:56 PM Timo Rothenpieler <timo@rothenpieler.org>
wrote:

> On 11.08.2022 00:37, Mark Reid wrote:
> > On Wed, Aug 10, 2022 at 3:28 PM Timo Rothenpieler <timo@rothenpieler.org
> >
> > wrote:
> >
> >> On 11.08.2022 00:18, James Almer wrote:
> >>> Then maybe the current implementation should be moved back to exr (it
> >>> used to be internal to exr until Paul made it standalone), so this lavu
> >>> module can match the existing hardware implementations of IEEE-734 half
> >>> floats for the purpose of relevant pixel format support.
> >>
> >> That doesn't seem necessary to me.
> >> The values produced before and now are both correct, just different.
> >> But there is no functional difference in the values it produces.
> >>
> >> Duplicating the entirety of that code just for that seems extremely
> >> unnecessary.
> >>
> >
> > openexr does note the intel implementations difference here
> >
> https://github.com/AcademySoftwareFoundation/Imath/blob/main/src/Imath/half.h#L288
>
> It's actually quite curious how that came to be.
> My natural idea would be that our current and EXRs code does it right.
>
> But all hardware as well as gccs software emulation agrees. Makes me
> wonder if it's fully intentional and according to some spec. But I
> couldn't find anything on the matter.
>

Ya I'm curious too now. I might ask the exr folks.
I noticed the difference when I fixed the subnormal bug a couple years ago.
That is why I changed it to match the openexr's halfToFloat() version that
preserves the Nan values instead of changing them.
This new behavior might have been what it was before I changed it
in 8d19b3c4a5.

I looked at the intel architecture developer's manual and sadly it only
describes the float32 to float16 algorithm.
The change back seems pretty benign to me too. The openexr implementation
is relying on the hardware instruction too if it can.
I don't know what one would do with the exact NaN value anyway.
diff mbox series

Patch

diff --git a/libavcodec/exr.c b/libavcodec/exr.c
index 5c6ca9adbf..47f4786491 100644
--- a/libavcodec/exr.c
+++ b/libavcodec/exr.c
@@ -191,7 +191,7 @@  typedef struct EXRContext {
     float gamma;
     union av_intfloat32 gamma_table[65536];
 
-    uint32_t mantissatable[2048];
+    uint32_t mantissatable[3072];
     uint32_t exponenttable[64];
     uint16_t offsettable[64];
 } EXRContext;
diff --git a/libavcodec/pnm.h b/libavcodec/pnm.h
index 5bf2eaa4d9..7e5445f529 100644
--- a/libavcodec/pnm.h
+++ b/libavcodec/pnm.h
@@ -34,7 +34,7 @@  typedef struct PNMContext {
     int half;
     float scale;
 
-    uint32_t mantissatable[2048];
+    uint32_t mantissatable[3072];
     uint32_t exponenttable[64];
     uint16_t offsettable[64];
 } PNMContext;
diff --git a/libavutil/half2float.h b/libavutil/half2float.h
index 1f6deade07..5af4690cfe 100644
--- a/libavutil/half2float.h
+++ b/libavutil/half2float.h
@@ -45,6 +45,9 @@  static void half2float_table(uint32_t *mantissatable, uint32_t *exponenttable,
         mantissatable[i] = convertmantissa(i);
     for (int i = 1024; i < 2048; i++)
         mantissatable[i] = 0x38000000UL + ((i - 1024) << 13UL);
+    for (int i = 2048; i < 3072; i++)
+        mantissatable[i] = mantissatable[i - 1024] | 0x400000UL;
+    mantissatable[2048] = mantissatable[1024];
 
     exponenttable[0] = 0;
     for (int i = 1; i < 31; i++)
@@ -58,7 +61,9 @@  static void half2float_table(uint32_t *mantissatable, uint32_t *exponenttable,
     offsettable[0] = 0;
     for (int i = 1; i < 64; i++)
         offsettable[i] = 1024;
+    offsettable[31] = 2048;
     offsettable[32] = 0;
+    offsettable[63] = 2048;
 }
 
 static uint32_t half2float(uint16_t h, const uint32_t *mantissatable, const uint32_t *exponenttable,
diff --git a/tests/ref/fate/exr-rgb-scanline-zip-half-0x0-0xFFFF b/tests/ref/fate/exr-rgb-scanline-zip-half-0x0-0xFFFF
index b6201116fe..e45a40b498 100644
--- a/tests/ref/fate/exr-rgb-scanline-zip-half-0x0-0xFFFF
+++ b/tests/ref/fate/exr-rgb-scanline-zip-half-0x0-0xFFFF
@@ -3,4 +3,4 @@ 
 #codec_id 0: rawvideo
 #dimensions 0: 256x256
 #sar 0: 1/1
-0,          0,          0,        1,   786432, 0x1445e411
+0,          0,          0,        1,   786432, 0xce9be2be