diff mbox series

[FFmpeg-devel,v2,2/7] avutil/half2float: adjust conversion of NaN

Message ID 20220814164833.19965-2-timo@rothenpieler.org
State New
Headers show
Series [FFmpeg-devel,v2,1/7] avutil: move half-precision float helper to avutil | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Timo Rothenpieler Aug. 14, 2022, 4:48 p.m. UTC
IEEE-754 differentiates two different kind of NaNs.
Quiet and Signaling ones. They are differentiated by the MSB of the
mantissa.

For whatever reason, actual hardware conversion of half to single always
sets the signaling bit to 1 if the mantissa is != 0, and to 0 if it's 0.
So our code has to follow suite or fate-testing hardware float16 will be
impossible.
---
 libavcodec/exr.c                                    | 2 +-
 libavcodec/pnm.h                                    | 2 +-
 libavutil/half2float.h                              | 5 +++++
 tests/ref/fate/exr-rgb-scanline-zip-half-0x0-0xFFFF | 2 +-
 4 files changed, 8 insertions(+), 3 deletions(-)

Comments

Tomas Härdin Aug. 18, 2022, 8:51 a.m. UTC | #1
sön 2022-08-14 klockan 18:48 +0200 skrev Timo Rothenpieler:
> IEEE-754 differentiates two different kind of NaNs.
> Quiet and Signaling ones. They are differentiated by the MSB of the
> mantissa.
> 
> For whatever reason, actual hardware conversion of half to single
> always
> sets the signaling bit to 1 if the mantissa is != 0, and to 0 if it's
> 0.
> So our code has to follow suite or fate-testing hardware float16 will
> be
> impossible.

Does IEEE-754 specify this behavior?

/Tomas
diff mbox series

Patch

diff --git a/libavcodec/exr.c b/libavcodec/exr.c
index 5c6ca9adbf..47f4786491 100644
--- a/libavcodec/exr.c
+++ b/libavcodec/exr.c
@@ -191,7 +191,7 @@  typedef struct EXRContext {
     float gamma;
     union av_intfloat32 gamma_table[65536];
 
-    uint32_t mantissatable[2048];
+    uint32_t mantissatable[3072];
     uint32_t exponenttable[64];
     uint16_t offsettable[64];
 } EXRContext;
diff --git a/libavcodec/pnm.h b/libavcodec/pnm.h
index 5bf2eaa4d9..7e5445f529 100644
--- a/libavcodec/pnm.h
+++ b/libavcodec/pnm.h
@@ -34,7 +34,7 @@  typedef struct PNMContext {
     int half;
     float scale;
 
-    uint32_t mantissatable[2048];
+    uint32_t mantissatable[3072];
     uint32_t exponenttable[64];
     uint16_t offsettable[64];
 } PNMContext;
diff --git a/libavutil/half2float.h b/libavutil/half2float.h
index 1f6deade07..5af4690cfe 100644
--- a/libavutil/half2float.h
+++ b/libavutil/half2float.h
@@ -45,6 +45,9 @@  static void half2float_table(uint32_t *mantissatable, uint32_t *exponenttable,
         mantissatable[i] = convertmantissa(i);
     for (int i = 1024; i < 2048; i++)
         mantissatable[i] = 0x38000000UL + ((i - 1024) << 13UL);
+    for (int i = 2048; i < 3072; i++)
+        mantissatable[i] = mantissatable[i - 1024] | 0x400000UL;
+    mantissatable[2048] = mantissatable[1024];
 
     exponenttable[0] = 0;
     for (int i = 1; i < 31; i++)
@@ -58,7 +61,9 @@  static void half2float_table(uint32_t *mantissatable, uint32_t *exponenttable,
     offsettable[0] = 0;
     for (int i = 1; i < 64; i++)
         offsettable[i] = 1024;
+    offsettable[31] = 2048;
     offsettable[32] = 0;
+    offsettable[63] = 2048;
 }
 
 static uint32_t half2float(uint16_t h, const uint32_t *mantissatable, const uint32_t *exponenttable,
diff --git a/tests/ref/fate/exr-rgb-scanline-zip-half-0x0-0xFFFF b/tests/ref/fate/exr-rgb-scanline-zip-half-0x0-0xFFFF
index b6201116fe..e45a40b498 100644
--- a/tests/ref/fate/exr-rgb-scanline-zip-half-0x0-0xFFFF
+++ b/tests/ref/fate/exr-rgb-scanline-zip-half-0x0-0xFFFF
@@ -3,4 +3,4 @@ 
 #codec_id 0: rawvideo
 #dimensions 0: 256x256
 #sar 0: 1/1
-0,          0,          0,        1,   786432, 0x1445e411
+0,          0,          0,        1,   786432, 0xce9be2be