diff mbox series

[FFmpeg-devel] avcodec/ffv1enc: Eliminate float/double from find_best_state()

Message ID 20220527185209.11644-1-michael@niedermayer.cc
State Accepted
Commit 366ef56f7f126398c813407c45f91216978224ba
Headers show
Series [FFmpeg-devel] avcodec/ffv1enc: Eliminate float/double from find_best_state() | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished
andriy/make_armv7_RPi4 success Make finished
andriy/make_fate_armv7_RPi4 success Make fate finished

Commit Message

Michael Niedermayer May 27, 2022, 6:52 p.m. UTC
log2() remains, this can either be replaced by a integer implementation or the table
hardcoded if needed

Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
---
 libavcodec/ffv1enc.c                    | 25 ++++++++++++-------------
 tests/ref/vsynth/vsynth1-ffv1-2pass     |  4 ++--
 tests/ref/vsynth/vsynth2-ffv1-2pass     |  4 ++--
 tests/ref/vsynth/vsynth3-ffv1-2pass     |  4 ++--
 tests/ref/vsynth/vsynth_lena-ffv1-2pass |  4 ++--
 5 files changed, 20 insertions(+), 21 deletions(-)

Comments

Anton Khirnov May 30, 2022, 7:47 a.m. UTC | #1
Quoting Michael Niedermayer (2022-05-27 20:52:09)
> log2() remains, this can either be replaced by a integer implementation or the table
> hardcoded if needed
> 
> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
> ---
>  libavcodec/ffv1enc.c                    | 25 ++++++++++++-------------
>  tests/ref/vsynth/vsynth1-ffv1-2pass     |  4 ++--
>  tests/ref/vsynth/vsynth2-ffv1-2pass     |  4 ++--
>  tests/ref/vsynth/vsynth3-ffv1-2pass     |  4 ++--
>  tests/ref/vsynth/vsynth_lena-ffv1-2pass |  4 ++--
>  5 files changed, 20 insertions(+), 21 deletions(-)

Fixes 32bit x86 FATE breakage for me.
Thanks.
Martin Storsjö May 30, 2022, 8:16 a.m. UTC | #2
On Mon, 30 May 2022, Anton Khirnov wrote:

> Quoting Michael Niedermayer (2022-05-27 20:52:09)
>> log2() remains, this can either be replaced by a integer implementation or the table
>> hardcoded if needed
>>
>> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
>> ---
>>  libavcodec/ffv1enc.c                    | 25 ++++++++++++-------------
>>  tests/ref/vsynth/vsynth1-ffv1-2pass     |  4 ++--
>>  tests/ref/vsynth/vsynth2-ffv1-2pass     |  4 ++--
>>  tests/ref/vsynth/vsynth3-ffv1-2pass     |  4 ++--
>>  tests/ref/vsynth/vsynth_lena-ffv1-2pass |  4 ++--
>>  5 files changed, 20 insertions(+), 21 deletions(-)
>
> Fixes 32bit x86 FATE breakage for me.

Thanks, this fixes failures on arm and aarch64 for me, too.

// Martin
Michael Niedermayer May 30, 2022, 10:14 a.m. UTC | #3
On Mon, May 30, 2022 at 11:16:41AM +0300, Martin Storsjö wrote:
> On Mon, 30 May 2022, Anton Khirnov wrote:
> 
> > Quoting Michael Niedermayer (2022-05-27 20:52:09)
> > > log2() remains, this can either be replaced by a integer implementation or the table
> > > hardcoded if needed
> > > 
> > > Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
> > > ---
> > >  libavcodec/ffv1enc.c                    | 25 ++++++++++++-------------
> > >  tests/ref/vsynth/vsynth1-ffv1-2pass     |  4 ++--
> > >  tests/ref/vsynth/vsynth2-ffv1-2pass     |  4 ++--
> > >  tests/ref/vsynth/vsynth3-ffv1-2pass     |  4 ++--
> > >  tests/ref/vsynth/vsynth_lena-ffv1-2pass |  4 ++--
> > >  5 files changed, 20 insertions(+), 21 deletions(-)
> > 
> > Fixes 32bit x86 FATE breakage for me.
> 
> Thanks, this fixes failures on arm and aarch64 for me, too.

will apply

thanks to both of you for testing

[...]
diff mbox series

Patch

diff --git a/libavcodec/ffv1enc.c b/libavcodec/ffv1enc.c
index cee2627eed..311f377b1e 100644
--- a/libavcodec/ffv1enc.c
+++ b/libavcodec/ffv1enc.c
@@ -140,32 +140,31 @@  static void find_best_state(uint8_t best_state[256][256],
                             const uint8_t one_state[256])
 {
     int i, j, k, m;
-    double l2tab[256];
+    uint32_t l2tab[256];
 
     for (i = 1; i < 256; i++)
-        l2tab[i] = log2(i / 256.0);
+        l2tab[i] = log2(i / 256.0) * ((-1<<31) / 8);
 
     for (i = 0; i < 256; i++) {
-        double best_len[256];
-        double p = i / 256.0;
+        uint64_t best_len[256];
 
         for (j = 0; j < 256; j++)
-            best_len[j] = 1 << 30;
+            best_len[j] = UINT64_MAX;
 
         for (j = FFMAX(i - 10, 1); j < FFMIN(i + 11, 256); j++) {
-            double occ[256] = { 0 };
-            double len      = 0;
-            occ[j] = 1.0;
+            uint32_t occ[256] = { 0 };
+            uint64_t len      = 0;
+            occ[j] = UINT32_MAX;
 
             if (!one_state[j])
                 continue;
 
             for (k = 0; k < 256; k++) {
-                double newocc[256] = { 0 };
+                uint32_t newocc[256] = { 0 };
                 for (m = 1; m < 256; m++)
                     if (occ[m]) {
-                        len -=occ[m]*(     p *l2tab[    m]
-                                      + (1-p)*l2tab[256-m]);
+                        len += (occ[m]*((       i *(uint64_t)l2tab[    m]
+                                         + (256-i)*(uint64_t)l2tab[256-m])>>8)) >> 8;
                     }
                 if (len < best_len[k]) {
                     best_len[k]      = len;
@@ -173,8 +172,8 @@  static void find_best_state(uint8_t best_state[256][256],
                 }
                 for (m = 1; m < 256; m++)
                     if (occ[m]) {
-                        newocc[      one_state[      m]] += occ[m] * p;
-                        newocc[256 - one_state[256 - m]] += occ[m] * (1 - p);
+                        newocc[      one_state[      m]] += occ[m] * (uint64_t)       i  >> 8;
+                        newocc[256 - one_state[256 - m]] += occ[m] * (uint64_t)(256 - i) >> 8;
                     }
                 memcpy(occ, newocc, sizeof(occ));
             }
diff --git a/tests/ref/vsynth/vsynth1-ffv1-2pass b/tests/ref/vsynth/vsynth1-ffv1-2pass
index c27c9691d2..477a1bf49e 100644
--- a/tests/ref/vsynth/vsynth1-ffv1-2pass
+++ b/tests/ref/vsynth/vsynth1-ffv1-2pass
@@ -1,4 +1,4 @@ 
-7332cfda96233acc7178b09868c07ad7 *tests/data/fate/vsynth1-ffv1-2pass.avi
-2382244 tests/data/fate/vsynth1-ffv1-2pass.avi
+266ff859dade888a2c0cfddb29260186 *tests/data/fate/vsynth1-ffv1-2pass.avi
+2382240 tests/data/fate/vsynth1-ffv1-2pass.avi
 c5ccac874dbf808e9088bc3107860042 *tests/data/fate/vsynth1-ffv1-2pass.out.rawvideo
 stddev:    0.00 PSNR:999.99 MAXDIFF:    0 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth2-ffv1-2pass b/tests/ref/vsynth/vsynth2-ffv1-2pass
index 26c20db24d..e14eb1a94e 100644
--- a/tests/ref/vsynth/vsynth2-ffv1-2pass
+++ b/tests/ref/vsynth/vsynth2-ffv1-2pass
@@ -1,4 +1,4 @@ 
-2f5af924c6f7de1d4c34ec2dc9fca4ac *tests/data/fate/vsynth2-ffv1-2pass.avi
-3530664 tests/data/fate/vsynth2-ffv1-2pass.avi
+97b5dc666896cbaf98cec3acfbe0f3fc *tests/data/fate/vsynth2-ffv1-2pass.avi
+3530654 tests/data/fate/vsynth2-ffv1-2pass.avi
 36d7ca943916e1743cefa609eba0205c *tests/data/fate/vsynth2-ffv1-2pass.out.rawvideo
 stddev:    0.00 PSNR:999.99 MAXDIFF:    0 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth3-ffv1-2pass b/tests/ref/vsynth/vsynth3-ffv1-2pass
index dd0fd615f4..dd7943c9ac 100644
--- a/tests/ref/vsynth/vsynth3-ffv1-2pass
+++ b/tests/ref/vsynth/vsynth3-ffv1-2pass
@@ -1,4 +1,4 @@ 
-5b658e65541539248035c17da5eada3a *tests/data/fate/vsynth3-ffv1-2pass.avi
-53522 tests/data/fate/vsynth3-ffv1-2pass.avi
+96a6700731a71ee2e05c207e2334ade7 *tests/data/fate/vsynth3-ffv1-2pass.avi
+53520 tests/data/fate/vsynth3-ffv1-2pass.avi
 a038ad7c3c09f776304ef7accdea9c74 *tests/data/fate/vsynth3-ffv1-2pass.out.rawvideo
 stddev:    0.00 PSNR:999.99 MAXDIFF:    0 bytes:    86700/    86700
diff --git a/tests/ref/vsynth/vsynth_lena-ffv1-2pass b/tests/ref/vsynth/vsynth_lena-ffv1-2pass
index 9ece86aaa3..ef49a9f5a6 100644
--- a/tests/ref/vsynth/vsynth_lena-ffv1-2pass
+++ b/tests/ref/vsynth/vsynth_lena-ffv1-2pass
@@ -1,4 +1,4 @@ 
-2e1833cf75da113a6fabbaae07ddd455 *tests/data/fate/vsynth_lena-ffv1-2pass.avi
-3490450 tests/data/fate/vsynth_lena-ffv1-2pass.avi
+c46df7f2b5770564475710f1086cdff6 *tests/data/fate/vsynth_lena-ffv1-2pass.avi
+3490446 tests/data/fate/vsynth_lena-ffv1-2pass.avi
 dde5895817ad9d219f79a52d0bdfb001 *tests/data/fate/vsynth_lena-ffv1-2pass.out.rawvideo
 stddev:    0.00 PSNR:999.99 MAXDIFF:    0 bytes:  7603200/  7603200