diff mbox series

[FFmpeg-devel] avutil/eval: Use even better PRNG

Message ID 20240109015521.26231-1-michael@niedermayer.cc
State New
Headers show
Series [FFmpeg-devel] avutil/eval: Use even better PRNG | expand

Checks

Context Check Description
yinshiyou/configure_loongarch64 warning Failed to apply patch
andriy/configure_x86 warning Failed to apply patch

Commit Message

Michael Niedermayer Jan. 9, 2024, 1:55 a.m. UTC
This is the 64bit version of Chris Doty-Humphreys SFC64

Compared to the LCGs these produce much better quality numbers.
Compared to LFGs this needs less state. (our LFG has 224 byte
state for its 32bit version) this has 32byte state
Also the initialization for our LFG is slower.
This is also much faster than KISS or PCG.

This could be merged with the change to integer LCG
Also a few fate tests need an update. I will update fate if SFC64
is the chosen PRNG

Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
---
 libavutil/eval.c    | 26 ++++++++++++--------
 libavutil/sfc64.h   | 59 +++++++++++++++++++++++++++++++++++++++++++++
 tests/ref/fate/eval |  2 +-
 3 files changed, 76 insertions(+), 11 deletions(-)
 create mode 100644 libavutil/sfc64.h

Comments

Stefano Sabatini Jan. 10, 2024, 10:48 p.m. UTC | #1
On date Tuesday 2024-01-09 02:55:21 +0100, Michael Niedermayer wrote:
> This is the 64bit version of Chris Doty-Humphreys SFC64
> 
> Compared to the LCGs these produce much better quality numbers.
> Compared to LFGs this needs less state. (our LFG has 224 byte
> state for its 32bit version) this has 32byte state
> Also the initialization for our LFG is slower.
> This is also much faster than KISS or PCG.
> 
> This could be merged with the change to integer LCG
> Also a few fate tests need an update. I will update fate if SFC64
> is the chosen PRNG
> 
> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
> ---
>  libavutil/eval.c    | 26 ++++++++++++--------
>  libavutil/sfc64.h   | 59 +++++++++++++++++++++++++++++++++++++++++++++
>  tests/ref/fate/eval |  2 +-
>  3 files changed, 76 insertions(+), 11 deletions(-)

cool :-)

>  create mode 100644 libavutil/sfc64.h
> 
> diff --git a/libavutil/eval.c b/libavutil/eval.c
> index 9d41140056c..d15becf9cda 100644
> --- a/libavutil/eval.c
> +++ b/libavutil/eval.c
> @@ -33,6 +33,7 @@
>  #include "eval.h"
>  #include "ffmath.h"
>  #include "internal.h"
> +#include "sfc64.h"

nit: sort order

>  #include "log.h"
>  #include "mathematics.h"
>  #include "time.h"
> @@ -55,7 +56,7 @@ typedef struct Parser {
>      void *log_ctx;
>  #define VARS 10
>      double *var;
> -    uint64_t *var_uint64;
> +    SFC64 *prng_state;
>  } Parser;

this is on top of another patch I guess

>  
>  static const AVClass eval_class = {
> @@ -174,7 +175,7 @@ struct AVExpr {
>      } a;
>      struct AVExpr *param[3];
>      double *var;
> -    uint64_t *var_uint64;
> +    SFC64 *prng_state;
>  };
>  
>  static double etime(double v)
> @@ -233,10 +234,15 @@ static double eval_expr(Parser *p, AVExpr *e)
>  
>  #define COMPUTE_NEXT_RANDOM()                                        \
>              int idx = av_clip(eval_expr(p, e->param[0]), 0, VARS-1); \
> -            uint64_t r = p->var_uint64[idx] ? p->var_uint64[idx] : (isnan(p->var[idx]) ? 0 : p->var[idx]);\
> -            r = r * 1664525 + 1013904223;                            \
> +            SFC64 *s = p->prng_state + idx;                          \
> +            uint64_t r;                                              \
> +                                                                     \
> +            if (!s->counter) {                                       \
> +                r = isnan(p->var[idx]) ? 0 : p->var[idx];            \

> +                sfc64_init(s, r, r, r, 12);                          \

for the record, why 12?

> +            }                                                        \
> +            r = sfc64_get(s);                                        \
>              p->var[idx] = r;                                         \
> -            p->var_uint64[idx]= r;

>  
>          case e_random: {
>              COMPUTE_NEXT_RANDOM();
> @@ -334,7 +340,7 @@ static double eval_expr(Parser *p, AVExpr *e)
>                  case e_last:return e->value * d2;
>                  case e_st :  {
>                      int index = av_clip(d, 0, VARS-1);

> -                    p->var_uint64[index] = 0;
> +                    p->prng_state[index].counter = 0;

I wonder if we should have a dedicated strandom() (or randomst)
function to store the value (and deprecate st for setting the random
seed, now that we are using a separated variable to store the state) -
not blocking though

>                      return e->value * (p->var[index]= d2);
>                  }
>                  case e_hypot:return e->value * hypot(d, d2);
> @@ -356,7 +362,7 @@ void av_expr_free(AVExpr *e)
>      av_expr_free(e->param[1]);
>      av_expr_free(e->param[2]);
>      av_freep(&e->var);
> -    av_freep(&e->var_uint64);
> +    av_freep(&e->prng_state);
>      av_freep(&e);
>  }
>  
> @@ -744,8 +750,8 @@ int av_expr_parse(AVExpr **expr, const char *s,
>          goto end;
>      }
>      e->var= av_mallocz(sizeof(double) *VARS);
> -    e->var_uint64= av_mallocz(sizeof(uint64_t) *VARS);
> -    if (!e->var || !e->var_uint64) {
> +    e->prng_state = av_mallocz(sizeof(*e->prng_state) *VARS);
> +    if (!e->var || !e->prng_state) {
>          ret = AVERROR(ENOMEM);
>          goto end;
>      }
> @@ -787,7 +793,7 @@ double av_expr_eval(AVExpr *e, const double *const_values, void *opaque)
>  {
>      Parser p = { 0 };
>      p.var= e->var;
> -    p.var_uint64= e->var_uint64;
> +    p.prng_state= e->prng_state;
>  
>      p.const_values = const_values;
>      p.opaque     = opaque;
> diff --git a/libavutil/sfc64.h b/libavutil/sfc64.h
> new file mode 100644
> index 00000000000..25bc43abef1
> --- /dev/null
> +++ b/libavutil/sfc64.h
> @@ -0,0 +1,59 @@
> +/*
> + * Copyright (c) 2024 Michael Niedermayer <michael-ffmpeg@niedermayer.cc>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + *

> + * This is a implementation of SFC64 a 64-bit PRNG by Chris Doty-Humphrey.

nit: This is a implementation of SFC64, a 64-bit PRNG by Chris Doty-Humphrey.

> + *
> + * This Generator is much faster (0m1.872s) than 64bit KISS (0m3.823s) and PCG-XSH-RR-64/32 (0m2.700s)

what are these benchmarks against?

> + * And passes testu01 and practrand test suits.
> + *
> + */
> +
> +/**
> + * @file
> + * simple Pseudo Random Number Generator
> + *
> + */
> +
> +#ifndef AVUTIL_SFC64_H
> +#define AVUTIL_SFC64_H
> +
> +#include <inttypes.h>
> +
> +typedef struct SFC64 {
> +    uint64_t a,b,c,counter;
> +} SFC64;
> +

> +static inline uint64_t sfc64_get(SFC64 *s) {
> +    uint64_t tmp = s->a + s->b + s->counter++;
> +    s->a = s->b ^ (s->b >> 11);
> +    s->b = s->c + (s->c << 3); // This is a multiply by 9
> +    s->c = ((s->c << 24) | (s->c >> 40)) + tmp;
> +    return tmp;
> +}
> +
> +static inline void sfc64_init(SFC64 *s, uint64_t seeda, uint64_t seedb, uint64_t seedc, int rounds) {
> +    s->a       = seeda;
> +    s->b       = seedb;
> +    s->c       = seedc;
> +    s->counter = 1;
> +    while (rounds--)
> +        sfc64_get(s);
> +}
> +
> +#endif // AVUTIL_SFC64_H

nit: probably it still makes sense to use ff/FF prefixes even if the
header is not public (and if this is useful, probably it could be made
public as a faster/smaller alternative to lfg).
Michael Niedermayer Jan. 11, 2024, 2:39 a.m. UTC | #2
On Wed, Jan 10, 2024 at 11:48:33PM +0100, Stefano Sabatini wrote:
> On date Tuesday 2024-01-09 02:55:21 +0100, Michael Niedermayer wrote:
[...]

> >  
> >  static const AVClass eval_class = {
> > @@ -174,7 +175,7 @@ struct AVExpr {
> >      } a;
> >      struct AVExpr *param[3];
> >      double *var;
> > -    uint64_t *var_uint64;
> > +    SFC64 *prng_state;
> >  };
> >  
> >  static double etime(double v)
> > @@ -233,10 +234,15 @@ static double eval_expr(Parser *p, AVExpr *e)
> >  
> >  #define COMPUTE_NEXT_RANDOM()                                        \
> >              int idx = av_clip(eval_expr(p, e->param[0]), 0, VARS-1); \
> > -            uint64_t r = p->var_uint64[idx] ? p->var_uint64[idx] : (isnan(p->var[idx]) ? 0 : p->var[idx]);\
> > -            r = r * 1664525 + 1013904223;                            \
> > +            SFC64 *s = p->prng_state + idx;                          \
> > +            uint64_t r;                                              \
> > +                                                                     \
> > +            if (!s->counter) {                                       \
> > +                r = isnan(p->var[idx]) ? 0 : p->var[idx];            \
> 
> > +                sfc64_init(s, r, r, r, 12);                          \
> 
> for the record, why 12?

The reference has 3 init functions
* one that uses one seed for the 3 parameters, it uses 12 rounds
* one that uses 3 seperate seeds that uses 18 rounds
* one that has "fast" in its name and does 8 rounds with one seed in 3 parameters

I will document this better


[...]
> >                      return e->value * (p->var[index]= d2);
> >                  }
> >                  case e_hypot:return e->value * hypot(d, d2);
> > @@ -356,7 +362,7 @@ void av_expr_free(AVExpr *e)
> >      av_expr_free(e->param[1]);
> >      av_expr_free(e->param[2]);
> >      av_freep(&e->var);
> > -    av_freep(&e->var_uint64);
> > +    av_freep(&e->prng_state);
> >      av_freep(&e);
> >  }
> >  
> > @@ -744,8 +750,8 @@ int av_expr_parse(AVExpr **expr, const char *s,
> >          goto end;
> >      }
> >      e->var= av_mallocz(sizeof(double) *VARS);
> > -    e->var_uint64= av_mallocz(sizeof(uint64_t) *VARS);
> > -    if (!e->var || !e->var_uint64) {
> > +    e->prng_state = av_mallocz(sizeof(*e->prng_state) *VARS);
> > +    if (!e->var || !e->prng_state) {
> >          ret = AVERROR(ENOMEM);
> >          goto end;
> >      }
> > @@ -787,7 +793,7 @@ double av_expr_eval(AVExpr *e, const double *const_values, void *opaque)
> >  {
> >      Parser p = { 0 };
> >      p.var= e->var;
> > -    p.var_uint64= e->var_uint64;
> > +    p.prng_state= e->prng_state;
> >  
> >      p.const_values = const_values;
> >      p.opaque     = opaque;
> > diff --git a/libavutil/sfc64.h b/libavutil/sfc64.h
> > new file mode 100644
> > index 00000000000..25bc43abef1
> > --- /dev/null
> > +++ b/libavutil/sfc64.h
> > @@ -0,0 +1,59 @@
> > +/*
> > + * Copyright (c) 2024 Michael Niedermayer <michael-ffmpeg@niedermayer.cc>
> > + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> > + *
> 
> > + * This is a implementation of SFC64 a 64-bit PRNG by Chris Doty-Humphrey.
> 
> nit: This is a implementation of SFC64, a 64-bit PRNG by Chris Doty-Humphrey.
> 

> > + *
> > + * This Generator is much faster (0m1.872s) than 64bit KISS (0m3.823s) and PCG-XSH-RR-64/32 (0m2.700s)
> 
> what are these benchmarks against?

a loop that computes alot of random numbers and at the end prints their sum.

The behavior was btw quite different if the numbers are not summed and printed
as the compiler can then optimize some things out but noone would run a PRNG
and not use the values.


[...]
> > +static inline uint64_t sfc64_get(SFC64 *s) {
> > +    uint64_t tmp = s->a + s->b + s->counter++;
> > +    s->a = s->b ^ (s->b >> 11);
> > +    s->b = s->c + (s->c << 3); // This is a multiply by 9
> > +    s->c = ((s->c << 24) | (s->c >> 40)) + tmp;
> > +    return tmp;
> > +}
> > +
> > +static inline void sfc64_init(SFC64 *s, uint64_t seeda, uint64_t seedb, uint64_t seedc, int rounds) {
> > +    s->a       = seeda;
> > +    s->b       = seedb;
> > +    s->c       = seedc;
> > +    s->counter = 1;
> > +    while (rounds--)
> > +        sfc64_get(s);
> > +}
> > +
> > +#endif // AVUTIL_SFC64_H
> 
> nit: probably it still makes sense to use ff/FF prefixes even if the
> header is not public (and if this is useful, probably it could be made
> public as a faster/smaller alternative to lfg).

ok

[...]
Michael Koch Jan. 19, 2024, 8:53 a.m. UTC | #3
There is still a small problem with the random generator, but this has 
nothing to do with the recent changes.
If the random() expression is used in the geq filter, then multiple 
pixels get the same sequence of random numbers.
As can be shown with this command, where the frame has only two pixels:

ffmpeg -loglevel repeat -f lavfi -i nullsrc=size=1x2,format=gray -vf 
"geq=lum='print(random(0));print(random(0));print(random(0))'" -frames 1 
-y out.png

I think it's because the filter is executed in multiple threads.
-filter_threads 1 fixes the problem, but it slows down the whole filter 
thread.

Michael
Michael Niedermayer Jan. 20, 2024, 12:33 a.m. UTC | #4
On Fri, Jan 19, 2024 at 09:53:46AM +0100, Michael Koch wrote:
> There is still a small problem with the random generator, but this has
> nothing to do with the recent changes.
> If the random() expression is used in the geq filter, then multiple pixels
> get the same sequence of random numbers.
> As can be shown with this command, where the frame has only two pixels:
> 
> ffmpeg -loglevel repeat -f lavfi -i nullsrc=size=1x2,format=gray -vf
> "geq=lum='print(random(0));print(random(0));print(random(0))'" -frames 1 -y
> out.png
> 
> I think it's because the filter is executed in multiple threads.
> -filter_threads 1 fixes the problem, but it slows down the whole filter
> thread.

You can avoid this by using
ifnot(X,st(0,Y))

which would reseed the random number generator differently on the first pixel of
each line
Not sure this is the best solution, better ideas are welcome

thx

[...]
diff mbox series

Patch

diff --git a/libavutil/eval.c b/libavutil/eval.c
index 9d41140056c..d15becf9cda 100644
--- a/libavutil/eval.c
+++ b/libavutil/eval.c
@@ -33,6 +33,7 @@ 
 #include "eval.h"
 #include "ffmath.h"
 #include "internal.h"
+#include "sfc64.h"
 #include "log.h"
 #include "mathematics.h"
 #include "time.h"
@@ -55,7 +56,7 @@  typedef struct Parser {
     void *log_ctx;
 #define VARS 10
     double *var;
-    uint64_t *var_uint64;
+    SFC64 *prng_state;
 } Parser;
 
 static const AVClass eval_class = {
@@ -174,7 +175,7 @@  struct AVExpr {
     } a;
     struct AVExpr *param[3];
     double *var;
-    uint64_t *var_uint64;
+    SFC64 *prng_state;
 };
 
 static double etime(double v)
@@ -233,10 +234,15 @@  static double eval_expr(Parser *p, AVExpr *e)
 
 #define COMPUTE_NEXT_RANDOM()                                        \
             int idx = av_clip(eval_expr(p, e->param[0]), 0, VARS-1); \
-            uint64_t r = p->var_uint64[idx] ? p->var_uint64[idx] : (isnan(p->var[idx]) ? 0 : p->var[idx]);\
-            r = r * 1664525 + 1013904223;                            \
+            SFC64 *s = p->prng_state + idx;                          \
+            uint64_t r;                                              \
+                                                                     \
+            if (!s->counter) {                                       \
+                r = isnan(p->var[idx]) ? 0 : p->var[idx];            \
+                sfc64_init(s, r, r, r, 12);                          \
+            }                                                        \
+            r = sfc64_get(s);                                        \
             p->var[idx] = r;                                         \
-            p->var_uint64[idx]= r;
 
         case e_random: {
             COMPUTE_NEXT_RANDOM();
@@ -334,7 +340,7 @@  static double eval_expr(Parser *p, AVExpr *e)
                 case e_last:return e->value * d2;
                 case e_st :  {
                     int index = av_clip(d, 0, VARS-1);
-                    p->var_uint64[index] = 0;
+                    p->prng_state[index].counter = 0;
                     return e->value * (p->var[index]= d2);
                 }
                 case e_hypot:return e->value * hypot(d, d2);
@@ -356,7 +362,7 @@  void av_expr_free(AVExpr *e)
     av_expr_free(e->param[1]);
     av_expr_free(e->param[2]);
     av_freep(&e->var);
-    av_freep(&e->var_uint64);
+    av_freep(&e->prng_state);
     av_freep(&e);
 }
 
@@ -744,8 +750,8 @@  int av_expr_parse(AVExpr **expr, const char *s,
         goto end;
     }
     e->var= av_mallocz(sizeof(double) *VARS);
-    e->var_uint64= av_mallocz(sizeof(uint64_t) *VARS);
-    if (!e->var || !e->var_uint64) {
+    e->prng_state = av_mallocz(sizeof(*e->prng_state) *VARS);
+    if (!e->var || !e->prng_state) {
         ret = AVERROR(ENOMEM);
         goto end;
     }
@@ -787,7 +793,7 @@  double av_expr_eval(AVExpr *e, const double *const_values, void *opaque)
 {
     Parser p = { 0 };
     p.var= e->var;
-    p.var_uint64= e->var_uint64;
+    p.prng_state= e->prng_state;
 
     p.const_values = const_values;
     p.opaque     = opaque;
diff --git a/libavutil/sfc64.h b/libavutil/sfc64.h
new file mode 100644
index 00000000000..25bc43abef1
--- /dev/null
+++ b/libavutil/sfc64.h
@@ -0,0 +1,59 @@ 
+/*
+ * Copyright (c) 2024 Michael Niedermayer <michael-ffmpeg@niedermayer.cc>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * This is a implementation of SFC64 a 64-bit PRNG by Chris Doty-Humphrey.
+ *
+ * This Generator is much faster (0m1.872s) than 64bit KISS (0m3.823s) and PCG-XSH-RR-64/32 (0m2.700s)
+ * And passes testu01 and practrand test suits.
+ *
+ */
+
+/**
+ * @file
+ * simple Pseudo Random Number Generator
+ *
+ */
+
+#ifndef AVUTIL_SFC64_H
+#define AVUTIL_SFC64_H
+
+#include <inttypes.h>
+
+typedef struct SFC64 {
+    uint64_t a,b,c,counter;
+} SFC64;
+
+static inline uint64_t sfc64_get(SFC64 *s) {
+    uint64_t tmp = s->a + s->b + s->counter++;
+    s->a = s->b ^ (s->b >> 11);
+    s->b = s->c + (s->c << 3); // This is a multiply by 9
+    s->c = ((s->c << 24) | (s->c >> 40)) + tmp;
+    return tmp;
+}
+
+static inline void sfc64_init(SFC64 *s, uint64_t seeda, uint64_t seedb, uint64_t seedc, int rounds) {
+    s->a       = seeda;
+    s->b       = seedb;
+    s->c       = seedc;
+    s->counter = 1;
+    while (rounds--)
+        sfc64_get(s);
+}
+
+#endif // AVUTIL_SFC64_H
diff --git a/tests/ref/fate/eval b/tests/ref/fate/eval
index 5b4d93f4274..441f9846c46 100644
--- a/tests/ref/fate/eval
+++ b/tests/ref/fate/eval
@@ -257,7 +257,7 @@  Evaluating 'root(sin(ld(0))+6+sin(ld(0)/12)-log(ld(0)), 100)'
 'root(sin(ld(0))+6+sin(ld(0)/12)-log(ld(0)), 100)' -> 60.965601
 
 Evaluating '7000000B*random(0)'
-'7000000B*random(0)' -> 0.003078
+'7000000B*random(0)' -> 12864914.486611
 
 Evaluating 'squish(2)'
 'squish(2)' -> 0.000335