[FFmpeg-devel,v5,2/3] vf_find_rect.c: use the optimized sad function to improve the find performance

Submitted by lance.lmwang@gmail.com on June 12, 2019, 10:57 a.m.

Details

Message ID 20190612105731.43622-2-lance.lmwang@gmail.com
State New
Headers show

Commit Message

lance.lmwang@gmail.com June 12, 2019, 10:57 a.m.
From: Limin Wang <lance.lmwang@gmail.com>

benchmark on x86_64: 6.4 -> 16 with below command:
./ffmpeg  -i 1920x1080.mp4 -vf find_rect=./find.tif,cover_rect=./cover.jpg:mode=cover -f null -
6.4 fps -> 16fps

Signed-off-by: Limin Wang <lance.lmwang@gmail.com>
---
 libavfilter/vf_find_rect.c | 53 +++++++++++++++-----------------------
 1 file changed, 21 insertions(+), 32 deletions(-)

Comments

Michael Niedermayer June 14, 2019, 7:02 p.m.
Hi

On Wed, Jun 12, 2019 at 06:57:30PM +0800, lance.lmwang@gmail.com wrote:
> From: Limin Wang <lance.lmwang@gmail.com>
> 
> benchmark on x86_64: 6.4 -> 16 with below command:
> ./ffmpeg  -i 1920x1080.mp4 -vf find_rect=./find.tif,cover_rect=./cover.jpg:mode=cover -f null -
> 6.4 fps -> 16fps
> 
> Signed-off-by: Limin Wang <lance.lmwang@gmail.com>
> ---
>  libavfilter/vf_find_rect.c | 53 +++++++++++++++-----------------------
>  1 file changed, 21 insertions(+), 32 deletions(-)
> 
> diff --git a/libavfilter/vf_find_rect.c b/libavfilter/vf_find_rect.c
> index ee6c3f4b45..ed15885bc2 100644
> --- a/libavfilter/vf_find_rect.c
> +++ b/libavfilter/vf_find_rect.c
> @@ -26,6 +26,7 @@
>  #include "libavutil/imgutils.h"
>  #include "libavutil/opt.h"
>  #include "internal.h"
> +#include "scene_sad.h"
>  
>  #include "lavfutils.h"
>  #include "lswsutils.h"
> @@ -36,6 +37,8 @@ typedef struct FOCContext {
>      AVClass *class;
>      float threshold;
>      int mipmaps;
> +    ff_scene_sad_fn sad;
> +    int bitdepth;
>      int xmin, ymin, xmax, ymax;
>      char *obj_filename;
>      int last_x, last_y;
> @@ -103,54 +106,40 @@ static AVFrame *downscale(AVFrame *in)
>      return frame;
>  }
>  
> -static float compare(const AVFrame *haystack, const AVFrame *obj, int offx, int offy)
> +static float compare_sad(FOCContext *foc, AVFrame *haystack, AVFrame *obj, int offx, int offy)
>  {
> -    int x,y;
> -    int o_sum_v = 0;
> -    int h_sum_v = 0;
> -    int64_t oo_sum_v = 0;
> -    int64_t hh_sum_v = 0;
> -    int64_t oh_sum_v = 0;
> -    float c;
> +    uint64_t sad = 0;
>      int n = obj->height * obj->width;
> -    const uint8_t *odat = obj     ->data[0];
> +    double mafd;
> +    const uint8_t *odat = obj->data[0];
>      const uint8_t *hdat = haystack->data[0] + offx + offy * haystack->linesize[0];
> -    int64_t o_sigma, h_sigma;
> -
> -    for(y = 0; y < obj->height; y++) {
> -        for(x = 0; x < obj->width; x++) {
> -            int o_v = odat[x];
> -            int h_v = hdat[x];
> -            o_sum_v += o_v;
> -            h_sum_v += h_v;
> -            oo_sum_v += o_v * o_v;
> -            hh_sum_v += h_v * h_v;
> -            oh_sum_v += o_v * h_v;
> -        }
> -        odat += obj->linesize[0];
> -        hdat += haystack->linesize[0];
> -    }
> -    o_sigma = n*oo_sum_v - o_sum_v*(int64_t)o_sum_v;
> -    h_sigma = n*hh_sum_v - h_sum_v*(int64_t)h_sum_v;
>  
> -    if (o_sigma == 0 || h_sigma == 0)
> -        return 1.0;
> +    foc->sad(hdat, haystack->linesize[0], odat, obj->linesize[0],
> +            obj->width, obj->height, &sad);
> +    emms_c();
> +    mafd = (double)sad / n / (1ULL << foc->bitdepth);

mixing floating point and MMX in the same function is likely not
safe

also SAD wont recognize objects that are a different contrast or brightness

[...]
lance.lmwang@gmail.com June 14, 2019, 10:39 p.m.
On Sat, Jun 15, 2019 at 3:02 AM Michael Niedermayer <michael@niedermayer.cc>
wrote:

> Hi
>
> On Wed, Jun 12, 2019 at 06:57:30PM +0800, lance.lmwang@gmail.com wrote:
> > From: Limin Wang <lance.lmwang@gmail.com>
> >
> > benchmark on x86_64: 6.4 -> 16 with below command:
> > ./ffmpeg  -i 1920x1080.mp4 -vf
> find_rect=./find.tif,cover_rect=./cover.jpg:mode=cover -f null -
> > 6.4 fps -> 16fps
> >
> > Signed-off-by: Limin Wang <lance.lmwang@gmail.com>
> > ---
> >  libavfilter/vf_find_rect.c | 53 +++++++++++++++-----------------------
> >  1 file changed, 21 insertions(+), 32 deletions(-)
> >
> > diff --git a/libavfilter/vf_find_rect.c b/libavfilter/vf_find_rect.c
> > index ee6c3f4b45..ed15885bc2 100644
> > --- a/libavfilter/vf_find_rect.c
> > +++ b/libavfilter/vf_find_rect.c
> > @@ -26,6 +26,7 @@
> >  #include "libavutil/imgutils.h"
> >  #include "libavutil/opt.h"
> >  #include "internal.h"
> > +#include "scene_sad.h"
> >
> >  #include "lavfutils.h"
> >  #include "lswsutils.h"
> > @@ -36,6 +37,8 @@ typedef struct FOCContext {
> >      AVClass *class;
> >      float threshold;
> >      int mipmaps;
> > +    ff_scene_sad_fn sad;
> > +    int bitdepth;
> >      int xmin, ymin, xmax, ymax;
> >      char *obj_filename;
> >      int last_x, last_y;
> > @@ -103,54 +106,40 @@ static AVFrame *downscale(AVFrame *in)
> >      return frame;
> >  }
> >
> > -static float compare(const AVFrame *haystack, const AVFrame *obj, int
> offx, int offy)
> > +static float compare_sad(FOCContext *foc, AVFrame *haystack, AVFrame
> *obj, int offx, int offy)
> >  {
> > -    int x,y;
> > -    int o_sum_v = 0;
> > -    int h_sum_v = 0;
> > -    int64_t oo_sum_v = 0;
> > -    int64_t hh_sum_v = 0;
> > -    int64_t oh_sum_v = 0;
> > -    float c;
> > +    uint64_t sad = 0;
> >      int n = obj->height * obj->width;
> > -    const uint8_t *odat = obj     ->data[0];
> > +    double mafd;
> > +    const uint8_t *odat = obj->data[0];
> >      const uint8_t *hdat = haystack->data[0] + offx + offy *
> haystack->linesize[0];
> > -    int64_t o_sigma, h_sigma;
> > -
> > -    for(y = 0; y < obj->height; y++) {
> > -        for(x = 0; x < obj->width; x++) {
> > -            int o_v = odat[x];
> > -            int h_v = hdat[x];
> > -            o_sum_v += o_v;
> > -            h_sum_v += h_v;
> > -            oo_sum_v += o_v * o_v;
> > -            hh_sum_v += h_v * h_v;
> > -            oh_sum_v += o_v * h_v;
> > -        }
> > -        odat += obj->linesize[0];
> > -        hdat += haystack->linesize[0];
> > -    }
> > -    o_sigma = n*oo_sum_v - o_sum_v*(int64_t)o_sum_v;
> > -    h_sigma = n*hh_sum_v - h_sum_v*(int64_t)h_sum_v;
> >
> > -    if (o_sigma == 0 || h_sigma == 0)
> > -        return 1.0;
> > +    foc->sad(hdat, haystack->linesize[0], odat, obj->linesize[0],
> > +            obj->width, obj->height, &sad);
> > +    emms_c();
> > +    mafd = (double)sad / n / (1ULL << foc->bitdepth);
>
> mixing floating point and MMX in the same function is likely not
> safe
>

The code is changed from vf_freezedetect.c,  it's OK on my testing system.
That's
why we had to use emms_c to avoid it.


>
> also SAD wont recognize objects that are a different contrast or brightness
>
>
I haven't chance to test these condition, that's the issue.  Please ignore
the patch function.




> [...]
>
> --
> Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
>
> Many that live deserve death. And some that die deserve life. Can you give
> it to them? Then do not be too eager to deal out death in judgement. For
> even the very wise cannot see all ends. -- Gandalf
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Michael Niedermayer June 15, 2019, 10:14 p.m.
On Sat, Jun 15, 2019 at 06:39:38AM +0800, Lance Wang wrote:
> On Sat, Jun 15, 2019 at 3:02 AM Michael Niedermayer <michael@niedermayer.cc>
> wrote:
> 
> > Hi
> >
> > On Wed, Jun 12, 2019 at 06:57:30PM +0800, lance.lmwang@gmail.com wrote:
> > > From: Limin Wang <lance.lmwang@gmail.com>
> > >
> > > benchmark on x86_64: 6.4 -> 16 with below command:
> > > ./ffmpeg  -i 1920x1080.mp4 -vf
> > find_rect=./find.tif,cover_rect=./cover.jpg:mode=cover -f null -
> > > 6.4 fps -> 16fps
> > >
> > > Signed-off-by: Limin Wang <lance.lmwang@gmail.com>
> > > ---
> > >  libavfilter/vf_find_rect.c | 53 +++++++++++++++-----------------------
> > >  1 file changed, 21 insertions(+), 32 deletions(-)
> > >
> > > diff --git a/libavfilter/vf_find_rect.c b/libavfilter/vf_find_rect.c
> > > index ee6c3f4b45..ed15885bc2 100644
> > > --- a/libavfilter/vf_find_rect.c
> > > +++ b/libavfilter/vf_find_rect.c
> > > @@ -26,6 +26,7 @@
> > >  #include "libavutil/imgutils.h"
> > >  #include "libavutil/opt.h"
> > >  #include "internal.h"
> > > +#include "scene_sad.h"
> > >
> > >  #include "lavfutils.h"
> > >  #include "lswsutils.h"
> > > @@ -36,6 +37,8 @@ typedef struct FOCContext {
> > >      AVClass *class;
> > >      float threshold;
> > >      int mipmaps;
> > > +    ff_scene_sad_fn sad;
> > > +    int bitdepth;
> > >      int xmin, ymin, xmax, ymax;
> > >      char *obj_filename;
> > >      int last_x, last_y;
> > > @@ -103,54 +106,40 @@ static AVFrame *downscale(AVFrame *in)
> > >      return frame;
> > >  }
> > >
> > > -static float compare(const AVFrame *haystack, const AVFrame *obj, int
> > offx, int offy)
> > > +static float compare_sad(FOCContext *foc, AVFrame *haystack, AVFrame
> > *obj, int offx, int offy)
> > >  {
> > > -    int x,y;
> > > -    int o_sum_v = 0;
> > > -    int h_sum_v = 0;
> > > -    int64_t oo_sum_v = 0;
> > > -    int64_t hh_sum_v = 0;
> > > -    int64_t oh_sum_v = 0;
> > > -    float c;
> > > +    uint64_t sad = 0;
> > >      int n = obj->height * obj->width;
> > > -    const uint8_t *odat = obj     ->data[0];
> > > +    double mafd;
> > > +    const uint8_t *odat = obj->data[0];
> > >      const uint8_t *hdat = haystack->data[0] + offx + offy *
> > haystack->linesize[0];
> > > -    int64_t o_sigma, h_sigma;
> > > -
> > > -    for(y = 0; y < obj->height; y++) {
> > > -        for(x = 0; x < obj->width; x++) {
> > > -            int o_v = odat[x];
> > > -            int h_v = hdat[x];
> > > -            o_sum_v += o_v;
> > > -            h_sum_v += h_v;
> > > -            oo_sum_v += o_v * o_v;
> > > -            hh_sum_v += h_v * h_v;
> > > -            oh_sum_v += o_v * h_v;
> > > -        }
> > > -        odat += obj->linesize[0];
> > > -        hdat += haystack->linesize[0];
> > > -    }
> > > -    o_sigma = n*oo_sum_v - o_sum_v*(int64_t)o_sum_v;
> > > -    h_sigma = n*hh_sum_v - h_sum_v*(int64_t)h_sum_v;
> > >
> > > -    if (o_sigma == 0 || h_sigma == 0)
> > > -        return 1.0;
> > > +    foc->sad(hdat, haystack->linesize[0], odat, obj->linesize[0],
> > > +            obj->width, obj->height, &sad);
> > > +    emms_c();
> > > +    mafd = (double)sad / n / (1ULL << foc->bitdepth);
> >
> > mixing floating point and MMX in the same function is likely not
> > safe
> >
> 
> The code is changed from vf_freezedetect.c,  it's OK on my testing system.
> That's
> why we had to use emms_c to avoid it.

Nothing stops the compiler from moving any floating
point operations, registers, ... around
putting mmx + emms in a function with floating point is asking for
problems, its better not to do this 
worse such problem would only show up on 32bit mmx systems, so if an issue
occured it would be specific to older systems, thats never good for finding
and fixing issues quickly

thx

[...]
lance.lmwang@gmail.com June 15, 2019, 11:16 p.m.
On Sun, Jun 16, 2019 at 6:41 AM Michael Niedermayer <michael@niedermayer.cc>
wrote:

> On Sat, Jun 15, 2019 at 06:39:38AM +0800, Lance Wang wrote:
> > On Sat, Jun 15, 2019 at 3:02 AM Michael Niedermayer
> <michael@niedermayer.cc>
> > wrote:
> >
> > > Hi
> > >
> > > On Wed, Jun 12, 2019 at 06:57:30PM +0800, lance.lmwang@gmail.com
> wrote:
> > > > From: Limin Wang <lance.lmwang@gmail.com>
> > > >
> > > > benchmark on x86_64: 6.4 -> 16 with below command:
> > > > ./ffmpeg  -i 1920x1080.mp4 -vf
> > > find_rect=./find.tif,cover_rect=./cover.jpg:mode=cover -f null -
> > > > 6.4 fps -> 16fps
> > > >
> > > > Signed-off-by: Limin Wang <lance.lmwang@gmail.com>
> > > > ---
> > > >  libavfilter/vf_find_rect.c | 53
> +++++++++++++++-----------------------
> > > >  1 file changed, 21 insertions(+), 32 deletions(-)
> > > >
> > > > diff --git a/libavfilter/vf_find_rect.c b/libavfilter/vf_find_rect.c
> > > > index ee6c3f4b45..ed15885bc2 100644
> > > > --- a/libavfilter/vf_find_rect.c
> > > > +++ b/libavfilter/vf_find_rect.c
> > > > @@ -26,6 +26,7 @@
> > > >  #include "libavutil/imgutils.h"
> > > >  #include "libavutil/opt.h"
> > > >  #include "internal.h"
> > > > +#include "scene_sad.h"
> > > >
> > > >  #include "lavfutils.h"
> > > >  #include "lswsutils.h"
> > > > @@ -36,6 +37,8 @@ typedef struct FOCContext {
> > > >      AVClass *class;
> > > >      float threshold;
> > > >      int mipmaps;
> > > > +    ff_scene_sad_fn sad;
> > > > +    int bitdepth;
> > > >      int xmin, ymin, xmax, ymax;
> > > >      char *obj_filename;
> > > >      int last_x, last_y;
> > > > @@ -103,54 +106,40 @@ static AVFrame *downscale(AVFrame *in)
> > > >      return frame;
> > > >  }
> > > >
> > > > -static float compare(const AVFrame *haystack, const AVFrame *obj,
> int
> > > offx, int offy)
> > > > +static float compare_sad(FOCContext *foc, AVFrame *haystack, AVFrame
> > > *obj, int offx, int offy)
> > > >  {
> > > > -    int x,y;
> > > > -    int o_sum_v = 0;
> > > > -    int h_sum_v = 0;
> > > > -    int64_t oo_sum_v = 0;
> > > > -    int64_t hh_sum_v = 0;
> > > > -    int64_t oh_sum_v = 0;
> > > > -    float c;
> > > > +    uint64_t sad = 0;
> > > >      int n = obj->height * obj->width;
> > > > -    const uint8_t *odat = obj     ->data[0];
> > > > +    double mafd;
> > > > +    const uint8_t *odat = obj->data[0];
> > > >      const uint8_t *hdat = haystack->data[0] + offx + offy *
> > > haystack->linesize[0];
> > > > -    int64_t o_sigma, h_sigma;
> > > > -
> > > > -    for(y = 0; y < obj->height; y++) {
> > > > -        for(x = 0; x < obj->width; x++) {
> > > > -            int o_v = odat[x];
> > > > -            int h_v = hdat[x];
> > > > -            o_sum_v += o_v;
> > > > -            h_sum_v += h_v;
> > > > -            oo_sum_v += o_v * o_v;
> > > > -            hh_sum_v += h_v * h_v;
> > > > -            oh_sum_v += o_v * h_v;
> > > > -        }
> > > > -        odat += obj->linesize[0];
> > > > -        hdat += haystack->linesize[0];
> > > > -    }
> > > > -    o_sigma = n*oo_sum_v - o_sum_v*(int64_t)o_sum_v;
> > > > -    h_sigma = n*hh_sum_v - h_sum_v*(int64_t)h_sum_v;
> > > >
> > > > -    if (o_sigma == 0 || h_sigma == 0)
> > > > -        return 1.0;
> > > > +    foc->sad(hdat, haystack->linesize[0], odat, obj->linesize[0],
> > > > +            obj->width, obj->height, &sad);
> > > > +    emms_c();
> > > > +    mafd = (double)sad / n / (1ULL << foc->bitdepth);
> > >
> > > mixing floating point and MMX in the same function is likely not
> > > safe
> > >
> >
> > The code is changed from vf_freezedetect.c,  it's OK on my testing
> system.
> > That's
> > why we had to use emms_c to avoid it.
>
> Nothing stops the compiler from moving any floating
> point operations, registers, ... around
> putting mmx + emms in a function with floating point is asking for
> problems, its better not to do this
> worse such problem would only show up on 32bit mmx systems, so if an issue
> occured it would be specific to older systems, thats never good for finding
> and fixing issues quickly
>
>
OK, I'll update the first patch for review, the other two patch will be
ignored and stop update anyway.

thx
>
> [...]
> --
> Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
>
> No great genius has ever existed without some touch of madness. --
> Aristotle
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

Patch hide | download patch | download mbox

diff --git a/libavfilter/vf_find_rect.c b/libavfilter/vf_find_rect.c
index ee6c3f4b45..ed15885bc2 100644
--- a/libavfilter/vf_find_rect.c
+++ b/libavfilter/vf_find_rect.c
@@ -26,6 +26,7 @@ 
 #include "libavutil/imgutils.h"
 #include "libavutil/opt.h"
 #include "internal.h"
+#include "scene_sad.h"
 
 #include "lavfutils.h"
 #include "lswsutils.h"
@@ -36,6 +37,8 @@  typedef struct FOCContext {
     AVClass *class;
     float threshold;
     int mipmaps;
+    ff_scene_sad_fn sad;
+    int bitdepth;
     int xmin, ymin, xmax, ymax;
     char *obj_filename;
     int last_x, last_y;
@@ -103,54 +106,40 @@  static AVFrame *downscale(AVFrame *in)
     return frame;
 }
 
-static float compare(const AVFrame *haystack, const AVFrame *obj, int offx, int offy)
+static float compare_sad(FOCContext *foc, AVFrame *haystack, AVFrame *obj, int offx, int offy)
 {
-    int x,y;
-    int o_sum_v = 0;
-    int h_sum_v = 0;
-    int64_t oo_sum_v = 0;
-    int64_t hh_sum_v = 0;
-    int64_t oh_sum_v = 0;
-    float c;
+    uint64_t sad = 0;
     int n = obj->height * obj->width;
-    const uint8_t *odat = obj     ->data[0];
+    double mafd;
+    const uint8_t *odat = obj->data[0];
     const uint8_t *hdat = haystack->data[0] + offx + offy * haystack->linesize[0];
-    int64_t o_sigma, h_sigma;
-
-    for(y = 0; y < obj->height; y++) {
-        for(x = 0; x < obj->width; x++) {
-            int o_v = odat[x];
-            int h_v = hdat[x];
-            o_sum_v += o_v;
-            h_sum_v += h_v;
-            oo_sum_v += o_v * o_v;
-            hh_sum_v += h_v * h_v;
-            oh_sum_v += o_v * h_v;
-        }
-        odat += obj->linesize[0];
-        hdat += haystack->linesize[0];
-    }
-    o_sigma = n*oo_sum_v - o_sum_v*(int64_t)o_sum_v;
-    h_sigma = n*hh_sum_v - h_sum_v*(int64_t)h_sum_v;
 
-    if (o_sigma == 0 || h_sigma == 0)
-        return 1.0;
+    foc->sad(hdat, haystack->linesize[0], odat, obj->linesize[0],
+            obj->width, obj->height, &sad);
+    emms_c();
+    mafd = (double)sad / n / (1ULL << foc->bitdepth);
 
-    c = (n*oh_sum_v - o_sum_v*(int64_t)h_sum_v) / (sqrt(o_sigma)*sqrt(h_sigma));
-
-    return 1 - fabs(c);
+    return mafd;
 }
 
 static int config_input(AVFilterLink *inlink)
 {
     AVFilterContext *ctx = inlink->dst;
     FOCContext *foc = ctx->priv;
+    const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(inlink->format);
 
     if (foc->xmax <= 0)
         foc->xmax = inlink->w - foc->obj_frame->width;
     if (foc->ymax <= 0)
         foc->ymax = inlink->h - foc->obj_frame->height;
 
+    foc->bitdepth = pix_desc->comp[0].depth;
+
+    foc->sad = ff_scene_sad_get_fn(foc->bitdepth == 8 ? 8 : 16);
+    if (!foc->sad)
+        return AVERROR(EINVAL);
+
+
     return 0;
 }
 
@@ -169,7 +158,7 @@  static float search(FOCContext *foc, int pass, int maxpass, int xmin, int xmax,
 
     for (y = ymin; y <= ymax; y++) {
         for (x = xmin; x <= xmax; x++) {
-            float score = compare(foc->haystack_frame[pass], foc->needle_frame[pass], x, y);
+            float score = compare_sad(foc, foc->haystack_frame[pass], foc->needle_frame[pass], x, y);
             av_assert0(score != 0);
             if (score < best_score) {
                 best_score = score;