diff mbox

[FFmpeg-devel] lavf/vf_freezedetect: improve for the freeze frame detection

Message ID 1563030909-17535-1-git-send-email-lance.lmwang@gmail.com
State New
Headers show

Commit Message

Lance Wang July 13, 2019, 3:15 p.m. UTC
From: Limin Wang <lance.lmwang@gmail.com>

I have samples failed to detect the freeze frame with the default -60dB
noise(-40dB is OK to detect),
after apply the patch, it's ok to detect. 

I run the testing with fate-suite sample for your testing:
old: 
no freeze frame detect.

with the patch:
./ffmpeg -i fate-suite/svq3/Vertical400kbit.sorenson3.mov -vf
freezedetect=n=-60dB -an -f null -
[freezedetect @ 0x7fe18a604900] lavfi.freezedetect.freeze_start: 38.7667
[freezedetect @ 0x7fe18a604900] lavfi.freezedetect.freeze_duration: 2.5
[freezedetect @ 0x7fe18a604900] lavfi.freezedetect.freeze_end: 41.2667
[freezedetect @ 0x7fe18a604900] lavfi.freezedetect.freeze_start: 41.2667

Have run make fate testing although haven't find any freezedetect checking for
the fate.

Signed-off-by: Limin Wang <lance.lmwang@gmail.com>
---
 libavfilter/vf_freezedetect.c | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

Comments

Marton Balint July 13, 2019, 5:24 p.m. UTC | #1
On Sat, 13 Jul 2019, lance.lmwang@gmail.com wrote:

> From: Limin Wang <lance.lmwang@gmail.com>
>
> I have samples failed to detect the freeze frame with the default -60dB
> noise(-40dB is OK to detect),
> after apply the patch, it's ok to detect. 
>
> I run the testing with fate-suite sample for your testing:
> old: 
> no freeze frame detect.
>
> with the patch:
> ./ffmpeg -i fate-suite/svq3/Vertical400kbit.sorenson3.mov -vf
> freezedetect=n=-60dB -an -f null -
> [freezedetect @ 0x7fe18a604900] lavfi.freezedetect.freeze_start: 38.7667
> [freezedetect @ 0x7fe18a604900] lavfi.freezedetect.freeze_duration: 2.5
> [freezedetect @ 0x7fe18a604900] lavfi.freezedetect.freeze_end: 41.2667
> [freezedetect @ 0x7fe18a604900] lavfi.freezedetect.freeze_start: 41.2667
>
> Have run make fate testing although haven't find any freezedetect checking for
> the fate.

You are changing the way the score is calculated and not describing why. 
Of course you will get different results.

>
> Signed-off-by: Limin Wang <lance.lmwang@gmail.com>
> ---
> libavfilter/vf_freezedetect.c | 22 ++++++++++++++++------
> 1 file changed, 16 insertions(+), 6 deletions(-)
>
> diff --git a/libavfilter/vf_freezedetect.c b/libavfilter/vf_freezedetect.c
> index cc086af..0288fb0 100644
> --- a/libavfilter/vf_freezedetect.c
> +++ b/libavfilter/vf_freezedetect.c
> @@ -38,7 +38,9 @@ typedef struct FreezeDetectContext {
>     ptrdiff_t height[4];
>     ff_scene_sad_fn sad;
>     int bitdepth;
> +    int nb_planes;
>     AVFrame *reference_frame;
> +    double prev_mafd;
>     int64_t n;
>     int64_t reference_n;
>     int frozen;
> @@ -102,13 +104,15 @@ static int config_input(AVFilterLink *inlink)
>     AVFilterContext *ctx = inlink->dst;
>     FreezeDetectContext *s = ctx->priv;
>     const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(inlink->format);
> +    int vsub = pix_desc->log2_chroma_h;
>
>     s->bitdepth = pix_desc->comp[0].depth;
> +    s->nb_planes = av_pix_fmt_count_planes(inlink->format);

This is not needed, for invalid planes we simply get 0 for linesize and 
width.

> 
> -    for (int plane = 0; plane < 4; plane++) {
> +    for (int plane = 0; plane < s->nb_planes; plane++) {
>         ptrdiff_t line_size = av_image_get_linesize(inlink->format, inlink->w, plane);
> -        s->width[plane] = line_size >> (s->bitdepth > 8);
> -        s->height[plane] = inlink->h >> ((plane == 1 || plane == 2) ? pix_desc->log2_chroma_h : 0);
> +        s->width[plane] = line_size;

Why? Width is the width of the plane in pixels, simply setting linesize 
does not seem right.

> +        s->height[plane] = plane == 1 || plane == 2 ? AV_CEIL_RSHIFT(inlink->h, vsub) : inlink->h;

Mix of a cosmetic and functional change. This should do the same with 
existing code and variables:
s->height[plane] = AV_CEIL_RSHIFT(inlink->h, ((plane == 1 || plane == 2) ? pix_desc->log2_chroma_h : 0)

>     }
>
>     s->sad = ff_scene_sad_get_fn(s->bitdepth == 8 ? 8 : 16);
> @@ -129,7 +133,9 @@ static int is_frozen(FreezeDetectContext *s, AVFrame *reference, AVFrame *frame)
>     uint64_t sad = 0;
>     uint64_t count = 0;
>     double mafd;
> -    for (int plane = 0; plane < 4; plane++) {
> +    double diff, cmp;
> +
> +    for (int plane = 0; plane < s->nb_planes; plane++) {

Unneeded.

>         if (s->width[plane]) {
>             uint64_t plane_sad;
>             s->sad(frame->data[plane], frame->linesize[plane],
> @@ -140,8 +146,12 @@ static int is_frozen(FreezeDetectContext *s, AVFrame *reference, AVFrame *frame)
>         }
>     }
>     emms_c();
> -    mafd = (double)sad / count / (1ULL << s->bitdepth);
> -    return (mafd <= s->noise);
> +    mafd = (double)sad /(count >> (s->bitdepth > 8));

Why? MAFD should be the mean difference normalized to [0..1].

> +    diff = fabs(mafd - s->prev_mafd);
> +    cmp  = av_clipf(FFMIN(mafd, diff) / 100., 0, 1);
> +    s->prev_mafd = mafd;

Why? This is not scene change detection, MAFD change is not useful for us. 
E.g. two frames alternating will be detected as frozen. Also note that not 
always consecutive frames are compared, so involving MAFD change seems 
even more bogus to me.

Regards,
Marton
Lance Wang July 13, 2019, 10:42 p.m. UTC | #2
On Sat, Jul 13, 2019 at 07:24:59PM +0200, Marton Balint wrote:
> 
> 
> On Sat, 13 Jul 2019, lance.lmwang@gmail.com wrote:
> 
> >From: Limin Wang <lance.lmwang@gmail.com>
> >
> >I have samples failed to detect the freeze frame with the default -60dB
> >noise(-40dB is OK to detect),
> >after apply the patch, it's ok to detect.
> >
> >I run the testing with fate-suite sample for your testing:
> >old: no freeze frame detect.
> >
> >with the patch:
> >./ffmpeg -i fate-suite/svq3/Vertical400kbit.sorenson3.mov -vf
> >freezedetect=n=-60dB -an -f null -
> >[freezedetect @ 0x7fe18a604900] lavfi.freezedetect.freeze_start: 38.7667
> >[freezedetect @ 0x7fe18a604900] lavfi.freezedetect.freeze_duration: 2.5
> >[freezedetect @ 0x7fe18a604900] lavfi.freezedetect.freeze_end: 41.2667
> >[freezedetect @ 0x7fe18a604900] lavfi.freezedetect.freeze_start: 41.2667
> >
> >Have run make fate testing although haven't find any freezedetect checking for
> >the fate.
> 
> You are changing the way the score is calculated and not describing
> why. Of course you will get different results.
> 
> >
> >Signed-off-by: Limin Wang <lance.lmwang@gmail.com>
> >---
> >libavfilter/vf_freezedetect.c | 22 ++++++++++++++++------
> >1 file changed, 16 insertions(+), 6 deletions(-)
> >
> >diff --git a/libavfilter/vf_freezedetect.c b/libavfilter/vf_freezedetect.c
> >index cc086af..0288fb0 100644
> >--- a/libavfilter/vf_freezedetect.c
> >+++ b/libavfilter/vf_freezedetect.c
> >@@ -38,7 +38,9 @@ typedef struct FreezeDetectContext {
> >    ptrdiff_t height[4];
> >    ff_scene_sad_fn sad;
> >    int bitdepth;
> >+    int nb_planes;
> >    AVFrame *reference_frame;
> >+    double prev_mafd;
> >    int64_t n;
> >    int64_t reference_n;
> >    int frozen;
> >@@ -102,13 +104,15 @@ static int config_input(AVFilterLink *inlink)
> >    AVFilterContext *ctx = inlink->dst;
> >    FreezeDetectContext *s = ctx->priv;
> >    const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(inlink->format);
> >+    int vsub = pix_desc->log2_chroma_h;
> >
> >    s->bitdepth = pix_desc->comp[0].depth;
> >+    s->nb_planes = av_pix_fmt_count_planes(inlink->format);
> 
> This is not needed, for invalid planes we simply get 0 for linesize
> and width.

It's more general to get nb_planes instead of checking the linesize
result. If have one plane, it's unneed to check 4 times.

> 
> >
> >-    for (int plane = 0; plane < 4; plane++) {
> >+    for (int plane = 0; plane < s->nb_planes; plane++) {
> >        ptrdiff_t line_size = av_image_get_linesize(inlink->format, inlink->w, plane);
> >-        s->width[plane] = line_size >> (s->bitdepth > 8);
> >-        s->height[plane] = inlink->h >> ((plane == 1 || plane == 2) ? pix_desc->log2_chroma_h : 0);
> >+        s->width[plane] = line_size;
> 
> Why? Width is the width of the plane in pixels, simply setting
> linesize does not seem right.

the sad init function have set the bitdepth, so I think it's OK to set
the width in bytes. Maybe it's my misunderstand for the code.

> 
> >+        s->height[plane] = plane == 1 || plane == 2 ? AV_CEIL_RSHIFT(inlink->h, vsub) : inlink->h;
> 
> Mix of a cosmetic and functional change. This should do the same
> with existing code and variables:
> s->height[plane] = AV_CEIL_RSHIFT(inlink->h, ((plane == 1 || plane == 2) ? pix_desc->log2_chroma_h : 0)
> 
> >    }
> >
> >    s->sad = ff_scene_sad_get_fn(s->bitdepth == 8 ? 8 : 16);
> >@@ -129,7 +133,9 @@ static int is_frozen(FreezeDetectContext *s, AVFrame *reference, AVFrame *frame)
> >    uint64_t sad = 0;
> >    uint64_t count = 0;
> >    double mafd;
> >-    for (int plane = 0; plane < 4; plane++) {
> >+    double diff, cmp;
> >+
> >+    for (int plane = 0; plane < s->nb_planes; plane++) {
> 
> Unneeded.
> 
> >        if (s->width[plane]) {
> >            uint64_t plane_sad;
> >            s->sad(frame->data[plane], frame->linesize[plane],
> >@@ -140,8 +146,12 @@ static int is_frozen(FreezeDetectContext *s, AVFrame *reference, AVFrame *frame)
> >        }
> >    }
> >    emms_c();
> >-    mafd = (double)sad / count / (1ULL << s->bitdepth);
> >-    return (mafd <= s->noise);
> >+    mafd = (double)sad /(count >> (s->bitdepth > 8));
> 
> Why? MAFD should be the mean difference normalized to [0..1].
if the bitdeth is 16, it'll divide by 1UL << 16, it'll cause the mafd is
very small. So I choose the scenecut way in the below. 

> 
> >+    diff = fabs(mafd - s->prev_mafd);
> >+    cmp  = av_clipf(FFMIN(mafd, diff) / 100., 0, 1);
> >+    s->prev_mafd = mafd;
> 
> Why? This is not scene change detection, MAFD change is not useful
> for us. E.g. two frames alternating will be detected as frozen. Also
> note that not always consecutive frames are compared, so involving
> MAFD change seems even more bogus to me.
Isn't possible to add one standard sampel into fate, it's useful to
test the function. We can choose different noise setting to get the same
result, so it's difficult to say whether it's covered.


> 
> Regards,
> Marton
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Marton Balint July 14, 2019, 9:51 a.m. UTC | #3
On Sun, 14 Jul 2019, Limin Wang wrote:

> On Sat, Jul 13, 2019 at 07:24:59PM +0200, Marton Balint wrote:
>> 
>> 
>> On Sat, 13 Jul 2019, lance.lmwang@gmail.com wrote:
>> 
>> >From: Limin Wang <lance.lmwang@gmail.com>
>> >
>> >I have samples failed to detect the freeze frame with the default -60dB
>> >noise(-40dB is OK to detect),
>> >after apply the patch, it's ok to detect.
>> >
>> >I run the testing with fate-suite sample for your testing:
>> >old: no freeze frame detect.
>> >
>> >with the patch:
>> >./ffmpeg -i fate-suite/svq3/Vertical400kbit.sorenson3.mov -vf
>> >freezedetect=n=-60dB -an -f null -
>> >[freezedetect @ 0x7fe18a604900] lavfi.freezedetect.freeze_start: 38.7667
>> >[freezedetect @ 0x7fe18a604900] lavfi.freezedetect.freeze_duration: 2.5
>> >[freezedetect @ 0x7fe18a604900] lavfi.freezedetect.freeze_end: 41.2667
>> >[freezedetect @ 0x7fe18a604900] lavfi.freezedetect.freeze_start: 41.2667
>> >
>> >Have run make fate testing although haven't find any freezedetect checking for
>> >the fate.
>> 
>> You are changing the way the score is calculated and not describing
>> why. Of course you will get different results.
>> 
>> >
>> >Signed-off-by: Limin Wang <lance.lmwang@gmail.com>
>> >---
>> >libavfilter/vf_freezedetect.c | 22 ++++++++++++++++------
>> >1 file changed, 16 insertions(+), 6 deletions(-)
>> >
>> >diff --git a/libavfilter/vf_freezedetect.c b/libavfilter/vf_freezedetect.c
>> >index cc086af..0288fb0 100644
>> >--- a/libavfilter/vf_freezedetect.c
>> >+++ b/libavfilter/vf_freezedetect.c
>> >@@ -38,7 +38,9 @@ typedef struct FreezeDetectContext {
>> >    ptrdiff_t height[4];
>> >    ff_scene_sad_fn sad;
>> >    int bitdepth;
>> >+    int nb_planes;
>> >    AVFrame *reference_frame;
>> >+    double prev_mafd;
>> >    int64_t n;
>> >    int64_t reference_n;
>> >    int frozen;
>> >@@ -102,13 +104,15 @@ static int config_input(AVFilterLink *inlink)
>> >    AVFilterContext *ctx = inlink->dst;
>> >    FreezeDetectContext *s = ctx->priv;
>> >    const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(inlink->format);
>> >+    int vsub = pix_desc->log2_chroma_h;
>> >
>> >    s->bitdepth = pix_desc->comp[0].depth;
>> >+    s->nb_planes = av_pix_fmt_count_planes(inlink->format);
>> 
>> This is not needed, for invalid planes we simply get 0 for linesize
>> and width.
>
> It's more general to get nb_planes instead of checking the linesize
> result. If have one plane, it's unneed to check 4 times.

Still looks a purely cosmetic change to me.

>
>> 
>> >
>> >-    for (int plane = 0; plane < 4; plane++) {
>> >+    for (int plane = 0; plane < s->nb_planes; plane++) {
>> >        ptrdiff_t line_size = av_image_get_linesize(inlink->format, inlink->w, plane);
>> >-        s->width[plane] = line_size >> (s->bitdepth > 8);
>> >-        s->height[plane] = inlink->h >> ((plane == 1 || plane == 2) ? pix_desc->log2_chroma_h : 0);
>> >+        s->width[plane] = line_size;
>> 
>> Why? Width is the width of the plane in pixels, simply setting
>> linesize does not seem right.
>
> the sad init function have set the bitdepth, so I think it's OK to set
> the width in bytes. Maybe it's my misunderstand for the code.

Later on width*height is used to determine the number of pixels for which 
we have counted the sum of differences.

>
>> 
>> >+        s->height[plane] = plane == 1 || plane == 2 ? AV_CEIL_RSHIFT(inlink->h, vsub) : inlink->h;
>> 
>> Mix of a cosmetic and functional change. This should do the same
>> with existing code and variables:
>> s->height[plane] = AV_CEIL_RSHIFT(inlink->h, ((plane == 1 || plane == 2) ? pix_desc->log2_chroma_h : 0)
>> 
>> >    }
>> >
>> >    s->sad = ff_scene_sad_get_fn(s->bitdepth == 8 ? 8 : 16);
>> >@@ -129,7 +133,9 @@ static int is_frozen(FreezeDetectContext *s, AVFrame *reference, AVFrame *frame)
>> >    uint64_t sad = 0;
>> >    uint64_t count = 0;
>> >    double mafd;
>> >-    for (int plane = 0; plane < 4; plane++) {
>> >+    double diff, cmp;
>> >+
>> >+    for (int plane = 0; plane < s->nb_planes; plane++) {
>> 
>> Unneeded.
>> 
>> >        if (s->width[plane]) {
>> >            uint64_t plane_sad;
>> >            s->sad(frame->data[plane], frame->linesize[plane],
>> >@@ -140,8 +146,12 @@ static int is_frozen(FreezeDetectContext *s, AVFrame *reference, AVFrame *frame)
>> >        }
>> >    }
>> >    emms_c();
>> >-    mafd = (double)sad / count / (1ULL << s->bitdepth);
>> >-    return (mafd <= s->noise);
>> >+    mafd = (double)sad /(count >> (s->bitdepth > 8));
>> 
>> Why? MAFD should be the mean difference normalized to [0..1].
> if the bitdeth is 16, it'll divide by 1UL << 16, it'll cause the mafd is
> very small. So I choose the scenecut way in the below.

The metric supposed to be indepentent of the bit depth. If you get 
different mafd for the same source expressed in 8bit, 10bit or 16bit pixel 
format (provided that you use the same subsampling), then you are doing 
something wrong.

And you can't reinvent MAFD according to your needs. It is the mean 
absolute difference of the whole image normalized to 0..1 in this case. If 
you are not calculating that, then it is not MAFD.

>
>> 
>> >+    diff = fabs(mafd - s->prev_mafd);
>> >+    cmp  = av_clipf(FFMIN(mafd, diff) / 100., 0, 1);
>> >+    s->prev_mafd = mafd;
>> 
>> Why? This is not scene change detection, MAFD change is not useful
>> for us. E.g. two frames alternating will be detected as frozen. Also
>> note that not always consecutive frames are compared, so involving
>> MAFD change seems even more bogus to me.
> Isn't possible to add one standard sampel into fate, it's useful to
> test the function. We can choose different noise setting to get the same
> result, so it's difficult to say whether it's covered.

Good idea, although one of the existing samples (maybe the one you tested 
with already) most probably should work as well. Fate would be the perfect 
place to test different bit depths and make sure you get the same results. 
Noise floor should be set up in a way so that changing it slightly make a 
real difference in detection, so score changes in code can be detected.

Regards,
Marton
Lance Wang July 15, 2019, 2:45 a.m. UTC | #4
On Sun, Jul 14, 2019 at 11:51:27AM +0200, Marton Balint wrote:
> 
> 
> On Sun, 14 Jul 2019, Limin Wang wrote:
> 
> >On Sat, Jul 13, 2019 at 07:24:59PM +0200, Marton Balint wrote:
> >>
> >>
> >>On Sat, 13 Jul 2019, lance.lmwang@gmail.com wrote:
> >>
> >>>From: Limin Wang <lance.lmwang@gmail.com>
> >>>
> >>>I have samples failed to detect the freeze frame with the default -60dB
> >>>noise(-40dB is OK to detect),
> >>>after apply the patch, it's ok to detect.
> >>>
> >>>I run the testing with fate-suite sample for your testing:
> >>>old: no freeze frame detect.
> >>>
> >>>with the patch:
> >>>./ffmpeg -i fate-suite/svq3/Vertical400kbit.sorenson3.mov -vf
> >>>freezedetect=n=-60dB -an -f null -
> >>>[freezedetect @ 0x7fe18a604900] lavfi.freezedetect.freeze_start: 38.7667
> >>>[freezedetect @ 0x7fe18a604900] lavfi.freezedetect.freeze_duration: 2.5
> >>>[freezedetect @ 0x7fe18a604900] lavfi.freezedetect.freeze_end: 41.2667
> >>>[freezedetect @ 0x7fe18a604900] lavfi.freezedetect.freeze_start: 41.2667
> >>>
> >>>Have run make fate testing although haven't find any freezedetect checking for
> >>>the fate.
> >>
> >>You are changing the way the score is calculated and not describing
> >>why. Of course you will get different results.
> >>
> >>>
> >>>Signed-off-by: Limin Wang <lance.lmwang@gmail.com>
> >>>---
> >>>libavfilter/vf_freezedetect.c | 22 ++++++++++++++++------
> >>>1 file changed, 16 insertions(+), 6 deletions(-)
> >>>
> >>>diff --git a/libavfilter/vf_freezedetect.c b/libavfilter/vf_freezedetect.c
> >>>index cc086af..0288fb0 100644
> >>>--- a/libavfilter/vf_freezedetect.c
> >>>+++ b/libavfilter/vf_freezedetect.c
> >>>@@ -38,7 +38,9 @@ typedef struct FreezeDetectContext {
> >>>    ptrdiff_t height[4];
> >>>    ff_scene_sad_fn sad;
> >>>    int bitdepth;
> >>>+    int nb_planes;
> >>>    AVFrame *reference_frame;
> >>>+    double prev_mafd;
> >>>    int64_t n;
> >>>    int64_t reference_n;
> >>>    int frozen;
> >>>@@ -102,13 +104,15 @@ static int config_input(AVFilterLink *inlink)
> >>>    AVFilterContext *ctx = inlink->dst;
> >>>    FreezeDetectContext *s = ctx->priv;
> >>>    const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(inlink->format);
> >>>+    int vsub = pix_desc->log2_chroma_h;
> >>>
> >>>    s->bitdepth = pix_desc->comp[0].depth;
> >>>+    s->nb_planes = av_pix_fmt_count_planes(inlink->format);
> >>
> >>This is not needed, for invalid planes we simply get 0 for linesize
> >>and width.
> >
> >It's more general to get nb_planes instead of checking the linesize
> >result. If have one plane, it's unneed to check 4 times.
> 
> Still looks a purely cosmetic change to me.
> 
> >
> >>
> >>>
> >>>-    for (int plane = 0; plane < 4; plane++) {
> >>>+    for (int plane = 0; plane < s->nb_planes; plane++) {
> >>>        ptrdiff_t line_size = av_image_get_linesize(inlink->format, inlink->w, plane);
> >>>-        s->width[plane] = line_size >> (s->bitdepth > 8);
> >>>-        s->height[plane] = inlink->h >> ((plane == 1 || plane == 2) ? pix_desc->log2_chroma_h : 0);
> >>>+        s->width[plane] = line_size;
> >>
> >>Why? Width is the width of the plane in pixels, simply setting
> >>linesize does not seem right.
> >
> >the sad init function have set the bitdepth, so I think it's OK to set
> >the width in bytes. Maybe it's my misunderstand for the code.
> 
> Later on width*height is used to determine the number of pixels for
> which we have counted the sum of differences.
> 
> >
> >>
> >>>+        s->height[plane] = plane == 1 || plane == 2 ? AV_CEIL_RSHIFT(inlink->h, vsub) : inlink->h;
> >>
> >>Mix of a cosmetic and functional change. This should do the same
> >>with existing code and variables:
> >>s->height[plane] = AV_CEIL_RSHIFT(inlink->h, ((plane == 1 || plane == 2) ? pix_desc->log2_chroma_h : 0)
> >>
> >>>    }
> >>>
> >>>    s->sad = ff_scene_sad_get_fn(s->bitdepth == 8 ? 8 : 16);
> >>>@@ -129,7 +133,9 @@ static int is_frozen(FreezeDetectContext *s, AVFrame *reference, AVFrame *frame)
> >>>    uint64_t sad = 0;
> >>>    uint64_t count = 0;
> >>>    double mafd;
> >>>-    for (int plane = 0; plane < 4; plane++) {
> >>>+    double diff, cmp;
> >>>+
> >>>+    for (int plane = 0; plane < s->nb_planes; plane++) {
> >>
> >>Unneeded.
> >>
> >>>        if (s->width[plane]) {
> >>>            uint64_t plane_sad;
> >>>            s->sad(frame->data[plane], frame->linesize[plane],
> >>>@@ -140,8 +146,12 @@ static int is_frozen(FreezeDetectContext *s, AVFrame *reference, AVFrame *frame)
> >>>        }
> >>>    }
> >>>    emms_c();
> >>>-    mafd = (double)sad / count / (1ULL << s->bitdepth);
> >>>-    return (mafd <= s->noise);
> >>>+    mafd = (double)sad /(count >> (s->bitdepth > 8));
> >>
> >>Why? MAFD should be the mean difference normalized to [0..1].
> >if the bitdeth is 16, it'll divide by 1UL << 16, it'll cause the mafd is
> >very small. So I choose the scenecut way in the below.
> 
> The metric supposed to be indepentent of the bit depth. If you get
> different mafd for the same source expressed in 8bit, 10bit or 16bit
> pixel format (provided that you use the same subsampling), then you
> are doing something wrong.
> 
> And you can't reinvent MAFD according to your needs. It is the mean
> absolute difference of the whole image normalized to 0..1 in this
> case. If you are not calculating that, then it is not MAFD.

Sorry, now the scenecut detection support rgb only, so I'm changing it to support 
more format without autoscale, it'll necessary to solved how to calculated mafd for
10bits or 12bits. Below is my understanding for the mafd calculation, correct me if 
I'm misunderstanding. for example, if the bitdepth is 8 bits, it means the pixel 
block is 8x8 pixel, so maybe we should divide 8x8 instead of 256?

mafd = sad / count / (bitdepth * bitdepth)


> 
> >
> >>
> >>>+    diff = fabs(mafd - s->prev_mafd);
> >>>+    cmp  = av_clipf(FFMIN(mafd, diff) / 100., 0, 1);
> >>>+    s->prev_mafd = mafd;
> >>
> >>Why? This is not scene change detection, MAFD change is not useful
> >>for us. E.g. two frames alternating will be detected as frozen. Also
> >>note that not always consecutive frames are compared, so involving
> >>MAFD change seems even more bogus to me.
> >Isn't possible to add one standard sampel into fate, it's useful to
> >test the function. We can choose different noise setting to get the same
> >result, so it's difficult to say whether it's covered.
> 
> Good idea, although one of the existing samples (maybe the one you
> tested with already) most probably should work as well. Fate would
> be the perfect place to test different bit depths and make sure you
> get the same results. Noise floor should be set up in a way so that
> changing it slightly make a real difference in detection, so score
> changes in code can be detected.
> Regards,
> Marton
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Marton Balint July 15, 2019, 9:07 p.m. UTC | #5
On Mon, 15 Jul 2019, Limin Wang wrote:

[...]

>> >>>        if (s->width[plane]) {
>> >>>            uint64_t plane_sad;
>> >>>            s->sad(frame->data[plane], frame->linesize[plane],
>> >>>@@ -140,8 +146,12 @@ static int is_frozen(FreezeDetectContext *s, AVFrame *reference, AVFrame *frame)
>> >>>        }
>> >>>    }
>> >>>    emms_c();
>> >>>-    mafd = (double)sad / count / (1ULL << s->bitdepth);
>> >>>-    return (mafd <= s->noise);
>> >>>+    mafd = (double)sad /(count >> (s->bitdepth > 8));
>> >>
>> >>Why? MAFD should be the mean difference normalized to [0..1].
>> >if the bitdeth is 16, it'll divide by 1UL << 16, it'll cause the mafd is
>> >very small. So I choose the scenecut way in the below.
>> 
>> The metric supposed to be indepentent of the bit depth. If you get
>> different mafd for the same source expressed in 8bit, 10bit or 16bit
>> pixel format (provided that you use the same subsampling), then you
>> are doing something wrong.
>> 
>> And you can't reinvent MAFD according to your needs. It is the mean
>> absolute difference of the whole image normalized to 0..1 in this
>> case. If you are not calculating that, then it is not MAFD.
>
> Sorry, now the scenecut detection support rgb only, so I'm changing it to support 
> more format without autoscale, it'll necessary to solved how to calculated mafd for
> 10bits or 12bits. Below is my understanding for the mafd calculation, correct me if 
> I'm misunderstanding. for example, if the bitdepth is 8 bits, it means the pixel 
> block is 8x8 pixel, so maybe we should divide 8x8 instead of 256?
>
> mafd = sad / count / (bitdepth * bitdepth)

bitdepth is the bitdepth of one pixel. For 8 bit, a color component can be
0..255. For 16 bit, a color component can be 0..65535.

8x8 and 16x16 pixel blocks are totally different and unrelated things from 
bit depth. Yes, SAD is usually calculated for 8x8 or 16x16 blocks for 
motion estimation or compression, not here. Here, SAD is the sum of 
absolute differences throughout the entire image, (width x height). Count 
= width * height, so SAD/count is the average of differences. In order to 
get it to [0..1] you have to divide by 2^bitdepth.

Regards,
Marton
Lance Wang July 15, 2019, 11:02 p.m. UTC | #6
On Mon, Jul 15, 2019 at 11:07:32PM +0200, Marton Balint wrote:
> 
> 
> On Mon, 15 Jul 2019, Limin Wang wrote:
> 
> [...]
> 
> >>>>>        if (s->width[plane]) {
> >>>>>            uint64_t plane_sad;
> >>>>>            s->sad(frame->data[plane], frame->linesize[plane],
> >>>>>@@ -140,8 +146,12 @@ static int is_frozen(FreezeDetectContext *s, AVFrame *reference, AVFrame *frame)
> >>>>>        }
> >>>>>    }
> >>>>>    emms_c();
> >>>>>-    mafd = (double)sad / count / (1ULL << s->bitdepth);
> >>>>>-    return (mafd <= s->noise);
> >>>>>+    mafd = (double)sad /(count >> (s->bitdepth > 8));
> >>>>
> >>>>Why? MAFD should be the mean difference normalized to [0..1].
> >>>if the bitdeth is 16, it'll divide by 1UL << 16, it'll cause the mafd is
> >>>very small. So I choose the scenecut way in the below.
> >>
> >>The metric supposed to be indepentent of the bit depth. If you get
> >>different mafd for the same source expressed in 8bit, 10bit or 16bit
> >>pixel format (provided that you use the same subsampling), then you
> >>are doing something wrong.
> >>
> >>And you can't reinvent MAFD according to your needs. It is the mean
> >>absolute difference of the whole image normalized to 0..1 in this
> >>case. If you are not calculating that, then it is not MAFD.
> >
> >Sorry, now the scenecut detection support rgb only, so I'm
> >changing it to support more format without autoscale, it'll
> >necessary to solved how to calculated mafd for
> >10bits or 12bits. Below is my understanding for the mafd
> >calculation, correct me if I'm misunderstanding. for example, if
> >the bitdepth is 8 bits, it means the pixel block is 8x8 pixel, so
> >maybe we should divide 8x8 instead of 256?
> >
> >mafd = sad / count / (bitdepth * bitdepth)
> 
> bitdepth is the bitdepth of one pixel. For 8 bit, a color component can be
> 0..255. For 16 bit, a color component can be 0..65535.
> 
> 8x8 and 16x16 pixel blocks are totally different and unrelated
> things from bit depth. Yes, SAD is usually calculated for 8x8 or
> 16x16 blocks for motion estimation or compression, not here. Here,
> SAD is the sum of absolute differences throughout the entire image,
> (width x height). Count = width * height, so SAD/count is the
> average of differences. In order to get it to [0..1] you have to
> divide by 2^bitdepth.
Thank you, I think I'm clear for the code, the 2^bitdepth is only for
normalize.

I have submited the scenecut detected change for more format support. 
For the result is little different with rgb format and I haven't good way
to sure it's expected result.  

https://patchwork.ffmpeg.org/patch/13957/

> 
> Regards,
> Marton
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
diff mbox

Patch

diff --git a/libavfilter/vf_freezedetect.c b/libavfilter/vf_freezedetect.c
index cc086af..0288fb0 100644
--- a/libavfilter/vf_freezedetect.c
+++ b/libavfilter/vf_freezedetect.c
@@ -38,7 +38,9 @@  typedef struct FreezeDetectContext {
     ptrdiff_t height[4];
     ff_scene_sad_fn sad;
     int bitdepth;
+    int nb_planes;
     AVFrame *reference_frame;
+    double prev_mafd;
     int64_t n;
     int64_t reference_n;
     int frozen;
@@ -102,13 +104,15 @@  static int config_input(AVFilterLink *inlink)
     AVFilterContext *ctx = inlink->dst;
     FreezeDetectContext *s = ctx->priv;
     const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(inlink->format);
+    int vsub = pix_desc->log2_chroma_h;
 
     s->bitdepth = pix_desc->comp[0].depth;
+    s->nb_planes = av_pix_fmt_count_planes(inlink->format);
 
-    for (int plane = 0; plane < 4; plane++) {
+    for (int plane = 0; plane < s->nb_planes; plane++) {
         ptrdiff_t line_size = av_image_get_linesize(inlink->format, inlink->w, plane);
-        s->width[plane] = line_size >> (s->bitdepth > 8);
-        s->height[plane] = inlink->h >> ((plane == 1 || plane == 2) ? pix_desc->log2_chroma_h : 0);
+        s->width[plane] = line_size;
+        s->height[plane] = plane == 1 || plane == 2 ? AV_CEIL_RSHIFT(inlink->h, vsub) : inlink->h;
     }
 
     s->sad = ff_scene_sad_get_fn(s->bitdepth == 8 ? 8 : 16);
@@ -129,7 +133,9 @@  static int is_frozen(FreezeDetectContext *s, AVFrame *reference, AVFrame *frame)
     uint64_t sad = 0;
     uint64_t count = 0;
     double mafd;
-    for (int plane = 0; plane < 4; plane++) {
+    double diff, cmp;
+
+    for (int plane = 0; plane < s->nb_planes; plane++) {
         if (s->width[plane]) {
             uint64_t plane_sad;
             s->sad(frame->data[plane], frame->linesize[plane],
@@ -140,8 +146,12 @@  static int is_frozen(FreezeDetectContext *s, AVFrame *reference, AVFrame *frame)
         }
     }
     emms_c();
-    mafd = (double)sad / count / (1ULL << s->bitdepth);
-    return (mafd <= s->noise);
+    mafd = (double)sad /(count >> (s->bitdepth > 8));
+    diff = fabs(mafd - s->prev_mafd);
+    cmp  = av_clipf(FFMIN(mafd, diff) / 100., 0, 1);
+    s->prev_mafd = mafd;
+
+    return (cmp <= s->noise);
 }
 
 static int set_meta(FreezeDetectContext *s, AVFrame *frame, const char *key, const char *value)