diff mbox series

[FFmpeg-devel] checkasm: vvc_alf: Limit benchmarking to a reasonable subset of functions

Message ID 20240521100429.87090-1-martin@martin.st
State Accepted
Commit a9dc7dd7fde368420dea0673242ab45cf57e7201
Headers show
Series [FFmpeg-devel] checkasm: vvc_alf: Limit benchmarking to a reasonable subset of functions | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished

Commit Message

Martin Storsjö May 21, 2024, 10:04 a.m. UTC
Don't benchmark every single combination of widths and heights;
only benchmark cases which are squares (like in vvc_mc.c).

Contrary to vvc_mc, which increases sizes by doubling dimensions,
vvc_alf tests all sizes in increments of 4. Limit benchmarking to
the cases which are powers of two.

This reduces the number of benchmarked cases from 3072 down to 18.
---
 tests/checkasm/vvc_alf.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

Comments

Martin Storsjö May 21, 2024, 10:15 a.m. UTC | #1
On Tue, 21 May 2024, Martin Storsjö wrote:

> Don't benchmark every single combination of widths and heights;
> only benchmark cases which are squares (like in vvc_mc.c).
>
> Contrary to vvc_mc, which increases sizes by doubling dimensions,
> vvc_alf tests all sizes in increments of 4. Limit benchmarking to
> the cases which are powers of two.
>
> This reduces the number of benchmarked cases from 3072 down to 18.
> ---
> tests/checkasm/vvc_alf.c | 11 ++++++++---
> 1 file changed, 8 insertions(+), 3 deletions(-)
>
> diff --git a/tests/checkasm/vvc_alf.c b/tests/checkasm/vvc_alf.c
> index 9526260598..6dd89bfafc 100644
> --- a/tests/checkasm/vvc_alf.c
> +++ b/tests/checkasm/vvc_alf.c
> @@ -103,7 +103,9 @@ static void check_alf_filter(VVCDSPContext *c, const int bit_depth)
>                     if (memcmp(dst0 + i * dst_stride, dst1 + i * dst_stride, w * SIZEOF_PIXEL))
>                         fail();
>                 }
> -                bench_new(dst1, dst_stride, src1 + offset, src_stride, w, h, filter, clip, vb_pos);
> +                // Bench only square sizes, and ones with dimensions being a power of two.
> +                if (w == h && (w & (w - 1)) == 0)
> +                    bench_new(dst1, dst_stride, src1 + offset, src_stride, w, h, filter, clip, vb_pos);
>             }
>             if (check_func(c->alf.filter[CHROMA], "vvc_alf_filter_chroma_%dx%d_%d", w, h, bit_depth)) {
>                 const int vb_pos = ctu_size - ALF_VB_POS_ABOVE_CHROMA;
> @@ -115,7 +117,8 @@ static void check_alf_filter(VVCDSPContext *c, const int bit_depth)
>                     if (memcmp(dst0 + i * dst_stride, dst1 + i * dst_stride, w * SIZEOF_PIXEL))
>                         fail();
>                 }
> -                bench_new(dst1, dst_stride, src1 + offset, src_stride, w, h, filter, clip, vb_pos);
> +                if (w == h && (w & (w - 1)) == 0)
> +                    bench_new(dst1, dst_stride, src1 + offset, src_stride, w, h, filter, clip, vb_pos);
>             }
>         }
>     }
> @@ -156,7 +159,9 @@ static void check_alf_classify(VVCDSPContext *c, const int bit_depth)
>                     fail();
>                 if (memcmp(transpose_idx0, transpose_idx1, id_size))
>                     fail();
> -                bench_new(class_idx1, transpose_idx1, src1 + offset, stride, w, h, vb_pos, alf_gradient_tmp);
> +                // Bench only square sizes, and ones with dimensions being a power of two.
> +                if (w == h && (w & (w - 1)) == 0)
> +                    bench_new(class_idx1, transpose_idx1, src1 + offset, stride, w, h, vb_pos, alf_gradient_tmp);

Note, these tests (all vvc tests) use check_func("name...%dx%d", w, h) - 
while it's common elsewhere to group them up into slightly bigger bundles, 
e.g. only including the width in the function name, assuming that this is 
the level of granularity of actual assembly implementations - e.g. I don't 
think there would be a different codepath implemented for each block 
height.

And it's possible to convey more information about exactly what failed, 
without needing to encode it into the function name - see the 
checkasm_check functions/macro, and e.g. commit 
8ff4a4a4f4f73c5e276fa0cbe6cd5a148ebdd4ae.

// Martin
Rémi Denis-Courmont May 21, 2024, 11:10 a.m. UTC | #2
Le 21 mai 2024 13:04:29 GMT+03:00, "Martin Storsjö" <martin@martin.st> a écrit :
>Don't benchmark every single combination of widths and heights;
>only benchmark cases which are squares (like in vvc_mc.c).
>
>Contrary to vvc_mc, which increases sizes by doubling dimensions,
>vvc_alf tests all sizes in increments of 4. Limit benchmarking to
>the cases which are powers of two.
>
>This reduces the number of benchmarked cases from 3072 down to 18.
>---
> tests/checkasm/vvc_alf.c | 11 ++++++++---
> 1 file changed, 8 insertions(+), 3 deletions(-)
>
>diff --git a/tests/checkasm/vvc_alf.c b/tests/checkasm/vvc_alf.c
>index 9526260598..6dd89bfafc 100644
>--- a/tests/checkasm/vvc_alf.c
>+++ b/tests/checkasm/vvc_alf.c
>@@ -103,7 +103,9 @@ static void check_alf_filter(VVCDSPContext *c, const int bit_depth)
>                     if (memcmp(dst0 + i * dst_stride, dst1 + i * dst_stride, w * SIZEOF_PIXEL))
>                         fail();
>                 }
>-                bench_new(dst1, dst_stride, src1 + offset, src_stride, w, h, filter, clip, vb_pos);
>+                // Bench only square sizes, and ones with dimensions being a power of two.
>+                if (w == h && (w & (w - 1)) == 0)
>+                    bench_new(dst1, dst_stride, src1 + offset, src_stride, w, h, filter, clip, vb_pos);
>             }
>             if (check_func(c->alf.filter[CHROMA], "vvc_alf_filter_chroma_%dx%d_%d", w, h, bit_depth)) {
>                 const int vb_pos = ctu_size - ALF_VB_POS_ABOVE_CHROMA;
>@@ -115,7 +117,8 @@ static void check_alf_filter(VVCDSPContext *c, const int bit_depth)
>                     if (memcmp(dst0 + i * dst_stride, dst1 + i * dst_stride, w * SIZEOF_PIXEL))
>                         fail();
>                 }
>-                bench_new(dst1, dst_stride, src1 + offset, src_stride, w, h, filter, clip, vb_pos);
>+                if (w == h && (w & (w - 1)) == 0)
>+                    bench_new(dst1, dst_stride, src1 + offset, src_stride, w, h, filter, clip, vb_pos);
>             }
>         }
>     }
>@@ -156,7 +159,9 @@ static void check_alf_classify(VVCDSPContext *c, const int bit_depth)
>                     fail();
>                 if (memcmp(transpose_idx0, transpose_idx1, id_size))
>                     fail();
>-                bench_new(class_idx1, transpose_idx1, src1 + offset, stride, w, h, vb_pos, alf_gradient_tmp);
>+                // Bench only square sizes, and ones with dimensions being a power of two.
>+                if (w == h && (w & (w - 1)) == 0)
>+                    bench_new(class_idx1, transpose_idx1, src1 + offset, stride, w, h, vb_pos, alf_gradient_tmp);
>             }
>         }
>     }

LGTM.

By the way, does anybody know if we could skip benchmarking C functions for which zero optimisations are available ? We are not printing the benchmark results in that case, so that wouldn't be a loss.
Nuo Mi May 21, 2024, 12:31 p.m. UTC | #3
On Tue, May 21, 2024 at 7:11 PM Rémi Denis-Courmont <remi@remlab.net> wrote:

>
>
> Le 21 mai 2024 13:04:29 GMT+03:00, "Martin Storsjö" <martin@martin.st> a
> écrit :
> >Don't benchmark every single combination of widths and heights;
> >only benchmark cases which are squares (like in vvc_mc.c).
> >
> >Contrary to vvc_mc, which increases sizes by doubling dimensions,
> >vvc_alf tests all sizes in increments of 4. Limit benchmarking to
> >the cases which are powers of two.
> >
> >This reduces the number of benchmarked cases from 3072 down to 18.
> >---
> > tests/checkasm/vvc_alf.c | 11 ++++++++---
> > 1 file changed, 8 insertions(+), 3 deletions(-)
> >
> >diff --git a/tests/checkasm/vvc_alf.c b/tests/checkasm/vvc_alf.c
> >index 9526260598..6dd89bfafc 100644
> >--- a/tests/checkasm/vvc_alf.c
> >+++ b/tests/checkasm/vvc_alf.c
> >@@ -103,7 +103,9 @@ static void check_alf_filter(VVCDSPContext *c, const
> int bit_depth)
> >                     if (memcmp(dst0 + i * dst_stride, dst1 + i *
> dst_stride, w * SIZEOF_PIXEL))
> >                         fail();
> >                 }
> >-                bench_new(dst1, dst_stride, src1 + offset, src_stride,
> w, h, filter, clip, vb_pos);
> >+                // Bench only square sizes, and ones with dimensions
> being a power of two.
> >+                if (w == h && (w & (w - 1)) == 0)
> >+                    bench_new(dst1, dst_stride, src1 + offset,
> src_stride, w, h, filter, clip, vb_pos);
> >             }
> >             if (check_func(c->alf.filter[CHROMA],
> "vvc_alf_filter_chroma_%dx%d_%d", w, h, bit_depth)) {
> >                 const int vb_pos = ctu_size - ALF_VB_POS_ABOVE_CHROMA;
> >@@ -115,7 +117,8 @@ static void check_alf_filter(VVCDSPContext *c, const
> int bit_depth)
> >                     if (memcmp(dst0 + i * dst_stride, dst1 + i *
> dst_stride, w * SIZEOF_PIXEL))
> >                         fail();
> >                 }
> >-                bench_new(dst1, dst_stride, src1 + offset, src_stride,
> w, h, filter, clip, vb_pos);
> >+                if (w == h && (w & (w - 1)) == 0)
> >+                    bench_new(dst1, dst_stride, src1 + offset,
> src_stride, w, h, filter, clip, vb_pos);
> >             }
> >         }
> >     }
> >@@ -156,7 +159,9 @@ static void check_alf_classify(VVCDSPContext *c,
> const int bit_depth)
> >                     fail();
> >                 if (memcmp(transpose_idx0, transpose_idx1, id_size))
> >                     fail();
> >-                bench_new(class_idx1, transpose_idx1, src1 + offset,
> stride, w, h, vb_pos, alf_gradient_tmp);
> >+                // Bench only square sizes, and ones with dimensions
> being a power of two.
> >+                if (w == h && (w & (w - 1)) == 0)
> >+                    bench_new(class_idx1, transpose_idx1, src1 + offset,
> stride, w, h, vb_pos, alf_gradient_tmp);
> >             }
> >         }
> >     }
>
> LGTM.
>
Applied.
Thank you, Martin and Remi.

>
> By the way, does anybody know if we could skip benchmarking C functions
> for which zero optimisations are available ? We are not printing the
> benchmark results in that case, so that wouldn't be a loss.
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
Nuo Mi May 21, 2024, 12:38 p.m. UTC | #4
On Tue, May 21, 2024 at 6:16 PM Martin Storsjö <martin@martin.st> wrote:

> On Tue, 21 May 2024, Martin Storsjö wrote:
>
> > Don't benchmark every single combination of widths and heights;
> > only benchmark cases which are squares (like in vvc_mc.c).
> >
> > Contrary to vvc_mc, which increases sizes by doubling dimensions,
> > vvc_alf tests all sizes in increments of 4. Limit benchmarking to
> > the cases which are powers of two.
> >
> > This reduces the number of benchmarked cases from 3072 down to 18.
> > ---
> > tests/checkasm/vvc_alf.c | 11 ++++++++---
> > 1 file changed, 8 insertions(+), 3 deletions(-)
> >
> > diff --git a/tests/checkasm/vvc_alf.c b/tests/checkasm/vvc_alf.c
> > index 9526260598..6dd89bfafc 100644
> > --- a/tests/checkasm/vvc_alf.c
> > +++ b/tests/checkasm/vvc_alf.c
> > @@ -103,7 +103,9 @@ static void check_alf_filter(VVCDSPContext *c, const
> int bit_depth)
> >                     if (memcmp(dst0 + i * dst_stride, dst1 + i *
> dst_stride, w * SIZEOF_PIXEL))
> >                         fail();
> >                 }
> > -                bench_new(dst1, dst_stride, src1 + offset, src_stride,
> w, h, filter, clip, vb_pos);
> > +                // Bench only square sizes, and ones with dimensions
> being a power of two.
> > +                if (w == h && (w & (w - 1)) == 0)
> > +                    bench_new(dst1, dst_stride, src1 + offset,
> src_stride, w, h, filter, clip, vb_pos);
> >             }
> >             if (check_func(c->alf.filter[CHROMA],
> "vvc_alf_filter_chroma_%dx%d_%d", w, h, bit_depth)) {
> >                 const int vb_pos = ctu_size - ALF_VB_POS_ABOVE_CHROMA;
> > @@ -115,7 +117,8 @@ static void check_alf_filter(VVCDSPContext *c, const
> int bit_depth)
> >                     if (memcmp(dst0 + i * dst_stride, dst1 + i *
> dst_stride, w * SIZEOF_PIXEL))
> >                         fail();
> >                 }
> > -                bench_new(dst1, dst_stride, src1 + offset, src_stride,
> w, h, filter, clip, vb_pos);
> > +                if (w == h && (w & (w - 1)) == 0)
> > +                    bench_new(dst1, dst_stride, src1 + offset,
> src_stride, w, h, filter, clip, vb_pos);
> >             }
> >         }
> >     }
> > @@ -156,7 +159,9 @@ static void check_alf_classify(VVCDSPContext *c,
> const int bit_depth)
> >                     fail();
> >                 if (memcmp(transpose_idx0, transpose_idx1, id_size))
> >                     fail();
> > -                bench_new(class_idx1, transpose_idx1, src1 + offset,
> stride, w, h, vb_pos, alf_gradient_tmp);
> > +                // Bench only square sizes, and ones with dimensions
> being a power of two.
> > +                if (w == h && (w & (w - 1)) == 0)
> > +                    bench_new(class_idx1, transpose_idx1, src1 +
> offset, stride, w, h, vb_pos, alf_gradient_tmp);
>
> Note, these tests (all vvc tests) use check_func("name...%dx%d", w, h) -
> while it's common elsewhere to group them up into slightly bigger bundles,
> e.g. only including the width in the function name, assuming that this is
> the level of granularity of actual assembly implementations - e.g. I don't
> think there would be a different codepath implemented for each block
> height.
>
> And it's possible to convey more information about exactly what failed,
> without needing to encode it into the function name - see the
> checkasm_check functions/macro, and e.g. commit
> 8ff4a4a4f4f73c5e276fa0cbe6cd5a148ebdd4ae.
>
Hi Martin,
Thank you for the suggestion.
Tracked with https://github.com/ffvvc/FFmpeg/issues/226

>
> // Martin
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
diff mbox series

Patch

diff --git a/tests/checkasm/vvc_alf.c b/tests/checkasm/vvc_alf.c
index 9526260598..6dd89bfafc 100644
--- a/tests/checkasm/vvc_alf.c
+++ b/tests/checkasm/vvc_alf.c
@@ -103,7 +103,9 @@  static void check_alf_filter(VVCDSPContext *c, const int bit_depth)
                     if (memcmp(dst0 + i * dst_stride, dst1 + i * dst_stride, w * SIZEOF_PIXEL))
                         fail();
                 }
-                bench_new(dst1, dst_stride, src1 + offset, src_stride, w, h, filter, clip, vb_pos);
+                // Bench only square sizes, and ones with dimensions being a power of two.
+                if (w == h && (w & (w - 1)) == 0)
+                    bench_new(dst1, dst_stride, src1 + offset, src_stride, w, h, filter, clip, vb_pos);
             }
             if (check_func(c->alf.filter[CHROMA], "vvc_alf_filter_chroma_%dx%d_%d", w, h, bit_depth)) {
                 const int vb_pos = ctu_size - ALF_VB_POS_ABOVE_CHROMA;
@@ -115,7 +117,8 @@  static void check_alf_filter(VVCDSPContext *c, const int bit_depth)
                     if (memcmp(dst0 + i * dst_stride, dst1 + i * dst_stride, w * SIZEOF_PIXEL))
                         fail();
                 }
-                bench_new(dst1, dst_stride, src1 + offset, src_stride, w, h, filter, clip, vb_pos);
+                if (w == h && (w & (w - 1)) == 0)
+                    bench_new(dst1, dst_stride, src1 + offset, src_stride, w, h, filter, clip, vb_pos);
             }
         }
     }
@@ -156,7 +159,9 @@  static void check_alf_classify(VVCDSPContext *c, const int bit_depth)
                     fail();
                 if (memcmp(transpose_idx0, transpose_idx1, id_size))
                     fail();
-                bench_new(class_idx1, transpose_idx1, src1 + offset, stride, w, h, vb_pos, alf_gradient_tmp);
+                // Bench only square sizes, and ones with dimensions being a power of two.
+                if (w == h && (w & (w - 1)) == 0)
+                    bench_new(class_idx1, transpose_idx1, src1 + offset, stride, w, h, vb_pos, alf_gradient_tmp);
             }
         }
     }