diff mbox series

[FFmpeg-devel,3/3] checkasm: add tests for vvc dmvr

Message ID TYSPR06MB6433A67F821979507FF66FD8AAAB2@TYSPR06MB6433.apcprd06.prod.outlook.com
State New
Headers show
Series [FFmpeg-devel,1/3] avcodec/vvcdec: Use av_image_copy_plane for DMVR 10-bit integer pixels | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Nuo Mi July 25, 2024, 1:35 p.m. UTC
dmvr_8_12x20_c: 186.2
dmvr_8_12x20_avx2: 25.7
dmvr_8_20x12_c: 181.7
dmvr_8_20x12_avx2: 25.2
dmvr_8_20x20_c: 283.2
dmvr_8_20x20_avx2: 32.0
dmvr_10_12x20_c: 90.0
dmvr_10_12x20_avx2: 15.7
dmvr_10_20x12_c: 41.0
dmvr_10_20x12_avx2: 14.7
dmvr_10_20x20_c: 81.5
dmvr_10_20x20_avx2: 26.7
dmvr_12_12x20_c: 190.7
dmvr_12_12x20_avx2: 20.2
dmvr_12_20x12_c: 187.2
dmvr_12_20x12_avx2: 20.2
dmvr_12_20x20_c: 292.7
dmvr_12_20x20_avx2: 27.2
dmvr_h_8_12x20_c: 317.0
dmvr_h_8_12x20_avx2: 37.0
dmvr_h_8_20x12_c: 340.0
dmvr_h_8_20x12_avx2: 41.0
dmvr_h_8_20x20_c: 540.7
dmvr_h_8_20x20_avx2: 64.0
dmvr_h_10_12x20_c: 322.7
dmvr_h_10_12x20_avx2: 30.7
dmvr_h_10_20x12_c: 344.2
dmvr_h_10_20x12_avx2: 34.0
dmvr_h_10_20x20_c: 529.0
dmvr_h_10_20x20_avx2: 51.5
dmvr_h_12_12x20_c: 326.7
dmvr_h_12_12x20_avx2: 33.5
dmvr_h_12_20x12_c: 331.7
dmvr_h_12_20x12_avx2: 51.2
dmvr_h_12_20x20_c: 534.0
dmvr_h_12_20x20_avx2: 62.7
dmvr_hv_8_12x20_c: 650.0
dmvr_hv_8_12x20_avx2: 57.2
dmvr_hv_8_20x12_c: 676.2
dmvr_hv_8_20x12_avx2: 70.0
dmvr_hv_8_20x20_c: 1068.5
dmvr_hv_8_20x20_avx2: 103.2
dmvr_hv_10_12x20_c: 649.0
dmvr_hv_10_12x20_avx2: 48.2
dmvr_hv_10_20x12_c: 677.7
dmvr_hv_10_20x12_avx2: 59.7
dmvr_hv_10_20x20_c: 1093.5
dmvr_hv_10_20x20_avx2: 91.7
dmvr_hv_12_12x20_c: 660.0
dmvr_hv_12_12x20_avx2: 58.7
dmvr_hv_12_20x12_c: 682.7
dmvr_hv_12_20x12_avx2: 72.0
dmvr_hv_12_20x20_c: 1094.0
dmvr_hv_12_20x20_avx2: 113.2
dmvr_v_8_12x20_c: 325.7
dmvr_v_8_12x20_avx2: 31.2
dmvr_v_8_20x12_c: 326.2
dmvr_v_8_20x12_avx2: 38.5
dmvr_v_8_20x20_c: 538.5
dmvr_v_8_20x20_avx2: 54.2
dmvr_v_10_12x20_c: 318.5
dmvr_v_10_12x20_avx2: 23.7
dmvr_v_10_20x12_c: 330.7
dmvr_v_10_20x12_avx2: 40.5
dmvr_v_10_20x20_c: 567.5
dmvr_v_10_20x20_avx2: 48.0
dmvr_v_12_12x20_c: 335.2
dmvr_v_12_12x20_avx2: 30.0
dmvr_v_12_20x12_c: 330.2
dmvr_v_12_20x12_avx2: 39.5
dmvr_v_12_20x20_c: 535.2
dmvr_v_12_20x20_avx2: 60.0
---
 tests/checkasm/vvc_mc.c | 59 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

Comments

James Almer July 26, 2024, 1:20 a.m. UTC | #1
On 7/25/2024 10:35 AM, Nuo Mi wrote:
> dmvr_8_12x20_c: 186.2
> dmvr_8_12x20_avx2: 25.7
> dmvr_8_20x12_c: 181.7
> dmvr_8_20x12_avx2: 25.2
> dmvr_8_20x20_c: 283.2
> dmvr_8_20x20_avx2: 32.0
> dmvr_10_12x20_c: 90.0
> dmvr_10_12x20_avx2: 15.7
> dmvr_10_20x12_c: 41.0
> dmvr_10_20x12_avx2: 14.7
> dmvr_10_20x20_c: 81.5
> dmvr_10_20x20_avx2: 26.7
> dmvr_12_12x20_c: 190.7
> dmvr_12_12x20_avx2: 20.2
> dmvr_12_20x12_c: 187.2
> dmvr_12_20x12_avx2: 20.2
> dmvr_12_20x20_c: 292.7
> dmvr_12_20x20_avx2: 27.2
> dmvr_h_8_12x20_c: 317.0
> dmvr_h_8_12x20_avx2: 37.0
> dmvr_h_8_20x12_c: 340.0
> dmvr_h_8_20x12_avx2: 41.0
> dmvr_h_8_20x20_c: 540.7
> dmvr_h_8_20x20_avx2: 64.0
> dmvr_h_10_12x20_c: 322.7
> dmvr_h_10_12x20_avx2: 30.7
> dmvr_h_10_20x12_c: 344.2
> dmvr_h_10_20x12_avx2: 34.0
> dmvr_h_10_20x20_c: 529.0
> dmvr_h_10_20x20_avx2: 51.5
> dmvr_h_12_12x20_c: 326.7
> dmvr_h_12_12x20_avx2: 33.5
> dmvr_h_12_20x12_c: 331.7
> dmvr_h_12_20x12_avx2: 51.2
> dmvr_h_12_20x20_c: 534.0
> dmvr_h_12_20x20_avx2: 62.7
> dmvr_hv_8_12x20_c: 650.0
> dmvr_hv_8_12x20_avx2: 57.2
> dmvr_hv_8_20x12_c: 676.2
> dmvr_hv_8_20x12_avx2: 70.0
> dmvr_hv_8_20x20_c: 1068.5
> dmvr_hv_8_20x20_avx2: 103.2
> dmvr_hv_10_12x20_c: 649.0
> dmvr_hv_10_12x20_avx2: 48.2
> dmvr_hv_10_20x12_c: 677.7
> dmvr_hv_10_20x12_avx2: 59.7
> dmvr_hv_10_20x20_c: 1093.5
> dmvr_hv_10_20x20_avx2: 91.7
> dmvr_hv_12_12x20_c: 660.0
> dmvr_hv_12_12x20_avx2: 58.7
> dmvr_hv_12_20x12_c: 682.7
> dmvr_hv_12_20x12_avx2: 72.0
> dmvr_hv_12_20x20_c: 1094.0
> dmvr_hv_12_20x20_avx2: 113.2
> dmvr_v_8_12x20_c: 325.7
> dmvr_v_8_12x20_avx2: 31.2
> dmvr_v_8_20x12_c: 326.2
> dmvr_v_8_20x12_avx2: 38.5
> dmvr_v_8_20x20_c: 538.5
> dmvr_v_8_20x20_avx2: 54.2
> dmvr_v_10_12x20_c: 318.5
> dmvr_v_10_12x20_avx2: 23.7
> dmvr_v_10_20x12_c: 330.7
> dmvr_v_10_20x12_avx2: 40.5
> dmvr_v_10_20x20_c: 567.5
> dmvr_v_10_20x20_avx2: 48.0
> dmvr_v_12_12x20_c: 335.2
> dmvr_v_12_12x20_avx2: 30.0
> dmvr_v_12_20x12_c: 330.2
> dmvr_v_12_20x12_avx2: 39.5
> dmvr_v_12_20x20_c: 535.2
> dmvr_v_12_20x20_avx2: 60.0
> ---
>   tests/checkasm/vvc_mc.c | 59 +++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 59 insertions(+)
> 
> diff --git a/tests/checkasm/vvc_mc.c b/tests/checkasm/vvc_mc.c
> index bc6b580f42..62fa6aa7d0 100644
> --- a/tests/checkasm/vvc_mc.c
> +++ b/tests/checkasm/vvc_mc.c
> @@ -324,6 +324,64 @@ static void check_avg(void)
>       report("avg");
>   }
>   
> +#define SR_RANGE 2
> +static void check_dmvr(void)
> +{
> +    LOCAL_ALIGNED_32(uint16_t, dst0, [DST_BUF_SIZE]);
> +    LOCAL_ALIGNED_32(uint16_t, dst1, [DST_BUF_SIZE]);
> +    LOCAL_ALIGNED_32(uint8_t,  src0, [SRC_BUF_SIZE]);
> +    LOCAL_ALIGNED_32(uint8_t,  src1, [SRC_BUF_SIZE]);
> +    const int dst_stride = MAX_PB_SIZE * sizeof(int16_t);
> +
> +    VVCDSPContext c;
> +    declare_func(void, int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, int height,
> +        intptr_t mx, intptr_t my, int width);
> +
> +    for (int bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
> +        ff_vvc_dsp_init(&c, bit_depth);
> +        randomize_pixels(src0, src1, SRC_BUF_SIZE);
> +        for (int i = 0; i < 2; i++) {
> +            for (int j = 0; j < 2; j++) {
> +                for (int h = 8; h <= 16; h *= 2) {
> +                    for (int w = 8; w <= 16; w *= 2) {
> +                        const int pred_w = w + 2 * SR_RANGE;
> +                        const int pred_h = h + 2 * SR_RANGE;
> +                        const int mx     = rnd() % VVC_INTER_LUMA_DMVR_FACTS;
> +                        const int my     = rnd() % VVC_INTER_LUMA_DMVR_FACTS;
> +                        const char *type;
> +
> +                        if (w * h < 128)
> +                            continue;

So h == 8 && w == 8 is not tested?

> +
> +                        switch ((j << 1) | i) {
> +                            case 0: type = "dmvr";    break; // 0 0
> +                            case 1: type = "dmvr_h";  break; // 0 1
> +                            case 2: type = "dmvr_v";  break; // 1 0
> +                            case 3: type = "dmvr_hv"; break; // 1 1
> +                        }
> +
> +                        if (check_func(c.inter.dmvr[j][i], "%s_%d_%dx%d", type, bit_depth, pred_w, pred_h)) {
> +                            memset(dst0, 0, DST_BUF_SIZE);
> +                            memset(dst1, 0, DST_BUF_SIZE);
> +                            call_ref(dst0, src0 + SRC_OFFSET, PIXEL_STRIDE, pred_h, mx, my, pred_w);
> +                            call_new(dst1, src1 + SRC_OFFSET, PIXEL_STRIDE, pred_h, mx, my, pred_w);
> +                            for (int k = 0; k < pred_h; k++) {
> +                                if (memcmp(dst0 + k * dst_stride, dst1 + k * dst_stride, pred_w * sizeof(int16_t))) {
> +                                    fail();
> +                                    break;
> +                                }
> +                            }
> +
> +                            bench_new(dst1, src1 + SRC_OFFSET, PIXEL_STRIDE, pred_h, mx, my, pred_w);
> +                        }
> +                    }
> +                }
> +            }
> +        }
> +    }
> +    report("dmvr");
> +}
> +
>   static void check_vvc_sad(void)
>   {
>       const int bit_depth = 10;
> @@ -363,6 +421,7 @@ static void check_vvc_sad(void)
>   
>   void checkasm_check_vvc_mc(void)
>   {
> +    check_dmvr();
>       check_vvc_sad();
>       check_put_vvc_luma();
>       check_put_vvc_luma_uni();
Nuo Mi July 26, 2024, 7:42 a.m. UTC | #2
On Fri, Jul 26, 2024 at 9:36 AM James Almer <jamrial@gmail.com> wrote:

> On 7/25/2024 10:35 AM, Nuo Mi wrote:
> > dmvr_8_12x20_c: 186.2
> > dmvr_8_12x20_avx2: 25.7
> > dmvr_8_20x12_c: 181.7
> > dmvr_8_20x12_avx2: 25.2
> > dmvr_8_20x20_c: 283.2
> > dmvr_8_20x20_avx2: 32.0
> > dmvr_10_12x20_c: 90.0
> > dmvr_10_12x20_avx2: 15.7
> > dmvr_10_20x12_c: 41.0
> > dmvr_10_20x12_avx2: 14.7
> > dmvr_10_20x20_c: 81.5
> > dmvr_10_20x20_avx2: 26.7
> > dmvr_12_12x20_c: 190.7
> > dmvr_12_12x20_avx2: 20.2
> > dmvr_12_20x12_c: 187.2
> > dmvr_12_20x12_avx2: 20.2
> > dmvr_12_20x20_c: 292.7
> > dmvr_12_20x20_avx2: 27.2
> > dmvr_h_8_12x20_c: 317.0
> > dmvr_h_8_12x20_avx2: 37.0
> > dmvr_h_8_20x12_c: 340.0
> > dmvr_h_8_20x12_avx2: 41.0
> > dmvr_h_8_20x20_c: 540.7
> > dmvr_h_8_20x20_avx2: 64.0
> > dmvr_h_10_12x20_c: 322.7
> > dmvr_h_10_12x20_avx2: 30.7
> > dmvr_h_10_20x12_c: 344.2
> > dmvr_h_10_20x12_avx2: 34.0
> > dmvr_h_10_20x20_c: 529.0
> > dmvr_h_10_20x20_avx2: 51.5
> > dmvr_h_12_12x20_c: 326.7
> > dmvr_h_12_12x20_avx2: 33.5
> > dmvr_h_12_20x12_c: 331.7
> > dmvr_h_12_20x12_avx2: 51.2
> > dmvr_h_12_20x20_c: 534.0
> > dmvr_h_12_20x20_avx2: 62.7
> > dmvr_hv_8_12x20_c: 650.0
> > dmvr_hv_8_12x20_avx2: 57.2
> > dmvr_hv_8_20x12_c: 676.2
> > dmvr_hv_8_20x12_avx2: 70.0
> > dmvr_hv_8_20x20_c: 1068.5
> > dmvr_hv_8_20x20_avx2: 103.2
> > dmvr_hv_10_12x20_c: 649.0
> > dmvr_hv_10_12x20_avx2: 48.2
> > dmvr_hv_10_20x12_c: 677.7
> > dmvr_hv_10_20x12_avx2: 59.7
> > dmvr_hv_10_20x20_c: 1093.5
> > dmvr_hv_10_20x20_avx2: 91.7
> > dmvr_hv_12_12x20_c: 660.0
> > dmvr_hv_12_12x20_avx2: 58.7
> > dmvr_hv_12_20x12_c: 682.7
> > dmvr_hv_12_20x12_avx2: 72.0
> > dmvr_hv_12_20x20_c: 1094.0
> > dmvr_hv_12_20x20_avx2: 113.2
> > dmvr_v_8_12x20_c: 325.7
> > dmvr_v_8_12x20_avx2: 31.2
> > dmvr_v_8_20x12_c: 326.2
> > dmvr_v_8_20x12_avx2: 38.5
> > dmvr_v_8_20x20_c: 538.5
> > dmvr_v_8_20x20_avx2: 54.2
> > dmvr_v_10_12x20_c: 318.5
> > dmvr_v_10_12x20_avx2: 23.7
> > dmvr_v_10_20x12_c: 330.7
> > dmvr_v_10_20x12_avx2: 40.5
> > dmvr_v_10_20x20_c: 567.5
> > dmvr_v_10_20x20_avx2: 48.0
> > dmvr_v_12_12x20_c: 335.2
> > dmvr_v_12_12x20_avx2: 30.0
> > dmvr_v_12_20x12_c: 330.2
> > dmvr_v_12_20x12_avx2: 39.5
> > dmvr_v_12_20x20_c: 535.2
> > dmvr_v_12_20x20_avx2: 60.0
> > ---
> >   tests/checkasm/vvc_mc.c | 59 +++++++++++++++++++++++++++++++++++++++++
> >   1 file changed, 59 insertions(+)
> >
> > diff --git a/tests/checkasm/vvc_mc.c b/tests/checkasm/vvc_mc.c
> > index bc6b580f42..62fa6aa7d0 100644
> > --- a/tests/checkasm/vvc_mc.c
> > +++ b/tests/checkasm/vvc_mc.c
> > @@ -324,6 +324,64 @@ static void check_avg(void)
> >       report("avg");
> >   }
> >
> > +#define SR_RANGE 2
> > +static void check_dmvr(void)
> > +{
> > +    LOCAL_ALIGNED_32(uint16_t, dst0, [DST_BUF_SIZE]);
> > +    LOCAL_ALIGNED_32(uint16_t, dst1, [DST_BUF_SIZE]);
> > +    LOCAL_ALIGNED_32(uint8_t,  src0, [SRC_BUF_SIZE]);
> > +    LOCAL_ALIGNED_32(uint8_t,  src1, [SRC_BUF_SIZE]);
> > +    const int dst_stride = MAX_PB_SIZE * sizeof(int16_t);
> > +
> > +    VVCDSPContext c;
> > +    declare_func(void, int16_t *dst, const uint8_t *src, ptrdiff_t
> src_stride, int height,
> > +        intptr_t mx, intptr_t my, int width);
> > +
> > +    for (int bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
> > +        ff_vvc_dsp_init(&c, bit_depth);
> > +        randomize_pixels(src0, src1, SRC_BUF_SIZE);
> > +        for (int i = 0; i < 2; i++) {
> > +            for (int j = 0; j < 2; j++) {
> > +                for (int h = 8; h <= 16; h *= 2) {
> > +                    for (int w = 8; w <= 16; w *= 2) {
> > +                        const int pred_w = w + 2 * SR_RANGE;
> > +                        const int pred_h = h + 2 * SR_RANGE;
> > +                        const int mx     = rnd() %
> VVC_INTER_LUMA_DMVR_FACTS;
> > +                        const int my     = rnd() %
> VVC_INTER_LUMA_DMVR_FACTS;
> > +                        const char *type;
> > +
> > +                        if (w * h < 128)
> > +                            continue;
>
> So h == 8 && w == 8 is not tested?
>
Hi James,
thank you for the review.

Yes, DMVR operates on subblocks with a maximum size of 16x16, and it also
requires that the width multiplied by the height be at least 128.
Therefore, only block sizes of 8x16, 16x8, and 16x16 are valid.

see:
8.5.1 General decoding process for coding units coded in inter prediction
mode
and
https://vicuesoft.com/blog/titles/DMVR_in_VVC/


>
> > +
> > +                        switch ((j << 1) | i) {
> > +                            case 0: type = "dmvr";    break; // 0 0
> > +                            case 1: type = "dmvr_h";  break; // 0 1
> > +                            case 2: type = "dmvr_v";  break; // 1 0
> > +                            case 3: type = "dmvr_hv"; break; // 1 1
> > +                        }
> > +
> > +                        if (check_func(c.inter.dmvr[j][i],
> "%s_%d_%dx%d", type, bit_depth, pred_w, pred_h)) {
> > +                            memset(dst0, 0, DST_BUF_SIZE);
> > +                            memset(dst1, 0, DST_BUF_SIZE);
> > +                            call_ref(dst0, src0 + SRC_OFFSET,
> PIXEL_STRIDE, pred_h, mx, my, pred_w);
> > +                            call_new(dst1, src1 + SRC_OFFSET,
> PIXEL_STRIDE, pred_h, mx, my, pred_w);
> > +                            for (int k = 0; k < pred_h; k++) {
> > +                                if (memcmp(dst0 + k * dst_stride, dst1
> + k * dst_stride, pred_w * sizeof(int16_t))) {
> > +                                    fail();
> > +                                    break;
> > +                                }
> > +                            }
> > +
> > +                            bench_new(dst1, src1 + SRC_OFFSET,
> PIXEL_STRIDE, pred_h, mx, my, pred_w);
> > +                        }
> > +                    }
> > +                }
> > +            }
> > +        }
> > +    }
> > +    report("dmvr");
> > +}
> > +
> >   static void check_vvc_sad(void)
> >   {
> >       const int bit_depth = 10;
> > @@ -363,6 +421,7 @@ static void check_vvc_sad(void)
> >
> >   void checkasm_check_vvc_mc(void)
> >   {
> > +    check_dmvr();
> >       check_vvc_sad();
> >       check_put_vvc_luma();
> >       check_put_vvc_luma_uni();
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
Nuo Mi Aug. 11, 2024, 2 p.m. UTC | #3
will apply next week if there are no objections

On Fri, Jul 26, 2024 at 3:42 PM Nuo Mi <nuomi2021@gmail.com> wrote:

>
>
> On Fri, Jul 26, 2024 at 9:36 AM James Almer <jamrial@gmail.com> wrote:
>
>> On 7/25/2024 10:35 AM, Nuo Mi wrote:
>> > dmvr_8_12x20_c: 186.2
>> > dmvr_8_12x20_avx2: 25.7
>> > dmvr_8_20x12_c: 181.7
>> > dmvr_8_20x12_avx2: 25.2
>> > dmvr_8_20x20_c: 283.2
>> > dmvr_8_20x20_avx2: 32.0
>> > dmvr_10_12x20_c: 90.0
>> > dmvr_10_12x20_avx2: 15.7
>> > dmvr_10_20x12_c: 41.0
>> > dmvr_10_20x12_avx2: 14.7
>> > dmvr_10_20x20_c: 81.5
>> > dmvr_10_20x20_avx2: 26.7
>> > dmvr_12_12x20_c: 190.7
>> > dmvr_12_12x20_avx2: 20.2
>> > dmvr_12_20x12_c: 187.2
>> > dmvr_12_20x12_avx2: 20.2
>> > dmvr_12_20x20_c: 292.7
>> > dmvr_12_20x20_avx2: 27.2
>> > dmvr_h_8_12x20_c: 317.0
>> > dmvr_h_8_12x20_avx2: 37.0
>> > dmvr_h_8_20x12_c: 340.0
>> > dmvr_h_8_20x12_avx2: 41.0
>> > dmvr_h_8_20x20_c: 540.7
>> > dmvr_h_8_20x20_avx2: 64.0
>> > dmvr_h_10_12x20_c: 322.7
>> > dmvr_h_10_12x20_avx2: 30.7
>> > dmvr_h_10_20x12_c: 344.2
>> > dmvr_h_10_20x12_avx2: 34.0
>> > dmvr_h_10_20x20_c: 529.0
>> > dmvr_h_10_20x20_avx2: 51.5
>> > dmvr_h_12_12x20_c: 326.7
>> > dmvr_h_12_12x20_avx2: 33.5
>> > dmvr_h_12_20x12_c: 331.7
>> > dmvr_h_12_20x12_avx2: 51.2
>> > dmvr_h_12_20x20_c: 534.0
>> > dmvr_h_12_20x20_avx2: 62.7
>> > dmvr_hv_8_12x20_c: 650.0
>> > dmvr_hv_8_12x20_avx2: 57.2
>> > dmvr_hv_8_20x12_c: 676.2
>> > dmvr_hv_8_20x12_avx2: 70.0
>> > dmvr_hv_8_20x20_c: 1068.5
>> > dmvr_hv_8_20x20_avx2: 103.2
>> > dmvr_hv_10_12x20_c: 649.0
>> > dmvr_hv_10_12x20_avx2: 48.2
>> > dmvr_hv_10_20x12_c: 677.7
>> > dmvr_hv_10_20x12_avx2: 59.7
>> > dmvr_hv_10_20x20_c: 1093.5
>> > dmvr_hv_10_20x20_avx2: 91.7
>> > dmvr_hv_12_12x20_c: 660.0
>> > dmvr_hv_12_12x20_avx2: 58.7
>> > dmvr_hv_12_20x12_c: 682.7
>> > dmvr_hv_12_20x12_avx2: 72.0
>> > dmvr_hv_12_20x20_c: 1094.0
>> > dmvr_hv_12_20x20_avx2: 113.2
>> > dmvr_v_8_12x20_c: 325.7
>> > dmvr_v_8_12x20_avx2: 31.2
>> > dmvr_v_8_20x12_c: 326.2
>> > dmvr_v_8_20x12_avx2: 38.5
>> > dmvr_v_8_20x20_c: 538.5
>> > dmvr_v_8_20x20_avx2: 54.2
>> > dmvr_v_10_12x20_c: 318.5
>> > dmvr_v_10_12x20_avx2: 23.7
>> > dmvr_v_10_20x12_c: 330.7
>> > dmvr_v_10_20x12_avx2: 40.5
>> > dmvr_v_10_20x20_c: 567.5
>> > dmvr_v_10_20x20_avx2: 48.0
>> > dmvr_v_12_12x20_c: 335.2
>> > dmvr_v_12_12x20_avx2: 30.0
>> > dmvr_v_12_20x12_c: 330.2
>> > dmvr_v_12_20x12_avx2: 39.5
>> > dmvr_v_12_20x20_c: 535.2
>> > dmvr_v_12_20x20_avx2: 60.0
>> > ---
>> >   tests/checkasm/vvc_mc.c | 59 +++++++++++++++++++++++++++++++++++++++++
>> >   1 file changed, 59 insertions(+)
>> >
>> > diff --git a/tests/checkasm/vvc_mc.c b/tests/checkasm/vvc_mc.c
>> > index bc6b580f42..62fa6aa7d0 100644
>> > --- a/tests/checkasm/vvc_mc.c
>> > +++ b/tests/checkasm/vvc_mc.c
>> > @@ -324,6 +324,64 @@ static void check_avg(void)
>> >       report("avg");
>> >   }
>> >
>> > +#define SR_RANGE 2
>> > +static void check_dmvr(void)
>> > +{
>> > +    LOCAL_ALIGNED_32(uint16_t, dst0, [DST_BUF_SIZE]);
>> > +    LOCAL_ALIGNED_32(uint16_t, dst1, [DST_BUF_SIZE]);
>> > +    LOCAL_ALIGNED_32(uint8_t,  src0, [SRC_BUF_SIZE]);
>> > +    LOCAL_ALIGNED_32(uint8_t,  src1, [SRC_BUF_SIZE]);
>> > +    const int dst_stride = MAX_PB_SIZE * sizeof(int16_t);
>> > +
>> > +    VVCDSPContext c;
>> > +    declare_func(void, int16_t *dst, const uint8_t *src, ptrdiff_t
>> src_stride, int height,
>> > +        intptr_t mx, intptr_t my, int width);
>> > +
>> > +    for (int bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
>> > +        ff_vvc_dsp_init(&c, bit_depth);
>> > +        randomize_pixels(src0, src1, SRC_BUF_SIZE);
>> > +        for (int i = 0; i < 2; i++) {
>> > +            for (int j = 0; j < 2; j++) {
>> > +                for (int h = 8; h <= 16; h *= 2) {
>> > +                    for (int w = 8; w <= 16; w *= 2) {
>> > +                        const int pred_w = w + 2 * SR_RANGE;
>> > +                        const int pred_h = h + 2 * SR_RANGE;
>> > +                        const int mx     = rnd() %
>> VVC_INTER_LUMA_DMVR_FACTS;
>> > +                        const int my     = rnd() %
>> VVC_INTER_LUMA_DMVR_FACTS;
>> > +                        const char *type;
>> > +
>> > +                        if (w * h < 128)
>> > +                            continue;
>>
>> So h == 8 && w == 8 is not tested?
>>
> Hi James,
> thank you for the review.
>
> Yes, DMVR operates on subblocks with a maximum size of 16x16, and it also
> requires that the width multiplied by the height be at least 128.
> Therefore, only block sizes of 8x16, 16x8, and 16x16 are valid.
>
> see:
> 8.5.1 General decoding process for coding units coded in inter prediction
> mode
> and
> https://vicuesoft.com/blog/titles/DMVR_in_VVC/
>
Will apply this next week.
Thank you

>
>
>
>>
>> > +
>> > +                        switch ((j << 1) | i) {
>> > +                            case 0: type = "dmvr";    break; // 0 0
>> > +                            case 1: type = "dmvr_h";  break; // 0 1
>> > +                            case 2: type = "dmvr_v";  break; // 1 0
>> > +                            case 3: type = "dmvr_hv"; break; // 1 1
>> > +                        }
>> > +
>> > +                        if (check_func(c.inter.dmvr[j][i],
>> "%s_%d_%dx%d", type, bit_depth, pred_w, pred_h)) {
>> > +                            memset(dst0, 0, DST_BUF_SIZE);
>> > +                            memset(dst1, 0, DST_BUF_SIZE);
>> > +                            call_ref(dst0, src0 + SRC_OFFSET,
>> PIXEL_STRIDE, pred_h, mx, my, pred_w);
>> > +                            call_new(dst1, src1 + SRC_OFFSET,
>> PIXEL_STRIDE, pred_h, mx, my, pred_w);
>> > +                            for (int k = 0; k < pred_h; k++) {
>> > +                                if (memcmp(dst0 + k * dst_stride, dst1
>> + k * dst_stride, pred_w * sizeof(int16_t))) {
>> > +                                    fail();
>> > +                                    break;
>> > +                                }
>> > +                            }
>> > +
>> > +                            bench_new(dst1, src1 + SRC_OFFSET,
>> PIXEL_STRIDE, pred_h, mx, my, pred_w);
>> > +                        }
>> > +                    }
>> > +                }
>> > +            }
>> > +        }
>> > +    }
>> > +    report("dmvr");
>> > +}
>> > +
>> >   static void check_vvc_sad(void)
>> >   {
>> >       const int bit_depth = 10;
>> > @@ -363,6 +421,7 @@ static void check_vvc_sad(void)
>> >
>> >   void checkasm_check_vvc_mc(void)
>> >   {
>> > +    check_dmvr();
>> >       check_vvc_sad();
>> >       check_put_vvc_luma();
>> >       check_put_vvc_luma_uni();
>> _______________________________________________
>> ffmpeg-devel mailing list
>> ffmpeg-devel@ffmpeg.org
>> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>
>> To unsubscribe, visit link above, or email
>> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>>
>
Nuo Mi Aug. 15, 2024, 12:45 p.m. UTC | #4
On Sun, Aug 11, 2024 at 10:00 PM Nuo Mi <nuomi2021@gmail.com> wrote:

> will apply next week if there are no objections
>
Done.
Thank you James for reviewing.

>
> On Fri, Jul 26, 2024 at 3:42 PM Nuo Mi <nuomi2021@gmail.com> wrote:
>
>>
>>
>> On Fri, Jul 26, 2024 at 9:36 AM James Almer <jamrial@gmail.com> wrote:
>>
>>> On 7/25/2024 10:35 AM, Nuo Mi wrote:
>>> > dmvr_8_12x20_c: 186.2
>>> > dmvr_8_12x20_avx2: 25.7
>>> > dmvr_8_20x12_c: 181.7
>>> > dmvr_8_20x12_avx2: 25.2
>>> > dmvr_8_20x20_c: 283.2
>>> > dmvr_8_20x20_avx2: 32.0
>>> > dmvr_10_12x20_c: 90.0
>>> > dmvr_10_12x20_avx2: 15.7
>>> > dmvr_10_20x12_c: 41.0
>>> > dmvr_10_20x12_avx2: 14.7
>>> > dmvr_10_20x20_c: 81.5
>>> > dmvr_10_20x20_avx2: 26.7
>>> > dmvr_12_12x20_c: 190.7
>>> > dmvr_12_12x20_avx2: 20.2
>>> > dmvr_12_20x12_c: 187.2
>>> > dmvr_12_20x12_avx2: 20.2
>>> > dmvr_12_20x20_c: 292.7
>>> > dmvr_12_20x20_avx2: 27.2
>>> > dmvr_h_8_12x20_c: 317.0
>>> > dmvr_h_8_12x20_avx2: 37.0
>>> > dmvr_h_8_20x12_c: 340.0
>>> > dmvr_h_8_20x12_avx2: 41.0
>>> > dmvr_h_8_20x20_c: 540.7
>>> > dmvr_h_8_20x20_avx2: 64.0
>>> > dmvr_h_10_12x20_c: 322.7
>>> > dmvr_h_10_12x20_avx2: 30.7
>>> > dmvr_h_10_20x12_c: 344.2
>>> > dmvr_h_10_20x12_avx2: 34.0
>>> > dmvr_h_10_20x20_c: 529.0
>>> > dmvr_h_10_20x20_avx2: 51.5
>>> > dmvr_h_12_12x20_c: 326.7
>>> > dmvr_h_12_12x20_avx2: 33.5
>>> > dmvr_h_12_20x12_c: 331.7
>>> > dmvr_h_12_20x12_avx2: 51.2
>>> > dmvr_h_12_20x20_c: 534.0
>>> > dmvr_h_12_20x20_avx2: 62.7
>>> > dmvr_hv_8_12x20_c: 650.0
>>> > dmvr_hv_8_12x20_avx2: 57.2
>>> > dmvr_hv_8_20x12_c: 676.2
>>> > dmvr_hv_8_20x12_avx2: 70.0
>>> > dmvr_hv_8_20x20_c: 1068.5
>>> > dmvr_hv_8_20x20_avx2: 103.2
>>> > dmvr_hv_10_12x20_c: 649.0
>>> > dmvr_hv_10_12x20_avx2: 48.2
>>> > dmvr_hv_10_20x12_c: 677.7
>>> > dmvr_hv_10_20x12_avx2: 59.7
>>> > dmvr_hv_10_20x20_c: 1093.5
>>> > dmvr_hv_10_20x20_avx2: 91.7
>>> > dmvr_hv_12_12x20_c: 660.0
>>> > dmvr_hv_12_12x20_avx2: 58.7
>>> > dmvr_hv_12_20x12_c: 682.7
>>> > dmvr_hv_12_20x12_avx2: 72.0
>>> > dmvr_hv_12_20x20_c: 1094.0
>>> > dmvr_hv_12_20x20_avx2: 113.2
>>> > dmvr_v_8_12x20_c: 325.7
>>> > dmvr_v_8_12x20_avx2: 31.2
>>> > dmvr_v_8_20x12_c: 326.2
>>> > dmvr_v_8_20x12_avx2: 38.5
>>> > dmvr_v_8_20x20_c: 538.5
>>> > dmvr_v_8_20x20_avx2: 54.2
>>> > dmvr_v_10_12x20_c: 318.5
>>> > dmvr_v_10_12x20_avx2: 23.7
>>> > dmvr_v_10_20x12_c: 330.7
>>> > dmvr_v_10_20x12_avx2: 40.5
>>> > dmvr_v_10_20x20_c: 567.5
>>> > dmvr_v_10_20x20_avx2: 48.0
>>> > dmvr_v_12_12x20_c: 335.2
>>> > dmvr_v_12_12x20_avx2: 30.0
>>> > dmvr_v_12_20x12_c: 330.2
>>> > dmvr_v_12_20x12_avx2: 39.5
>>> > dmvr_v_12_20x20_c: 535.2
>>> > dmvr_v_12_20x20_avx2: 60.0
>>> > ---
>>> >   tests/checkasm/vvc_mc.c | 59
>>> +++++++++++++++++++++++++++++++++++++++++
>>> >   1 file changed, 59 insertions(+)
>>> >
>>> > diff --git a/tests/checkasm/vvc_mc.c b/tests/checkasm/vvc_mc.c
>>> > index bc6b580f42..62fa6aa7d0 100644
>>> > --- a/tests/checkasm/vvc_mc.c
>>> > +++ b/tests/checkasm/vvc_mc.c
>>> > @@ -324,6 +324,64 @@ static void check_avg(void)
>>> >       report("avg");
>>> >   }
>>> >
>>> > +#define SR_RANGE 2
>>> > +static void check_dmvr(void)
>>> > +{
>>> > +    LOCAL_ALIGNED_32(uint16_t, dst0, [DST_BUF_SIZE]);
>>> > +    LOCAL_ALIGNED_32(uint16_t, dst1, [DST_BUF_SIZE]);
>>> > +    LOCAL_ALIGNED_32(uint8_t,  src0, [SRC_BUF_SIZE]);
>>> > +    LOCAL_ALIGNED_32(uint8_t,  src1, [SRC_BUF_SIZE]);
>>> > +    const int dst_stride = MAX_PB_SIZE * sizeof(int16_t);
>>> > +
>>> > +    VVCDSPContext c;
>>> > +    declare_func(void, int16_t *dst, const uint8_t *src, ptrdiff_t
>>> src_stride, int height,
>>> > +        intptr_t mx, intptr_t my, int width);
>>> > +
>>> > +    for (int bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
>>> > +        ff_vvc_dsp_init(&c, bit_depth);
>>> > +        randomize_pixels(src0, src1, SRC_BUF_SIZE);
>>> > +        for (int i = 0; i < 2; i++) {
>>> > +            for (int j = 0; j < 2; j++) {
>>> > +                for (int h = 8; h <= 16; h *= 2) {
>>> > +                    for (int w = 8; w <= 16; w *= 2) {
>>> > +                        const int pred_w = w + 2 * SR_RANGE;
>>> > +                        const int pred_h = h + 2 * SR_RANGE;
>>> > +                        const int mx     = rnd() %
>>> VVC_INTER_LUMA_DMVR_FACTS;
>>> > +                        const int my     = rnd() %
>>> VVC_INTER_LUMA_DMVR_FACTS;
>>> > +                        const char *type;
>>> > +
>>> > +                        if (w * h < 128)
>>> > +                            continue;
>>>
>>> So h == 8 && w == 8 is not tested?
>>>
>> Hi James,
>> thank you for the review.
>>
>> Yes, DMVR operates on subblocks with a maximum size of 16x16, and it also
>> requires that the width multiplied by the height be at least 128.
>> Therefore, only block sizes of 8x16, 16x8, and 16x16 are valid.
>>
>> see:
>> 8.5.1 General decoding process for coding units coded in inter prediction
>> mode
>> and
>> https://vicuesoft.com/blog/titles/DMVR_in_VVC/
>>
> Will apply this next week.
> Thank you
>
>>
>>
>>
>>>
>>> > +
>>> > +                        switch ((j << 1) | i) {
>>> > +                            case 0: type = "dmvr";    break; // 0 0
>>> > +                            case 1: type = "dmvr_h";  break; // 0 1
>>> > +                            case 2: type = "dmvr_v";  break; // 1 0
>>> > +                            case 3: type = "dmvr_hv"; break; // 1 1
>>> > +                        }
>>> > +
>>> > +                        if (check_func(c.inter.dmvr[j][i],
>>> "%s_%d_%dx%d", type, bit_depth, pred_w, pred_h)) {
>>> > +                            memset(dst0, 0, DST_BUF_SIZE);
>>> > +                            memset(dst1, 0, DST_BUF_SIZE);
>>> > +                            call_ref(dst0, src0 + SRC_OFFSET,
>>> PIXEL_STRIDE, pred_h, mx, my, pred_w);
>>> > +                            call_new(dst1, src1 + SRC_OFFSET,
>>> PIXEL_STRIDE, pred_h, mx, my, pred_w);
>>> > +                            for (int k = 0; k < pred_h; k++) {
>>> > +                                if (memcmp(dst0 + k * dst_stride,
>>> dst1 + k * dst_stride, pred_w * sizeof(int16_t))) {
>>> > +                                    fail();
>>> > +                                    break;
>>> > +                                }
>>> > +                            }
>>> > +
>>> > +                            bench_new(dst1, src1 + SRC_OFFSET,
>>> PIXEL_STRIDE, pred_h, mx, my, pred_w);
>>> > +                        }
>>> > +                    }
>>> > +                }
>>> > +            }
>>> > +        }
>>> > +    }
>>> > +    report("dmvr");
>>> > +}
>>> > +
>>> >   static void check_vvc_sad(void)
>>> >   {
>>> >       const int bit_depth = 10;
>>> > @@ -363,6 +421,7 @@ static void check_vvc_sad(void)
>>> >
>>> >   void checkasm_check_vvc_mc(void)
>>> >   {
>>> > +    check_dmvr();
>>> >       check_vvc_sad();
>>> >       check_put_vvc_luma();
>>> >       check_put_vvc_luma_uni();
>>> _______________________________________________
>>> ffmpeg-devel mailing list
>>> ffmpeg-devel@ffmpeg.org
>>> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>>
>>> To unsubscribe, visit link above, or email
>>> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>>>
>>
diff mbox series

Patch

diff --git a/tests/checkasm/vvc_mc.c b/tests/checkasm/vvc_mc.c
index bc6b580f42..62fa6aa7d0 100644
--- a/tests/checkasm/vvc_mc.c
+++ b/tests/checkasm/vvc_mc.c
@@ -324,6 +324,64 @@  static void check_avg(void)
     report("avg");
 }
 
+#define SR_RANGE 2
+static void check_dmvr(void)
+{
+    LOCAL_ALIGNED_32(uint16_t, dst0, [DST_BUF_SIZE]);
+    LOCAL_ALIGNED_32(uint16_t, dst1, [DST_BUF_SIZE]);
+    LOCAL_ALIGNED_32(uint8_t,  src0, [SRC_BUF_SIZE]);
+    LOCAL_ALIGNED_32(uint8_t,  src1, [SRC_BUF_SIZE]);
+    const int dst_stride = MAX_PB_SIZE * sizeof(int16_t);
+
+    VVCDSPContext c;
+    declare_func(void, int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, int height,
+        intptr_t mx, intptr_t my, int width);
+
+    for (int bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
+        ff_vvc_dsp_init(&c, bit_depth);
+        randomize_pixels(src0, src1, SRC_BUF_SIZE);
+        for (int i = 0; i < 2; i++) {
+            for (int j = 0; j < 2; j++) {
+                for (int h = 8; h <= 16; h *= 2) {
+                    for (int w = 8; w <= 16; w *= 2) {
+                        const int pred_w = w + 2 * SR_RANGE;
+                        const int pred_h = h + 2 * SR_RANGE;
+                        const int mx     = rnd() % VVC_INTER_LUMA_DMVR_FACTS;
+                        const int my     = rnd() % VVC_INTER_LUMA_DMVR_FACTS;
+                        const char *type;
+
+                        if (w * h < 128)
+                            continue;
+
+                        switch ((j << 1) | i) {
+                            case 0: type = "dmvr";    break; // 0 0
+                            case 1: type = "dmvr_h";  break; // 0 1
+                            case 2: type = "dmvr_v";  break; // 1 0
+                            case 3: type = "dmvr_hv"; break; // 1 1
+                        }
+
+                        if (check_func(c.inter.dmvr[j][i], "%s_%d_%dx%d", type, bit_depth, pred_w, pred_h)) {
+                            memset(dst0, 0, DST_BUF_SIZE);
+                            memset(dst1, 0, DST_BUF_SIZE);
+                            call_ref(dst0, src0 + SRC_OFFSET, PIXEL_STRIDE, pred_h, mx, my, pred_w);
+                            call_new(dst1, src1 + SRC_OFFSET, PIXEL_STRIDE, pred_h, mx, my, pred_w);
+                            for (int k = 0; k < pred_h; k++) {
+                                if (memcmp(dst0 + k * dst_stride, dst1 + k * dst_stride, pred_w * sizeof(int16_t))) {
+                                    fail();
+                                    break;
+                                }
+                            }
+
+                            bench_new(dst1, src1 + SRC_OFFSET, PIXEL_STRIDE, pred_h, mx, my, pred_w);
+                        }
+                    }
+                }
+            }
+        }
+    }
+    report("dmvr");
+}
+
 static void check_vvc_sad(void)
 {
     const int bit_depth = 10;
@@ -363,6 +421,7 @@  static void check_vvc_sad(void)
 
 void checkasm_check_vvc_mc(void)
 {
+    check_dmvr();
     check_vvc_sad();
     check_put_vvc_luma();
     check_put_vvc_luma_uni();