diff mbox series

[FFmpeg-devel,3/4] checkasm/h263dsp: test dct_unquantize_{intra, inter}

Message ID 20240612044723.175502-3-remi@remlab.net
State New
Headers show
Series [FFmpeg-devel,PATCHv5,1/4] lavc/h263dsp: add DCT dequantisation functions | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Rémi Denis-Courmont June 12, 2024, 4:47 a.m. UTC
---
 tests/checkasm/h263dsp.c | 47 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)

Comments

James Almer June 12, 2024, 6:39 p.m. UTC | #1
On 6/12/2024 1:47 AM, Rémi Denis-Courmont wrote:
> ---
>   tests/checkasm/h263dsp.c | 47 +++++++++++++++++++++++++++++++++++++++-
>   1 file changed, 46 insertions(+), 1 deletion(-)
> 
> diff --git a/tests/checkasm/h263dsp.c b/tests/checkasm/h263dsp.c
> index 2d0957a90b..8a2cdb34df 100644
> --- a/tests/checkasm/h263dsp.c
> +++ b/tests/checkasm/h263dsp.c
> @@ -18,13 +18,55 @@
>    * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
>    */
>   
> +#include <stdbool.h>
>   #include <string.h>
>   
>   #include "checkasm.h"
>   
> -#include "libavcodec/h263dsp.h"
> +#include "libavutil/avassert.h"
>   #include "libavutil/mem.h"
>   #include "libavutil/mem_internal.h"
> +#include "libavcodec/h263dsp.h"
> +#include "libavcodec/mpegvideodata.h"
> +
> +static uint_fast8_t mpeg_qscale_rnd(void)
> +{
> +    int n = rnd(), q = (n >> 1) & 31;
> +
> +    if (n & 1)
> +        return ff_mpeg2_non_linear_qscale[q];
> +    else
> +        return q << 1;
> +}
> +
> +typedef void (*unquantizer)(int16_t *, size_t, int, int);
> +
> +static void check_dct_unquantize(unquantizer func, const char *name)
> +{
> +#define LEN 64
> +    LOCAL_ALIGNED_16(int16_t, block0, [LEN]);
> +    LOCAL_ALIGNED_16(int16_t, block1, [LEN]);

These are not padded, and unless I'm reading this wrong, an asm 
implementation loading say 16 bytes at a time will overread/write in 
dct_unquantize_intra (which offsets block by 1).

> +    size_t len = 1 + (rnd() & (LEN - 1));
> +    const int qscale = mpeg_qscale_rnd();
> +    const int qmul = qscale << 1;
> +    const int qadd = (rnd() & 1) ? (qscale - 1) | 1 : 0;
> +
> +    declare_func(void, int16_t *, size_t, int, int);
> +
> +    for (size_t i = 0; i < LEN; i++)
> +        block1[i] = block0[i] = (rnd() & 1) ? rnd() : 0;
> +
> +    if (check_func(func, "h263dsp.dct_unquantize_%s", name)) {
> +        av_assert0(len <= LEN);
> +        call_ref(block0, len, qmul, qadd);
> +        call_new(block1, len, qmul, qadd);
> +
> +        if (memcmp(block0, block1, len * sizeof (int16_t)))
> +            fail();
> +
> +        bench_new(block1, LEN, qmul, qadd);
> +    }
> +}
>   
>   typedef void (*filter)(uint8_t *src, int stride, int qscale);
>   
> @@ -56,6 +98,9 @@ void checkasm_check_h263dsp(void)
>       H263DSPContext ctx;
>   
>       ff_h263dsp_init(&ctx);
> +    check_dct_unquantize(ctx.h263_dct_unquantize_intra, "intra");
> +    check_dct_unquantize(ctx.h263_dct_unquantize_inter, "inter");
> +    report("dct_unquantize");
>       check_loop_filter('h', ctx.h263_h_loop_filter);
>       check_loop_filter('v', ctx.h263_v_loop_filter);
>       report("loop_filter");
Rémi Denis-Courmont June 12, 2024, 7:15 p.m. UTC | #2
Le keskiviikkona 12. kesäkuuta 2024, 21.39.12 EEST James Almer a écrit :
> On 6/12/2024 1:47 AM, Rémi Denis-Courmont wrote:
> > ---
> > 
> >   tests/checkasm/h263dsp.c | 47 +++++++++++++++++++++++++++++++++++++++-
> >   1 file changed, 46 insertions(+), 1 deletion(-)
> > 
> > diff --git a/tests/checkasm/h263dsp.c b/tests/checkasm/h263dsp.c
> > index 2d0957a90b..8a2cdb34df 100644
> > --- a/tests/checkasm/h263dsp.c
> > +++ b/tests/checkasm/h263dsp.c
> > @@ -18,13 +18,55 @@
> > 
> >    * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
> >    */
> > 
> > +#include <stdbool.h>
> > 
> >   #include <string.h>
> >   
> >   #include "checkasm.h"
> > 
> > -#include "libavcodec/h263dsp.h"
> > +#include "libavutil/avassert.h"
> > 
> >   #include "libavutil/mem.h"
> >   #include "libavutil/mem_internal.h"
> > 
> > +#include "libavcodec/h263dsp.h"
> > +#include "libavcodec/mpegvideodata.h"
> > +
> > +static uint_fast8_t mpeg_qscale_rnd(void)
> > +{
> > +    int n = rnd(), q = (n >> 1) & 31;
> > +
> > +    if (n & 1)
> > +        return ff_mpeg2_non_linear_qscale[q];
> > +    else
> > +        return q << 1;
> > +}
> > +
> > +typedef void (*unquantizer)(int16_t *, size_t, int, int);
> > +
> > +static void check_dct_unquantize(unquantizer func, const char *name)
> > +{
> > +#define LEN 64
> > +    LOCAL_ALIGNED_16(int16_t, block0, [LEN]);
> > +    LOCAL_ALIGNED_16(int16_t, block1, [LEN]);
> 
> These are not padded, and unless I'm reading this wrong, an asm
> implementation loading say 16 bytes at a time will overread/write in
> dct_unquantize_intra (which offsets block by 1).

AFAIU, there is no padding per se, but the block buffer size is always exactly 
64 elements, regardless of the number of coeffs, hence this code. The old NEON 
intrinsic code seems to assume the block is a multiple of 8 elements, and the 
tail can be overwritten safely (hence not checking in memcmp()).

I have a feeling that I am not grasping the implications of you comment here.
James Almer June 12, 2024, 7:39 p.m. UTC | #3
On 6/12/2024 4:15 PM, Rémi Denis-Courmont wrote:
> Le keskiviikkona 12. kesäkuuta 2024, 21.39.12 EEST James Almer a écrit :
>> On 6/12/2024 1:47 AM, Rémi Denis-Courmont wrote:
>>> ---
>>>
>>>    tests/checkasm/h263dsp.c | 47 +++++++++++++++++++++++++++++++++++++++-
>>>    1 file changed, 46 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/tests/checkasm/h263dsp.c b/tests/checkasm/h263dsp.c
>>> index 2d0957a90b..8a2cdb34df 100644
>>> --- a/tests/checkasm/h263dsp.c
>>> +++ b/tests/checkasm/h263dsp.c
>>> @@ -18,13 +18,55 @@
>>>
>>>     * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
>>>     */
>>>
>>> +#include <stdbool.h>
>>>
>>>    #include <string.h>
>>>    
>>>    #include "checkasm.h"
>>>
>>> -#include "libavcodec/h263dsp.h"
>>> +#include "libavutil/avassert.h"
>>>
>>>    #include "libavutil/mem.h"
>>>    #include "libavutil/mem_internal.h"
>>>
>>> +#include "libavcodec/h263dsp.h"
>>> +#include "libavcodec/mpegvideodata.h"
>>> +
>>> +static uint_fast8_t mpeg_qscale_rnd(void)
>>> +{
>>> +    int n = rnd(), q = (n >> 1) & 31;
>>> +
>>> +    if (n & 1)
>>> +        return ff_mpeg2_non_linear_qscale[q];
>>> +    else
>>> +        return q << 1;
>>> +}
>>> +
>>> +typedef void (*unquantizer)(int16_t *, size_t, int, int);
>>> +
>>> +static void check_dct_unquantize(unquantizer func, const char *name)
>>> +{
>>> +#define LEN 64
>>> +    LOCAL_ALIGNED_16(int16_t, block0, [LEN]);
>>> +    LOCAL_ALIGNED_16(int16_t, block1, [LEN]);
>>
>> These are not padded, and unless I'm reading this wrong, an asm
>> implementation loading say 16 bytes at a time will overread/write in
>> dct_unquantize_intra (which offsets block by 1).
> 
> AFAIU, there is no padding per se, but the block buffer size is always exactly
> 64 elements, regardless of the number of coeffs, hence this code. The old NEON
> intrinsic code seems to assume the block is a multiple of 8 elements, and the
> tail can be overwritten safely (hence not checking in memcmp()).
> 
> I have a feeling that I am not grasping the implications of you comment here.

An asm function loading 16 bytes at a time from block[1] onwards for 
intra may end up reading two bytes more than available at the end of the 
128 byte wide buffer.
Rémi Denis-Courmont June 12, 2024, 7:52 p.m. UTC | #4
Le keskiviikkona 12. kesäkuuta 2024, 22.39.49 EEST James Almer a écrit :
> >> These are not padded, and unless I'm reading this wrong, an asm
> >> implementation loading say 16 bytes at a time will overread/write in
> >> dct_unquantize_intra (which offsets block by 1).
> > 
> > AFAIU, there is no padding per se, but the block buffer size is always
> > exactly 64 elements, regardless of the number of coeffs, hence this code.
> > The old NEON intrinsic code seems to assume the block is a multiple of 8
> > elements, and the tail can be overwritten safely (hence not checking in
> > memcmp()).
> > 
> > I have a feeling that I am not grasping the implications of you comment
> > here.
> An asm function loading 16 bytes at a time from block[1] onwards for
> intra may end up reading two bytes more than available at the end of the
> 128 byte wide buffer.

Wouldn't that be a bug in the assembler function? Do you mean that checkasm 
should add padding to check against overwrites?

The whole point of separating inter and intra was to preserve alignment for 
those instruction set extensions that want it (C and RVV couldn't care less).
diff mbox series

Patch

diff --git a/tests/checkasm/h263dsp.c b/tests/checkasm/h263dsp.c
index 2d0957a90b..8a2cdb34df 100644
--- a/tests/checkasm/h263dsp.c
+++ b/tests/checkasm/h263dsp.c
@@ -18,13 +18,55 @@ 
  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
+#include <stdbool.h>
 #include <string.h>
 
 #include "checkasm.h"
 
-#include "libavcodec/h263dsp.h"
+#include "libavutil/avassert.h"
 #include "libavutil/mem.h"
 #include "libavutil/mem_internal.h"
+#include "libavcodec/h263dsp.h"
+#include "libavcodec/mpegvideodata.h"
+
+static uint_fast8_t mpeg_qscale_rnd(void)
+{
+    int n = rnd(), q = (n >> 1) & 31;
+
+    if (n & 1)
+        return ff_mpeg2_non_linear_qscale[q];
+    else
+        return q << 1;
+}
+
+typedef void (*unquantizer)(int16_t *, size_t, int, int);
+
+static void check_dct_unquantize(unquantizer func, const char *name)
+{
+#define LEN 64
+    LOCAL_ALIGNED_16(int16_t, block0, [LEN]);
+    LOCAL_ALIGNED_16(int16_t, block1, [LEN]);
+    size_t len = 1 + (rnd() & (LEN - 1));
+    const int qscale = mpeg_qscale_rnd();
+    const int qmul = qscale << 1;
+    const int qadd = (rnd() & 1) ? (qscale - 1) | 1 : 0;
+
+    declare_func(void, int16_t *, size_t, int, int);
+
+    for (size_t i = 0; i < LEN; i++)
+        block1[i] = block0[i] = (rnd() & 1) ? rnd() : 0;
+
+    if (check_func(func, "h263dsp.dct_unquantize_%s", name)) {
+        av_assert0(len <= LEN);
+        call_ref(block0, len, qmul, qadd);
+        call_new(block1, len, qmul, qadd);
+
+        if (memcmp(block0, block1, len * sizeof (int16_t)))
+            fail();
+
+        bench_new(block1, LEN, qmul, qadd);
+    }
+}
 
 typedef void (*filter)(uint8_t *src, int stride, int qscale);
 
@@ -56,6 +98,9 @@  void checkasm_check_h263dsp(void)
     H263DSPContext ctx;
 
     ff_h263dsp_init(&ctx);
+    check_dct_unquantize(ctx.h263_dct_unquantize_intra, "intra");
+    check_dct_unquantize(ctx.h263_dct_unquantize_inter, "inter");
+    report("dct_unquantize");
     check_loop_filter('h', ctx.h263_h_loop_filter);
     check_loop_filter('v', ctx.h263_v_loop_filter);
     report("loop_filter");