Message ID | 20231205114146.7936-1-jdek@itanimul.li |
---|---|
State | New |
Headers | show |
Series | [FFmpeg-devel] checkasm/hevc_deblock: add luma test | expand |
Context | Check | Description |
---|---|---|
yinshiyou/make_loongarch64 | success | Make finished |
yinshiyou/make_fate_loongarch64 | success | Make fate finished |
andriy/make_x86 | success | Make finished |
andriy/make_fate_x86 | success | Make fate finished |
On Tue, 5 Dec 2023, J. Dekker wrote: > Signed-off-by: J. Dekker <jdek@itanimul.li> > --- > tests/checkasm/hevc_deblock.c | 110 ++++++++++++++++++++++++++++++++-- > 1 file changed, 106 insertions(+), 4 deletions(-) > > Yes, this only supports 8bit. 10/12bit should be trivial, will add if this > looks reasonable (I checked code paths using gdb, and as far as I can tell it > does test all three). Tested on known good x86 asm. Overall I think this looks mostly good. A few pointers for things that could be improved later, in addition to 10/12 bit support, and a few minor things to fix before pushing: > diff --git a/tests/checkasm/hevc_deblock.c b/tests/checkasm/hevc_deblock.c > index 66fc8d5646..3f970a470a 100644 > --- a/tests/checkasm/hevc_deblock.c > +++ b/tests/checkasm/hevc_deblock.c > @@ -29,8 +29,8 @@ > static const uint32_t pixel_mask[3] = { 0xffffffff, 0x03ff03ff, 0x0fff0fff }; > > #define SIZEOF_PIXEL ((bit_depth + 7) / 8) > -#define BUF_STRIDE (8 * 2) > -#define BUF_LINES (8) > +#define BUF_STRIDE (16 * 2) > +#define BUF_LINES (16) > #define BUF_OFFSET (BUF_STRIDE * BUF_LINES) > #define BUF_SIZE (BUF_STRIDE * BUF_LINES + BUF_OFFSET * 2) For these calculations, make sure to check that things stay within the expected bounds for all reads/writes, if you extend it to 10/12 bits later. > @@ -88,14 +88,116 @@ static void check_deblock_chroma(HEVCDSPContext *h, int bit_depth) > } > } > > +// line zero > +#define P3 buf[-4 * xstride] > +#define P2 buf[-3 * xstride] > +#define P1 buf[-2 * xstride] > +#define P0 buf[-1 * xstride] > +#define Q0 buf[0 * xstride] > +#define Q1 buf[1 * xstride] > +#define Q2 buf[2 * xstride] > +#define Q3 buf[3 * xstride] > + > +// line three. used only for deblocking decision I don't immediately see what these comments about line zero and line three mean here. > +#define TP3 buf[-4 * xstride + 3 * ystride] > +#define TP2 buf[-3 * xstride + 3 * ystride] > +#define TP1 buf[-2 * xstride + 3 * ystride] > +#define TP0 buf[-1 * xstride + 3 * ystride] > +#define TQ0 buf[0 * xstride + 3 * ystride] > +#define TQ1 buf[1 * xstride + 3 * ystride] > +#define TQ2 buf[2 * xstride + 3 * ystride] > +#define TQ3 buf[3 * xstride + 3 * ystride] Hmm, this seems to be unused currently? > + > +static void randomize_luma_buffers(int type, uint8_t *buf, ptrdiff_t xstride, ptrdiff_t ystride) > +{ > + int i; > + buf += BUF_OFFSET; > + switch (type) { > + case 0: // strong > + for (i = 0; i < 16; i++) { > + P3 = P2 = P1 = P0 = 64; > + Q0 = Q1 = Q2 = Q3 = 80; > + buf += ystride; > + } > + break; > + case 1: // weak > + for (i = 0; i < 16; i++) { > + P3 = P2 = 60; P1 = P0 = 64; > + Q0 = Q1 = 74; Q2 = Q3 = 80; > + buf += ystride; > + } These two aren't very random right now :-) Can we fit within the same criteria while keeping things somewhat random? E.g. "P3 = P2 + (rand() % 2*range) - range", or something along those lines? > +static void check_deblock_luma(HEVCDSPContext *h, int bit_depth) > +{ > + const char *type; > + const char *types[3] = { "strong", "normal", "skip" }; > + int beta; > + int32_t tc[2] = { 0, 0 }; > + // no_p, no_q can only be { 0,0 } for the simpler assembly (non *_c > + // variant) functions, see deblocking_filter_CTB() in hevc_filter.c Can we extend this to test the _c variant as well? > + uint8_t no_p[2] = { 0, 0 }; > + uint8_t no_q[2] = { 0, 0 }; > + LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]); > + LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]); > + > + declare_func(void, uint8_t *pix, ptrdiff_t stride, int beta, int32_t *tc, uint8_t *no_p, uint8_t *no_q); > + > + for (int j = 0; j < 3; j++) { > + beta = (j == 3) ? 0 : 32; // beta easy way to turn off filtering We never hit j==3 here, did you intend to have a fourth type included here? Can we use a less hardcoded beta value here, while still triggering the right cases? > + type = types[j]; > + > + // see betatable[] in hevc_filter.c > + tc[0] = (rnd() & 63) + (rnd() & 1); > + tc[1] = (rnd() & 63) + (rnd() & 1); Hmm, I don't immediately see how the tc[] values relate to betatable[] on a first glance // Martin
diff --git a/tests/checkasm/hevc_deblock.c b/tests/checkasm/hevc_deblock.c index 66fc8d5646..3f970a470a 100644 --- a/tests/checkasm/hevc_deblock.c +++ b/tests/checkasm/hevc_deblock.c @@ -29,8 +29,8 @@ static const uint32_t pixel_mask[3] = { 0xffffffff, 0x03ff03ff, 0x0fff0fff }; #define SIZEOF_PIXEL ((bit_depth + 7) / 8) -#define BUF_STRIDE (8 * 2) -#define BUF_LINES (8) +#define BUF_STRIDE (16 * 2) +#define BUF_LINES (16) #define BUF_OFFSET (BUF_STRIDE * BUF_LINES) #define BUF_SIZE (BUF_STRIDE * BUF_LINES + BUF_OFFSET * 2) @@ -88,14 +88,116 @@ static void check_deblock_chroma(HEVCDSPContext *h, int bit_depth) } } +// line zero +#define P3 buf[-4 * xstride] +#define P2 buf[-3 * xstride] +#define P1 buf[-2 * xstride] +#define P0 buf[-1 * xstride] +#define Q0 buf[0 * xstride] +#define Q1 buf[1 * xstride] +#define Q2 buf[2 * xstride] +#define Q3 buf[3 * xstride] + +// line three. used only for deblocking decision +#define TP3 buf[-4 * xstride + 3 * ystride] +#define TP2 buf[-3 * xstride + 3 * ystride] +#define TP1 buf[-2 * xstride + 3 * ystride] +#define TP0 buf[-1 * xstride + 3 * ystride] +#define TQ0 buf[0 * xstride + 3 * ystride] +#define TQ1 buf[1 * xstride + 3 * ystride] +#define TQ2 buf[2 * xstride + 3 * ystride] +#define TQ3 buf[3 * xstride + 3 * ystride] + +static void randomize_luma_buffers(int type, uint8_t *buf, ptrdiff_t xstride, ptrdiff_t ystride) +{ + int i; + buf += BUF_OFFSET; + switch (type) { + case 0: // strong + for (i = 0; i < 16; i++) { + P3 = P2 = P1 = P0 = 64; + Q0 = Q1 = Q2 = Q3 = 80; + buf += ystride; + } + break; + case 1: // weak + for (i = 0; i < 16; i++) { + P3 = P2 = 60; P1 = P0 = 64; + Q0 = Q1 = 74; Q2 = Q3 = 80; + buf += ystride; + } + break; + case 2: // none + for (i = 0; i < 16; i++) { + for (int j = -8; j < 8; j++) { + buf[j * xstride + i * ystride] = rnd(); + } + } + break; + } +} + +static void check_deblock_luma(HEVCDSPContext *h, int bit_depth) +{ + const char *type; + const char *types[3] = { "strong", "normal", "skip" }; + int beta; + int32_t tc[2] = { 0, 0 }; + // no_p, no_q can only be { 0,0 } for the simpler assembly (non *_c + // variant) functions, see deblocking_filter_CTB() in hevc_filter.c + uint8_t no_p[2] = { 0, 0 }; + uint8_t no_q[2] = { 0, 0 }; + LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]); + LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]); + + declare_func(void, uint8_t *pix, ptrdiff_t stride, int beta, int32_t *tc, uint8_t *no_p, uint8_t *no_q); + + for (int j = 0; j < 3; j++) { + beta = (j == 3) ? 0 : 32; // beta easy way to turn off filtering + type = types[j]; + + // see betatable[] in hevc_filter.c + tc[0] = (rnd() & 63) + (rnd() & 1); + tc[1] = (rnd() & 63) + (rnd() & 1); + + if (check_func(h->hevc_h_loop_filter_luma, "hevc_h_loop_filter_luma%d_%s", bit_depth, type)) { + for (int i = 0; i < 4; i++) { + randomize_luma_buffers(j, buf0, 16, 1); + memcpy(buf1, buf0, BUF_SIZE); + + call_ref(buf0 + BUF_OFFSET, 16, beta, tc, no_p, no_q); + call_new(buf1 + BUF_OFFSET, 16, beta, tc, no_p, no_q); + if (memcmp(buf0, buf1, BUF_SIZE)) + fail(); + } + bench_new(buf1 + BUF_OFFSET, 16, beta, tc, no_p, no_q); + } + + if (check_func(h->hevc_v_loop_filter_luma, "hevc_v_loop_filter_luma%d_%s", bit_depth, type)) { + for (int i = 0; i < 4; i++) { + randomize_luma_buffers(j, buf0, 1, 16); + memcpy(buf1, buf0, BUF_SIZE); + + call_ref(buf0 + BUF_OFFSET, 16, beta, tc, no_p, no_q); + call_new(buf1 + BUF_OFFSET, 16, beta, tc, no_p, no_q); + if (memcmp(buf0, buf1, BUF_SIZE)) + fail(); + } + bench_new(buf1 + BUF_OFFSET, 16, beta, tc, no_p, no_q); + } + } +} + void checkasm_check_hevc_deblock(void) { + HEVCDSPContext h; int bit_depth; - for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) { - HEVCDSPContext h; ff_hevc_dsp_init(&h, bit_depth); check_deblock_chroma(&h, bit_depth); } report("chroma"); + ff_hevc_dsp_init(&h, 8); + check_deblock_luma(&h, 8); + report("luma"); }
Signed-off-by: J. Dekker <jdek@itanimul.li> --- tests/checkasm/hevc_deblock.c | 110 ++++++++++++++++++++++++++++++++-- 1 file changed, 106 insertions(+), 4 deletions(-) Yes, this only supports 8bit. 10/12bit should be trivial, will add if this looks reasonable (I checked code paths using gdb, and as far as I can tell it does test all three). Tested on known good x86 asm.