Message ID | 20180303031550.41130-1-yingmingfan@gmail.com |
---|---|
State | Superseded |
Headers | show |
Works for me on MacOs 10.12 (x86_64 and x86_32). Martin
Hi there. We are working on add some neon optimization for hevc decoder. But before submit neon codes, i'd like to submit some checkasm codes. First one will be checkasm codes of SAO. I tested this path under MacOS 10.13.3 and Debian, all FATE case passed. So will anyone review this patch? Yingming Fan > On 3 Mar 2018, at 8:38 PM, Martin Vignali <martin.vignali@gmail.com> wrote: > > Works for me on MacOs 10.12 (x86_64 and x86_32). > > Martin > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
On 3/3/2018 12:15 AM, Yingming Fan wrote: > --- > tests/checkasm/Makefile | 2 +- > tests/checkasm/checkasm.c | 1 + > tests/checkasm/checkasm.h | 1 + > tests/checkasm/hevc_sao.c | 158 ++++++++++++++++++++++++++++++++++++++++++++++ > tests/fate/checkasm.mak | 1 + > 5 files changed, 162 insertions(+), 1 deletion(-) > create mode 100644 tests/checkasm/hevc_sao.c > > diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile > index 77bdcf6e65..0520e264e2 100644 > --- a/tests/checkasm/Makefile > +++ b/tests/checkasm/Makefile > @@ -23,7 +23,7 @@ AVCODECOBJS-$(CONFIG_EXR_DECODER) += exrdsp.o > AVCODECOBJS-$(CONFIG_HUFFYUV_DECODER) += huffyuvdsp.o > AVCODECOBJS-$(CONFIG_JPEG2000_DECODER) += jpeg2000dsp.o > AVCODECOBJS-$(CONFIG_PIXBLOCKDSP) += pixblockdsp.o > -AVCODECOBJS-$(CONFIG_HEVC_DECODER) += hevc_add_res.o hevc_idct.o > +AVCODECOBJS-$(CONFIG_HEVC_DECODER) += hevc_add_res.o hevc_idct.o hevc_sao.o > AVCODECOBJS-$(CONFIG_UTVIDEO_DECODER) += utvideodsp.o > AVCODECOBJS-$(CONFIG_V210_ENCODER) += v210enc.o > AVCODECOBJS-$(CONFIG_VP9_DECODER) += vp9dsp.o > diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c > index a4b8aff984..fe81d139c6 100644 > --- a/tests/checkasm/checkasm.c > +++ b/tests/checkasm/checkasm.c > @@ -116,6 +116,7 @@ static const struct { > #if CONFIG_HEVC_DECODER > { "hevc_add_res", checkasm_check_hevc_add_res }, > { "hevc_idct", checkasm_check_hevc_idct }, > + { "hevc_sao", checkasm_check_hevc_sao }, > #endif > #if CONFIG_HUFFYUV_DECODER > { "huffyuvdsp", checkasm_check_huffyuvdsp }, > diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h > index 3de38e6717..8b9d96bc15 100644 > --- a/tests/checkasm/checkasm.h > +++ b/tests/checkasm/checkasm.h > @@ -57,6 +57,7 @@ void checkasm_check_h264pred(void); > void checkasm_check_h264qpel(void); > void checkasm_check_hevc_add_res(void); > void checkasm_check_hevc_idct(void); > +void checkasm_check_hevc_sao(void); > void checkasm_check_huffyuvdsp(void); > void checkasm_check_jpeg2000dsp(void); > void checkasm_check_llviddsp(void); > diff --git a/tests/checkasm/hevc_sao.c b/tests/checkasm/hevc_sao.c > new file mode 100644 > index 0000000000..e2a0a54e9b > --- /dev/null > +++ b/tests/checkasm/hevc_sao.c > @@ -0,0 +1,158 @@ > +/* > + * Copyright (c) 2018 Yingming Fan <yingmingfan@gmail.com> > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2 of the License, or > + * (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License along > + * with FFmpeg; if not, write to the Free Software Foundation, Inc., > + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. > + */ > + > +#include <string.h> > + > +#include "libavutil/intreadwrite.h" > + > +#include "libavcodec/avcodec.h" > + > +#include "libavcodec/hevcdsp.h" > + > +#include "checkasm.h" > + > +static const uint32_t pixel_mask[3] = { 0xffffffff, 0x01ff01ff, 0x03ff03ff }; > +static const uint32_t sao_size[5] = {8, 16, 32, 48, 64}; > + > +#define SIZEOF_PIXEL ((bit_depth + 7) / 8) > +#define PIXEL_STRIDE (2*MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE) //same with sao_edge src_stride > +#define BUF_SIZE (PIXEL_STRIDE * (64+2) * 2) //+2 for top and bottom row, *2 for high bit depth > +#define OFFSET_THRESH (1 << (bit_depth - 5)) > +#define OFFSET_LENGTH 5 > + > +#define randomize_buffers(buf0, buf1, size) \ > + do { \ > + uint32_t mask = pixel_mask[bit_depth - 8]; \ > + int i; \ > + if (bit_depth == 8) { \ > + for (i = 0; i < size; i += 4) { \ > + uint32_t r = rnd() & mask; \ > + AV_WN32A(buf0 + i, r); \ > + AV_WN32A(buf1 + i, r); \ > + } \ > + } else { \ > + for (i = 0; i < size; i += 2) { \ > + uint32_t r = rnd() & mask; \ > + AV_WN32A((uint16_t *)buf0 + i, r); \ > + AV_WN32A((uint16_t *)buf1 + i, r); \ > + } \ > + } \ > + } while (0) > + > +#define randomize_buffers2(buf, size) \ > + do { \ > + uint32_t max_offset = OFFSET_THRESH; \ > + int i; \ > + if (bit_depth == 8) { \ > + for (i = 0; i < size; i++) { \ > + uint8_t r = rnd() % max_offset; \ > + buf[i] = r; \ > + } \ > + } else { \ > + for (i = 0; i < size; i++) { \ > + uint16_t r = rnd() % max_offset; \ > + buf[i] = r; \ > + } \ > + } \ > + } while (0) > + > +static void check_sao_band(HEVCDSPContext h, int bit_depth) > +{ > + int i; > + LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]); > + LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]); > + LOCAL_ALIGNED_32(uint8_t, src0, [BUF_SIZE]); > + LOCAL_ALIGNED_32(uint8_t, src1, [BUF_SIZE]); > + int16_t offset_val[OFFSET_LENGTH]; > + int left_class = rnd()%32; > + > + for (i = 0; i <= 4; i++) { > + int block_size = sao_size[i]; > + ptrdiff_t stride = PIXEL_STRIDE*SIZEOF_PIXEL; > + declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, uint8_t *src, ptrdiff_t dst_stride, ptrdiff_t src_stride, > + int16_t *sao_offset_val, int sao_left_class, int width, int height); > + > + randomize_buffers(src0, src1, BUF_SIZE/SIZEOF_PIXEL); > + randomize_buffers2(offset_val, OFFSET_LENGTH); > + memset(dst0, 0, BUF_SIZE); > + memset(dst1, 0, BUF_SIZE); > + > + if (check_func(h.sao_band_filter[i], "hevc_sao_band_%dx%d_%d", block_size, block_size, bit_depth)) { > + call_ref(dst0, src0, stride, stride, offset_val, left_class, block_size, block_size); > + call_new(dst1, src1, stride, stride, offset_val, left_class, block_size, block_size); > + if (memcmp(dst0, dst1, BUF_SIZE)) > + fail(); > + bench_new(dst1, src1, stride, stride, offset_val, left_class, block_size, block_size); > + } > + } > +} > + > +static void check_sao_edge(HEVCDSPContext h, int bit_depth) > +{ > + int i; > + LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]); > + LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]); > + LOCAL_ALIGNED_32(uint8_t, src0, [BUF_SIZE]); > + LOCAL_ALIGNED_32(uint8_t, src1, [BUF_SIZE]); > + int16_t offset_val[OFFSET_LENGTH]; > + int eo = rnd()%4; > + > + for (i = 0; i <= 4; i++) { > + int block_size = sao_size[i]; > + ptrdiff_t stride = PIXEL_STRIDE*SIZEOF_PIXEL; > + int offset = (AV_INPUT_BUFFER_PADDING_SIZE + PIXEL_STRIDE)*SIZEOF_PIXEL; > + declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, uint8_t *src, ptrdiff_t stride_dst, > + int16_t *sao_offset_val, int eo, int width, int height); > + > + randomize_buffers(src0, src1, BUF_SIZE/SIZEOF_PIXEL); > + randomize_buffers2(offset_val, OFFSET_LENGTH); > + memset(dst0, 0, BUF_SIZE); > + memset(dst1, 0, BUF_SIZE); > + > + if (check_func(h.sao_edge_filter[i], "hevc_sao_edge_%dx%d_%d", block_size, block_size, bit_depth)) { > + call_ref(dst0, src0 + offset, stride, offset_val, eo, block_size, block_size); > + call_new(dst1, src1 + offset, stride, offset_val, eo, block_size, block_size); > + if (memcmp(dst0, dst1, BUF_SIZE)) > + fail(); > + bench_new(dst1, src1 + offset, stride, offset_val, eo, block_size, block_size); > + } > + } > +} > + > +void checkasm_check_hevc_sao(void) > +{ > + int bit_depth; > + > + for (bit_depth = 8; bit_depth <= 10; bit_depth++) { I don't think SIMD for 9bit hevc will ever be written, seeing we don't even have conformance samples of such bitstreams. Doesn't hurt testing it anyway just in case, i guess. What we do have however are 12bit samples and SIMD, so it would be ideal if you could test that here as well. > + HEVCDSPContext h; > + > + ff_hevc_dsp_init(&h, bit_depth); > + check_sao_band(h, bit_depth); > + } > + report("sao_band"); > + > + for (bit_depth = 8; bit_depth <= 10; bit_depth++) { > + HEVCDSPContext h; > + > + ff_hevc_dsp_init(&h, bit_depth); > + check_sao_edge(h, bit_depth); > + } > + report("sao_edge"); > +} > diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak > index 826ac16789..cf62f9b119 100644 > --- a/tests/fate/checkasm.mak > +++ b/tests/fate/checkasm.mak > @@ -14,6 +14,7 @@ FATE_CHECKASM = fate-checkasm-aacpsdsp \ > fate-checkasm-h264qpel \ > fate-checkasm-hevc_add_res \ > fate-checkasm-hevc_idct \ > + fate-checkasm-hevc_sao \ > fate-checkasm-jpeg2000dsp \ > fate-checkasm-llviddsp \ > fate-checkasm-llviddspenc \ >
Hi James, I agree with what you said, we should test 8 10 and 12 bit. But i noticed that hevc_idct and hevc_add_res also not test 12 bit. I think we should also add 12 bit for these checkasm codes. 2018-03-06 21:55 GMT+08:00 James Almer <jamrial@gmail.com>: > On 3/3/2018 12:15 AM, Yingming Fan wrote: > > --- > > tests/checkasm/Makefile | 2 +- > > tests/checkasm/checkasm.c | 1 + > > tests/checkasm/checkasm.h | 1 + > > tests/checkasm/hevc_sao.c | 158 ++++++++++++++++++++++++++++++ > ++++++++++++++++ > > tests/fate/checkasm.mak | 1 + > > 5 files changed, 162 insertions(+), 1 deletion(-) > > create mode 100644 tests/checkasm/hevc_sao.c > > > > diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile > > index 77bdcf6e65..0520e264e2 100644 > > --- a/tests/checkasm/Makefile > > +++ b/tests/checkasm/Makefile > > @@ -23,7 +23,7 @@ AVCODECOBJS-$(CONFIG_EXR_DECODER) += exrdsp.o > > AVCODECOBJS-$(CONFIG_HUFFYUV_DECODER) += huffyuvdsp.o > > AVCODECOBJS-$(CONFIG_JPEG2000_DECODER) += jpeg2000dsp.o > > AVCODECOBJS-$(CONFIG_PIXBLOCKDSP) += pixblockdsp.o > > -AVCODECOBJS-$(CONFIG_HEVC_DECODER) += hevc_add_res.o hevc_idct.o > > +AVCODECOBJS-$(CONFIG_HEVC_DECODER) += hevc_add_res.o hevc_idct.o > hevc_sao.o > > AVCODECOBJS-$(CONFIG_UTVIDEO_DECODER) += utvideodsp.o > > AVCODECOBJS-$(CONFIG_V210_ENCODER) += v210enc.o > > AVCODECOBJS-$(CONFIG_VP9_DECODER) += vp9dsp.o > > diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c > > index a4b8aff984..fe81d139c6 100644 > > --- a/tests/checkasm/checkasm.c > > +++ b/tests/checkasm/checkasm.c > > @@ -116,6 +116,7 @@ static const struct { > > #if CONFIG_HEVC_DECODER > > { "hevc_add_res", checkasm_check_hevc_add_res }, > > { "hevc_idct", checkasm_check_hevc_idct }, > > + { "hevc_sao", checkasm_check_hevc_sao }, > > #endif > > #if CONFIG_HUFFYUV_DECODER > > { "huffyuvdsp", checkasm_check_huffyuvdsp }, > > diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h > > index 3de38e6717..8b9d96bc15 100644 > > --- a/tests/checkasm/checkasm.h > > +++ b/tests/checkasm/checkasm.h > > @@ -57,6 +57,7 @@ void checkasm_check_h264pred(void); > > void checkasm_check_h264qpel(void); > > void checkasm_check_hevc_add_res(void); > > void checkasm_check_hevc_idct(void); > > +void checkasm_check_hevc_sao(void); > > void checkasm_check_huffyuvdsp(void); > > void checkasm_check_jpeg2000dsp(void); > > void checkasm_check_llviddsp(void); > > diff --git a/tests/checkasm/hevc_sao.c b/tests/checkasm/hevc_sao.c > > new file mode 100644 > > index 0000000000..e2a0a54e9b > > --- /dev/null > > +++ b/tests/checkasm/hevc_sao.c > > @@ -0,0 +1,158 @@ > > +/* > > + * Copyright (c) 2018 Yingming Fan <yingmingfan@gmail.com> > > + * > > + * This file is part of FFmpeg. > > + * > > + * FFmpeg is free software; you can redistribute it and/or modify > > + * it under the terms of the GNU General Public License as published by > > + * the Free Software Foundation; either version 2 of the License, or > > + * (at your option) any later version. > > + * > > + * FFmpeg is distributed in the hope that it will be useful, > > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > > + * GNU General Public License for more details. > > + * > > + * You should have received a copy of the GNU General Public License > along > > + * with FFmpeg; if not, write to the Free Software Foundation, Inc., > > + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. > > + */ > > + > > +#include <string.h> > > + > > +#include "libavutil/intreadwrite.h" > > + > > +#include "libavcodec/avcodec.h" > > + > > +#include "libavcodec/hevcdsp.h" > > + > > +#include "checkasm.h" > > + > > +static const uint32_t pixel_mask[3] = { 0xffffffff, 0x01ff01ff, > 0x03ff03ff }; > > +static const uint32_t sao_size[5] = {8, 16, 32, 48, 64}; > > + > > +#define SIZEOF_PIXEL ((bit_depth + 7) / 8) > > +#define PIXEL_STRIDE (2*MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE) > //same with sao_edge src_stride > > +#define BUF_SIZE (PIXEL_STRIDE * (64+2) * 2) //+2 for top and bottom > row, *2 for high bit depth > > +#define OFFSET_THRESH (1 << (bit_depth - 5)) > > +#define OFFSET_LENGTH 5 > > + > > +#define randomize_buffers(buf0, buf1, size) \ > > + do { \ > > + uint32_t mask = pixel_mask[bit_depth - 8]; \ > > + int i; \ > > + if (bit_depth == 8) { \ > > + for (i = 0; i < size; i += 4) { \ > > + uint32_t r = rnd() & mask; \ > > + AV_WN32A(buf0 + i, r); \ > > + AV_WN32A(buf1 + i, r); \ > > + } \ > > + } else { \ > > + for (i = 0; i < size; i += 2) { \ > > + uint32_t r = rnd() & mask; \ > > + AV_WN32A((uint16_t *)buf0 + i, r); \ > > + AV_WN32A((uint16_t *)buf1 + i, r); \ > > + } \ > > + } \ > > + } while (0) > > + > > +#define randomize_buffers2(buf, size) \ > > + do { \ > > + uint32_t max_offset = OFFSET_THRESH; \ > > + int i; \ > > + if (bit_depth == 8) { \ > > + for (i = 0; i < size; i++) { \ > > + uint8_t r = rnd() % max_offset; \ > > + buf[i] = r; \ > > + } \ > > + } else { \ > > + for (i = 0; i < size; i++) { \ > > + uint16_t r = rnd() % max_offset; \ > > + buf[i] = r; \ > > + } \ > > + } \ > > + } while (0) > > + > > +static void check_sao_band(HEVCDSPContext h, int bit_depth) > > +{ > > + int i; > > + LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]); > > + LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]); > > + LOCAL_ALIGNED_32(uint8_t, src0, [BUF_SIZE]); > > + LOCAL_ALIGNED_32(uint8_t, src1, [BUF_SIZE]); > > + int16_t offset_val[OFFSET_LENGTH]; > > + int left_class = rnd()%32; > > + > > + for (i = 0; i <= 4; i++) { > > + int block_size = sao_size[i]; > > + ptrdiff_t stride = PIXEL_STRIDE*SIZEOF_PIXEL; > > + declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, uint8_t > *src, ptrdiff_t dst_stride, ptrdiff_t src_stride, > > + int16_t *sao_offset_val, int sao_left_class, > int width, int height); > > + > > + randomize_buffers(src0, src1, BUF_SIZE/SIZEOF_PIXEL); > > + randomize_buffers2(offset_val, OFFSET_LENGTH); > > + memset(dst0, 0, BUF_SIZE); > > + memset(dst1, 0, BUF_SIZE); > > + > > + if (check_func(h.sao_band_filter[i], "hevc_sao_band_%dx%d_%d", > block_size, block_size, bit_depth)) { > > + call_ref(dst0, src0, stride, stride, offset_val, > left_class, block_size, block_size); > > + call_new(dst1, src1, stride, stride, offset_val, > left_class, block_size, block_size); > > + if (memcmp(dst0, dst1, BUF_SIZE)) > > + fail(); > > + bench_new(dst1, src1, stride, stride, offset_val, > left_class, block_size, block_size); > > + } > > + } > > +} > > + > > +static void check_sao_edge(HEVCDSPContext h, int bit_depth) > > +{ > > + int i; > > + LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]); > > + LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]); > > + LOCAL_ALIGNED_32(uint8_t, src0, [BUF_SIZE]); > > + LOCAL_ALIGNED_32(uint8_t, src1, [BUF_SIZE]); > > + int16_t offset_val[OFFSET_LENGTH]; > > + int eo = rnd()%4; > > + > > + for (i = 0; i <= 4; i++) { > > + int block_size = sao_size[i]; > > + ptrdiff_t stride = PIXEL_STRIDE*SIZEOF_PIXEL; > > + int offset = (AV_INPUT_BUFFER_PADDING_SIZE + > PIXEL_STRIDE)*SIZEOF_PIXEL; > > + declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, uint8_t > *src, ptrdiff_t stride_dst, > > + int16_t *sao_offset_val, int eo, int width, > int height); > > + > > + randomize_buffers(src0, src1, BUF_SIZE/SIZEOF_PIXEL); > > + randomize_buffers2(offset_val, OFFSET_LENGTH); > > + memset(dst0, 0, BUF_SIZE); > > + memset(dst1, 0, BUF_SIZE); > > + > > + if (check_func(h.sao_edge_filter[i], "hevc_sao_edge_%dx%d_%d", > block_size, block_size, bit_depth)) { > > + call_ref(dst0, src0 + offset, stride, offset_val, eo, > block_size, block_size); > > + call_new(dst1, src1 + offset, stride, offset_val, eo, > block_size, block_size); > > + if (memcmp(dst0, dst1, BUF_SIZE)) > > + fail(); > > + bench_new(dst1, src1 + offset, stride, offset_val, eo, > block_size, block_size); > > + } > > + } > > +} > > + > > +void checkasm_check_hevc_sao(void) > > +{ > > + int bit_depth; > > + > > + for (bit_depth = 8; bit_depth <= 10; bit_depth++) { > > I don't think SIMD for 9bit hevc will ever be written, seeing we don't > even have conformance samples of such bitstreams. Doesn't hurt testing > it anyway just in case, i guess. > > What we do have however are 12bit samples and SIMD, so it would be ideal > if you could test that here as well. > + HEVCDSPContext h; > > + > > + ff_hevc_dsp_init(&h, bit_depth); > > + check_sao_band(h, bit_depth); > > + } > > + report("sao_band"); > > + > > + for (bit_depth = 8; bit_depth <= 10; bit_depth++) { > > + HEVCDSPContext h; > > + > > + ff_hevc_dsp_init(&h, bit_depth); > > + check_sao_edge(h, bit_depth); > > + } > > + report("sao_edge"); > > +} > > diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak > > index 826ac16789..cf62f9b119 100644 > > --- a/tests/fate/checkasm.mak > > +++ b/tests/fate/checkasm.mak > > @@ -14,6 +14,7 @@ FATE_CHECKASM = fate-checkasm-aacpsdsp > \ > > fate-checkasm-h264qpel > \ > > fate-checkasm-hevc_add_res > \ > > fate-checkasm-hevc_idct > \ > > + fate-checkasm-hevc_sao > \ > > fate-checkasm-jpeg2000dsp > \ > > fate-checkasm-llviddsp > \ > > fate-checkasm-llviddspenc > \ > > > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel >
diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile index 77bdcf6e65..0520e264e2 100644 --- a/tests/checkasm/Makefile +++ b/tests/checkasm/Makefile @@ -23,7 +23,7 @@ AVCODECOBJS-$(CONFIG_EXR_DECODER) += exrdsp.o AVCODECOBJS-$(CONFIG_HUFFYUV_DECODER) += huffyuvdsp.o AVCODECOBJS-$(CONFIG_JPEG2000_DECODER) += jpeg2000dsp.o AVCODECOBJS-$(CONFIG_PIXBLOCKDSP) += pixblockdsp.o -AVCODECOBJS-$(CONFIG_HEVC_DECODER) += hevc_add_res.o hevc_idct.o +AVCODECOBJS-$(CONFIG_HEVC_DECODER) += hevc_add_res.o hevc_idct.o hevc_sao.o AVCODECOBJS-$(CONFIG_UTVIDEO_DECODER) += utvideodsp.o AVCODECOBJS-$(CONFIG_V210_ENCODER) += v210enc.o AVCODECOBJS-$(CONFIG_VP9_DECODER) += vp9dsp.o diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c index a4b8aff984..fe81d139c6 100644 --- a/tests/checkasm/checkasm.c +++ b/tests/checkasm/checkasm.c @@ -116,6 +116,7 @@ static const struct { #if CONFIG_HEVC_DECODER { "hevc_add_res", checkasm_check_hevc_add_res }, { "hevc_idct", checkasm_check_hevc_idct }, + { "hevc_sao", checkasm_check_hevc_sao }, #endif #if CONFIG_HUFFYUV_DECODER { "huffyuvdsp", checkasm_check_huffyuvdsp }, diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h index 3de38e6717..8b9d96bc15 100644 --- a/tests/checkasm/checkasm.h +++ b/tests/checkasm/checkasm.h @@ -57,6 +57,7 @@ void checkasm_check_h264pred(void); void checkasm_check_h264qpel(void); void checkasm_check_hevc_add_res(void); void checkasm_check_hevc_idct(void); +void checkasm_check_hevc_sao(void); void checkasm_check_huffyuvdsp(void); void checkasm_check_jpeg2000dsp(void); void checkasm_check_llviddsp(void); diff --git a/tests/checkasm/hevc_sao.c b/tests/checkasm/hevc_sao.c new file mode 100644 index 0000000000..e2a0a54e9b --- /dev/null +++ b/tests/checkasm/hevc_sao.c @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2018 Yingming Fan <yingmingfan@gmail.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with FFmpeg; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <string.h> + +#include "libavutil/intreadwrite.h" + +#include "libavcodec/avcodec.h" + +#include "libavcodec/hevcdsp.h" + +#include "checkasm.h" + +static const uint32_t pixel_mask[3] = { 0xffffffff, 0x01ff01ff, 0x03ff03ff }; +static const uint32_t sao_size[5] = {8, 16, 32, 48, 64}; + +#define SIZEOF_PIXEL ((bit_depth + 7) / 8) +#define PIXEL_STRIDE (2*MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE) //same with sao_edge src_stride +#define BUF_SIZE (PIXEL_STRIDE * (64+2) * 2) //+2 for top and bottom row, *2 for high bit depth +#define OFFSET_THRESH (1 << (bit_depth - 5)) +#define OFFSET_LENGTH 5 + +#define randomize_buffers(buf0, buf1, size) \ + do { \ + uint32_t mask = pixel_mask[bit_depth - 8]; \ + int i; \ + if (bit_depth == 8) { \ + for (i = 0; i < size; i += 4) { \ + uint32_t r = rnd() & mask; \ + AV_WN32A(buf0 + i, r); \ + AV_WN32A(buf1 + i, r); \ + } \ + } else { \ + for (i = 0; i < size; i += 2) { \ + uint32_t r = rnd() & mask; \ + AV_WN32A((uint16_t *)buf0 + i, r); \ + AV_WN32A((uint16_t *)buf1 + i, r); \ + } \ + } \ + } while (0) + +#define randomize_buffers2(buf, size) \ + do { \ + uint32_t max_offset = OFFSET_THRESH; \ + int i; \ + if (bit_depth == 8) { \ + for (i = 0; i < size; i++) { \ + uint8_t r = rnd() % max_offset; \ + buf[i] = r; \ + } \ + } else { \ + for (i = 0; i < size; i++) { \ + uint16_t r = rnd() % max_offset; \ + buf[i] = r; \ + } \ + } \ + } while (0) + +static void check_sao_band(HEVCDSPContext h, int bit_depth) +{ + int i; + LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]); + LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]); + LOCAL_ALIGNED_32(uint8_t, src0, [BUF_SIZE]); + LOCAL_ALIGNED_32(uint8_t, src1, [BUF_SIZE]); + int16_t offset_val[OFFSET_LENGTH]; + int left_class = rnd()%32; + + for (i = 0; i <= 4; i++) { + int block_size = sao_size[i]; + ptrdiff_t stride = PIXEL_STRIDE*SIZEOF_PIXEL; + declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, uint8_t *src, ptrdiff_t dst_stride, ptrdiff_t src_stride, + int16_t *sao_offset_val, int sao_left_class, int width, int height); + + randomize_buffers(src0, src1, BUF_SIZE/SIZEOF_PIXEL); + randomize_buffers2(offset_val, OFFSET_LENGTH); + memset(dst0, 0, BUF_SIZE); + memset(dst1, 0, BUF_SIZE); + + if (check_func(h.sao_band_filter[i], "hevc_sao_band_%dx%d_%d", block_size, block_size, bit_depth)) { + call_ref(dst0, src0, stride, stride, offset_val, left_class, block_size, block_size); + call_new(dst1, src1, stride, stride, offset_val, left_class, block_size, block_size); + if (memcmp(dst0, dst1, BUF_SIZE)) + fail(); + bench_new(dst1, src1, stride, stride, offset_val, left_class, block_size, block_size); + } + } +} + +static void check_sao_edge(HEVCDSPContext h, int bit_depth) +{ + int i; + LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]); + LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]); + LOCAL_ALIGNED_32(uint8_t, src0, [BUF_SIZE]); + LOCAL_ALIGNED_32(uint8_t, src1, [BUF_SIZE]); + int16_t offset_val[OFFSET_LENGTH]; + int eo = rnd()%4; + + for (i = 0; i <= 4; i++) { + int block_size = sao_size[i]; + ptrdiff_t stride = PIXEL_STRIDE*SIZEOF_PIXEL; + int offset = (AV_INPUT_BUFFER_PADDING_SIZE + PIXEL_STRIDE)*SIZEOF_PIXEL; + declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, uint8_t *src, ptrdiff_t stride_dst, + int16_t *sao_offset_val, int eo, int width, int height); + + randomize_buffers(src0, src1, BUF_SIZE/SIZEOF_PIXEL); + randomize_buffers2(offset_val, OFFSET_LENGTH); + memset(dst0, 0, BUF_SIZE); + memset(dst1, 0, BUF_SIZE); + + if (check_func(h.sao_edge_filter[i], "hevc_sao_edge_%dx%d_%d", block_size, block_size, bit_depth)) { + call_ref(dst0, src0 + offset, stride, offset_val, eo, block_size, block_size); + call_new(dst1, src1 + offset, stride, offset_val, eo, block_size, block_size); + if (memcmp(dst0, dst1, BUF_SIZE)) + fail(); + bench_new(dst1, src1 + offset, stride, offset_val, eo, block_size, block_size); + } + } +} + +void checkasm_check_hevc_sao(void) +{ + int bit_depth; + + for (bit_depth = 8; bit_depth <= 10; bit_depth++) { + HEVCDSPContext h; + + ff_hevc_dsp_init(&h, bit_depth); + check_sao_band(h, bit_depth); + } + report("sao_band"); + + for (bit_depth = 8; bit_depth <= 10; bit_depth++) { + HEVCDSPContext h; + + ff_hevc_dsp_init(&h, bit_depth); + check_sao_edge(h, bit_depth); + } + report("sao_edge"); +} diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak index 826ac16789..cf62f9b119 100644 --- a/tests/fate/checkasm.mak +++ b/tests/fate/checkasm.mak @@ -14,6 +14,7 @@ FATE_CHECKASM = fate-checkasm-aacpsdsp \ fate-checkasm-h264qpel \ fate-checkasm-hevc_add_res \ fate-checkasm-hevc_idct \ + fate-checkasm-hevc_sao \ fate-checkasm-jpeg2000dsp \ fate-checkasm-llviddsp \ fate-checkasm-llviddspenc \