diff mbox series

[FFmpeg-devel,DON'T,MERGE,1/2] checkasm/sw_rgb: test rgb24 to yuv

Message ID tencent_90E6136AF5D6E919AEA9254393048855B305@qq.com
State New
Headers show
Series [FFmpeg-devel,DON'T,MERGE,1/2] checkasm/sw_rgb: test rgb24 to yuv | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 fail Make fate failed

Commit Message

Zhao Zhili June 3, 2024, 7:17 a.m. UTC
From: Zhao Zhili <zhilizhao@tencent.com>

---
I need help on the test. It succeed with the following patch on ARM64,
but failed with x86. I'm not sure whether the issue is in the test, or
hidden in x86 asm, and I don't know x86 asm.

 tests/checkasm/sw_rgb.c | 126 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 126 insertions(+)

Comments

Martin Storsjö June 3, 2024, 8 a.m. UTC | #1
On Mon, 3 Jun 2024, Zhao Zhili wrote:

> From: Zhao Zhili <zhilizhao@tencent.com>
>
> ---
> I need help on the test. It succeed with the following patch on ARM64,
> but failed with x86. I'm not sure whether the issue is in the test, or
> hidden in x86 asm, and I don't know x86 asm.

Note that by default, the output of swscale can differ in rounding from 
the reference C code. If the SWS_ACCURATE_RND flag is set, it shouldn't 
differ, though.

So it makes sense to test twice, once with SWS_ACCURATE_RND set, doing a 
bitexact test of the output, and once without it, allowing the values to 
be off by one (or a few?).

See the existing test in checkasm/sw_scale.c for one example on doing 
this.

// Martin
diff mbox series

Patch

diff --git a/tests/checkasm/sw_rgb.c b/tests/checkasm/sw_rgb.c
index 7cd815e5be..68afee95e3 100644
--- a/tests/checkasm/sw_rgb.c
+++ b/tests/checkasm/sw_rgb.c
@@ -24,6 +24,8 @@ 
 #include "libavutil/mem_internal.h"
 
 #include "libswscale/rgb2rgb.h"
+#include "libswscale/swscale.h"
+#include "libswscale/swscale_internal.h"
 
 #include "checkasm.h"
 
@@ -111,6 +113,124 @@  static void check_uyvy_to_422p(void)
     }
 }
 
+static void check_rgb_to_y()
+{
+    struct SwsContext *ctx;
+    const AVPixFmtDescriptor *desc;
+#define LARGEST_INPUT_SIZE 4096
+    static const int input_sizes[] = {8, 128, 1280, 1080, LARGEST_INPUT_SIZE};
+    int32_t rgb2yuv[9] = {0};
+
+    declare_func(void, uint8_t *dst, const uint8_t *src,
+                 const uint8_t *unused1, const uint8_t *unused2, int width,
+                 uint32_t *rgb2yuv, void *opq);
+
+    LOCAL_ALIGNED_32(uint8_t, src, [LARGEST_INPUT_SIZE * 3]);
+    LOCAL_ALIGNED_32(uint16_t, dst0_y, [LARGEST_INPUT_SIZE]);
+    LOCAL_ALIGNED_32(uint16_t, dst1_y, [LARGEST_INPUT_SIZE]);
+
+    randomize_buffers(src, LARGEST_INPUT_SIZE * 3);
+    rgb2yuv[BY_IDX] =  ((int)(0.114 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+    rgb2yuv[BV_IDX] = (-(int)(0.081 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+    rgb2yuv[BU_IDX] =  ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+    rgb2yuv[GY_IDX] =  ((int)(0.587 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+    rgb2yuv[GV_IDX] = (-(int)(0.419 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+    rgb2yuv[GU_IDX] = (-(int)(0.331 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+    rgb2yuv[RY_IDX] =  ((int)(0.299 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+    rgb2yuv[RV_IDX] =  ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+    rgb2yuv[RU_IDX] = (-(int)(0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+
+    ctx = sws_alloc_context();
+    if (sws_init_context(ctx, NULL, NULL) < 0)
+        fail();
+
+    for (int i = 0; i < FF_ARRAY_ELEMS(input_sizes); i++) {
+        int w = input_sizes[i];
+
+        ctx->srcFormat = AV_PIX_FMT_RGB24;
+        ctx->dstFormat = AV_PIX_FMT_YUV420P;
+
+        ff_sws_init_scale(ctx);
+        if (check_func(ctx->lumToYV12, "rgb24_to_y_%d", w)) {
+            memset(dst0_y, 0xFF, LARGEST_INPUT_SIZE * 2);
+            memset(dst1_y, 0xFF, LARGEST_INPUT_SIZE * 2);
+
+            call_ref(dst0_y, src, NULL, NULL, w, rgb2yuv, NULL);
+            call_new(dst1_y, src, NULL, NULL, w, rgb2yuv, NULL);
+
+            if (memcmp(dst0_y, dst1_y, w * 2))
+                fail();
+
+            bench_new(dst1_y, src, NULL, NULL, w, rgb2yuv, NULL);
+        }
+    }
+
+    sws_freeContext(ctx);
+}
+
+static void check_rgb_to_uv()
+{
+    struct SwsContext *ctx;
+    const AVPixFmtDescriptor *desc;
+#define LARGEST_INPUT_SIZE 4096
+    static const int input_sizes[] = {8, 128, 1280, 1080, LARGEST_INPUT_SIZE};
+    int32_t rgb2yuv[9] = {0};
+
+    declare_func(void, uint8_t *dstU, uint8_t *dstV,
+                 const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
+                 int width, uint32_t *pal, void *opq);
+
+    LOCAL_ALIGNED_32(uint8_t, src, [LARGEST_INPUT_SIZE * 3]);
+    LOCAL_ALIGNED_32(uint16_t, dst0_u, [LARGEST_INPUT_SIZE]);
+    LOCAL_ALIGNED_32(uint16_t, dst0_v, [LARGEST_INPUT_SIZE]);
+    LOCAL_ALIGNED_32(uint16_t, dst1_u, [LARGEST_INPUT_SIZE]);
+    LOCAL_ALIGNED_32(uint16_t, dst1_v, [LARGEST_INPUT_SIZE]);
+
+    randomize_buffers(src, LARGEST_INPUT_SIZE * 3);
+    rgb2yuv[BY_IDX] =  ((int)(0.114 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+    rgb2yuv[BV_IDX] = (-(int)(0.081 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+    rgb2yuv[BU_IDX] =  ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+    rgb2yuv[GY_IDX] =  ((int)(0.587 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+    rgb2yuv[GV_IDX] = (-(int)(0.419 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+    rgb2yuv[GU_IDX] = (-(int)(0.331 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+    rgb2yuv[RY_IDX] =  ((int)(0.299 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+    rgb2yuv[RV_IDX] =  ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+    rgb2yuv[RU_IDX] = (-(int)(0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+
+    ctx = sws_alloc_context();
+    if (sws_init_context(ctx, NULL, NULL) < 0)
+        fail();
+
+    for (int i = 0; i < 2; i++) {
+        for (int j = 0; j < FF_ARRAY_ELEMS(input_sizes); j++) {
+            int w = input_sizes[j] >> i;
+
+            ctx->chrSrcHSubSample = i ? 1 : 0;
+            ctx->srcFormat = AV_PIX_FMT_RGB24;
+            ctx->dstFormat = i ? AV_PIX_FMT_YUV420P : AV_PIX_FMT_YUV444P;
+
+            ff_sws_init_scale(ctx);
+
+            if (check_func(ctx->chrToYV12, "rgb24_to_uv%s_%d", i ? "_half" : "", w)) {
+                memset(dst0_u, 0xFF, LARGEST_INPUT_SIZE * 2);
+                memset(dst0_v, 0xFF, LARGEST_INPUT_SIZE * 2);
+                memset(dst1_u, 0xFF, LARGEST_INPUT_SIZE * 2);
+                memset(dst1_v, 0xFF, LARGEST_INPUT_SIZE * 2);
+
+                call_ref(dst0_u, dst0_v, NULL, src, src, w, rgb2yuv, NULL);
+                call_new(dst1_u, dst1_v, NULL, src, src, w, rgb2yuv, NULL);
+
+                if (memcmp(dst0_u, dst1_u, w * 2) || memcmp(dst0_v, dst1_v, w * 2))
+                    fail();
+
+                bench_new(dst1_u, dst1_v, NULL, src, src, w, rgb2yuv, NULL);
+            }
+        }
+    }
+
+    sws_freeContext(ctx);
+}
+
 static void check_interleave_bytes(void)
 {
     LOCAL_ALIGNED_16(uint8_t, src0_buf, [MAX_STRIDE*MAX_HEIGHT+1]);
@@ -201,6 +321,12 @@  void checkasm_check_sw_rgb(void)
     check_uyvy_to_422p();
     report("uyvytoyuv422");
 
+    check_rgb_to_y();
+    report("rgb_to_y");
+
+    check_rgb_to_uv();
+    report("rgb_to_uv");
+
     check_interleave_bytes();
     report("interleave_bytes");
 }