diff mbox series

[FFmpeg-devel,WIP,1/2] checkasm/sw_rgb: test rgb24 to yuv

Message ID tencent_AF908CE6D9E7EBEA95131D1C438BA6263A06@qq.com
State New
Headers show
Series [FFmpeg-devel,WIP,1/2] checkasm/sw_rgb: test rgb24 to yuv | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 fail Make fate failed

Commit Message

Zhao Zhili June 3, 2024, 1:02 p.m. UTC
From: Zhao Zhili <zhilizhao@tencent.com>

---
The test still failed on x86, but success on arm64 and longarch.

I have tried to call rgb24ToY_c and ff_rgb24ToY_avx directly and
compare the results, they don't match.

https://github.com/quink-black/FFmpeg/actions/runs/9347753270
https://patchwork.ffmpeg.org/project/ffmpeg/patch/tencent_90E6136AF5D6E919AEA9254393048855B305@qq.com/

 tests/checkasm/sw_rgb.c | 123 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 123 insertions(+)

Comments

James Almer June 3, 2024, 6:47 p.m. UTC | #1
On 6/3/2024 10:02 AM, Zhao Zhili wrote:
> From: Zhao Zhili <zhilizhao@tencent.com>
> 
> ---
> The test still failed on x86, but success on arm64 and longarch.
> 
> I have tried to call rgb24ToY_c and ff_rgb24ToY_avx directly and
> compare the results, they don't match.

You're using an incomplete table. See below.

> 
> https://github.com/quink-black/FFmpeg/actions/runs/9347753270
> https://patchwork.ffmpeg.org/project/ffmpeg/patch/tencent_90E6136AF5D6E919AEA9254393048855B305@qq.com/
> 
>   tests/checkasm/sw_rgb.c | 123 ++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 123 insertions(+)
> 
> diff --git a/tests/checkasm/sw_rgb.c b/tests/checkasm/sw_rgb.c
> index 7cd815e5be..18fd4255a6 100644
> --- a/tests/checkasm/sw_rgb.c
> +++ b/tests/checkasm/sw_rgb.c
> @@ -24,6 +24,8 @@
>   #include "libavutil/mem_internal.h"
>   
>   #include "libswscale/rgb2rgb.h"
> +#include "libswscale/swscale.h"
> +#include "libswscale/swscale_internal.h"
>   
>   #include "checkasm.h"
>   
> @@ -41,6 +43,7 @@ static const struct {uint8_t w, h, s;} planes[] = {
>   
>   #define MAX_STRIDE 128
>   #define MAX_HEIGHT 128
> +#define LARGEST_INPUT_SIZE 4096
>   
>   static void check_shuffle_bytes(void * func, const char * report)
>   {
> @@ -111,6 +114,120 @@ static void check_uyvy_to_422p(void)
>       }
>   }
>   
> +static void check_rgb_to_y(void)
> +{
> +    struct SwsContext *ctx;
> +    static const int input_sizes[] = {8, 128, 1280, 1080, LARGEST_INPUT_SIZE};
> +    int32_t rgb2yuv[9] = {0};
> +
> +    declare_func(void, uint8_t *dst, const uint8_t *src,
> +                 const uint8_t *unused1, const uint8_t *unused2, int width,
> +                 uint32_t *rgb2yuv, void *opq);
> +
> +    LOCAL_ALIGNED_32(uint8_t, src, [LARGEST_INPUT_SIZE * 3]);
> +    LOCAL_ALIGNED_32(uint8_t, dst0_y, [LARGEST_INPUT_SIZE * 2]);
> +    LOCAL_ALIGNED_32(uint8_t, dst1_y, [LARGEST_INPUT_SIZE * 2]);
> +
> +    randomize_buffers(src, LARGEST_INPUT_SIZE * 3);
> +    rgb2yuv[BY_IDX] =  ((int)(0.114 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
> +    rgb2yuv[BV_IDX] = (-(int)(0.081 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
> +    rgb2yuv[BU_IDX] =  ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
> +    rgb2yuv[GY_IDX] =  ((int)(0.587 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
> +    rgb2yuv[GV_IDX] = (-(int)(0.419 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
> +    rgb2yuv[GU_IDX] = (-(int)(0.331 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
> +    rgb2yuv[RY_IDX] =  ((int)(0.299 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
> +    rgb2yuv[RV_IDX] =  ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
> +    rgb2yuv[RU_IDX] = (-(int)(0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
> +
> +    ctx = sws_alloc_context();
> +    if (sws_init_context(ctx, NULL, NULL) < 0)
> +        fail();

Allocate and initiate this once in checkasm_check_sw_rgb() and reuse it.

> +
> +    for (int i = 0; i < FF_ARRAY_ELEMS(input_sizes); i++) {
> +        int w = input_sizes[i];
> +
> +        ctx->srcFormat = AV_PIX_FMT_RGB24;
> +        ctx->dstFormat = AV_PIX_FMT_YUV420P;
> +
> +        ff_sws_init_scale(ctx);
> +        if (check_func(ctx->lumToYV12, "rgb24_to_y_%d", w)) {
> +            memset(dst0_y, 0xFF, LARGEST_INPUT_SIZE * 2);
> +            memset(dst1_y, 0xFF, LARGEST_INPUT_SIZE * 2);
> +
> +            call_ref(dst0_y, src, NULL, NULL, w, rgb2yuv, NULL);

Don't use a custom filled table, more so when it's smaller than needed.
Use ctx->input_rgb2yuv_table directly here and everywhere else. It's 
filled with the values the C and any simd version may need.

With that, the tests pass on x86.

> +            call_new(dst1_y, src, NULL, NULL, w, rgb2yuv, NULL);
> +
> +            if (memcmp(dst0_y, dst1_y, w * 2))
> +                fail();
> +
> +            bench_new(dst1_y, src, NULL, NULL, w, rgb2yuv, NULL);
> +        }
> +    }
> +
> +    sws_freeContext(ctx);
> +}
> +
> +static void check_rgb_to_uv(void)
> +{
> +    struct SwsContext *ctx;
> +    static const int input_sizes[] = {8, 128, 1280, 1080, LARGEST_INPUT_SIZE};
> +    int32_t rgb2yuv[9] = {0};
> +
> +    declare_func(void, uint8_t *dstU, uint8_t *dstV,
> +                 const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
> +                 int width, uint32_t *pal, void *opq);
> +
> +    LOCAL_ALIGNED_32(uint8_t, src, [LARGEST_INPUT_SIZE * 3]);
> +    LOCAL_ALIGNED_32(uint8_t, dst0_u, [LARGEST_INPUT_SIZE * 2]);
> +    LOCAL_ALIGNED_32(uint8_t, dst0_v, [LARGEST_INPUT_SIZE * 2]);
> +    LOCAL_ALIGNED_32(uint8_t, dst1_u, [LARGEST_INPUT_SIZE * 2]);
> +    LOCAL_ALIGNED_32(uint8_t, dst1_v, [LARGEST_INPUT_SIZE * 2]);
> +
> +    randomize_buffers(src, LARGEST_INPUT_SIZE * 3);
> +    rgb2yuv[BY_IDX] =  ((int)(0.114 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
> +    rgb2yuv[BV_IDX] = (-(int)(0.081 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
> +    rgb2yuv[BU_IDX] =  ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
> +    rgb2yuv[GY_IDX] =  ((int)(0.587 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
> +    rgb2yuv[GV_IDX] = (-(int)(0.419 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
> +    rgb2yuv[GU_IDX] = (-(int)(0.331 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
> +    rgb2yuv[RY_IDX] =  ((int)(0.299 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
> +    rgb2yuv[RV_IDX] =  ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
> +    rgb2yuv[RU_IDX] = (-(int)(0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
> +
> +    ctx = sws_alloc_context();
> +    if (sws_init_context(ctx, NULL, NULL) < 0)
> +        fail();
> +
> +    for (int i = 0; i < 2; i++) {
> +        for (int j = 0; j < FF_ARRAY_ELEMS(input_sizes); j++) {
> +            int w = input_sizes[j] >> i;
> +
> +            ctx->chrSrcHSubSample = i ? 1 : 0;
> +            ctx->srcFormat = AV_PIX_FMT_RGB24;
> +            ctx->dstFormat = i ? AV_PIX_FMT_YUV420P : AV_PIX_FMT_YUV444P;
> +
> +            ff_sws_init_scale(ctx);
> +
> +            if (check_func(ctx->chrToYV12, "rgb24_to_uv%s_%d", i ? "_half" : "", w)) {
> +                memset(dst0_u, 0xFF, LARGEST_INPUT_SIZE * 2);
> +                memset(dst0_v, 0xFF, LARGEST_INPUT_SIZE * 2);
> +                memset(dst1_u, 0xFF, LARGEST_INPUT_SIZE * 2);
> +                memset(dst1_v, 0xFF, LARGEST_INPUT_SIZE * 2);
> +
> +                call_ref(dst0_u, dst0_v, NULL, src, src, w, rgb2yuv, NULL);
> +                call_new(dst1_u, dst1_v, NULL, src, src, w, rgb2yuv, NULL);
> +
> +                if (memcmp(dst0_u, dst1_u, w * 2) || memcmp(dst0_v, dst1_v, w * 2))
> +                    fail();
> +
> +                bench_new(dst1_u, dst1_v, NULL, src, src, w, rgb2yuv, NULL);
> +            }
> +        }
> +    }
> +
> +    sws_freeContext(ctx);
> +}
> +
>   static void check_interleave_bytes(void)
>   {
>       LOCAL_ALIGNED_16(uint8_t, src0_buf, [MAX_STRIDE*MAX_HEIGHT+1]);
> @@ -201,6 +318,12 @@ void checkasm_check_sw_rgb(void)
>       check_uyvy_to_422p();
>       report("uyvytoyuv422");
>   
> +    check_rgb_to_y();
> +    report("rgb_to_y");
> +
> +    check_rgb_to_uv();
> +    report("rgb_to_uv");
> +
>       check_interleave_bytes();
>       report("interleave_bytes");
>   }
diff mbox series

Patch

diff --git a/tests/checkasm/sw_rgb.c b/tests/checkasm/sw_rgb.c
index 7cd815e5be..18fd4255a6 100644
--- a/tests/checkasm/sw_rgb.c
+++ b/tests/checkasm/sw_rgb.c
@@ -24,6 +24,8 @@ 
 #include "libavutil/mem_internal.h"
 
 #include "libswscale/rgb2rgb.h"
+#include "libswscale/swscale.h"
+#include "libswscale/swscale_internal.h"
 
 #include "checkasm.h"
 
@@ -41,6 +43,7 @@  static const struct {uint8_t w, h, s;} planes[] = {
 
 #define MAX_STRIDE 128
 #define MAX_HEIGHT 128
+#define LARGEST_INPUT_SIZE 4096
 
 static void check_shuffle_bytes(void * func, const char * report)
 {
@@ -111,6 +114,120 @@  static void check_uyvy_to_422p(void)
     }
 }
 
+static void check_rgb_to_y(void)
+{
+    struct SwsContext *ctx;
+    static const int input_sizes[] = {8, 128, 1280, 1080, LARGEST_INPUT_SIZE};
+    int32_t rgb2yuv[9] = {0};
+
+    declare_func(void, uint8_t *dst, const uint8_t *src,
+                 const uint8_t *unused1, const uint8_t *unused2, int width,
+                 uint32_t *rgb2yuv, void *opq);
+
+    LOCAL_ALIGNED_32(uint8_t, src, [LARGEST_INPUT_SIZE * 3]);
+    LOCAL_ALIGNED_32(uint8_t, dst0_y, [LARGEST_INPUT_SIZE * 2]);
+    LOCAL_ALIGNED_32(uint8_t, dst1_y, [LARGEST_INPUT_SIZE * 2]);
+
+    randomize_buffers(src, LARGEST_INPUT_SIZE * 3);
+    rgb2yuv[BY_IDX] =  ((int)(0.114 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+    rgb2yuv[BV_IDX] = (-(int)(0.081 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+    rgb2yuv[BU_IDX] =  ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+    rgb2yuv[GY_IDX] =  ((int)(0.587 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+    rgb2yuv[GV_IDX] = (-(int)(0.419 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+    rgb2yuv[GU_IDX] = (-(int)(0.331 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+    rgb2yuv[RY_IDX] =  ((int)(0.299 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+    rgb2yuv[RV_IDX] =  ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+    rgb2yuv[RU_IDX] = (-(int)(0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+
+    ctx = sws_alloc_context();
+    if (sws_init_context(ctx, NULL, NULL) < 0)
+        fail();
+
+    for (int i = 0; i < FF_ARRAY_ELEMS(input_sizes); i++) {
+        int w = input_sizes[i];
+
+        ctx->srcFormat = AV_PIX_FMT_RGB24;
+        ctx->dstFormat = AV_PIX_FMT_YUV420P;
+
+        ff_sws_init_scale(ctx);
+        if (check_func(ctx->lumToYV12, "rgb24_to_y_%d", w)) {
+            memset(dst0_y, 0xFF, LARGEST_INPUT_SIZE * 2);
+            memset(dst1_y, 0xFF, LARGEST_INPUT_SIZE * 2);
+
+            call_ref(dst0_y, src, NULL, NULL, w, rgb2yuv, NULL);
+            call_new(dst1_y, src, NULL, NULL, w, rgb2yuv, NULL);
+
+            if (memcmp(dst0_y, dst1_y, w * 2))
+                fail();
+
+            bench_new(dst1_y, src, NULL, NULL, w, rgb2yuv, NULL);
+        }
+    }
+
+    sws_freeContext(ctx);
+}
+
+static void check_rgb_to_uv(void)
+{
+    struct SwsContext *ctx;
+    static const int input_sizes[] = {8, 128, 1280, 1080, LARGEST_INPUT_SIZE};
+    int32_t rgb2yuv[9] = {0};
+
+    declare_func(void, uint8_t *dstU, uint8_t *dstV,
+                 const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
+                 int width, uint32_t *pal, void *opq);
+
+    LOCAL_ALIGNED_32(uint8_t, src, [LARGEST_INPUT_SIZE * 3]);
+    LOCAL_ALIGNED_32(uint8_t, dst0_u, [LARGEST_INPUT_SIZE * 2]);
+    LOCAL_ALIGNED_32(uint8_t, dst0_v, [LARGEST_INPUT_SIZE * 2]);
+    LOCAL_ALIGNED_32(uint8_t, dst1_u, [LARGEST_INPUT_SIZE * 2]);
+    LOCAL_ALIGNED_32(uint8_t, dst1_v, [LARGEST_INPUT_SIZE * 2]);
+
+    randomize_buffers(src, LARGEST_INPUT_SIZE * 3);
+    rgb2yuv[BY_IDX] =  ((int)(0.114 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+    rgb2yuv[BV_IDX] = (-(int)(0.081 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+    rgb2yuv[BU_IDX] =  ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+    rgb2yuv[GY_IDX] =  ((int)(0.587 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+    rgb2yuv[GV_IDX] = (-(int)(0.419 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+    rgb2yuv[GU_IDX] = (-(int)(0.331 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+    rgb2yuv[RY_IDX] =  ((int)(0.299 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+    rgb2yuv[RV_IDX] =  ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+    rgb2yuv[RU_IDX] = (-(int)(0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+
+    ctx = sws_alloc_context();
+    if (sws_init_context(ctx, NULL, NULL) < 0)
+        fail();
+
+    for (int i = 0; i < 2; i++) {
+        for (int j = 0; j < FF_ARRAY_ELEMS(input_sizes); j++) {
+            int w = input_sizes[j] >> i;
+
+            ctx->chrSrcHSubSample = i ? 1 : 0;
+            ctx->srcFormat = AV_PIX_FMT_RGB24;
+            ctx->dstFormat = i ? AV_PIX_FMT_YUV420P : AV_PIX_FMT_YUV444P;
+
+            ff_sws_init_scale(ctx);
+
+            if (check_func(ctx->chrToYV12, "rgb24_to_uv%s_%d", i ? "_half" : "", w)) {
+                memset(dst0_u, 0xFF, LARGEST_INPUT_SIZE * 2);
+                memset(dst0_v, 0xFF, LARGEST_INPUT_SIZE * 2);
+                memset(dst1_u, 0xFF, LARGEST_INPUT_SIZE * 2);
+                memset(dst1_v, 0xFF, LARGEST_INPUT_SIZE * 2);
+
+                call_ref(dst0_u, dst0_v, NULL, src, src, w, rgb2yuv, NULL);
+                call_new(dst1_u, dst1_v, NULL, src, src, w, rgb2yuv, NULL);
+
+                if (memcmp(dst0_u, dst1_u, w * 2) || memcmp(dst0_v, dst1_v, w * 2))
+                    fail();
+
+                bench_new(dst1_u, dst1_v, NULL, src, src, w, rgb2yuv, NULL);
+            }
+        }
+    }
+
+    sws_freeContext(ctx);
+}
+
 static void check_interleave_bytes(void)
 {
     LOCAL_ALIGNED_16(uint8_t, src0_buf, [MAX_STRIDE*MAX_HEIGHT+1]);
@@ -201,6 +318,12 @@  void checkasm_check_sw_rgb(void)
     check_uyvy_to_422p();
     report("uyvytoyuv422");
 
+    check_rgb_to_y();
+    report("rgb_to_y");
+
+    check_rgb_to_uv();
+    report("rgb_to_uv");
+
     check_interleave_bytes();
     report("interleave_bytes");
 }