diff mbox series

[FFmpeg-devel,5/6] checkasm: add tests for yuv2rgb

Message ID 20240616222849.420361-5-ramiro.polla@gmail.com
State New
Headers show
Series [FFmpeg-devel,1/6] swscale/yuv2rgb: fix conversion for widths not aligned to 8 | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 fail Make fate failed

Commit Message

Ramiro Polla June 16, 2024, 10:28 p.m. UTC
---
 tests/checkasm/Makefile     |   2 +-
 tests/checkasm/checkasm.c   |   1 +
 tests/checkasm/checkasm.h   |   1 +
 tests/checkasm/sw_yuv2rgb.c | 205 ++++++++++++++++++++++++++++++++++++
 tests/fate/checkasm.mak     |   1 +
 5 files changed, 209 insertions(+), 1 deletion(-)
 create mode 100644 tests/checkasm/sw_yuv2rgb.c

Comments

Ramiro Polla June 20, 2024, 2:59 p.m. UTC | #1
On Mon, Jun 17, 2024 at 12:28 AM Ramiro Polla <ramiro.polla@gmail.com> wrote:
>
> ---
>  tests/checkasm/Makefile     |   2 +-
>  tests/checkasm/checkasm.c   |   1 +
>  tests/checkasm/checkasm.h   |   1 +
>  tests/checkasm/sw_yuv2rgb.c | 205 ++++++++++++++++++++++++++++++++++++
>  tests/fate/checkasm.mak     |   1 +
>  5 files changed, 209 insertions(+), 1 deletion(-)
>  create mode 100644 tests/checkasm/sw_yuv2rgb.c
>
> diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
> index f20732b37a..3a7670e24b 100644
> --- a/tests/checkasm/Makefile
> +++ b/tests/checkasm/Makefile
> @@ -63,7 +63,7 @@ AVFILTEROBJS-$(CONFIG_SOBEL_FILTER)      += vf_convolution.o
>  CHECKASMOBJS-$(CONFIG_AVFILTER) += $(AVFILTEROBJS-yes)
>
>  # swscale tests
> -SWSCALEOBJS                             += sw_gbrp.o sw_range_convert.o sw_rgb.o sw_scale.o
> +SWSCALEOBJS                             += sw_gbrp.o sw_range_convert.o sw_rgb.o sw_scale.o sw_yuv2rgb.o
>
>  CHECKASMOBJS-$(CONFIG_SWSCALE)  += $(SWSCALEOBJS)
>
> diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
> index 56232ab1e0..d9ac772a08 100644
> --- a/tests/checkasm/checkasm.c
> +++ b/tests/checkasm/checkasm.c
> @@ -254,6 +254,7 @@ static const struct {
>      { "sw_range_convert", checkasm_check_sw_range_convert },
>      { "sw_rgb", checkasm_check_sw_rgb },
>      { "sw_scale", checkasm_check_sw_scale },
> +    { "sw_yuv2rgb", checkasm_check_sw_yuv2rgb },
>  #endif
>  #if CONFIG_AVUTIL
>          { "fixed_dsp", checkasm_check_fixed_dsp },
> diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
> index e544007b67..4d5f3e387e 100644
> --- a/tests/checkasm/checkasm.h
> +++ b/tests/checkasm/checkasm.h
> @@ -122,6 +122,7 @@ void checkasm_check_sw_gbrp(void);
>  void checkasm_check_sw_range_convert(void);
>  void checkasm_check_sw_rgb(void);
>  void checkasm_check_sw_scale(void);
> +void checkasm_check_sw_yuv2rgb(void);
>  void checkasm_check_takdsp(void);
>  void checkasm_check_utvideodsp(void);
>  void checkasm_check_v210dec(void);
> diff --git a/tests/checkasm/sw_yuv2rgb.c b/tests/checkasm/sw_yuv2rgb.c
> new file mode 100644
> index 0000000000..fbe01a7788
> --- /dev/null
> +++ b/tests/checkasm/sw_yuv2rgb.c
> @@ -0,0 +1,205 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License along
> + * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
> + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
> + */
> +
> +#include <string.h>
> +
> +#include "libavutil/common.h"
> +#include "libavutil/intreadwrite.h"
> +#include "libavutil/mem_internal.h"
> +#include "libavutil/pixdesc.h"
> +
> +#include "libswscale/swscale.h"
> +#include "libswscale/swscale_internal.h"
> +
> +#include "checkasm.h"
> +
> +#define randomize_buffers(buf, size)      \
> +    do {                                  \
> +        for (int j = 0; j < size; j += 4) \
> +            AV_WN32(buf + j, rnd());      \
> +    } while (0)
> +
> +static const int dst_fmts[] = {
> +//     AV_PIX_FMT_BGR48BE,
> +//     AV_PIX_FMT_BGR48LE,
> +//     AV_PIX_FMT_RGB48BE,
> +//     AV_PIX_FMT_RGB48LE,
> +    AV_PIX_FMT_ARGB,
> +    AV_PIX_FMT_ABGR,
> +    AV_PIX_FMT_RGBA,
> +    AV_PIX_FMT_BGRA,
> +    AV_PIX_FMT_RGB24,
> +    AV_PIX_FMT_BGR24,
> +    AV_PIX_FMT_RGB565,
> +    AV_PIX_FMT_BGR565,
> +    AV_PIX_FMT_RGB555,
> +    AV_PIX_FMT_BGR555,
> +//     AV_PIX_FMT_RGB444,
> +//     AV_PIX_FMT_BGR444,
> +//     AV_PIX_FMT_RGB8,
> +//     AV_PIX_FMT_BGR8,
> +//     AV_PIX_FMT_RGB4,
> +//     AV_PIX_FMT_BGR4,
> +//     AV_PIX_FMT_RGB4_BYTE,
> +//     AV_PIX_FMT_BGR4_BYTE,
> +//     AV_PIX_FMT_MONOBLACK,
> +};
> +
> +static int cmp_off_by_n(const uint8_t *ref, const uint8_t *test, size_t n, int accuracy)
> +{
> +    for (size_t i = 0; i < n; i++) {
> +        if (abs(ref[i] - test[i]) > accuracy)
> +            return 1;
> +    }
> +    return 0;
> +}
> +
> +static int cmp_555_by_n(const uint8_t *ref, const uint8_t *test, size_t n, int accuracy)
> +{
> +    const uint16_t *ref16  = (const uint16_t *) ref;
> +    const uint16_t *test16 = (const uint16_t *) test;
> +    for (size_t i = 0; i < n; i++) {
> +        if (abs(( ref16[i]        & 0x1f) - ( test16[i]        & 0x1f)) > accuracy)
> +            return 1;
> +        if (abs(((ref16[i] >>  5) & 0x1f) - ((test16[i] >>  5) & 0x1f)) > accuracy)
> +            return 1;
> +        if (abs(((ref16[i] >> 10) & 0x1f) - ((test16[i] >> 10) & 0x1f)) > accuracy)
> +            return 1;
> +    }
> +    return 0;
> +}
> +
> +static int cmp_565_by_n(const uint8_t *ref, const uint8_t *test, size_t n, int accuracy)
> +{
> +    const uint16_t *ref16  = (const uint16_t *) ref;
> +    const uint16_t *test16 = (const uint16_t *) test;
> +    for (size_t i = 0; i < n; i++) {
> +        if (abs(( ref16[i]        & 0x1f) - ( test16[i]        & 0x1f)) > accuracy)
> +            return 1;
> +        if (abs(((ref16[i] >>  5) & 0x3f) - ((test16[i] >>  5) & 0x3f)) > accuracy)
> +            return 1;
> +        if (abs(((ref16[i] >> 11) & 0x1f) - ((test16[i] >> 11) & 0x1f)) > accuracy)
> +            return 1;
> +    }
> +    return 0;
> +}
> +
> +static void check_yuv2rgb(int src_pix_fmt)
> +{
> +    const AVPixFmtDescriptor *src_desc = av_pix_fmt_desc_get(src_pix_fmt);
> +#define MAX_LINE_SIZE 1920
> +    static const int input_sizes[] = {8, 128, 1080, MAX_LINE_SIZE};
> +
> +    declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT,
> +                      int, SwsContext *c, const uint8_t *src[],
> +                           int srcStride[], int srcSliceY, int srcSliceH,
> +                           uint8_t *dst[], int dstStride[]);
> +
> +    LOCAL_ALIGNED_8(uint8_t, src_y, [MAX_LINE_SIZE * 2]);
> +    LOCAL_ALIGNED_8(uint8_t, src_u, [MAX_LINE_SIZE]);
> +    LOCAL_ALIGNED_8(uint8_t, src_v, [MAX_LINE_SIZE]);
> +    LOCAL_ALIGNED_8(uint8_t, src_a, [MAX_LINE_SIZE * 2]);
> +    const uint8_t *src[4] = { src_y, src_u, src_v, src_a };
> +
> +    LOCAL_ALIGNED_8(uint8_t, dst0_, [2 * MAX_LINE_SIZE * 6]);
> +    uint8_t *dst0[4] = { dst0_ };
> +    uint8_t *lines0[2] = { dst0_, dst0_ + MAX_LINE_SIZE * 6 };
> +
> +    LOCAL_ALIGNED_8(uint8_t, dst1_, [2 * MAX_LINE_SIZE * 6]);
> +    uint8_t *dst1[4] = { dst1_ };
> +    uint8_t *lines1[2] = { dst1_, dst1_ + MAX_LINE_SIZE * 6 };
> +
> +    randomize_buffers(src_y, MAX_LINE_SIZE * 2);
> +    randomize_buffers(src_u, MAX_LINE_SIZE);
> +    randomize_buffers(src_v, MAX_LINE_SIZE);
> +    randomize_buffers(src_a, MAX_LINE_SIZE * 2);
> +
> +    for (int dfi = 0; dfi < FF_ARRAY_ELEMS(dst_fmts); dfi++) {
> +        int dst_pix_fmt = dst_fmts[dfi];
> +        const AVPixFmtDescriptor *dst_desc = av_pix_fmt_desc_get(dst_pix_fmt);
> +        int sample_size = av_get_padded_bits_per_pixel(dst_desc) >> 3;
> +        for (int isi = 0; isi < FF_ARRAY_ELEMS(input_sizes); isi++) {
> +            struct SwsContext *ctx;
> +            int width = input_sizes[isi];
> +            int srcSliceY = 0;
> +            int srcSliceH = 2;
> +            int srcStride[4] = {
> +                width,
> +                width >> src_desc->log2_chroma_w,
> +                width >> src_desc->log2_chroma_w,
> +                width,
> +            };
> +            int dstStride[4] = { MAX_LINE_SIZE * 6 };
> +
> +            ctx = sws_getContext(width, srcSliceH, src_pix_fmt,
> +                                 width, srcSliceH, dst_pix_fmt,
> +                                 0, NULL, NULL, NULL);

checkasm gets a little spammy with this commit by printing "No
accelerated colorspace conversion found from %s to %s.\n" from
libswscale/yuv2rgb.c multiple times. Any suggestions? Perhaps
downgrading that message to AV_LOG_VERBOSE, or raising loglevel around
the call to sws_getContext()?

> +            if (!ctx)
> +                fail();
> +
> +            if (check_func(ctx->convert_unscaled, "%s_%s_%d", src_desc->name, dst_desc->name, width)) {
> +                memset(dst0_, 0xFF, 2 * MAX_LINE_SIZE * 6);
> +                memset(dst1_, 0xFF, 2 * MAX_LINE_SIZE * 6);
> +
> +                call_ref(ctx, src, srcStride, srcSliceY,
> +                         srcSliceH, dst0, dstStride);
> +                call_new(ctx, src, srcStride, srcSliceY,
> +                         srcSliceH, dst1, dstStride);
> +
> +                if (dst_pix_fmt == AV_PIX_FMT_ARGB  ||
> +                    dst_pix_fmt == AV_PIX_FMT_ABGR  ||
> +                    dst_pix_fmt == AV_PIX_FMT_RGBA  ||
> +                    dst_pix_fmt == AV_PIX_FMT_BGRA  ||
> +                    dst_pix_fmt == AV_PIX_FMT_RGB24 ||
> +                    dst_pix_fmt == AV_PIX_FMT_BGR24) {
> +                    if (cmp_off_by_n(lines0[0], lines1[0], width * sample_size, 3) ||
> +                        cmp_off_by_n(lines0[1], lines1[1], width * sample_size, 3))
> +                        fail();
> +                } else if (dst_pix_fmt == AV_PIX_FMT_RGB565 ||
> +                           dst_pix_fmt == AV_PIX_FMT_BGR565) {
> +                    if (cmp_565_by_n(lines0[0], lines1[0], width, 2) ||
> +                        cmp_565_by_n(lines0[1], lines1[1], width, 2))
> +                        fail();
> +                } else if (dst_pix_fmt == AV_PIX_FMT_RGB555 ||
> +                           dst_pix_fmt == AV_PIX_FMT_BGR555) {
> +                    if (cmp_555_by_n(lines0[0], lines1[0], width, 2) ||
> +                        cmp_555_by_n(lines0[1], lines1[1], width, 2))
> +                        fail();
> +                } else {
> +                    fail();
> +                }
> +
> +                bench_new(ctx, src, srcStride, srcSliceY,
> +                          srcSliceH, dst0, dstStride);
> +            }
> +            sws_freeContext(ctx);
> +        }
> +    }
> +}
> +
> +#undef MAX_LINE_SIZE
> +
> +void checkasm_check_sw_yuv2rgb(void)
> +{
> +    check_yuv2rgb(AV_PIX_FMT_YUV420P);
> +    report("yuv420p");
> +    check_yuv2rgb(AV_PIX_FMT_YUV422P);
> +    report("yuv422p");
> +    check_yuv2rgb(AV_PIX_FMT_YUVA420P);
> +    report("yuva420p");
> +}
> diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak
> index 0ed2ea5be6..49832b09bf 100644
> --- a/tests/fate/checkasm.mak
> +++ b/tests/fate/checkasm.mak
> @@ -44,6 +44,7 @@ FATE_CHECKASM = fate-checkasm-aacencdsp                                 \
>                  fate-checkasm-sw_range_convert                          \
>                  fate-checkasm-sw_rgb                                    \
>                  fate-checkasm-sw_scale                                  \
> +                fate-checkasm-sw_yuv2rgb                                \
>                  fate-checkasm-takdsp                                    \
>                  fate-checkasm-utvideodsp                                \
>                  fate-checkasm-v210dec                                   \
> --
> 2.30.2
>
Ramiro Polla June 25, 2024, 9:19 p.m. UTC | #2
On Thu, Jun 20, 2024 at 4:59 PM Ramiro Polla <ramiro.polla@gmail.com> wrote:
> checkasm gets a little spammy with this commit by printing "No
> accelerated colorspace conversion found from %s to %s.\n" from
> libswscale/yuv2rgb.c multiple times. Any suggestions? Perhaps
> downgrading that message to AV_LOG_VERBOSE, or raising loglevel around
> the call to sws_getContext()?

New patch attached that overrides the log level to prevent spamming.
Ramiro Polla June 27, 2024, 2:02 p.m. UTC | #3
On Tue, Jun 25, 2024 at 11:19 PM Ramiro Polla <ramiro.polla@gmail.com> wrote:
> On Thu, Jun 20, 2024 at 4:59 PM Ramiro Polla <ramiro.polla@gmail.com> wrote:
> > checkasm gets a little spammy with this commit by printing "No
> > accelerated colorspace conversion found from %s to %s.\n" from
> > libswscale/yuv2rgb.c multiple times. Any suggestions? Perhaps
> > downgrading that message to AV_LOG_VERBOSE, or raising loglevel around
> > the call to sws_getContext()?
>
> New patch attached that overrides the log level to prevent spamming.

I'll apply this tomorrow if there are no comments.
Sean McGovern June 30, 2024, 12:33 a.m. UTC | #4
Hi Ramiro,


On Thu, Jun 27, 2024, 10:37 Ramiro Polla <ramiro.polla@gmail.com> wrote:

> On Tue, Jun 25, 2024 at 11:19 PM Ramiro Polla <ramiro.polla@gmail.com>
> wrote:
> > On Thu, Jun 20, 2024 at 4:59 PM Ramiro Polla <ramiro.polla@gmail.com>
> wrote:
> > > checkasm gets a little spammy with this commit by printing "No
> > > accelerated colorspace conversion found from %s to %s.\n" from
> > > libswscale/yuv2rgb.c multiple times. Any suggestions? Perhaps
> > > downgrading that message to AV_LOG_VERBOSE, or raising loglevel around
> > > the call to sws_getContext()?
> >
> > New patch attached that overrides the log level to prevent spamming.
>
> I'll apply this tomorrow if there are no comments.
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>

It looks like this exposed an issue in the AltiVec acceleration in
libswscale.

libswscale/ppc/yuv2rgb.c looks a bit too complex for me to investigate. I'd
be curious to see if this fails on a 64-bit POWER machine. I'll try to test
it later.

-- Sean McGovern

>
diff mbox series

Patch

diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
index f20732b37a..3a7670e24b 100644
--- a/tests/checkasm/Makefile
+++ b/tests/checkasm/Makefile
@@ -63,7 +63,7 @@  AVFILTEROBJS-$(CONFIG_SOBEL_FILTER)      += vf_convolution.o
 CHECKASMOBJS-$(CONFIG_AVFILTER) += $(AVFILTEROBJS-yes)
 
 # swscale tests
-SWSCALEOBJS                             += sw_gbrp.o sw_range_convert.o sw_rgb.o sw_scale.o
+SWSCALEOBJS                             += sw_gbrp.o sw_range_convert.o sw_rgb.o sw_scale.o sw_yuv2rgb.o
 
 CHECKASMOBJS-$(CONFIG_SWSCALE)  += $(SWSCALEOBJS)
 
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index 56232ab1e0..d9ac772a08 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -254,6 +254,7 @@  static const struct {
     { "sw_range_convert", checkasm_check_sw_range_convert },
     { "sw_rgb", checkasm_check_sw_rgb },
     { "sw_scale", checkasm_check_sw_scale },
+    { "sw_yuv2rgb", checkasm_check_sw_yuv2rgb },
 #endif
 #if CONFIG_AVUTIL
         { "fixed_dsp", checkasm_check_fixed_dsp },
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index e544007b67..4d5f3e387e 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -122,6 +122,7 @@  void checkasm_check_sw_gbrp(void);
 void checkasm_check_sw_range_convert(void);
 void checkasm_check_sw_rgb(void);
 void checkasm_check_sw_scale(void);
+void checkasm_check_sw_yuv2rgb(void);
 void checkasm_check_takdsp(void);
 void checkasm_check_utvideodsp(void);
 void checkasm_check_v210dec(void);
diff --git a/tests/checkasm/sw_yuv2rgb.c b/tests/checkasm/sw_yuv2rgb.c
new file mode 100644
index 0000000000..fbe01a7788
--- /dev/null
+++ b/tests/checkasm/sw_yuv2rgb.c
@@ -0,0 +1,205 @@ 
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+
+#include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/mem_internal.h"
+#include "libavutil/pixdesc.h"
+
+#include "libswscale/swscale.h"
+#include "libswscale/swscale_internal.h"
+
+#include "checkasm.h"
+
+#define randomize_buffers(buf, size)      \
+    do {                                  \
+        for (int j = 0; j < size; j += 4) \
+            AV_WN32(buf + j, rnd());      \
+    } while (0)
+
+static const int dst_fmts[] = {
+//     AV_PIX_FMT_BGR48BE,
+//     AV_PIX_FMT_BGR48LE,
+//     AV_PIX_FMT_RGB48BE,
+//     AV_PIX_FMT_RGB48LE,
+    AV_PIX_FMT_ARGB,
+    AV_PIX_FMT_ABGR,
+    AV_PIX_FMT_RGBA,
+    AV_PIX_FMT_BGRA,
+    AV_PIX_FMT_RGB24,
+    AV_PIX_FMT_BGR24,
+    AV_PIX_FMT_RGB565,
+    AV_PIX_FMT_BGR565,
+    AV_PIX_FMT_RGB555,
+    AV_PIX_FMT_BGR555,
+//     AV_PIX_FMT_RGB444,
+//     AV_PIX_FMT_BGR444,
+//     AV_PIX_FMT_RGB8,
+//     AV_PIX_FMT_BGR8,
+//     AV_PIX_FMT_RGB4,
+//     AV_PIX_FMT_BGR4,
+//     AV_PIX_FMT_RGB4_BYTE,
+//     AV_PIX_FMT_BGR4_BYTE,
+//     AV_PIX_FMT_MONOBLACK,
+};
+
+static int cmp_off_by_n(const uint8_t *ref, const uint8_t *test, size_t n, int accuracy)
+{
+    for (size_t i = 0; i < n; i++) {
+        if (abs(ref[i] - test[i]) > accuracy)
+            return 1;
+    }
+    return 0;
+}
+
+static int cmp_555_by_n(const uint8_t *ref, const uint8_t *test, size_t n, int accuracy)
+{
+    const uint16_t *ref16  = (const uint16_t *) ref;
+    const uint16_t *test16 = (const uint16_t *) test;
+    for (size_t i = 0; i < n; i++) {
+        if (abs(( ref16[i]        & 0x1f) - ( test16[i]        & 0x1f)) > accuracy)
+            return 1;
+        if (abs(((ref16[i] >>  5) & 0x1f) - ((test16[i] >>  5) & 0x1f)) > accuracy)
+            return 1;
+        if (abs(((ref16[i] >> 10) & 0x1f) - ((test16[i] >> 10) & 0x1f)) > accuracy)
+            return 1;
+    }
+    return 0;
+}
+
+static int cmp_565_by_n(const uint8_t *ref, const uint8_t *test, size_t n, int accuracy)
+{
+    const uint16_t *ref16  = (const uint16_t *) ref;
+    const uint16_t *test16 = (const uint16_t *) test;
+    for (size_t i = 0; i < n; i++) {
+        if (abs(( ref16[i]        & 0x1f) - ( test16[i]        & 0x1f)) > accuracy)
+            return 1;
+        if (abs(((ref16[i] >>  5) & 0x3f) - ((test16[i] >>  5) & 0x3f)) > accuracy)
+            return 1;
+        if (abs(((ref16[i] >> 11) & 0x1f) - ((test16[i] >> 11) & 0x1f)) > accuracy)
+            return 1;
+    }
+    return 0;
+}
+
+static void check_yuv2rgb(int src_pix_fmt)
+{
+    const AVPixFmtDescriptor *src_desc = av_pix_fmt_desc_get(src_pix_fmt);
+#define MAX_LINE_SIZE 1920
+    static const int input_sizes[] = {8, 128, 1080, MAX_LINE_SIZE};
+
+    declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT,
+                      int, SwsContext *c, const uint8_t *src[],
+                           int srcStride[], int srcSliceY, int srcSliceH,
+                           uint8_t *dst[], int dstStride[]);
+
+    LOCAL_ALIGNED_8(uint8_t, src_y, [MAX_LINE_SIZE * 2]);
+    LOCAL_ALIGNED_8(uint8_t, src_u, [MAX_LINE_SIZE]);
+    LOCAL_ALIGNED_8(uint8_t, src_v, [MAX_LINE_SIZE]);
+    LOCAL_ALIGNED_8(uint8_t, src_a, [MAX_LINE_SIZE * 2]);
+    const uint8_t *src[4] = { src_y, src_u, src_v, src_a };
+
+    LOCAL_ALIGNED_8(uint8_t, dst0_, [2 * MAX_LINE_SIZE * 6]);
+    uint8_t *dst0[4] = { dst0_ };
+    uint8_t *lines0[2] = { dst0_, dst0_ + MAX_LINE_SIZE * 6 };
+
+    LOCAL_ALIGNED_8(uint8_t, dst1_, [2 * MAX_LINE_SIZE * 6]);
+    uint8_t *dst1[4] = { dst1_ };
+    uint8_t *lines1[2] = { dst1_, dst1_ + MAX_LINE_SIZE * 6 };
+
+    randomize_buffers(src_y, MAX_LINE_SIZE * 2);
+    randomize_buffers(src_u, MAX_LINE_SIZE);
+    randomize_buffers(src_v, MAX_LINE_SIZE);
+    randomize_buffers(src_a, MAX_LINE_SIZE * 2);
+
+    for (int dfi = 0; dfi < FF_ARRAY_ELEMS(dst_fmts); dfi++) {
+        int dst_pix_fmt = dst_fmts[dfi];
+        const AVPixFmtDescriptor *dst_desc = av_pix_fmt_desc_get(dst_pix_fmt);
+        int sample_size = av_get_padded_bits_per_pixel(dst_desc) >> 3;
+        for (int isi = 0; isi < FF_ARRAY_ELEMS(input_sizes); isi++) {
+            struct SwsContext *ctx;
+            int width = input_sizes[isi];
+            int srcSliceY = 0;
+            int srcSliceH = 2;
+            int srcStride[4] = {
+                width,
+                width >> src_desc->log2_chroma_w,
+                width >> src_desc->log2_chroma_w,
+                width,
+            };
+            int dstStride[4] = { MAX_LINE_SIZE * 6 };
+
+            ctx = sws_getContext(width, srcSliceH, src_pix_fmt,
+                                 width, srcSliceH, dst_pix_fmt,
+                                 0, NULL, NULL, NULL);
+            if (!ctx)
+                fail();
+
+            if (check_func(ctx->convert_unscaled, "%s_%s_%d", src_desc->name, dst_desc->name, width)) {
+                memset(dst0_, 0xFF, 2 * MAX_LINE_SIZE * 6);
+                memset(dst1_, 0xFF, 2 * MAX_LINE_SIZE * 6);
+
+                call_ref(ctx, src, srcStride, srcSliceY,
+                         srcSliceH, dst0, dstStride);
+                call_new(ctx, src, srcStride, srcSliceY,
+                         srcSliceH, dst1, dstStride);
+
+                if (dst_pix_fmt == AV_PIX_FMT_ARGB  ||
+                    dst_pix_fmt == AV_PIX_FMT_ABGR  ||
+                    dst_pix_fmt == AV_PIX_FMT_RGBA  ||
+                    dst_pix_fmt == AV_PIX_FMT_BGRA  ||
+                    dst_pix_fmt == AV_PIX_FMT_RGB24 ||
+                    dst_pix_fmt == AV_PIX_FMT_BGR24) {
+                    if (cmp_off_by_n(lines0[0], lines1[0], width * sample_size, 3) ||
+                        cmp_off_by_n(lines0[1], lines1[1], width * sample_size, 3))
+                        fail();
+                } else if (dst_pix_fmt == AV_PIX_FMT_RGB565 ||
+                           dst_pix_fmt == AV_PIX_FMT_BGR565) {
+                    if (cmp_565_by_n(lines0[0], lines1[0], width, 2) ||
+                        cmp_565_by_n(lines0[1], lines1[1], width, 2))
+                        fail();
+                } else if (dst_pix_fmt == AV_PIX_FMT_RGB555 ||
+                           dst_pix_fmt == AV_PIX_FMT_BGR555) {
+                    if (cmp_555_by_n(lines0[0], lines1[0], width, 2) ||
+                        cmp_555_by_n(lines0[1], lines1[1], width, 2))
+                        fail();
+                } else {
+                    fail();
+                }
+
+                bench_new(ctx, src, srcStride, srcSliceY,
+                          srcSliceH, dst0, dstStride);
+            }
+            sws_freeContext(ctx);
+        }
+    }
+}
+
+#undef MAX_LINE_SIZE
+
+void checkasm_check_sw_yuv2rgb(void)
+{
+    check_yuv2rgb(AV_PIX_FMT_YUV420P);
+    report("yuv420p");
+    check_yuv2rgb(AV_PIX_FMT_YUV422P);
+    report("yuv422p");
+    check_yuv2rgb(AV_PIX_FMT_YUVA420P);
+    report("yuva420p");
+}
diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak
index 0ed2ea5be6..49832b09bf 100644
--- a/tests/fate/checkasm.mak
+++ b/tests/fate/checkasm.mak
@@ -44,6 +44,7 @@  FATE_CHECKASM = fate-checkasm-aacencdsp                                 \
                 fate-checkasm-sw_range_convert                          \
                 fate-checkasm-sw_rgb                                    \
                 fate-checkasm-sw_scale                                  \
+                fate-checkasm-sw_yuv2rgb                                \
                 fate-checkasm-takdsp                                    \
                 fate-checkasm-utvideodsp                                \
                 fate-checkasm-v210dec                                   \