diff mbox series

[FFmpeg-devel,v1,5/6] swscale: Add unscaled XRGB->YUV420P functions

Message ID 20230820151022.2204421-6-jc@kynesim.co.uk
State New
Headers show
Series swscale: Add dedicated RGB->YUV unscaled functions & aarch64 asm | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

John Cox Aug. 20, 2023, 3:10 p.m. UTC
Add simple C functions for converting XRGB to YUV420P. Same logic as the
RGB24 functions but dropping the A channel.

Signed-off-by: John Cox <jc@kynesim.co.uk>
---
 libswscale/rgb2rgb.c          |  20 +++++++
 libswscale/rgb2rgb.h          |  16 +++++
 libswscale/rgb2rgb_template.c | 106 ++++++++++++++++++++++++++++++++++
 libswscale/swscale_unscaled.c |  89 ++++++++++++++++++++++++++++
 4 files changed, 231 insertions(+)
diff mbox series

Patch

diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c
index de90e5193f..b976341e70 100644
--- a/libswscale/rgb2rgb.c
+++ b/libswscale/rgb2rgb.c
@@ -88,6 +88,26 @@  void (*ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst,
                        int width, int height,
                        int lumStride, int chromStride, int srcStride,
                        int32_t *rgb2yuv);
+void (*ff_rgbxtoyv12)(const uint8_t *src, uint8_t *ydst,
+					  uint8_t *udst, uint8_t *vdst,
+					  int width, int height,
+					  int lumStride, int chromStride, int srcStride,
+					  int32_t *rgb2yuv);
+void (*ff_bgrxtoyv12)(const uint8_t *src, uint8_t *ydst,
+					  uint8_t *udst, uint8_t *vdst,
+					  int width, int height,
+					  int lumStride, int chromStride, int srcStride,
+					  int32_t *rgb2yuv);
+void (*ff_xrgbtoyv12)(const uint8_t *src, uint8_t *ydst,
+					  uint8_t *udst, uint8_t *vdst,
+					  int width, int height,
+					  int lumStride, int chromStride, int srcStride,
+					  int32_t *rgb2yuv);
+void (*ff_xbgrtoyv12)(const uint8_t *src, uint8_t *ydst,
+					  uint8_t *udst, uint8_t *vdst,
+					  int width, int height,
+					  int lumStride, int chromStride, int srcStride,
+					  int32_t *rgb2yuv);
 void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height,
                  int srcStride, int dstStride);
 void (*interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst,
diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h
index f7a76a92ba..0015b1568a 100644
--- a/libswscale/rgb2rgb.h
+++ b/libswscale/rgb2rgb.h
@@ -135,6 +135,22 @@  extern void (*ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
                               int width, int height,
                               int lumStride, int chromStride, int srcStride,
                               int32_t *rgb2yuv);
+extern void (*ff_rgbxtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+                             int width, int height,
+                             int lumStride, int chromStride, int srcStride,
+                             int32_t *rgb2yuv);
+extern void (*ff_bgrxtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+                             int width, int height,
+                             int lumStride, int chromStride, int srcStride,
+                             int32_t *rgb2yuv);
+extern void (*ff_xrgbtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+                             int width, int height,
+                             int lumStride, int chromStride, int srcStride,
+                             int32_t *rgb2yuv);
+extern void (*ff_xbgrtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+                             int width, int height,
+                             int lumStride, int chromStride, int srcStride,
+                             int32_t *rgb2yuv);
 extern void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height,
                         int srcStride, int dstStride);
 
diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c
index 5503e58a29..22326807c5 100644
--- a/libswscale/rgb2rgb_template.c
+++ b/libswscale/rgb2rgb_template.c
@@ -742,6 +742,108 @@  void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
     rgb24toyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_rgb);
 }
 
+static void rgbxtoyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
+                   uint8_t *vdst, int width, int height, int lumStride,
+                   int chromStride, int srcStride, int32_t *rgb2yuv,
+                   const uint8_t x[9])
+{
+    int32_t ry = rgb2yuv[x[0]], gy = rgb2yuv[x[1]], by = rgb2yuv[x[2]];
+    int32_t ru = rgb2yuv[x[3]], gu = rgb2yuv[x[4]], bu = rgb2yuv[x[5]];
+    int32_t rv = rgb2yuv[x[6]], gv = rgb2yuv[x[7]], bv = rgb2yuv[x[8]];
+    int y;
+    const int chromWidth = width >> 1;
+    // Constants with both rounding and offset
+    const int32_t ky = ((16 << 1) + 1) << (RGB2YUV_SHIFT - 1);
+    const int32_t kc = ((128 << 1) + 1) << (RGB2YUV_SHIFT - 1);
+
+    for (y = 0; y < height; y += 2) {
+        int i;
+        for (i = 0; i < chromWidth; i++) {
+            unsigned int b = src[8 * i + 0];
+            unsigned int g = src[8 * i + 1];
+            unsigned int r = src[8 * i + 2];
+
+            unsigned int Y = (ry * r + gy * g + by * b + ky) >> RGB2YUV_SHIFT;
+            unsigned int V = (rv * r + gv * g + bv * b + kc) >> RGB2YUV_SHIFT;
+            unsigned int U = (ru * r + gu * g + bu * b + kc) >> RGB2YUV_SHIFT;
+
+            udst[i]     = U;
+            vdst[i]     = V;
+            ydst[2 * i] = Y;
+
+            b = src[8 * i + 4];
+            g = src[8 * i + 5];
+            r = src[8 * i + 6];
+
+            Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
+            ydst[2 * i + 1] = Y;
+        }
+        if ((width & 1) != 0) {
+            unsigned int b = src[8 * i + 0];
+            unsigned int g = src[8 * i + 1];
+            unsigned int r = src[8 * i + 2];
+
+            unsigned int Y = (ry * r + gy * g + by * b + ky) >> RGB2YUV_SHIFT;
+            unsigned int V = (rv * r + gv * g + bv * b + kc) >> RGB2YUV_SHIFT;
+            unsigned int U = (ru * r + gu * g + bu * b + kc) >> RGB2YUV_SHIFT;
+
+            udst[i]     = U;
+            vdst[i]     = V;
+            ydst[2 * i] = Y;
+        }
+        ydst += lumStride;
+        src  += srcStride;
+
+        if (y+1 == height)
+            break;
+
+        for (i = 0; i < width; i++) {
+            unsigned int b = src[4 * i + 0];
+            unsigned int g = src[4 * i + 1];
+            unsigned int r = src[4 * i + 2];
+
+            unsigned int Y = (ry * r + gy * g + by * b + ky) >> RGB2YUV_SHIFT;
+
+            ydst[i] = Y;
+        }
+        udst += chromStride;
+        vdst += chromStride;
+        ydst += lumStride;
+        src  += srcStride;
+    }
+}
+
+static void ff_rgbxtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
+                   uint8_t *vdst, int width, int height, int lumStride,
+                   int chromStride, int srcStride, int32_t *rgb2yuv)
+{
+    rgbxtoyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_rgb);
+}
+
+static void ff_bgrxtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
+                   uint8_t *vdst, int width, int height, int lumStride,
+                   int chromStride, int srcStride, int32_t *rgb2yuv)
+{
+    rgbxtoyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_bgr);
+}
+
+// As the general code does no SIMD-like ops simply adding 1 to the src address
+// will fix the ignored alpha position
+static void ff_xrgbtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
+                   uint8_t *vdst, int width, int height, int lumStride,
+                   int chromStride, int srcStride, int32_t *rgb2yuv)
+{
+    rgbxtoyv12_x(src + 1, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_rgb);
+}
+
+static void ff_xbgrtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
+                   uint8_t *vdst, int width, int height, int lumStride,
+                   int chromStride, int srcStride, int32_t *rgb2yuv)
+{
+    rgbxtoyv12_x(src + 1, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_bgr);
+}
+
+
 static void interleaveBytes_c(const uint8_t *src1, const uint8_t *src2,
                               uint8_t *dest, int width, int height,
                               int src1Stride, int src2Stride, int dstStride)
@@ -1016,6 +1118,10 @@  static av_cold void rgb2rgb_init_c(void)
     planar2x           = planar2x_c;
     ff_rgb24toyv12     = ff_rgb24toyv12_c;
     ff_bgr24toyv12     = ff_bgr24toyv12_c;
+    ff_rgbxtoyv12      = ff_rgbxtoyv12_c;
+    ff_bgrxtoyv12      = ff_bgrxtoyv12_c;
+    ff_xrgbtoyv12      = ff_xrgbtoyv12_c;
+    ff_xbgrtoyv12      = ff_xbgrtoyv12_c;
     interleaveBytes    = interleaveBytes_c;
     deinterleaveBytes  = deinterleaveBytes_c;
     vu9_to_vu12        = vu9_to_vu12_c;
diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
index e10f967755..ff682d367c 100644
--- a/libswscale/swscale_unscaled.c
+++ b/libswscale/swscale_unscaled.c
@@ -1671,6 +1671,74 @@  static int rgb24ToYv12Wrapper(SwsContext *c, const uint8_t *src[],
     return srcSliceH;
 }
 
+static int bgrxToYv12Wrapper(SwsContext *c, const uint8_t *src[],
+                             int srcStride[], int srcSliceY, int srcSliceH,
+                             uint8_t *dst[], int dstStride[])
+{
+    ff_bgrxtoyv12(
+        src[0],
+        dst[0] +  srcSliceY       * dstStride[0],
+        dst[1] + (srcSliceY >> 1) * dstStride[1],
+        dst[2] + (srcSliceY >> 1) * dstStride[2],
+        c->srcW, srcSliceH,
+        dstStride[0], dstStride[1], srcStride[0],
+        c->input_rgb2yuv_table);
+    if (dst[3])
+        fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
+    return srcSliceH;
+}
+
+static int rgbxToYv12Wrapper(SwsContext *c, const uint8_t *src[],
+                             int srcStride[], int srcSliceY, int srcSliceH,
+                             uint8_t *dst[], int dstStride[])
+{
+    ff_rgbxtoyv12(
+        src[0],
+        dst[0] +  srcSliceY       * dstStride[0],
+        dst[1] + (srcSliceY >> 1) * dstStride[1],
+        dst[2] + (srcSliceY >> 1) * dstStride[2],
+        c->srcW, srcSliceH,
+        dstStride[0], dstStride[1], srcStride[0],
+        c->input_rgb2yuv_table);
+    if (dst[3])
+        fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
+    return srcSliceH;
+}
+
+static int xbgrToYv12Wrapper(SwsContext *c, const uint8_t *src[],
+                             int srcStride[], int srcSliceY, int srcSliceH,
+                             uint8_t *dst[], int dstStride[])
+{
+    ff_xbgrtoyv12(
+        src[0],
+        dst[0] +  srcSliceY       * dstStride[0],
+        dst[1] + (srcSliceY >> 1) * dstStride[1],
+        dst[2] + (srcSliceY >> 1) * dstStride[2],
+        c->srcW, srcSliceH,
+        dstStride[0], dstStride[1], srcStride[0],
+        c->input_rgb2yuv_table);
+    if (dst[3])
+        fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
+    return srcSliceH;
+}
+
+static int xrgbToYv12Wrapper(SwsContext *c, const uint8_t *src[],
+                             int srcStride[], int srcSliceY, int srcSliceH,
+                             uint8_t *dst[], int dstStride[])
+{
+    ff_xrgbtoyv12(
+        src[0],
+        dst[0] +  srcSliceY       * dstStride[0],
+        dst[1] + (srcSliceY >> 1) * dstStride[1],
+        dst[2] + (srcSliceY >> 1) * dstStride[2],
+        c->srcW, srcSliceH,
+        dstStride[0], dstStride[1], srcStride[0],
+        c->input_rgb2yuv_table);
+    if (dst[3])
+        fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
+    return srcSliceH;
+}
+
 static int yvu9ToYv12Wrapper(SwsContext *c, const uint8_t *src[],
                              int srcStride[], int srcSliceY, int srcSliceH,
                              uint8_t *dst[], int dstStride[])
@@ -2059,6 +2127,27 @@  void ff_get_unscaled_swscale(SwsContext *c)
         !(flags & (SWS_ACCURATE_RND | SWS_BITEXACT)))
         c->convert_unscaled = rgb24ToYv12Wrapper;
 
+    /* bgrxtoYV12 */
+    if (((srcFormat == AV_PIX_FMT_BGRA && dstFormat == AV_PIX_FMT_YUV420P) ||
+         (srcFormat == AV_PIX_FMT_BGR0 && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) &&
+        !(flags & (SWS_ACCURATE_RND | SWS_BITEXACT)))
+        c->convert_unscaled = bgrxToYv12Wrapper;
+    /* rgbx24toYV12 */
+    if (((srcFormat == AV_PIX_FMT_RGBA && dstFormat == AV_PIX_FMT_YUV420P) ||
+         (srcFormat == AV_PIX_FMT_RGB0 && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) &&
+        !(flags & (SWS_ACCURATE_RND | SWS_BITEXACT)))
+        c->convert_unscaled = rgbxToYv12Wrapper;
+    /* xbgrtoYV12 */
+    if (((srcFormat == AV_PIX_FMT_ABGR && dstFormat == AV_PIX_FMT_YUV420P) ||
+         (srcFormat == AV_PIX_FMT_0BGR && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) &&
+        !(flags & (SWS_ACCURATE_RND | SWS_BITEXACT)))
+        c->convert_unscaled = xbgrToYv12Wrapper;
+    /* xrgb24toYV12 */
+    if (((srcFormat == AV_PIX_FMT_ARGB && dstFormat == AV_PIX_FMT_YUV420P) ||
+         (srcFormat == AV_PIX_FMT_0RGB && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) &&
+        !(flags & (SWS_ACCURATE_RND | SWS_BITEXACT)))
+        c->convert_unscaled = xrgbToYv12Wrapper;
+
     /* RGB/BGR -> RGB/BGR (no dither needed forms) */
     if (isAnyRGB(srcFormat) && isAnyRGB(dstFormat) && findRgbConvFn(c)
         && (!needsDither || (c->flags&(SWS_FAST_BILINEAR|SWS_POINT))))