diff mbox series

[FFmpeg-devel,v4,3/3] swscale/input: add input support for RGBF16

Message ID 20241014083311.1500912-5-ms+git@mur.at
State New
Headers show
Series Float16/32 support impovements (v4) | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished

Commit Message

Martin Schitter Oct. 14, 2024, 8:33 a.m. UTC
---
 libswscale/input.c   | 101 ++++++++++++++++++++++++++++++++++++++++++-
 libswscale/utils.c   |   2 +
 libswscale/version.h |   2 +-
 3 files changed, 103 insertions(+), 2 deletions(-)
diff mbox series

Patch

diff --git a/libswscale/input.c b/libswscale/input.c
index e2af1d5..9beb72b 100644
--- a/libswscale/input.c
+++ b/libswscale/input.c
@@ -1369,6 +1369,57 @@  static av_always_inline void rgbaf16ToA_endian(uint16_t *dst, const uint16_t *sr
     }
 }
 
+static av_always_inline void rgbf16ToUV_half_endian(uint16_t *dstU, uint16_t *dstV, int is_be,
+                                                    const uint16_t *src, int width,
+                                                    int32_t *rgb2yuv, Half2FloatTables *h2f_tbl)
+{
+    int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
+    int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
+    int i;
+    for (i = 0; i < width; i++) {
+        int r = (lrintf(av_clipf(65535.0f * rdpx(src[i*6+0]), 0.0f, 65535.0f)) +
+                 lrintf(av_clipf(65535.0f * rdpx(src[i*6+3]), 0.0f, 65535.0f))) >> 1;
+        int g = (lrintf(av_clipf(65535.0f * rdpx(src[i*6+1]), 0.0f, 65535.0f)) +
+                 lrintf(av_clipf(65535.0f * rdpx(src[i*6+4]), 0.0f, 65535.0f))) >> 1;
+        int b = (lrintf(av_clipf(65535.0f * rdpx(src[i*6+2]), 0.0f, 65535.0f)) +
+                 lrintf(av_clipf(65535.0f * rdpx(src[i*6+5]), 0.0f, 65535.0f))) >> 1;
+
+        dstU[i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
+        dstV[i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
+    }
+}
+
+static av_always_inline void rgbf16ToUV_endian(uint16_t *dstU, uint16_t *dstV, int is_be,
+                                               const uint16_t *src, int width,
+                                               int32_t *rgb2yuv, Half2FloatTables *h2f_tbl)
+{
+    int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
+    int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
+    int i;
+    for (i = 0; i < width; i++) {
+        int r = lrintf(av_clipf(65535.0f * rdpx(src[i*3+0]), 0.0f, 65535.0f));
+        int g = lrintf(av_clipf(65535.0f * rdpx(src[i*3+1]), 0.0f, 65535.0f));
+        int b = lrintf(av_clipf(65535.0f * rdpx(src[i*3+2]), 0.0f, 65535.0f));
+
+        dstU[i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
+        dstV[i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
+    }
+}
+
+static av_always_inline void rgbf16ToY_endian(uint16_t *dst, const uint16_t *src, int is_be,
+                                              int width, int32_t *rgb2yuv, Half2FloatTables *h2f_tbl)
+{
+    int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
+    int i;
+    for (i = 0; i < width; i++) {
+        int r = lrintf(av_clipf(65535.0f * rdpx(src[i*3+0]), 0.0f, 65535.0f));
+        int g = lrintf(av_clipf(65535.0f * rdpx(src[i*3+1]), 0.0f, 65535.0f));
+        int b = lrintf(av_clipf(65535.0f * rdpx(src[i*3+2]), 0.0f, 65535.0f));
+
+        dst[i] = (ry*r + gy*g + by*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
+    }
+}
+
 #undef rdpx
 
 #define rgbaf16_funcs_endian(endian_name, endian)                                                         \
@@ -1408,7 +1459,37 @@  static void rgbaf16##endian_name##ToA_c(uint8_t *_dst, const uint8_t *_src, cons
     const uint16_t *src = (const uint16_t*)_src;                                                          \
     uint16_t *dst = (uint16_t*)_dst;                                                                      \
     rgbaf16ToA_endian(dst, src, endian, width, opq);                                                      \
-}
+}                                                                                                         \
+static void rgbf16##endian_name##ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused,       \
+                                              const uint8_t *src1, const uint8_t *src2,                   \
+                                              int width, uint32_t *_rgb2yuv, void *opq)                   \
+{                                                                                                         \
+    const uint16_t *src = (const uint16_t*)src1;                                                          \
+    uint16_t *dstU = (uint16_t*)_dstU;                                                                    \
+    uint16_t *dstV = (uint16_t*)_dstV;                                                                    \
+    int32_t *rgb2yuv = (int32_t*)_rgb2yuv;                                                                \
+    av_assert1(src1==src2);                                                                               \
+    rgbf16ToUV_half_endian(dstU, dstV, endian, src, width, rgb2yuv, opq);                                 \
+}                                                                                                         \
+static void rgbf16##endian_name##ToUV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused,            \
+                                         const uint8_t *src1, const uint8_t *src2,                        \
+                                         int width, uint32_t *_rgb2yuv, void *opq)                        \
+{                                                                                                         \
+    const uint16_t *src = (const uint16_t*)src1;                                                          \
+    uint16_t *dstU = (uint16_t*)_dstU;                                                                    \
+    uint16_t *dstV = (uint16_t*)_dstV;                                                                    \
+    int32_t *rgb2yuv = (int32_t*)_rgb2yuv;                                                                \
+    av_assert1(src1==src2);                                                                               \
+    rgbf16ToUV_endian(dstU, dstV, endian, src, width, rgb2yuv, opq);                                      \
+}                                                                                                         \
+static void rgbf16##endian_name##ToY_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused0,        \
+                                        const uint8_t *unused1, int width, uint32_t *_rgb2yuv, void *opq) \
+{                                                                                                         \
+    const uint16_t *src = (const uint16_t*)_src;                                                          \
+    uint16_t *dst = (uint16_t*)_dst;                                                                      \
+    int32_t *rgb2yuv = (int32_t*)_rgb2yuv;                                                                \
+    rgbf16ToY_endian(dst, src, endian, width, rgb2yuv, opq);                                              \
+}                                                                                                         \
 
 rgbaf16_funcs_endian(le, 0)
 rgbaf16_funcs_endian(be, 1)
@@ -1729,6 +1810,12 @@  av_cold void ff_sws_init_input_funcs(SwsContext *c,
         case AV_PIX_FMT_RGBAF16LE:
             *chrToYV12 = rgbaf16leToUV_half_c;
             break;
+        case AV_PIX_FMT_RGBF16BE:
+            *chrToYV12 = rgbf16beToUV_half_c;
+            break;
+        case AV_PIX_FMT_RGBF16LE:
+            *chrToYV12 = rgbf16leToUV_half_c;
+            break;
         }
     } else {
         switch (srcFormat) {
@@ -1822,6 +1909,12 @@  av_cold void ff_sws_init_input_funcs(SwsContext *c,
         case AV_PIX_FMT_RGBAF16LE:
             *chrToYV12 = rgbaf16leToUV_c;
             break;
+        case AV_PIX_FMT_RGBF16BE:
+            *chrToYV12 = rgbf16beToUV_c;
+            break;
+        case AV_PIX_FMT_RGBF16LE:
+            *chrToYV12 = rgbf16leToUV_c;
+            break;
         }
     }
 
@@ -2150,6 +2243,12 @@  av_cold void ff_sws_init_input_funcs(SwsContext *c,
     case AV_PIX_FMT_RGBAF16LE:
         *lumToYV12 = rgbaf16leToY_c;
         break;
+    case AV_PIX_FMT_RGBF16BE:
+        *lumToYV12 = rgbf16beToY_c;
+        break;
+    case AV_PIX_FMT_RGBF16LE:
+        *lumToYV12 = rgbf16leToY_c;
+        break;
     case AV_PIX_FMT_RGBF32LE:
         *lumToYV12 = rgbf32le_to_y_c;
         break;
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 3ed65a1..992d7f0 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -266,6 +266,8 @@  static const FormatEntry format_entries[] = {
     [AV_PIX_FMT_VUYX]        = { 1, 1 },
     [AV_PIX_FMT_RGBAF16BE]   = { 1, 0 },
     [AV_PIX_FMT_RGBAF16LE]   = { 1, 0 },
+    [AV_PIX_FMT_RGBF16BE]    = { 1, 0 },
+    [AV_PIX_FMT_RGBF16LE]    = { 1, 0 },
     [AV_PIX_FMT_RGBF32BE]    = { 1, 0 },
     [AV_PIX_FMT_RGBF32LE]    = { 1, 0 },
     [AV_PIX_FMT_XV30LE]      = { 1, 1 },
diff --git a/libswscale/version.h b/libswscale/version.h
index c13db31..f573bef 100644
--- a/libswscale/version.h
+++ b/libswscale/version.h
@@ -28,7 +28,7 @@ 
 
 #include "version_major.h"
 
-#define LIBSWSCALE_VERSION_MINOR   5
+#define LIBSWSCALE_VERSION_MINOR   6
 #define LIBSWSCALE_VERSION_MICRO 100
 
 #define LIBSWSCALE_VERSION_INT  AV_VERSION_INT(LIBSWSCALE_VERSION_MAJOR, \