diff mbox series

[FFmpeg-devel,2/3] sws/input: R-V V rgb24ToUV and bgr24ToUV

Message ID 20240605163638.10586-2-remi@remlab.net
State New
Headers show
Series [FFmpeg-devel,PATCHv3,1/3] sws/input: R-V V rgb24ToY & bgr24ToY | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Rémi Denis-Courmont June 5, 2024, 4:36 p.m. UTC
T-Head C908:
rgb24_to_uv_8_c:            2.7
rgb24_to_uv_8_rvv_i32:      3.2
rgb24_to_uv_128_c:         41.0
rgb24_to_uv_128_rvv_i32:   12.7
rgb24_to_uv_1080_c:       342.5
rgb24_to_uv_1080_rvv_i32: 105.7
rgb24_to_uv_1280_c:       406.0
rgb24_to_uv_1280_rvv_i32: 124.2
rgb24_to_uv_1920_c:       626.0
rgb24_to_uv_1920_rvv_i32: 186.0

SpacemiT X60:
rgb24_to_uv_8_c:            2.5
rgb24_to_uv_8_rvv_i32:      3.0
rgb24_to_uv_128_c:         36.5
rgb24_to_uv_128_rvv_i32:    5.7
rgb24_to_uv_1080_c:       304.2
rgb24_to_uv_1080_rvv_i32:  49.0
rgb24_to_uv_1280_c:       360.5
rgb24_to_uv_1280_rvv_i32:  57.5
rgb24_to_uv_1920_c:       540.7
rgb24_to_uv_1920_rvv_i32:  86.2
---
 libswscale/riscv/input.S   | 46 ++++++++++++++++++++++++++++++++++++++
 libswscale/riscv/swscale.c |  8 +++++++
 2 files changed, 54 insertions(+)
diff mbox series

Patch

diff --git a/libswscale/riscv/input.S b/libswscale/riscv/input.S
index 323f650bc9..3392f189ca 100644
--- a/libswscale/riscv/input.S
+++ b/libswscale/riscv/input.S
@@ -53,3 +53,49 @@  func ff_rgb24ToY_rvv, zve32x
 
         ret
 endfunc
+
+func ff_bgr24ToUV_rvv, zve32x
+        lw      t1, 20(a6) # BU
+        lw      t4, 32(a6) # BV
+        lw      t3, 12(a6) # RU
+        lw      t6, 24(a6) # RV
+        j       1f
+endfunc
+
+func ff_rgb24ToUV_rvv, zve32x
+        lw      t1, 12(a6) # RU
+        lw      t4, 24(a6) # RV
+        lw      t3, 20(a6) # BU
+        lw      t6, 32(a6) # BV
+1:
+        lw      t2, 16(a6) # GU
+        lw      t5, 28(a6) # GV
+        li      a7, (256 << (15 - 1)) + (1 << (15 - 7))
+2:
+        vsetvli    t0, a5, e32, m8, ta, ma
+        vlseg3e8.v v0, (a3)
+        sub        a5, a5, t0
+        vzext.vf4  v16, v0
+        sh1add     a6, t0, t0
+        vzext.vf4  v24, v2
+        vmul.vx    v8, v16, t1
+        add        a3, a6, a3
+        vmul.vx    v16, v16, t4
+        vmacc.vx   v8, t2, v24
+        vmacc.vx   v16, t5, v24
+        vzext.vf4  v24, v4
+        vadd.vx    v8, v8, a7
+        vadd.vx    v16, v16, a7
+        vmacc.vx   v8, t3, v24
+        vmacc.vx   v16, t6, v24
+        vsetvli    zero, zero, e16, m4, ta, ma
+        vnsra.wi   v0, v8, 15 - 6
+        vnsra.wi   v4, v16, 15 - 6
+        vse16.v    v0, (a0)
+        sh1add     a0, t0, a0
+        vse16.v    v4, (a1)
+        sh1add     a1, t0, a1
+        bnez       a5, 2b
+
+        ret
+endfunc
diff --git a/libswscale/riscv/swscale.c b/libswscale/riscv/swscale.c
index 187b3fce58..b3552976c6 100644
--- a/libswscale/riscv/swscale.c
+++ b/libswscale/riscv/swscale.c
@@ -23,8 +23,12 @@ 
 
 void ff_bgr24ToY_rvv(uint8_t *dst, const uint8_t *src, const uint8_t *,
                      const uint8_t *, int width, uint32_t *coeffs, void *);
+void ff_bgr24ToUV_rvv(uint8_t *, uint8_t *, const uint8_t *, const uint8_t *,
+                      const uint8_t *, int width, uint32_t *coeffs, void *);
 void ff_rgb24ToY_rvv(uint8_t *dst, const uint8_t *src, const uint8_t *,
                      const uint8_t *, int width, uint32_t *coeffs, void *);
+void ff_rgb24ToUV_rvv(uint8_t *, uint8_t *, const uint8_t *, const uint8_t *,
+                      const uint8_t *, int width, uint32_t *coeffs, void *);
 
 av_cold void ff_sws_init_swscale_riscv(SwsContext *c)
 {
@@ -35,10 +39,14 @@  av_cold void ff_sws_init_swscale_riscv(SwsContext *c)
         switch (c->srcFormat) {
             case AV_PIX_FMT_BGR24:
                 c->lumToYV12 = ff_bgr24ToY_rvv;
+                if (!c->chrSrcHSubSample)
+                    c->chrToYV12 = ff_bgr24ToUV_rvv;
                 break;
 
             case AV_PIX_FMT_RGB24:
                 c->lumToYV12 = ff_rgb24ToY_rvv;
+                if (!c->chrSrcHSubSample)
+                    c->chrToYV12 = ff_rgb24ToUV_rvv;
                 break;
         }
     }