diff mbox series

[FFmpeg-devel,1/3] sws/rgb2rgb: RISC-V V shuffle_bytes_xxxx functions

Message ID 20220928153001.30025-1-remi@remlab.net
State Accepted
Commit 66a03f405316a0e1a4a60cacd1d32ec540604a01
Headers show
Series RISC-V V swscale pixel format conversions | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 fail Make fate failed

Commit Message

Rémi Denis-Courmont Sept. 28, 2022, 3:29 p.m. UTC
From: Rémi Denis-Courmont <remi@remlab.net>

---
 libswscale/rgb2rgb.c           |  2 +
 libswscale/rgb2rgb.h           |  1 +
 libswscale/riscv/Makefile      |  2 +
 libswscale/riscv/rgb2rgb.c     | 47 ++++++++++++++++++++
 libswscale/riscv/rgb2rgb_rvv.S | 78 ++++++++++++++++++++++++++++++++++
 5 files changed, 130 insertions(+)
 create mode 100644 libswscale/riscv/Makefile
 create mode 100644 libswscale/riscv/rgb2rgb.c
 create mode 100644 libswscale/riscv/rgb2rgb_rvv.S
diff mbox series

Patch

diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c
index 3af775b389..e98fdac8ea 100644
--- a/libswscale/rgb2rgb.c
+++ b/libswscale/rgb2rgb.c
@@ -139,6 +139,8 @@  av_cold void ff_sws_rgb2rgb_init(void)
     rgb2rgb_init_c();
 #if ARCH_AARCH64
     rgb2rgb_init_aarch64();
+#elif ARCH_RISCV
+    rgb2rgb_init_riscv();
 #elif ARCH_X86
     rgb2rgb_init_x86();
 #elif ARCH_LOONGARCH64
diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h
index db85bfc42f..f3951d523e 100644
--- a/libswscale/rgb2rgb.h
+++ b/libswscale/rgb2rgb.h
@@ -167,6 +167,7 @@  extern void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const u
 void ff_sws_rgb2rgb_init(void);
 
 void rgb2rgb_init_aarch64(void);
+void rgb2rgb_init_riscv(void);
 void rgb2rgb_init_x86(void);
 void rgb2rgb_init_loongarch(void);
 
diff --git a/libswscale/riscv/Makefile b/libswscale/riscv/Makefile
new file mode 100644
index 0000000000..214d877b62
--- /dev/null
+++ b/libswscale/riscv/Makefile
@@ -0,0 +1,2 @@ 
+OBJS += riscv/rgb2rgb.o
+RVV-OBJS += riscv/rgb2rgb_rvv.o
diff --git a/libswscale/riscv/rgb2rgb.c b/libswscale/riscv/rgb2rgb.c
new file mode 100644
index 0000000000..5654154494
--- /dev/null
+++ b/libswscale/riscv/rgb2rgb.c
@@ -0,0 +1,47 @@ 
+/*
+ * Copyright © 2022 Rémi Denis-Courmont.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libswscale/rgb2rgb.h"
+
+void ff_shuffle_bytes_0321_rvv(const uint8_t *src, uint8_t *dst, int src_len);
+void ff_shuffle_bytes_2103_rvv(const uint8_t *src, uint8_t *dst, int src_len);
+void ff_shuffle_bytes_1230_rvv(const uint8_t *src, uint8_t *dst, int src_len);
+void ff_shuffle_bytes_3012_rvv(const uint8_t *src, uint8_t *dst, int src_len);
+void ff_shuffle_bytes_3210_rvv(const uint8_t *src, uint8_t *dst, int src_len);
+
+av_cold void rgb2rgb_init_riscv(void)
+{
+#if HAVE_RVV
+    int flags = av_get_cpu_flags();
+
+    if (flags & AV_CPU_FLAG_RVV_I32) {
+        shuffle_bytes_0321 = ff_shuffle_bytes_0321_rvv;
+        shuffle_bytes_2103 = ff_shuffle_bytes_2103_rvv;
+        shuffle_bytes_1230 = ff_shuffle_bytes_1230_rvv;
+        shuffle_bytes_3012 = ff_shuffle_bytes_3012_rvv;
+        shuffle_bytes_3210 = ff_shuffle_bytes_3210_rvv;
+    }
+#endif
+}
diff --git a/libswscale/riscv/rgb2rgb_rvv.S b/libswscale/riscv/rgb2rgb_rvv.S
new file mode 100644
index 0000000000..3eb11262c0
--- /dev/null
+++ b/libswscale/riscv/rgb2rgb_rvv.S
@@ -0,0 +1,78 @@ 
+/*
+ * Copyright © 2022 Rémi Denis-Courmont.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/riscv/asm.S"
+
+func ff_shuffle_bytes_0321_rvv, zve32x
+        addi    t1, a0, 3
+        addi    t2, a0, 2
+        addi    t3, a0, 1
+1:
+        srai    a2, a2, 2
+        li      t4, 4
+2:
+        vsetvli    t0, a2, e8, m1, ta, ma
+        sub        a2, a2, t0
+        vlse8.v    v8, (a0), t4
+        sh2add     a0, t0, a0
+        vlse8.v    v9, (t1), t4
+        sh2add     t1, t0, t1
+        vlse8.v    v10, (t2), t4
+        sh2add     t2, t0, t2
+        vlse8.v    v11, (t3), t4
+        sh2add     t3, t0, t3
+        vsseg4e8.v v8, (a1)
+        sh2add     a1, t0, a1
+        bnez       a2, 2b
+
+        ret
+endfunc
+
+func ff_shuffle_bytes_2103_rvv, zve32x
+        addi    t1, a0, 1
+        addi    t2, a0, 0
+        addi    t3, a0, 3
+        addi    a0, a0, 2
+        j       1b
+endfunc
+
+func ff_shuffle_bytes_1230_rvv, zve32x
+        addi    t1, a0, 2
+        addi    t2, a0, 3
+        addi    t3, a0, 0
+        addi    a0, a0, 1
+        j       1b
+endfunc
+
+func ff_shuffle_bytes_3012_rvv, zve32x
+        addi    t1, a0, 0
+        addi    t2, a0, 1
+        addi    t3, a0, 2
+        addi    a0, a0, 3
+        j       1b
+endfunc
+
+func ff_shuffle_bytes_3210_rvv, zve32x
+        addi    t1, a0, 2
+        addi    t2, a0, 1
+        addi    t3, a0, 0
+        addi    a0, a0, 3
+        j       1b
+endfunc