diff mbox series

[FFmpeg-devel,5/7] lavc/pixblockdsp: RISC-V V 8-bit get_pixels & get_pixels_unaligned

Message ID 20220927200427.11811-5-remi@remlab.net
State New
Headers show
Series [FFmpeg-devel,1/7] lavu/riscv: helper to read the vector length | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Rémi Denis-Courmont Sept. 27, 2022, 8:04 p.m. UTC
From: Rémi Denis-Courmont <remi@remlab.net>

---
 libavcodec/riscv/Makefile           |  1 +
 libavcodec/riscv/pixblockdsp_init.c | 12 ++++++++++
 libavcodec/riscv/pixblockdsp_rvv.S  | 37 +++++++++++++++++++++++++++++
 3 files changed, 50 insertions(+)
 create mode 100644 libavcodec/riscv/pixblockdsp_rvv.S
diff mbox series

Patch

diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index 96925afdab..0fb2c81c75 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -9,5 +9,6 @@  OBJS-$(CONFIG_IDCTDSP) += riscv/idctdsp_init.o
 RVV-OBJS-$(CONFIG_IDCTDSP) += riscv/idctdsp_rvv.o
 OBJS-$(CONFIG_PIXBLOCKDSP) += riscv/pixblockdsp_init.o \
                               riscv/pixblockdsp_rvi.o
+RVV-OBJS-$(CONFIG_PIXBLOCKDSP) += riscv/pixblockdsp_rvv.o
 OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_init.o
 RVV-OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_rvv.o
diff --git a/libavcodec/riscv/pixblockdsp_init.c b/libavcodec/riscv/pixblockdsp_init.c
index 04bf52649f..69dbd18918 100644
--- a/libavcodec/riscv/pixblockdsp_init.c
+++ b/libavcodec/riscv/pixblockdsp_init.c
@@ -20,8 +20,10 @@ 
 
 #include <stdint.h>
 
+#include "config.h"
 #include "libavutil/attributes.h"
 #include "libavutil/cpu.h"
+#include "libavutil/riscv/cpu.h"
 #include "libavcodec/avcodec.h"
 #include "libavcodec/pixblockdsp.h"
 
@@ -30,6 +32,9 @@  void ff_get_pixels_8_rvi(int16_t *block, const uint8_t *pixels,
 void ff_get_pixels_16_rvi(int16_t *block, const uint8_t *pixels,
                           ptrdiff_t stride);
 
+void ff_get_pixels_8_rvv(int16_t *block, const uint8_t *pixels,
+                         ptrdiff_t stride);
+
 av_cold void ff_pixblockdsp_init_riscv(PixblockDSPContext *c,
                                        AVCodecContext *avctx,
                                        unsigned high_bit_depth)
@@ -42,4 +47,11 @@  av_cold void ff_pixblockdsp_init_riscv(PixblockDSPContext *c,
         else
             c->get_pixels = ff_get_pixels_8_rvi;
     }
+
+#if HAVE_RVV
+    if ((cpu_flags & AV_CPU_FLAG_RVV_I32) && ff_get_rv_vlenb() >= 16) {
+        if (!high_bit_depth)
+            c->get_pixels_unaligned = c->get_pixels = ff_get_pixels_8_rvv;
+    }
+#endif
 }
diff --git a/libavcodec/riscv/pixblockdsp_rvv.S b/libavcodec/riscv/pixblockdsp_rvv.S
new file mode 100644
index 0000000000..b7c74b88b5
--- /dev/null
+++ b/libavcodec/riscv/pixblockdsp_rvv.S
@@ -0,0 +1,37 @@ 
+/*
+ * Copyright © 2022 Rémi Denis-Courmont.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "../libavutil/riscv/asm.S"
+
+func ff_get_pixels_8_rvv, zve32x
+        vsetivli     zero, 8, e8, mf2, ta, ma
+        vlsseg8e8.v  v16, (a1), a2
+        vwcvtu.x.x.v v8, v16
+        vwcvtu.x.x.v v9, v17
+        vwcvtu.x.x.v v10, v18
+        vwcvtu.x.x.v v11, v19
+        vwcvtu.x.x.v v12, v20
+        vwcvtu.x.x.v v13, v21
+        vwcvtu.x.x.v v14, v22
+        vwcvtu.x.x.v v15, v23
+        vsseg8e16.v  v8, (a0)
+        ret
+endfunc