diff mbox series

[FFmpeg-devel,01/10] lavc/vp9dsp: R-V V ipred vert

Message ID tencent_425318010FBD6A9D05C7D05CB1C49C0B1606@qq.com
State New
Headers show
Series [FFmpeg-devel,01/10] lavc/vp9dsp: R-V V ipred vert | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

uk7b@foxmail.com May 4, 2024, 3:03 p.m. UTC
From: sunyuechi <sunyuechi@iscas.ac.cn>

C908:
vp9_vert_8x8_8bpp_c: 22.0
vp9_vert_8x8_8bpp_rvv_i64: 18.5
vp9_vert_16x16_8bpp_c: 71.2
vp9_vert_16x16_8bpp_rvv_i32: 50.7
vp9_vert_32x32_8bpp_c: 300.2
vp9_vert_32x32_8bpp_rvv_i32: 136.7
---
 libavcodec/riscv/vp9_intra_rvv.S | 35 ++++++++++++++++++++++++++++++++
 libavcodec/riscv/vp9dsp.h        |  6 ++++++
 libavcodec/riscv/vp9dsp_init.c   |  3 +++
 3 files changed, 44 insertions(+)

Comments

flow gg May 4, 2024, 3:05 p.m. UTC | #1
the github link: https://github.com/hleft/FFmpeg/tree/vp9

<uk7b@foxmail.com> 于2024年5月4日周六 23:03写道:

> From: sunyuechi <sunyuechi@iscas.ac.cn>
>
> C908:
> vp9_vert_8x8_8bpp_c: 22.0
> vp9_vert_8x8_8bpp_rvv_i64: 18.5
> vp9_vert_16x16_8bpp_c: 71.2
> vp9_vert_16x16_8bpp_rvv_i32: 50.7
> vp9_vert_32x32_8bpp_c: 300.2
> vp9_vert_32x32_8bpp_rvv_i32: 136.7
> ---
>  libavcodec/riscv/vp9_intra_rvv.S | 35 ++++++++++++++++++++++++++++++++
>  libavcodec/riscv/vp9dsp.h        |  6 ++++++
>  libavcodec/riscv/vp9dsp_init.c   |  3 +++
>  3 files changed, 44 insertions(+)
>
> diff --git a/libavcodec/riscv/vp9_intra_rvv.S
> b/libavcodec/riscv/vp9_intra_rvv.S
> index db9774c263..b5f0f9d3c3 100644
> --- a/libavcodec/riscv/vp9_intra_rvv.S
> +++ b/libavcodec/riscv/vp9_intra_rvv.S
> @@ -113,3 +113,38 @@ func_dc dc_left  8   left 3  0  zve64x
>  func_dc dc_top   32  top  5  1  zve32x
>  func_dc dc_top   16  top  4  1  zve32x
>  func_dc dc_top   8   top  3  0  zve64x
> +
> +func ff_v_32x32_rvv, zve32x
> +        vsetivli     zero, 8, e8, mf2, ta, ma
> +        vle32.v      v8, (a3)
> +
> +        .rept 31
> +        vse32.v      v8, (a0)
> +        add          a0, a0, a1
> +        .endr
> +        vse32.v      v8, (a0)
> +
> +        ret
> +endfunc
> +
> +func ff_v_16x16_rvv, zve32x
> +        vsetivli     zero, 4, e8, mf4, ta, ma
> +        vle32.v      v8, (a3)
> +
> +        .rept 15
> +        vse32.v      v8, (a0)
> +        add          a0, a0, a1
> +        .endr
> +        vse32.v      v8, (a0)
> +
> +        ret
> +endfunc
> +
> +func ff_v_8x8_rvv, zve64x
> +        ld           t0, (a3)
> +        vsetivli     zero, 8, e64, m4, ta, ma
> +        vmv.v.x      v8, t0
> +        vsse64.v     v8, (a0), a1
> +
> +        ret
> +endfunc
> diff --git a/libavcodec/riscv/vp9dsp.h b/libavcodec/riscv/vp9dsp.h
> index 25047ed507..113397ce86 100644
> --- a/libavcodec/riscv/vp9dsp.h
> +++ b/libavcodec/riscv/vp9dsp.h
> @@ -60,6 +60,12 @@ void ff_dc_129_16x16_rvv(uint8_t *dst, ptrdiff_t
> stride, const uint8_t *l,
>                           const uint8_t *a);
>  void ff_dc_129_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
>                         const uint8_t *a);
> +void ff_v_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
> +                    const uint8_t *a);
> +void ff_v_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
> +                    const uint8_t *a);
> +void ff_v_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
> +                  const uint8_t *a);
>
>  #define VP9_8TAP_RISCV_RVV_FUNC(SIZE, type, type_idx)
>      \
>  void ff_put_8tap_##type##_##SIZE##h_rvv(uint8_t *dst, ptrdiff_t
> dststride,   \
> diff --git a/libavcodec/riscv/vp9dsp_init.c
> b/libavcodec/riscv/vp9dsp_init.c
> index 69ab39004c..9c550d40b5 100644
> --- a/libavcodec/riscv/vp9dsp_init.c
> +++ b/libavcodec/riscv/vp9dsp_init.c
> @@ -36,6 +36,7 @@ static av_cold void
> vp9dsp_intrapred_init_rvv(VP9DSPContext *dsp, int bpp)
>              dsp->intra_pred[TX_8X8][DC_128_PRED] = ff_dc_128_8x8_rvv;
>              dsp->intra_pred[TX_8X8][DC_129_PRED] = ff_dc_129_8x8_rvv;
>              dsp->intra_pred[TX_8X8][TOP_DC_PRED] = ff_dc_top_8x8_rvv;
> +            dsp->intra_pred[TX_8X8][VERT_PRED] = ff_v_8x8_rvv;
>          }
>
>          if (bpp == 8 && flags & AV_CPU_FLAG_RVV_I32 && ff_get_rv_vlenb()
> >= 16) {
> @@ -51,6 +52,8 @@ static av_cold void
> vp9dsp_intrapred_init_rvv(VP9DSPContext *dsp, int bpp)
>              dsp->intra_pred[TX_16X16][DC_129_PRED] = ff_dc_129_16x16_rvv;
>              dsp->intra_pred[TX_32X32][TOP_DC_PRED] = ff_dc_top_32x32_rvv;
>              dsp->intra_pred[TX_16X16][TOP_DC_PRED] = ff_dc_top_16x16_rvv;
> +            dsp->intra_pred[TX_32X32][VERT_PRED] = ff_v_32x32_rvv;
> +            dsp->intra_pred[TX_16X16][VERT_PRED] = ff_v_16x16_rvv;
>          }
>      #endif
>  }
> --
> 2.45.0
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
Rémi Denis-Courmont May 5, 2024, 8:36 a.m. UTC | #2
Le lauantaina 4. toukokuuta 2024, 18.03.04 EEST uk7b@foxmail.com a écrit :
> From: sunyuechi <sunyuechi@iscas.ac.cn>
> 
> C908:
> vp9_vert_8x8_8bpp_c: 22.0
> vp9_vert_8x8_8bpp_rvv_i64: 18.5
> vp9_vert_16x16_8bpp_c: 71.2
> vp9_vert_16x16_8bpp_rvv_i32: 50.7
> vp9_vert_32x32_8bpp_c: 300.2
> vp9_vert_32x32_8bpp_rvv_i32: 136.7
> ---
>  libavcodec/riscv/vp9_intra_rvv.S | 35 ++++++++++++++++++++++++++++++++
>  libavcodec/riscv/vp9dsp.h        |  6 ++++++
>  libavcodec/riscv/vp9dsp_init.c   |  3 +++
>  3 files changed, 44 insertions(+)
> 
> diff --git a/libavcodec/riscv/vp9_intra_rvv.S
> b/libavcodec/riscv/vp9_intra_rvv.S index db9774c263..b5f0f9d3c3 100644
> --- a/libavcodec/riscv/vp9_intra_rvv.S
> +++ b/libavcodec/riscv/vp9_intra_rvv.S
> @@ -113,3 +113,38 @@ func_dc dc_left  8   left 3  0  zve64x
>  func_dc dc_top   32  top  5  1  zve32x
>  func_dc dc_top   16  top  4  1  zve32x
>  func_dc dc_top   8   top  3  0  zve64x
> +
> +func ff_v_32x32_rvv, zve32x
> +        vsetivli     zero, 8, e8, mf2, ta, ma
> +        vle32.v      v8, (a3)
> +
> +        .rept 31
> +        vse32.v      v8, (a0)
> +        add          a0, a0, a1
> +        .endr
> +        vse32.v      v8, (a0)
> +
> +        ret
> +endfunc

This does not look like it needs vectors. Ditto the next one.

> +
> +func ff_v_16x16_rvv, zve32x
> +        vsetivli     zero, 4, e8, mf4, ta, ma
> +        vle32.v      v8, (a3)
> +
> +        .rept 15
> +        vse32.v      v8, (a0)
> +        add          a0, a0, a1
> +        .endr
> +        vse32.v      v8, (a0)
> +
> +        ret
> +endfunc
> +
> +func ff_v_8x8_rvv, zve64x
> +        ld           t0, (a3)
> +        vsetivli     zero, 8, e64, m4, ta, ma
> +        vmv.v.x      v8, t0
> +        vsse64.v     v8, (a0), a1
> +
> +        ret
> +endfunc

This is missing a guard against RV32.

> diff --git a/libavcodec/riscv/vp9dsp.h b/libavcodec/riscv/vp9dsp.h
> index 25047ed507..113397ce86 100644
> --- a/libavcodec/riscv/vp9dsp.h
> +++ b/libavcodec/riscv/vp9dsp.h
> @@ -60,6 +60,12 @@ void ff_dc_129_16x16_rvv(uint8_t *dst, ptrdiff_t stride,
> const uint8_t *l, const uint8_t *a);
>  void ff_dc_129_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
>                         const uint8_t *a);
> +void ff_v_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
> +                    const uint8_t *a);
> +void ff_v_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
> +                    const uint8_t *a);
> +void ff_v_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
> +                  const uint8_t *a);
> 
>  #define VP9_8TAP_RISCV_RVV_FUNC(SIZE, type, type_idx)                      
>   \ void ff_put_8tap_##type##_##SIZE##h_rvv(uint8_t *dst, ptrdiff_t
> dststride,   \ diff --git a/libavcodec/riscv/vp9dsp_init.c
> b/libavcodec/riscv/vp9dsp_init.c index 69ab39004c..9c550d40b5 100644
> --- a/libavcodec/riscv/vp9dsp_init.c
> +++ b/libavcodec/riscv/vp9dsp_init.c
> @@ -36,6 +36,7 @@ static av_cold void
> vp9dsp_intrapred_init_rvv(VP9DSPContext *dsp, int bpp)
> dsp->intra_pred[TX_8X8][DC_128_PRED] = ff_dc_128_8x8_rvv;
> dsp->intra_pred[TX_8X8][DC_129_PRED] = ff_dc_129_8x8_rvv;
> dsp->intra_pred[TX_8X8][TOP_DC_PRED] = ff_dc_top_8x8_rvv; +           
> dsp->intra_pred[TX_8X8][VERT_PRED] = ff_v_8x8_rvv;
>          }
> 
>          if (bpp == 8 && flags & AV_CPU_FLAG_RVV_I32 && ff_get_rv_vlenb() >=
> 16) { @@ -51,6 +52,8 @@ static av_cold void
> vp9dsp_intrapred_init_rvv(VP9DSPContext *dsp, int bpp)
> dsp->intra_pred[TX_16X16][DC_129_PRED] = ff_dc_129_16x16_rvv;
> dsp->intra_pred[TX_32X32][TOP_DC_PRED] = ff_dc_top_32x32_rvv;
> dsp->intra_pred[TX_16X16][TOP_DC_PRED] = ff_dc_top_16x16_rvv; +           
> dsp->intra_pred[TX_32X32][VERT_PRED] = ff_v_32x32_rvv; +           
> dsp->intra_pred[TX_16X16][VERT_PRED] = ff_v_16x16_rvv; }
>      #endif
>  }
diff mbox series

Patch

diff --git a/libavcodec/riscv/vp9_intra_rvv.S b/libavcodec/riscv/vp9_intra_rvv.S
index db9774c263..b5f0f9d3c3 100644
--- a/libavcodec/riscv/vp9_intra_rvv.S
+++ b/libavcodec/riscv/vp9_intra_rvv.S
@@ -113,3 +113,38 @@  func_dc dc_left  8   left 3  0  zve64x
 func_dc dc_top   32  top  5  1  zve32x
 func_dc dc_top   16  top  4  1  zve32x
 func_dc dc_top   8   top  3  0  zve64x
+
+func ff_v_32x32_rvv, zve32x
+        vsetivli     zero, 8, e8, mf2, ta, ma
+        vle32.v      v8, (a3)
+
+        .rept 31
+        vse32.v      v8, (a0)
+        add          a0, a0, a1
+        .endr
+        vse32.v      v8, (a0)
+
+        ret
+endfunc
+
+func ff_v_16x16_rvv, zve32x
+        vsetivli     zero, 4, e8, mf4, ta, ma
+        vle32.v      v8, (a3)
+
+        .rept 15
+        vse32.v      v8, (a0)
+        add          a0, a0, a1
+        .endr
+        vse32.v      v8, (a0)
+
+        ret
+endfunc
+
+func ff_v_8x8_rvv, zve64x
+        ld           t0, (a3)
+        vsetivli     zero, 8, e64, m4, ta, ma
+        vmv.v.x      v8, t0
+        vsse64.v     v8, (a0), a1
+
+        ret
+endfunc
diff --git a/libavcodec/riscv/vp9dsp.h b/libavcodec/riscv/vp9dsp.h
index 25047ed507..113397ce86 100644
--- a/libavcodec/riscv/vp9dsp.h
+++ b/libavcodec/riscv/vp9dsp.h
@@ -60,6 +60,12 @@  void ff_dc_129_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
                          const uint8_t *a);
 void ff_dc_129_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
                        const uint8_t *a);
+void ff_v_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+                    const uint8_t *a);
+void ff_v_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+                    const uint8_t *a);
+void ff_v_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+                  const uint8_t *a);
 
 #define VP9_8TAP_RISCV_RVV_FUNC(SIZE, type, type_idx)                         \
 void ff_put_8tap_##type##_##SIZE##h_rvv(uint8_t *dst, ptrdiff_t dststride,   \
diff --git a/libavcodec/riscv/vp9dsp_init.c b/libavcodec/riscv/vp9dsp_init.c
index 69ab39004c..9c550d40b5 100644
--- a/libavcodec/riscv/vp9dsp_init.c
+++ b/libavcodec/riscv/vp9dsp_init.c
@@ -36,6 +36,7 @@  static av_cold void vp9dsp_intrapred_init_rvv(VP9DSPContext *dsp, int bpp)
             dsp->intra_pred[TX_8X8][DC_128_PRED] = ff_dc_128_8x8_rvv;
             dsp->intra_pred[TX_8X8][DC_129_PRED] = ff_dc_129_8x8_rvv;
             dsp->intra_pred[TX_8X8][TOP_DC_PRED] = ff_dc_top_8x8_rvv;
+            dsp->intra_pred[TX_8X8][VERT_PRED] = ff_v_8x8_rvv;
         }
 
         if (bpp == 8 && flags & AV_CPU_FLAG_RVV_I32 && ff_get_rv_vlenb() >= 16) {
@@ -51,6 +52,8 @@  static av_cold void vp9dsp_intrapred_init_rvv(VP9DSPContext *dsp, int bpp)
             dsp->intra_pred[TX_16X16][DC_129_PRED] = ff_dc_129_16x16_rvv;
             dsp->intra_pred[TX_32X32][TOP_DC_PRED] = ff_dc_top_32x32_rvv;
             dsp->intra_pred[TX_16X16][TOP_DC_PRED] = ff_dc_top_16x16_rvv;
+            dsp->intra_pred[TX_32X32][VERT_PRED] = ff_v_32x32_rvv;
+            dsp->intra_pred[TX_16X16][VERT_PRED] = ff_v_16x16_rvv;
         }
     #endif
 }