diff mbox series

[FFmpeg-devel] avcodec/hevcdsp: Offset ff_hevc_.pel_filters to simplify addressing

Message ID AS8P250MB074409EBD6086B8D0879C5798F492@AS8P250MB0744.EURP250.PROD.OUTLOOK.COM
State Accepted
Commit 6106fb2b4c5ebd88a75fadfbbe7cf7e7fb944d18
Headers show
Series [FFmpeg-devel] avcodec/hevcdsp: Offset ff_hevc_.pel_filters to simplify addressing | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Andreas Rheinhardt Feb. 11, 2024, 8:23 a.m. UTC
Besides simplifying address computations (it saves 432B of .text
in hevcdsp.o alone here) it also fixes undefined behaviour that
occurs if mx or my are 0 (happens when the filters are unused)
because they lead to an array index of -1 in the old code.
This happens in the checkasm-hevc_pel FATE-test.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
The loongarch and mips parts of this are untested. Luckily we have a
loongarch patchwork runner...

 libavcodec/hevcdsp.c                    |   6 +-
 libavcodec/hevcdsp.h                    |   5 +-
 libavcodec/hevcdsp_template.c           |  38 ++--
 libavcodec/loongarch/hevc_mc.S          | 224 +++++-------------------
 libavcodec/loongarch/hevc_mc_bi_lsx.c   |   6 +-
 libavcodec/loongarch/hevc_mc_uni_lsx.c  |   6 +-
 libavcodec/loongarch/hevc_mc_uniw_lsx.c |   4 +-
 libavcodec/loongarch/hevcdsp_lsx.c      |   6 +-
 libavcodec/mips/hevc_mc_bi_msa.c        |   6 +-
 libavcodec/mips/hevc_mc_biw_msa.c       |   6 +-
 libavcodec/mips/hevc_mc_uni_msa.c       |   6 +-
 libavcodec/mips/hevc_mc_uniw_msa.c      |   6 +-
 libavcodec/mips/hevcdsp_mmi.c           |  20 +--
 libavcodec/mips/hevcdsp_msa.c           |   6 +-
 libavcodec/x86/hevcdsp_init.c           |   4 +-
 15 files changed, 112 insertions(+), 237 deletions(-)

Comments

Nuo Mi Feb. 11, 2024, 11:36 a.m. UTC | #1
On Sun, Feb 11, 2024 at 4:21 PM Andreas Rheinhardt <
andreas.rheinhardt@outlook.com> wrote:

> Besides simplifying address computations (it saves 432B of .text
> in hevcdsp.o alone here) it also fixes undefined behaviour that
> occurs if mx or my are 0 (happens when the filters are unused)
> because they lead to an array index of -1 in the old code.
> This happens in the checkasm-hevc_pel FATE-test.
>
> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
> ---
> The loongarch and mips parts of this are untested. Luckily we have a
> loongarch patchwork runner...
>
>  libavcodec/hevcdsp.c                    |   6 +-
>  libavcodec/hevcdsp.h                    |   5 +-
>  libavcodec/hevcdsp_template.c           |  38 ++--
>  libavcodec/loongarch/hevc_mc.S          | 224 +++++-------------------
>  libavcodec/loongarch/hevc_mc_bi_lsx.c   |   6 +-
>  libavcodec/loongarch/hevc_mc_uni_lsx.c  |   6 +-
>  libavcodec/loongarch/hevc_mc_uniw_lsx.c |   4 +-
>  libavcodec/loongarch/hevcdsp_lsx.c      |   6 +-
>  libavcodec/mips/hevc_mc_bi_msa.c        |   6 +-
>  libavcodec/mips/hevc_mc_biw_msa.c       |   6 +-
>  libavcodec/mips/hevc_mc_uni_msa.c       |   6 +-
>  libavcodec/mips/hevc_mc_uniw_msa.c      |   6 +-
>  libavcodec/mips/hevcdsp_mmi.c           |  20 +--
>  libavcodec/mips/hevcdsp_msa.c           |   6 +-
>  libavcodec/x86/hevcdsp_init.c           |   4 +-
>  15 files changed, 112 insertions(+), 237 deletions(-)
>
> diff --git a/libavcodec/hevcdsp.c b/libavcodec/hevcdsp.c
> index 2ca551df1d..630fdc012e 100644
> --- a/libavcodec/hevcdsp.c
> +++ b/libavcodec/hevcdsp.c
> @@ -91,7 +91,8 @@ static const int8_t transform[32][32] = {
>        90, -90,  88, -85,  82, -78,  73, -67,  61, -54,  46, -38,  31,
> -22,  13,  -4 },
>  };
>
> -DECLARE_ALIGNED(16, const int8_t, ff_hevc_epel_filters)[7][4] = {
> +DECLARE_ALIGNED(16, const int8_t, ff_hevc_epel_filters)[8][4] = {
> +    {  0 },
>      { -2, 58, 10, -2},
>      { -4, 54, 16, -2},
>      { -6, 46, 28, -4},
> @@ -101,7 +102,8 @@ DECLARE_ALIGNED(16, const int8_t,
> ff_hevc_epel_filters)[7][4] = {
>      { -2, 10, 58, -2},
>  };
>
> -DECLARE_ALIGNED(16, const int8_t, ff_hevc_qpel_filters)[3][16] = {
> +DECLARE_ALIGNED(16, const int8_t, ff_hevc_qpel_filters)[4][16] = {
>
Do you know why this is [4][16]? [4][8] should suffice.
If some architecture requires 16, we might need to update
VVC_INTER_LUMA_TAPS to 16 in the future.
Thank you

> +    { 0 },
>      { -1,  4,-10, 58, 17, -5,  1,  0, -1,  4,-10, 58, 17, -5,  1,  0},
>      { -1,  4,-11, 40, 40,-11,  4, -1, -1,  4,-11, 40, 40,-11,  4, -1},
>      {  0,  1, -5, 17, 58,-10,  4, -1,  0,  1, -5, 17, 58,-10,  4, -1}
> diff --git a/libavcodec/hevcdsp.h b/libavcodec/hevcdsp.h
> index 1b9c5bb6bc..a5933dcac4 100644
>
> --
> 2.34.1
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
Nuo Mi Feb. 11, 2024, 11:47 a.m. UTC | #2
On Sun, Feb 11, 2024 at 4:21 PM Andreas Rheinhardt <
andreas.rheinhardt@outlook.com> wrote:

> Besides simplifying address computations (it saves 432B of .text
> in hevcdsp.o alone here) it also fixes undefined behaviour that
>
We can save more if we change
void (*put_hevc_epel[10][2][2])(int16_t *dst, const uint8_t *src, ptrdiff_t
srcstride, int height, intptr_t mx, intptr_t my, int width);
to
void (*put_hevc_epel[10][2][2])(int16_t *dst, const uint8_t *src, ptrdiff_t
srcstride, int height, const int8_t *hf, const int8_t *vf, int width);
But it may need a lot of work.

occurs if mx or my are 0 (happens when the filters are unused)
> because they lead to an array index of -1 in the old code.
> This happens in the checkasm-hevc_pel FATE-test.
>
> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
> ---
> The loongarch and mips parts of this are untested. Luckily we have a
> loongarch patchwork runner...
>
>
Christophe Gisquet Feb. 11, 2024, 9:48 p.m. UTC | #3
Le dim. 11 févr. 2024 à 12:37, Nuo Mi <nuomi2021@gmail.com> a écrit :
> > -DECLARE_ALIGNED(16, const int8_t, ff_hevc_qpel_filters)[3][16] = {
> > +DECLARE_ALIGNED(16, const int8_t, ff_hevc_qpel_filters)[4][16] = {
> >
> Do you know why this is [4][16]? [4][8] should suffice.

Probably so that all coefficient banks are aligned. Another use for it
is you can directly use the address in some instruction instead of
using/wasting a reg for holding the data.
Nuo Mi Feb. 13, 2024, 2:51 a.m. UTC | #4
On Mon, Feb 12, 2024 at 5:48 AM Christophe Gisquet <
christophe.gisquet@gmail.com> wrote:

> Le dim. 11 févr. 2024 à 12:37, Nuo Mi <nuomi2021@gmail.com> a écrit :
> > > -DECLARE_ALIGNED(16, const int8_t, ff_hevc_qpel_filters)[3][16] = {
> > > +DECLARE_ALIGNED(16, const int8_t, ff_hevc_qpel_filters)[4][16] = {
> > >
> > Do you know why this is [4][16]? [4][8] should suffice.
>
> Probably so that all coefficient banks are aligned. Another use for it
> is you can directly use the address in some instruction instead of
> using/wasting a reg for holding the data.
>

Hi Christophe,
Thank you for the explanation.
However, epel (chroma) did not adhere to this. X86 doesn't require this
either since VVC didn't do it.
It's a bit odd that only some architectures and luma need this
I'll revisit this when I determine which architecture needs it and leave a
comment in the code accordingly.

>
> --
> Christophe Gisquet
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
Nuo Mi Feb. 13, 2024, 2:54 a.m. UTC | #5
On Sun, Feb 11, 2024 at 4:21 PM Andreas Rheinhardt <
andreas.rheinhardt@outlook.com> wrote:

> Besides simplifying address computations (it saves 432B of .text
> in hevcdsp.o alone here) it also fixes undefined behaviour that
> occurs if mx or my are 0 (happens when the filters are unused)
> because they lead to an array index of -1 in the old code.
> This happens in the checkasm-hevc_pel FATE-test.
>
> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
>
> Thank you, Andreas,

Hi all,
If there are no objections, I'll merge it tomorrow.
James Almer Feb. 13, 2024, 11:26 p.m. UTC | #6
On 2/12/2024 11:54 PM, Nuo Mi wrote:
> On Sun, Feb 11, 2024 at 4:21 PM Andreas Rheinhardt <
> andreas.rheinhardt@outlook.com> wrote:
> 
>> Besides simplifying address computations (it saves 432B of .text
>> in hevcdsp.o alone here) it also fixes undefined behaviour that
>> occurs if mx or my are 0 (happens when the filters are unused)
>> because they lead to an array index of -1 in the old code.
>> This happens in the checkasm-hevc_pel FATE-test.
>>
>> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
>>
>> Thank you, Andreas,
> 
> Hi all,
> If there are no objections, I'll merge it tomorrow.

Just pushed it. Thanks Andreas for fixing this.
diff mbox series

Patch

diff --git a/libavcodec/hevcdsp.c b/libavcodec/hevcdsp.c
index 2ca551df1d..630fdc012e 100644
--- a/libavcodec/hevcdsp.c
+++ b/libavcodec/hevcdsp.c
@@ -91,7 +91,8 @@  static const int8_t transform[32][32] = {
       90, -90,  88, -85,  82, -78,  73, -67,  61, -54,  46, -38,  31, -22,  13,  -4 },
 };
 
-DECLARE_ALIGNED(16, const int8_t, ff_hevc_epel_filters)[7][4] = {
+DECLARE_ALIGNED(16, const int8_t, ff_hevc_epel_filters)[8][4] = {
+    {  0 },
     { -2, 58, 10, -2},
     { -4, 54, 16, -2},
     { -6, 46, 28, -4},
@@ -101,7 +102,8 @@  DECLARE_ALIGNED(16, const int8_t, ff_hevc_epel_filters)[7][4] = {
     { -2, 10, 58, -2},
 };
 
-DECLARE_ALIGNED(16, const int8_t, ff_hevc_qpel_filters)[3][16] = {
+DECLARE_ALIGNED(16, const int8_t, ff_hevc_qpel_filters)[4][16] = {
+    { 0 },
     { -1,  4,-10, 58, 17, -5,  1,  0, -1,  4,-10, 58, 17, -5,  1,  0},
     { -1,  4,-11, 40, 40,-11,  4, -1, -1,  4,-11, 40, 40,-11,  4, -1},
     {  0,  1, -5, 17, 58,-10,  4, -1,  0,  1, -5, 17, 58,-10,  4, -1}
diff --git a/libavcodec/hevcdsp.h b/libavcodec/hevcdsp.h
index 1b9c5bb6bc..a5933dcac4 100644
--- a/libavcodec/hevcdsp.h
+++ b/libavcodec/hevcdsp.h
@@ -126,8 +126,9 @@  typedef struct HEVCDSPContext {
 
 void ff_hevc_dsp_init(HEVCDSPContext *hpc, int bit_depth);
 
-extern const int8_t ff_hevc_epel_filters[7][4];
-extern const int8_t ff_hevc_qpel_filters[3][16];
+/** ff_hevc_.pel_filters[0] are dummies to simplify array addressing */
+extern const int8_t ff_hevc_epel_filters[8][4];
+extern const int8_t ff_hevc_qpel_filters[4][16];
 
 void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth);
 void ff_hevc_dsp_init_arm(HEVCDSPContext *c, const int bit_depth);
diff --git a/libavcodec/hevcdsp_template.c b/libavcodec/hevcdsp_template.c
index 9b48bdf08e..121c44c401 100644
--- a/libavcodec/hevcdsp_template.c
+++ b/libavcodec/hevcdsp_template.c
@@ -301,9 +301,9 @@  IDCT_DC(32)
 //
 ////////////////////////////////////////////////////////////////////////////////
 #define ff_hevc_pel_filters ff_hevc_qpel_filters
-#define DECL_HV_FILTER(f)                                  \
-    const uint8_t *hf = ff_hevc_ ## f ## _filters[mx - 1]; \
-    const uint8_t *vf = ff_hevc_ ## f ## _filters[my - 1];
+#define DECL_HV_FILTER(f)                              \
+    const uint8_t *hf = ff_hevc_ ## f ## _filters[mx]; \
+    const uint8_t *vf = ff_hevc_ ## f ## _filters[my];
 
 #define FW_PUT(p, f, t)                                                                                   \
 static void FUNC(put_hevc_## f)(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, int height,        \
@@ -421,7 +421,7 @@  static void FUNC(put_hevc_qpel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride, const
     pixel *dst          = (pixel *)_dst;
     ptrdiff_t dststride = _dststride / sizeof(pixel);
 
-    const int8_t *filter    = ff_hevc_qpel_filters[mx - 1];
+    const int8_t *filter    = ff_hevc_qpel_filters[mx];
 
     int shift = 14  + 1 - BIT_DEPTH;
 #if BIT_DEPTH < 14
@@ -449,7 +449,7 @@  static void FUNC(put_hevc_qpel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride,
     pixel *dst          = (pixel *)_dst;
     ptrdiff_t dststride = _dststride / sizeof(pixel);
 
-    const int8_t *filter    = ff_hevc_qpel_filters[my - 1];
+    const int8_t *filter    = ff_hevc_qpel_filters[my];
 
     int shift = 14 + 1 - BIT_DEPTH;
 #if BIT_DEPTH < 14
@@ -487,7 +487,7 @@  static void FUNC(put_hevc_qpel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride,
 #endif
 
     src   -= QPEL_EXTRA_BEFORE * srcstride;
-    filter = ff_hevc_qpel_filters[mx - 1];
+    filter = ff_hevc_qpel_filters[mx];
     for (y = 0; y < height + QPEL_EXTRA; y++) {
         for (x = 0; x < width; x++)
             tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
@@ -496,7 +496,7 @@  static void FUNC(put_hevc_qpel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride,
     }
 
     tmp    = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
-    filter = ff_hevc_qpel_filters[my - 1];
+    filter = ff_hevc_qpel_filters[my];
 
     for (y = 0; y < height; y++) {
         for (x = 0; x < width; x++)
@@ -518,7 +518,7 @@  static void FUNC(put_hevc_qpel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride,
     pixel *dst          = (pixel *)_dst;
     ptrdiff_t dststride = _dststride / sizeof(pixel);
 
-    const int8_t *filter    = ff_hevc_qpel_filters[mx - 1];
+    const int8_t *filter    = ff_hevc_qpel_filters[mx];
 
     int shift = 14  + 1 - BIT_DEPTH;
     int log2Wd = denom + shift - 1;
@@ -546,7 +546,7 @@  static void FUNC(put_hevc_qpel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride,
     pixel *dst          = (pixel *)_dst;
     ptrdiff_t dststride = _dststride / sizeof(pixel);
 
-    const int8_t *filter    = ff_hevc_qpel_filters[my - 1];
+    const int8_t *filter    = ff_hevc_qpel_filters[my];
 
     int shift = 14 + 1 - BIT_DEPTH;
     int log2Wd = denom + shift - 1;
@@ -580,7 +580,7 @@  static void FUNC(put_hevc_qpel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride,
     int log2Wd = denom + shift - 1;
 
     src   -= QPEL_EXTRA_BEFORE * srcstride;
-    filter = ff_hevc_qpel_filters[mx - 1];
+    filter = ff_hevc_qpel_filters[mx];
     for (y = 0; y < height + QPEL_EXTRA; y++) {
         for (x = 0; x < width; x++)
             tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
@@ -589,7 +589,7 @@  static void FUNC(put_hevc_qpel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride,
     }
 
     tmp    = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
-    filter = ff_hevc_qpel_filters[my - 1];
+    filter = ff_hevc_qpel_filters[my];
 
     ox0     = ox0 * (1 << (BIT_DEPTH - 8));
     ox1     = ox1 * (1 << (BIT_DEPTH - 8));
@@ -621,7 +621,7 @@  static void FUNC(put_hevc_epel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride,
     ptrdiff_t srcstride  = _srcstride / sizeof(pixel);
     pixel *dst          = (pixel *)_dst;
     ptrdiff_t dststride = _dststride / sizeof(pixel);
-    const int8_t *filter = ff_hevc_epel_filters[mx - 1];
+    const int8_t *filter = ff_hevc_epel_filters[mx];
     int shift = 14 + 1 - BIT_DEPTH;
 #if BIT_DEPTH < 14
     int offset = 1 << (shift - 1);
@@ -646,7 +646,7 @@  static void FUNC(put_hevc_epel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride,
     int x, y;
     const pixel *src = (const pixel *)_src;
     ptrdiff_t srcstride  = _srcstride / sizeof(pixel);
-    const int8_t *filter = ff_hevc_epel_filters[my - 1];
+    const int8_t *filter = ff_hevc_epel_filters[my];
     pixel *dst          = (pixel *)_dst;
     ptrdiff_t dststride = _dststride / sizeof(pixel);
     int shift = 14 + 1 - BIT_DEPTH;
@@ -674,7 +674,7 @@  static void FUNC(put_hevc_epel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride,
     ptrdiff_t srcstride = _srcstride / sizeof(pixel);
     pixel *dst          = (pixel *)_dst;
     ptrdiff_t dststride = _dststride / sizeof(pixel);
-    const int8_t *filter = ff_hevc_epel_filters[mx - 1];
+    const int8_t *filter = ff_hevc_epel_filters[mx];
     int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
     int16_t *tmp = tmp_array;
     int shift = 14 + 1 - BIT_DEPTH;
@@ -694,7 +694,7 @@  static void FUNC(put_hevc_epel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride,
     }
 
     tmp      = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
-    filter = ff_hevc_epel_filters[my - 1];
+    filter = ff_hevc_epel_filters[my];
 
     for (y = 0; y < height; y++) {
         for (x = 0; x < width; x++)
@@ -715,7 +715,7 @@  static void FUNC(put_hevc_epel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride,
     ptrdiff_t srcstride  = _srcstride / sizeof(pixel);
     pixel *dst          = (pixel *)_dst;
     ptrdiff_t dststride = _dststride / sizeof(pixel);
-    const int8_t *filter = ff_hevc_epel_filters[mx - 1];
+    const int8_t *filter = ff_hevc_epel_filters[mx];
     int shift = 14 + 1 - BIT_DEPTH;
     int log2Wd = denom + shift - 1;
 
@@ -739,7 +739,7 @@  static void FUNC(put_hevc_epel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride,
     int x, y;
     const pixel *src = (const pixel *)_src;
     ptrdiff_t srcstride  = _srcstride / sizeof(pixel);
-    const int8_t *filter = ff_hevc_epel_filters[my - 1];
+    const int8_t *filter = ff_hevc_epel_filters[my];
     pixel *dst          = (pixel *)_dst;
     ptrdiff_t dststride = _dststride / sizeof(pixel);
     int shift = 14 + 1 - BIT_DEPTH;
@@ -767,7 +767,7 @@  static void FUNC(put_hevc_epel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride,
     ptrdiff_t srcstride = _srcstride / sizeof(pixel);
     pixel *dst          = (pixel *)_dst;
     ptrdiff_t dststride = _dststride / sizeof(pixel);
-    const int8_t *filter = ff_hevc_epel_filters[mx - 1];
+    const int8_t *filter = ff_hevc_epel_filters[mx];
     int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
     int16_t *tmp = tmp_array;
     int shift = 14 + 1 - BIT_DEPTH;
@@ -783,7 +783,7 @@  static void FUNC(put_hevc_epel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride,
     }
 
     tmp      = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
-    filter = ff_hevc_epel_filters[my - 1];
+    filter = ff_hevc_epel_filters[my];
 
     ox0     = ox0 * (1 << (BIT_DEPTH - 8));
     ox1     = ox1 * (1 << (BIT_DEPTH - 8));
diff --git a/libavcodec/loongarch/hevc_mc.S b/libavcodec/loongarch/hevc_mc.S
index a0e5938fbd..12d92e32e9 100644
--- a/libavcodec/loongarch/hevc_mc.S
+++ b/libavcodec/loongarch/hevc_mc.S
@@ -498,7 +498,6 @@  endfunc
 function ff_hevc_put_hevc_qpel_uni_w_v4_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      8  //my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr5,    t1,      t0  //filter
@@ -570,7 +569,6 @@  endfunc
 function ff_hevc_put_hevc_qpel_uni_w_v6_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      8  //my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr5,    t1,      t0 //filter
@@ -699,7 +697,6 @@  endfunc
 function ff_hevc_put_hevc_qpel_uni_w_v8_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      8  //my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr5,    t1,      t0 //filter
@@ -778,7 +775,6 @@  endfunc
 function ff_hevc_put_hevc_qpel_uni_w_v8_8_lasx
     LOAD_VAR 256
     ld.d           t0,     sp,      8  //my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr5,    t1,      t0 //filter
@@ -845,7 +841,6 @@  endfunc
 function ff_hevc_put_hevc_qpel_uni_w_v16_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      8  //my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr5,    t1,      t0 //filter
@@ -940,7 +935,6 @@  endfunc
 function ff_hevc_put_hevc_qpel_uni_w_v16_8_lasx
     LOAD_VAR 256
     ld.d           t0,     sp,      8  //my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr5,    t1,      t0 //filter
@@ -955,7 +949,6 @@  endfunc
 function ff_hevc_put_hevc_qpel_uni_w_v12_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      8  //my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr5,    t1,      t0 //filter
@@ -969,7 +962,6 @@  endfunc
 function ff_hevc_put_hevc_qpel_uni_w_v12_8_lasx
     LOAD_VAR 256
     ld.d           t0,     sp,      8  //my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr5,    t1,      t0 //filter
@@ -984,7 +976,6 @@  endfunc
 function ff_hevc_put_hevc_qpel_uni_w_v24_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      8  //my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr5,    t1,      t0 //filter
@@ -1005,7 +996,6 @@  endfunc
 function ff_hevc_put_hevc_qpel_uni_w_v24_8_lasx
     LOAD_VAR 256
     ld.d           t0,     sp,      8  //my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr5,    t1,      t0 //filter
@@ -1027,7 +1017,6 @@  endfunc
 function ff_hevc_put_hevc_qpel_uni_w_v32_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      8  //my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr5,    t1,      t0 //filter
@@ -1051,7 +1040,6 @@  endfunc
 function ff_hevc_put_hevc_qpel_uni_w_v32_8_lasx
     LOAD_VAR 256
     ld.d           t0,     sp,      8  //my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr5,    t1,      t0 //filter
@@ -1076,7 +1064,6 @@  endfunc
 function ff_hevc_put_hevc_qpel_uni_w_v48_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      8  //my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr5,    t1,      t0 //filter
@@ -1102,7 +1089,6 @@  endfunc
 function ff_hevc_put_hevc_qpel_uni_w_v48_8_lasx
     LOAD_VAR 256
     ld.d           t0,     sp,      8  //my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr5,    t1,      t0 //filter
@@ -1129,7 +1115,6 @@  endfunc
 function ff_hevc_put_hevc_qpel_uni_w_v64_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      8  //my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr5,    t1,      t0 //filter
@@ -1155,7 +1140,6 @@  endfunc
 function ff_hevc_put_hevc_qpel_uni_w_v64_8_lasx
     LOAD_VAR 256
     ld.d           t0,     sp,      8  //my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr5,    t1,      t0 //filter
@@ -1275,7 +1259,6 @@  endfunc
 function ff_hevc_put_hevc_qpel_uni_w_h4_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      0  //mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr5,    t1,      t0 //filter
@@ -1325,7 +1308,6 @@  endfunc
 function ff_hevc_put_hevc_qpel_uni_w_h4_8_lasx
     LOAD_VAR 256
     ld.d           t0,     sp,      0  //mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr5,    t1,      t0 //filter
@@ -1364,7 +1346,6 @@  endfunc
 function ff_hevc_put_hevc_qpel_uni_w_h6_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      0  //mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr5,    t1,      t0 //filter
@@ -1385,7 +1366,6 @@  endfunc
 function ff_hevc_put_hevc_qpel_uni_w_h6_8_lasx
     LOAD_VAR 256
     ld.d           t0,     sp,      0  //mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr5,    t1,      t0 //filter
@@ -1408,7 +1388,6 @@  endfunc
 function ff_hevc_put_hevc_qpel_uni_w_h8_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      0  //mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr5,    t1,      t0 //filter
@@ -1428,7 +1407,6 @@  endfunc
 function ff_hevc_put_hevc_qpel_uni_w_h8_8_lasx
     LOAD_VAR 256
     ld.d           t0,     sp,      0  //mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr5,    t1,      t0 //filter
@@ -1450,7 +1428,6 @@  endfunc
 function ff_hevc_put_hevc_qpel_uni_w_h12_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      0  //mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr5,    t1,      t0 //filter
@@ -1475,7 +1452,6 @@  endfunc
 function ff_hevc_put_hevc_qpel_uni_w_h12_8_lasx
     LOAD_VAR 256
     ld.d           t0,     sp,      0  //mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr5,    t1,      t0 //filter
@@ -1495,7 +1471,6 @@  endfunc
 function ff_hevc_put_hevc_qpel_uni_w_h16_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      0  //mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr5,    t1,      t0 //filter
@@ -1518,7 +1493,6 @@  endfunc
 function ff_hevc_put_hevc_qpel_uni_w_h16_8_lasx
     LOAD_VAR 256
     ld.d           t0,     sp,      0  //mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr5,    t1,      t0 //filter
@@ -1537,7 +1511,6 @@  endfunc
 function ff_hevc_put_hevc_qpel_uni_w_h24_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      0  //mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr5,    t1,      t0 //filter
@@ -1565,7 +1538,6 @@  endfunc
 function ff_hevc_put_hevc_qpel_uni_w_h24_8_lasx
     LOAD_VAR 256
     ld.d           t0,     sp,      0  //mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr5,    t1,      t0 //filter
@@ -1590,7 +1562,6 @@  endfunc
 function ff_hevc_put_hevc_qpel_uni_w_h32_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      0  //mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr5,    t1,      t0 //filter
@@ -1622,7 +1593,6 @@  endfunc
 function ff_hevc_put_hevc_qpel_uni_w_h32_8_lasx
     LOAD_VAR 256
     ld.d           t0,     sp,      0  //mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr5,    t1,      t0 //filter
@@ -1644,7 +1614,6 @@  endfunc
 function ff_hevc_put_hevc_qpel_uni_w_h48_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      0  //mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr5,    t1,      t0 //filter
@@ -1684,7 +1653,6 @@  endfunc
 function ff_hevc_put_hevc_qpel_uni_w_h48_8_lasx
     LOAD_VAR 256
     ld.d           t0,     sp,      0  //mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr5,    t1,      t0 //filter
@@ -1709,7 +1677,6 @@  endfunc
 function ff_hevc_put_hevc_qpel_uni_w_h64_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      0  //mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr5,    t1,      t0 //filter
@@ -1757,7 +1724,6 @@  endfunc
 function ff_hevc_put_hevc_qpel_uni_w_h64_8_lasx
     LOAD_VAR 256
     ld.d           t0,     sp,      0  //mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr5,    t1,      t0 //filter
@@ -1841,15 +1807,13 @@  endconst
 function ff_hevc_put_hevc_epel_uni_w_hv4_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      0  // mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
-    vldx           vr5,    t1,      t0 // ff_hevc_epel_filters[mx - 1];
+    vldx           vr5,    t1,      t0 // ff_hevc_epel_filters[mx];
     vreplvei.w     vr5,    vr5,     0
     ld.d           t0,     sp,      8  // my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
-    vldx           vr6,    t1,      t0 // ff_hevc_epel_filters[my - 1];
+    vldx           vr6,    t1,      t0 // ff_hevc_epel_filters[my];
     vsllwil.h.b    vr6,    vr6,     0
     vsllwil.w.h    vr6,    vr6,     0
     vreplvei.w     vr16,   vr6,     0
@@ -2068,15 +2032,13 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_hv6_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      0  // mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
-    vldx           vr5,    t1,      t0 // ff_hevc_epel_filters[mx - 1];
+    vldx           vr5,    t1,      t0 // ff_hevc_epel_filters[mx];
     vreplvei.w     vr5,    vr5,     0
     ld.d           t0,     sp,      8  // my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
-    vldx           vr6,    t1,      t0 // ff_hevc_epel_filters[my - 1];
+    vldx           vr6,    t1,      t0 // ff_hevc_epel_filters[my];
     vsllwil.h.b    vr6,    vr6,     0
     vsllwil.w.h    vr6,    vr6,     0
     vreplvei.w     vr16,   vr6,     0
@@ -2094,15 +2056,13 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_hv6_8_lasx
     LOAD_VAR 256
     ld.d           t0,     sp,      0  // mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
-    vldx           vr5,    t1,      t0 // ff_hevc_epel_filters[mx - 1];
+    vldx           vr5,    t1,      t0 // ff_hevc_epel_filters[mx];
     xvreplve0.w    xr5,    xr5
     ld.d           t0,     sp,      8  // my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
-    vldx           vr6,    t1,      t0 // ff_hevc_epel_filters[my - 1];
+    vldx           vr6,    t1,      t0 // ff_hevc_epel_filters[my];
     vsllwil.h.b    vr6,    vr6,     0
     vsllwil.w.h    vr6,    vr6,     0
     xvreplve0.q    xr6,    xr6
@@ -2120,15 +2080,13 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_hv8_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      0  // mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
-    vldx           vr5,    t1,      t0 // ff_hevc_epel_filters[mx - 1];
+    vldx           vr5,    t1,      t0 // ff_hevc_epel_filters[mx];
     vreplvei.w     vr5,    vr5,     0
     ld.d           t0,     sp,      8  // my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
-    vldx           vr6,    t1,      t0 // ff_hevc_epel_filters[my - 1];
+    vldx           vr6,    t1,      t0 // ff_hevc_epel_filters[my];
     vsllwil.h.b    vr6,    vr6,     0
     vsllwil.w.h    vr6,    vr6,     0
     vreplvei.w     vr16,   vr6,     0
@@ -2146,15 +2104,13 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_hv8_8_lasx
     LOAD_VAR 256
     ld.d           t0,     sp,      0  // mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
-    vldx           vr5,    t1,      t0 // ff_hevc_epel_filters[mx - 1];
+    vldx           vr5,    t1,      t0 // ff_hevc_epel_filters[mx];
     xvreplve0.w    xr5,    xr5
     ld.d           t0,     sp,      8  // my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
-    vldx           vr6,    t1,      t0 // ff_hevc_epel_filters[my - 1];
+    vldx           vr6,    t1,      t0 // ff_hevc_epel_filters[my];
     vsllwil.h.b    vr6,    vr6,     0
     vsllwil.w.h    vr6,    vr6,     0
     xvreplve0.q    xr6,    xr6
@@ -2172,15 +2128,13 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_hv12_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      0  // mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
-    vldx           vr5,    t1,      t0 // ff_hevc_epel_filters[mx - 1];
+    vldx           vr5,    t1,      t0 // ff_hevc_epel_filters[mx];
     vreplvei.w     vr5,    vr5,     0
     ld.d           t0,     sp,      8  // my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
-    vldx           vr6,    t1,      t0 // ff_hevc_epel_filters[my - 1];
+    vldx           vr6,    t1,      t0 // ff_hevc_epel_filters[my];
     vsllwil.h.b    vr6,    vr6,     0
     vsllwil.w.h    vr6,    vr6,     0
     vreplvei.w     vr16,   vr6,     0
@@ -2205,15 +2159,13 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_hv12_8_lasx
     LOAD_VAR 256
     ld.d           t0,     sp,      0  // mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
-    vldx           vr5,    t1,      t0 // ff_hevc_epel_filters[mx - 1];
+    vldx           vr5,    t1,      t0 // ff_hevc_epel_filters[mx];
     xvreplve0.w    xr5,    xr5
     ld.d           t0,     sp,      8  // my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
-    vldx           vr6,    t1,      t0 // ff_hevc_epel_filters[my - 1];
+    vldx           vr6,    t1,      t0 // ff_hevc_epel_filters[my];
     vsllwil.h.b    vr6,    vr6,     0
     vsllwil.w.h    vr6,    vr6,     0
     xvreplve0.q    xr6,    xr6
@@ -2231,15 +2183,13 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_hv16_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      0  // mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
-    vldx           vr5,    t1,      t0 // ff_hevc_epel_filters[mx - 1];
+    vldx           vr5,    t1,      t0 // ff_hevc_epel_filters[mx];
     vreplvei.w     vr5,    vr5,     0
     ld.d           t0,     sp,      8  // my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
-    vldx           vr6,    t1,      t0 // ff_hevc_epel_filters[my - 1];
+    vldx           vr6,    t1,      t0 // ff_hevc_epel_filters[my];
     vsllwil.h.b    vr6,    vr6,     0
     vsllwil.w.h    vr6,    vr6,     0
     vreplvei.w     vr16,   vr6,     0
@@ -2267,15 +2217,13 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_hv16_8_lasx
     LOAD_VAR 256
     ld.d           t0,     sp,      0  // mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
-    vldx           vr5,    t1,      t0 // ff_hevc_epel_filters[mx - 1];
+    vldx           vr5,    t1,      t0 // ff_hevc_epel_filters[mx];
     xvreplve0.w    xr5,    xr5
     ld.d           t0,     sp,      8  // my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
-    vldx           vr6,    t1,      t0 // ff_hevc_epel_filters[my - 1];
+    vldx           vr6,    t1,      t0 // ff_hevc_epel_filters[my];
     vsllwil.h.b    vr6,    vr6,     0
     vsllwil.w.h    vr6,    vr6,     0
     xvreplve0.q    xr6,    xr6
@@ -2293,15 +2241,13 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_hv24_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      0  // mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
-    vldx           vr5,    t1,      t0 // ff_hevc_epel_filters[mx - 1];
+    vldx           vr5,    t1,      t0 // ff_hevc_epel_filters[mx];
     vreplvei.w     vr5,    vr5,     0
     ld.d           t0,     sp,      8  // my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
-    vldx           vr6,    t1,      t0 // ff_hevc_epel_filters[my - 1];
+    vldx           vr6,    t1,      t0 // ff_hevc_epel_filters[my];
     vsllwil.h.b    vr6,    vr6,     0
     vsllwil.w.h    vr6,    vr6,     0
     vreplvei.w     vr16,   vr6,     0
@@ -2331,15 +2277,13 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_hv24_8_lasx
     LOAD_VAR 256
     ld.d           t0,     sp,      0  // mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
-    vldx           vr5,    t1,      t0 // ff_hevc_epel_filters[mx - 1];
+    vldx           vr5,    t1,      t0 // ff_hevc_epel_filters[mx];
     xvreplve0.w    xr5,    xr5
     ld.d           t0,     sp,      8  // my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
-    vldx           vr6,    t1,      t0 // ff_hevc_epel_filters[my - 1];
+    vldx           vr6,    t1,      t0 // ff_hevc_epel_filters[my];
     vsllwil.h.b    vr6,    vr6,     0
     vsllwil.w.h    vr6,    vr6,     0
     xvreplve0.q    xr6,    xr6
@@ -2364,15 +2308,13 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_hv32_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      0  // mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
-    vldx           vr5,    t1,      t0 // ff_hevc_epel_filters[mx - 1];
+    vldx           vr5,    t1,      t0 // ff_hevc_epel_filters[mx];
     vreplvei.w     vr5,    vr5,     0
     ld.d           t0,     sp,      8  // my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
-    vldx           vr6,    t1,      t0 // ff_hevc_epel_filters[my - 1];
+    vldx           vr6,    t1,      t0 // ff_hevc_epel_filters[my];
     vsllwil.h.b    vr6,    vr6,     0
     vsllwil.w.h    vr6,    vr6,     0
     vreplvei.w     vr16,   vr6,     0
@@ -2402,15 +2344,13 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_hv32_8_lasx
     LOAD_VAR 256
     ld.d           t0,     sp,      0  // mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
-    vldx           vr5,    t1,      t0 // ff_hevc_epel_filters[mx - 1];
+    vldx           vr5,    t1,      t0 // ff_hevc_epel_filters[mx];
     xvreplve0.w    xr5,    xr5
     ld.d           t0,     sp,      8  // my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
-    vldx           vr6,    t1,      t0 // ff_hevc_epel_filters[my - 1];
+    vldx           vr6,    t1,      t0 // ff_hevc_epel_filters[my];
     vsllwil.h.b    vr6,    vr6,     0
     vsllwil.w.h    vr6,    vr6,     0
     xvreplve0.q    xr6,    xr6
@@ -2440,15 +2380,13 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_hv48_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      0  // mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
-    vldx           vr5,    t1,      t0 // ff_hevc_epel_filters[mx - 1];
+    vldx           vr5,    t1,      t0 // ff_hevc_epel_filters[mx];
     vreplvei.w     vr5,    vr5,     0
     ld.d           t0,     sp,      8  // my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
-    vldx           vr6,    t1,      t0 // ff_hevc_epel_filters[my - 1];
+    vldx           vr6,    t1,      t0 // ff_hevc_epel_filters[my];
     vsllwil.h.b    vr6,    vr6,     0
     vsllwil.w.h    vr6,    vr6,     0
     vreplvei.w     vr16,   vr6,     0
@@ -2478,15 +2416,13 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_hv48_8_lasx
     LOAD_VAR 256
     ld.d           t0,     sp,      0  // mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
-    vldx           vr5,    t1,      t0 // ff_hevc_epel_filters[mx - 1];
+    vldx           vr5,    t1,      t0 // ff_hevc_epel_filters[mx];
     xvreplve0.w    xr5,    xr5
     ld.d           t0,     sp,      8  // my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
-    vldx           vr6,    t1,      t0 // ff_hevc_epel_filters[my - 1];
+    vldx           vr6,    t1,      t0 // ff_hevc_epel_filters[my];
     vsllwil.h.b    vr6,    vr6,     0
     vsllwil.w.h    vr6,    vr6,     0
     xvreplve0.q    xr6,    xr6
@@ -2516,15 +2452,13 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_hv64_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      0  // mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
-    vldx           vr5,    t1,      t0 // ff_hevc_epel_filters[mx - 1];
+    vldx           vr5,    t1,      t0 // ff_hevc_epel_filters[mx];
     vreplvei.w     vr5,    vr5,     0
     ld.d           t0,     sp,      8  // my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
-    vldx           vr6,    t1,      t0 // ff_hevc_epel_filters[my - 1];
+    vldx           vr6,    t1,      t0 // ff_hevc_epel_filters[my];
     vsllwil.h.b    vr6,    vr6,     0
     vsllwil.w.h    vr6,    vr6,     0
     vreplvei.w     vr16,   vr6,     0
@@ -2554,15 +2488,13 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_hv64_8_lasx
     LOAD_VAR 256
     ld.d           t0,     sp,      0  // mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
-    vldx           vr5,    t1,      t0 // ff_hevc_epel_filters[mx - 1];
+    vldx           vr5,    t1,      t0 // ff_hevc_epel_filters[mx];
     xvreplve0.w    xr5,    xr5
     ld.d           t0,     sp,      8  // my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
-    vldx           vr6,    t1,      t0 // ff_hevc_epel_filters[my - 1];
+    vldx           vr6,    t1,      t0 // ff_hevc_epel_filters[my];
     vsllwil.h.b    vr6,    vr6,     0
     vsllwil.w.h    vr6,    vr6,     0
     xvreplve0.q    xr6,    xr6
@@ -2596,8 +2528,7 @@  endfunc
  *                                int width)
  */
 function ff_hevc_put_hevc_uni_qpel_h4_8_lsx
-    addi.d         t0,     a5,      -1
-    slli.w         t0,     t0,      4
+    slli.w         t0,     a5,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr5,    t1,      t0 //filter
     addi.d         a2,     a2,      -3 //src -= 3
@@ -2663,8 +2594,7 @@  endfunc
 .endm
 
 function ff_hevc_put_hevc_uni_qpel_h6_8_lsx
-    addi.d         t0,     a5,      -1
-    slli.w         t0,     t0,      4
+    slli.w         t0,     a5,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr0,    t1,      t0 //filter abcdefgh
     vreplvei.h     vr1,    vr0,     1  //cd...
@@ -2692,8 +2622,7 @@  function ff_hevc_put_hevc_uni_qpel_h6_8_lsx
 endfunc
 
 function ff_hevc_put_hevc_uni_qpel_h8_8_lsx
-    addi.d         t0,     a5,      -1
-    slli.w         t0,     t0,      4
+    slli.w         t0,     a5,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr0,    t1,      t0 //filter abcdefgh
     vreplvei.h     vr1,    vr0,     1  //cd...
@@ -2720,8 +2649,7 @@  function ff_hevc_put_hevc_uni_qpel_h8_8_lsx
 endfunc
 
 function ff_hevc_put_hevc_uni_qpel_h12_8_lsx
-    addi.d         t0,     a5,      -1
-    slli.w         t0,     t0,      4
+    slli.w         t0,     a5,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr0,    t1,      t0 //filter abcdefgh
     vreplvei.h     vr1,    vr0,     1  //cd...
@@ -2751,8 +2679,7 @@  function ff_hevc_put_hevc_uni_qpel_h12_8_lsx
 endfunc
 
 function ff_hevc_put_hevc_uni_qpel_h12_8_lasx
-    addi.d         t0,     a5,      -1
-    slli.w         t0,     t0,      4
+    slli.w         t0,     a5,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr0,    t1,      t0 //filter abcdefgh
     xvreplve0.q    xr0,    xr0
@@ -2784,8 +2711,7 @@  function ff_hevc_put_hevc_uni_qpel_h12_8_lasx
 endfunc
 
 function ff_hevc_put_hevc_uni_qpel_h16_8_lsx
-    addi.d         t0,     a5,      -1
-    slli.w         t0,     t0,      4
+    slli.w         t0,     a5,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr0,    t1,      t0 //filter abcdefgh
     vreplvei.h     vr1,    vr0,     1  //cd...
@@ -2814,8 +2740,7 @@  function ff_hevc_put_hevc_uni_qpel_h16_8_lsx
 endfunc
 
 function ff_hevc_put_hevc_uni_qpel_h16_8_lasx
-    addi.d         t0,     a5,      -1
-    slli.w         t0,     t0,      4
+    slli.w         t0,     a5,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr0,    t1,      t0 //filter abcdefgh
     xvreplve0.q    xr0,    xr0
@@ -2846,8 +2771,7 @@  function ff_hevc_put_hevc_uni_qpel_h16_8_lasx
 endfunc
 
 function ff_hevc_put_hevc_uni_qpel_h24_8_lsx
-    addi.d         t0,     a5,      -1
-    slli.w         t0,     t0,      4
+    slli.w         t0,     a5,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr0,    t1,      t0 //filter abcdefgh
     vreplvei.h     vr1,    vr0,     1  //cd...
@@ -2880,8 +2804,7 @@  function ff_hevc_put_hevc_uni_qpel_h24_8_lsx
 endfunc
 
 function ff_hevc_put_hevc_uni_qpel_h24_8_lasx
-    addi.d         t0,     a5,      -1
-    slli.w         t0,     t0,      4
+    slli.w         t0,     a5,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr0,    t1,      t0 //filter abcdefgh
     xvreplve0.q    xr0,    xr0
@@ -2916,8 +2839,7 @@  function ff_hevc_put_hevc_uni_qpel_h24_8_lasx
 endfunc
 
 function ff_hevc_put_hevc_uni_qpel_h32_8_lsx
-    addi.d         t0,     a5,      -1
-    slli.w         t0,     t0,      4
+    slli.w         t0,     a5,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr0,    t1,      t0 //filter abcdefgh
     vreplvei.h     vr1,    vr0,     1  //cd...
@@ -2952,8 +2874,7 @@  function ff_hevc_put_hevc_uni_qpel_h32_8_lsx
 endfunc
 
 function ff_hevc_put_hevc_uni_qpel_h32_8_lasx
-    addi.d         t0,     a5,      -1
-    slli.w         t0,     t0,      4
+    slli.w         t0,     a5,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr0,    t1,      t0 //filter abcdefgh
     xvreplve0.q    xr0,    xr0
@@ -2987,8 +2908,7 @@  function ff_hevc_put_hevc_uni_qpel_h32_8_lasx
 endfunc
 
 function ff_hevc_put_hevc_uni_qpel_h48_8_lsx
-    addi.d         t0,     a5,      -1
-    slli.w         t0,     t0,      4
+    slli.w         t0,     a5,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr0,    t1,      t0 //filter abcdefgh
     vreplvei.h     vr1,    vr0,     1  //cd...
@@ -3029,8 +2949,7 @@  function ff_hevc_put_hevc_uni_qpel_h48_8_lsx
 endfunc
 
 function ff_hevc_put_hevc_uni_qpel_h48_8_lasx
-    addi.d         t0,     a5,      -1
-    slli.w         t0,     t0,      4
+    slli.w         t0,     a5,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr0,    t1,      t0 //filter abcdefgh
     xvreplve0.q    xr0,    xr0
@@ -3070,8 +2989,7 @@  function ff_hevc_put_hevc_uni_qpel_h48_8_lasx
 endfunc
 
 function ff_hevc_put_hevc_uni_qpel_h64_8_lasx
-    addi.d         t0,     a5,      -1
-    slli.w         t0,     t0,      4
+    slli.w         t0,     a5,      4
     la.local       t1,     ff_hevc_qpel_filters
     vldx           vr0,    t1,      t0 //filter abcdefgh
     xvreplve0.q    xr0,    xr0
@@ -3122,7 +3040,6 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_v4_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      8  //my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
     vldx           vr0,    t1,      t0 //filter
@@ -3260,7 +3177,6 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_v6_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      8  //my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
     vldx           vr0,    t1,      t0 //filter
@@ -3275,7 +3191,6 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_v6_8_lasx
     LOAD_VAR 256
     ld.d           t0,     sp,      8  //my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
     vldx           vr0,    t1,      t0 //filter
@@ -3289,7 +3204,6 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_v8_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      8  //my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
     vldx           vr0,    t1,      t0 //filter
@@ -3304,7 +3218,6 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_v8_8_lasx
     LOAD_VAR 256
     ld.d           t0,     sp,      8  //my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
     vldx           vr0,    t1,      t0 //filter
@@ -3394,7 +3307,6 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_v12_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      8  //my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
     vldx           vr0,    t1,      t0 //filter
@@ -3409,7 +3321,6 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_v12_8_lasx
     LOAD_VAR 256
     ld.d           t0,     sp,      8  //my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
     vldx           vr0,    t1,      t0 //filter
@@ -3425,7 +3336,6 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_v16_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      8  //my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
     vldx           vr0,    t1,      t0 //filter
@@ -3440,7 +3350,6 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_v16_8_lasx
     LOAD_VAR 256
     ld.d           t0,     sp,      8  //my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
     vldx           vr0,    t1,      t0 //filter
@@ -3456,7 +3365,6 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_v24_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      8  //my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
     vldx           vr0,    t1,      t0 //filter
@@ -3478,7 +3386,6 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_v24_8_lasx
     LOAD_VAR 256
     ld.d           t0,     sp,      8  //my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
     vldx           vr0,    t1,      t0 //filter
@@ -3503,7 +3410,6 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_v32_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      8  //my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
     vldx           vr0,    t1,      t0 //filter
@@ -3525,7 +3431,6 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_v32_8_lasx
     LOAD_VAR 256
     ld.d           t0,     sp,      8  //my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
     vldx           vr0,    t1,      t0 //filter
@@ -3548,7 +3453,6 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_v48_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      8  //my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
     vldx           vr0,    t1,      t0 //filter
@@ -3574,7 +3478,6 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_v48_8_lasx
     LOAD_VAR 256
     ld.d           t0,     sp,      8  //my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
     vldx           vr0,    t1,      t0 //filter
@@ -3601,7 +3504,6 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_v64_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      8  //my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
     vldx           vr0,    t1,      t0 //filter
@@ -3631,7 +3533,6 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_v64_8_lasx
     LOAD_VAR 256
     ld.d           t0,     sp,      8  //my
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
     vldx           vr0,    t1,      t0 //filter
@@ -3668,7 +3569,6 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_h4_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      0  //mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
     vldx           vr0,    t1,      t0 //filter
@@ -3699,7 +3599,6 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_h6_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      0  //mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
     vldx           vr0,    t1,      t0 //filter
@@ -3730,7 +3629,6 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_h6_8_lasx
     LOAD_VAR 256
     ld.d           t0,     sp,      0  //mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
     vldx           vr0,    t1,      t0 //filter
@@ -3759,7 +3657,6 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_h8_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      0  //mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
     vldx           vr0,    t1,      t0 //filter
@@ -3789,7 +3686,6 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_h8_8_lasx
     LOAD_VAR 256
     ld.d           t0,     sp,      0  //mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
     vldx           vr0,    t1,      t0 //filter
@@ -3853,7 +3749,6 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_h12_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      0  //mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
     vldx           vr0,    t1,      t0 //filter
@@ -3889,7 +3784,6 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_h12_8_lasx
     LOAD_VAR 256
     ld.d           t0,     sp,      0  //mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
     vldx           vr0,    t1,      t0 //filter
@@ -3910,7 +3804,6 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_h16_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      0  //mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
     vldx           vr0,    t1,      t0 //filter
@@ -3934,7 +3827,6 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_h16_8_lasx
     LOAD_VAR 256
     ld.d           t0,     sp,      0  //mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
     vldx           vr0,    t1,      t0 //filter
@@ -3955,7 +3847,6 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_h24_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      0  //mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
     vldx           vr0,    t1,      t0 //filter
@@ -3986,7 +3877,6 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_h24_8_lasx
     LOAD_VAR 256
     ld.d           t0,     sp,      0  //mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
     vldx           vr0,    t1,      t0 //filter
@@ -4015,7 +3905,6 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_h32_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      0  //mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
     vldx           vr0,    t1,      t0 //filter
@@ -4040,7 +3929,6 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_h32_8_lasx
     LOAD_VAR 256
     ld.d           t0,     sp,      0  //mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
     vldx           vr0,    t1,      t0 //filter
@@ -4062,7 +3950,6 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_h48_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      0  //mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
     vldx           vr0,    t1,      t0 //filter
@@ -4088,7 +3975,6 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_h48_8_lasx
     LOAD_VAR 256
     ld.d           t0,     sp,      0  //mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
     vldx           vr0,    t1,      t0 //filter
@@ -4111,7 +3997,6 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_h64_8_lsx
     LOAD_VAR 128
     ld.d           t0,     sp,      0  //mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
     vldx           vr0,    t1,      t0 //filter
@@ -4138,7 +4023,6 @@  endfunc
 function ff_hevc_put_hevc_epel_uni_w_h64_8_lasx
     LOAD_VAR 256
     ld.d           t0,     sp,      0  //mx
-    addi.d         t0,     t0,      -1
     slli.w         t0,     t0,      2
     la.local       t1,     ff_hevc_epel_filters
     vldx           vr0,    t1,      t0 //filter
@@ -4166,7 +4050,6 @@  endfunc
  *                               intptr_t my, int width)
  */
 function ff_hevc_put_hevc_bi_epel_h4_8_lsx
-   addi.d          a6,     a6,     -1
    slli.w          a6,     a6,      2
    la.local        t0,     ff_hevc_epel_filters
    vldx            vr0,    t0,      a6 // filter
@@ -4209,7 +4092,6 @@  endfunc
 .endm
 
 function ff_hevc_put_hevc_bi_epel_h6_8_lsx
-   addi.d          a6,     a6,     -1
    slli.w          a6,     a6,      2
    la.local        t0,     ff_hevc_epel_filters
    vldx            vr0,    t0,      a6 // filter
@@ -4234,7 +4116,6 @@  function ff_hevc_put_hevc_bi_epel_h6_8_lsx
 endfunc
 
 function ff_hevc_put_hevc_bi_epel_h8_8_lsx
-   addi.d          a6,     a6,     -1
    slli.w          a6,     a6,      2
    la.local        t0,     ff_hevc_epel_filters
    vldx            vr0,    t0,      a6 // filter
@@ -4258,7 +4139,6 @@  function ff_hevc_put_hevc_bi_epel_h8_8_lsx
 endfunc
 
 function ff_hevc_put_hevc_bi_epel_h12_8_lsx
-   addi.d          a6,     a6,     -1
    slli.w          a6,     a6,      2
    la.local        t0,     ff_hevc_epel_filters
    vldx            vr0,    t0,      a6 // filter
@@ -4286,7 +4166,6 @@  function ff_hevc_put_hevc_bi_epel_h12_8_lsx
 endfunc
 
 function ff_hevc_put_hevc_bi_epel_h12_8_lasx
-   addi.d          a6,     a6,     -1
    slli.w          a6,     a6,      2
    la.local        t0,     ff_hevc_epel_filters
    vldx            vr0,    t0,      a6 // filter
@@ -4314,7 +4193,6 @@  function ff_hevc_put_hevc_bi_epel_h12_8_lasx
 endfunc
 
 function ff_hevc_put_hevc_bi_epel_h16_8_lsx
-   addi.d          a6,     a6,     -1
    slli.w          a6,     a6,      2
    la.local        t0,     ff_hevc_epel_filters
    vldx            vr0,    t0,      a6 // filter
@@ -4341,7 +4219,6 @@  function ff_hevc_put_hevc_bi_epel_h16_8_lsx
 endfunc
 
 function ff_hevc_put_hevc_bi_epel_h16_8_lasx
-   addi.d          a6,     a6,     -1
    slli.w          a6,     a6,      2
    la.local        t0,     ff_hevc_epel_filters
    vldx            vr0,    t0,      a6 // filter
@@ -4368,7 +4245,6 @@  function ff_hevc_put_hevc_bi_epel_h16_8_lasx
 endfunc
 
 function ff_hevc_put_hevc_bi_epel_h32_8_lasx
-   addi.d          a6,     a6,     -1
    slli.w          a6,     a6,      2
    la.local        t0,     ff_hevc_epel_filters
    vldx            vr0,    t0,      a6 // filter
@@ -4400,7 +4276,6 @@  function ff_hevc_put_hevc_bi_epel_h32_8_lasx
 endfunc
 
 function ff_hevc_put_hevc_bi_epel_h48_8_lsx
-   addi.d          a6,     a6,     -1
    slli.w          a6,     a6,      2
    la.local        t0,     ff_hevc_epel_filters
    vldx            vr0,    t0,      a6// filter
@@ -4443,7 +4318,6 @@  function ff_hevc_put_hevc_bi_epel_h48_8_lsx
 endfunc
 
 function ff_hevc_put_hevc_bi_epel_h48_8_lasx
-   addi.d          a6,     a6,     -1
    slli.w          a6,     a6,      2
    la.local        t0,     ff_hevc_epel_filters
    vldx            vr0,    t0,      a6 // filter
@@ -4481,7 +4355,6 @@  function ff_hevc_put_hevc_bi_epel_h48_8_lasx
 endfunc
 
 function ff_hevc_put_hevc_bi_epel_h64_8_lsx
-   addi.d          a6,     a6,     -1
    slli.w          a6,     a6,      2
    la.local        t0,     ff_hevc_epel_filters
    vldx            vr0,    t0,      a6// filter
@@ -4531,7 +4404,6 @@  function ff_hevc_put_hevc_bi_epel_h64_8_lsx
 endfunc
 
 function ff_hevc_put_hevc_bi_epel_h64_8_lasx
-   addi.d          a6,     a6,     -1
    slli.w          a6,     a6,      2
    la.local        t0,     ff_hevc_epel_filters
    vldx            vr0,    t0,      a6 // filter
diff --git a/libavcodec/loongarch/hevc_mc_bi_lsx.c b/libavcodec/loongarch/hevc_mc_bi_lsx.c
index 48441c107b..d7ddd1c246 100644
--- a/libavcodec/loongarch/hevc_mc_bi_lsx.c
+++ b/libavcodec/loongarch/hevc_mc_bi_lsx.c
@@ -2224,7 +2224,7 @@  void ff_hevc_put_hevc_bi_##PEL##_##DIR##WIDTH##_8_lsx(uint8_t *dst,          \
                                                       intptr_t my,           \
                                                       int width)             \
 {                                                                            \
-    const int8_t *filter = ff_hevc_##PEL##_filters[FILT_DIR - 1];            \
+    const int8_t *filter = ff_hevc_##PEL##_filters[FILT_DIR];                \
                                                                              \
     hevc_##DIR1##_##TAP##t_##WIDTH##w_lsx(src, src_stride, src_16bit,        \
                                           MAX_PB_SIZE, dst, dst_stride,      \
@@ -2265,8 +2265,8 @@  void ff_hevc_put_hevc_bi_##PEL##_hv##WIDTH##_8_lsx(uint8_t *dst,          \
                                                    intptr_t my,           \
                                                    int width)             \
 {                                                                         \
-    const int8_t *filter_x = ff_hevc_##PEL##_filters[mx - 1];             \
-    const int8_t *filter_y = ff_hevc_##PEL##_filters[my - 1];             \
+    const int8_t *filter_x = ff_hevc_##PEL##_filters[mx];                 \
+    const int8_t *filter_y = ff_hevc_##PEL##_filters[my];                 \
                                                                           \
     hevc_hv_##TAP##t_##WIDTH##w_lsx(src, src_stride, src_16bit,           \
                                     MAX_PB_SIZE, dst, dst_stride,         \
diff --git a/libavcodec/loongarch/hevc_mc_uni_lsx.c b/libavcodec/loongarch/hevc_mc_uni_lsx.c
index 5437dce0e0..6bdc27a824 100644
--- a/libavcodec/loongarch/hevc_mc_uni_lsx.c
+++ b/libavcodec/loongarch/hevc_mc_uni_lsx.c
@@ -1373,7 +1373,7 @@  void ff_hevc_put_hevc_uni_##PEL##_##DIR##WIDTH##_8_lsx(uint8_t *dst,           \
                                                        intptr_t my,            \
                                                        int width)              \
 {                                                                              \
-    const int8_t *filter = ff_hevc_##PEL##_filters[FILT_DIR - 1];              \
+    const int8_t *filter = ff_hevc_##PEL##_filters[FILT_DIR];                  \
                                                                                \
     common_##DIR1##_##TAP##t_##WIDTH##w_lsx(src, src_stride, dst, dst_stride,  \
                                             filter, height);                   \
@@ -1401,8 +1401,8 @@  void ff_hevc_put_hevc_uni_##PEL##_hv##WIDTH##_8_lsx(uint8_t *dst,          \
                                                     intptr_t my,           \
                                                     int width)             \
 {                                                                          \
-    const int8_t *filter_x = ff_hevc_##PEL##_filters[mx - 1];              \
-    const int8_t *filter_y = ff_hevc_##PEL##_filters[my - 1];              \
+    const int8_t *filter_x = ff_hevc_##PEL##_filters[mx];                  \
+    const int8_t *filter_y = ff_hevc_##PEL##_filters[my];                  \
                                                                            \
     hevc_hv_##TAP##t_##WIDTH##w_lsx(src, src_stride, dst, dst_stride,  \
                                     filter_x, filter_y, height);       \
diff --git a/libavcodec/loongarch/hevc_mc_uniw_lsx.c b/libavcodec/loongarch/hevc_mc_uniw_lsx.c
index c4e79225d3..0ced8d4920 100644
--- a/libavcodec/loongarch/hevc_mc_uniw_lsx.c
+++ b/libavcodec/loongarch/hevc_mc_uniw_lsx.c
@@ -280,8 +280,8 @@  void ff_hevc_put_hevc_uni_w_##PEL##_hv##WIDTH##_8_lsx(uint8_t *dst,            \
                                                       intptr_t my,             \
                                                       int width)               \
 {                                                                              \
-    const int8_t *filter_x = ff_hevc_##PEL##_filters[mx - 1];                  \
-    const int8_t *filter_y = ff_hevc_##PEL##_filters[my - 1];                  \
+    const int8_t *filter_x = ff_hevc_##PEL##_filters[mx];                      \
+    const int8_t *filter_y = ff_hevc_##PEL##_filters[my];                      \
     int shift = denom + 14 - 8;                                                \
                                                                                \
     hevc_hv_##TAP##t_##WIDTH##w_lsx(src, src_stride, dst, dst_stride, filter_x,\
diff --git a/libavcodec/loongarch/hevcdsp_lsx.c b/libavcodec/loongarch/hevcdsp_lsx.c
index 85843dd111..5747925525 100644
--- a/libavcodec/loongarch/hevcdsp_lsx.c
+++ b/libavcodec/loongarch/hevcdsp_lsx.c
@@ -3233,7 +3233,7 @@  void ff_hevc_put_hevc_##PEL##_##DIR##WIDTH##_8_lsx(int16_t *dst,          \
                                                    intptr_t my,           \
                                                    int width)             \
 {                                                                         \
-    const int8_t *filter = ff_hevc_##PEL##_filters[FILT_DIR - 1];         \
+    const int8_t *filter = ff_hevc_##PEL##_filters[FILT_DIR];             \
                                                                           \
     hevc_##DIR1##_##TAP##t_##WIDTH##w_lsx(src, src_stride, dst,           \
                                           MAX_PB_SIZE, filter, height);   \
@@ -3274,8 +3274,8 @@  void ff_hevc_put_hevc_##PEL##_hv##WIDTH##_8_lsx(int16_t *dst,           \
                                                 intptr_t my,            \
                                                 int width)              \
 {                                                                       \
-    const int8_t *filter_x = ff_hevc_##PEL##_filters[mx - 1];           \
-    const int8_t *filter_y = ff_hevc_##PEL##_filters[my - 1];           \
+    const int8_t *filter_x = ff_hevc_##PEL##_filters[mx];               \
+    const int8_t *filter_y = ff_hevc_##PEL##_filters[my];               \
                                                                         \
     hevc_hv_##TAP##t_##WIDTH##w_lsx(src, src_stride, dst, MAX_PB_SIZE,  \
                                           filter_x, filter_y, height);  \
diff --git a/libavcodec/mips/hevc_mc_bi_msa.c b/libavcodec/mips/hevc_mc_bi_msa.c
index dac6a32ab4..309ba5025d 100644
--- a/libavcodec/mips/hevc_mc_bi_msa.c
+++ b/libavcodec/mips/hevc_mc_bi_msa.c
@@ -5096,7 +5096,7 @@  void ff_hevc_put_hevc_bi_##PEL##_##DIR##WIDTH##_8_msa(uint8_t *dst,          \
                                                       intptr_t my,           \
                                                       int width)             \
 {                                                                            \
-    const int8_t *filter = ff_hevc_##PEL##_filters[FILT_DIR - 1];            \
+    const int8_t *filter = ff_hevc_##PEL##_filters[FILT_DIR];                \
                                                                              \
     hevc_##DIR1##_bi_##TAP##t_##WIDTH##w_msa(src, src_stride, src_16bit,     \
                                              MAX_PB_SIZE, dst, dst_stride,   \
@@ -5150,8 +5150,8 @@  void ff_hevc_put_hevc_bi_##PEL##_hv##WIDTH##_8_msa(uint8_t *dst,          \
                                                    intptr_t my,           \
                                                    int width)             \
 {                                                                         \
-    const int8_t *filter_x = ff_hevc_##PEL##_filters[mx - 1];             \
-    const int8_t *filter_y = ff_hevc_##PEL##_filters[my - 1];             \
+    const int8_t *filter_x = ff_hevc_##PEL##_filters[mx];                 \
+    const int8_t *filter_y = ff_hevc_##PEL##_filters[my];                 \
                                                                           \
     hevc_hv_bi_##TAP##t_##WIDTH##w_msa(src, src_stride, src_16bit,        \
                                        MAX_PB_SIZE, dst, dst_stride,      \
diff --git a/libavcodec/mips/hevc_mc_biw_msa.c b/libavcodec/mips/hevc_mc_biw_msa.c
index 260ec84496..34be61c0dc 100644
--- a/libavcodec/mips/hevc_mc_biw_msa.c
+++ b/libavcodec/mips/hevc_mc_biw_msa.c
@@ -6060,7 +6060,7 @@  void ff_hevc_put_hevc_bi_w_##PEL##_##DIR##WIDTH##_8_msa(uint8_t *dst,         \
                                                         intptr_t my,          \
                                                         int width)            \
 {                                                                             \
-    const int8_t *filter = ff_hevc_##PEL##_filters[FILT_DIR - 1];             \
+    const int8_t *filter = ff_hevc_##PEL##_filters[FILT_DIR];                 \
     int log2Wd = denom + 14 - 8;                                              \
                                                                               \
     hevc_##DIR1##_biwgt_##TAP##t_##WIDTH##w_msa(src, src_stride, src_16bit,   \
@@ -6122,8 +6122,8 @@  void ff_hevc_put_hevc_bi_w_##PEL##_hv##WIDTH##_8_msa(uint8_t *dst,          \
                                                      intptr_t my,           \
                                                      int width)             \
 {                                                                           \
-    const int8_t *filter_x = ff_hevc_##PEL##_filters[mx - 1];               \
-    const int8_t *filter_y = ff_hevc_##PEL##_filters[my - 1];               \
+    const int8_t *filter_x = ff_hevc_##PEL##_filters[mx];                   \
+    const int8_t *filter_y = ff_hevc_##PEL##_filters[my];                   \
     int log2Wd = denom + 14 - 8;                                            \
                                                                             \
     hevc_hv_biwgt_##TAP##t_##WIDTH##w_msa(src, src_stride, src_16bit,       \
diff --git a/libavcodec/mips/hevc_mc_uni_msa.c b/libavcodec/mips/hevc_mc_uni_msa.c
index e221df7d53..602b7c614a 100644
--- a/libavcodec/mips/hevc_mc_uni_msa.c
+++ b/libavcodec/mips/hevc_mc_uni_msa.c
@@ -4155,7 +4155,7 @@  void ff_hevc_put_hevc_uni_##PEL##_##DIR##WIDTH##_8_msa(uint8_t *dst,           \
                                                        intptr_t my,            \
                                                        int width)              \
 {                                                                              \
-    const int8_t *filter = ff_hevc_##PEL##_filters[FILT_DIR - 1];              \
+    const int8_t *filter = ff_hevc_##PEL##_filters[FILT_DIR];                  \
                                                                                \
     common_##DIR1##_##TAP##t_##WIDTH##w_msa(src, src_stride, dst, dst_stride,  \
                                             filter, height);                   \
@@ -4207,8 +4207,8 @@  void ff_hevc_put_hevc_uni_##PEL##_hv##WIDTH##_8_msa(uint8_t *dst,          \
                                                     intptr_t my,           \
                                                     int width)             \
 {                                                                          \
-    const int8_t *filter_x = ff_hevc_##PEL##_filters[mx - 1];              \
-    const int8_t *filter_y = ff_hevc_##PEL##_filters[my - 1];              \
+    const int8_t *filter_x = ff_hevc_##PEL##_filters[mx];                  \
+    const int8_t *filter_y = ff_hevc_##PEL##_filters[my];                  \
                                                                            \
     hevc_hv_uni_##TAP##t_##WIDTH##w_msa(src, src_stride, dst, dst_stride,  \
                                         filter_x, filter_y, height);       \
diff --git a/libavcodec/mips/hevc_mc_uniw_msa.c b/libavcodec/mips/hevc_mc_uniw_msa.c
index caf40c34da..502a502037 100644
--- a/libavcodec/mips/hevc_mc_uniw_msa.c
+++ b/libavcodec/mips/hevc_mc_uniw_msa.c
@@ -6263,7 +6263,7 @@  void ff_hevc_put_hevc_uni_w_##PEL##_##DIR##WIDTH##_8_msa(uint8_t *dst,        \
                                                          intptr_t my,         \
                                                          int width)           \
 {                                                                             \
-    const int8_t *filter = ff_hevc_##PEL##_filters[FILT_DIR - 1];             \
+    const int8_t *filter = ff_hevc_##PEL##_filters[FILT_DIR];                 \
     int shift = denom + 14 - 8;                                               \
                                                                               \
     hevc_##DIR1##_uniwgt_##TAP##t_##WIDTH##w_msa(src, src_stride, dst,        \
@@ -6320,8 +6320,8 @@  void ff_hevc_put_hevc_uni_w_##PEL##_hv##WIDTH##_8_msa(uint8_t *dst,           \
                                                       intptr_t my,            \
                                                       int width)              \
 {                                                                             \
-    const int8_t *filter_x = ff_hevc_##PEL##_filters[mx - 1];                 \
-    const int8_t *filter_y = ff_hevc_##PEL##_filters[my - 1];                 \
+    const int8_t *filter_x = ff_hevc_##PEL##_filters[mx];                     \
+    const int8_t *filter_y = ff_hevc_##PEL##_filters[my];                     \
     int shift = denom + 14 - 8;                                               \
                                                                               \
     hevc_hv_uniwgt_##TAP##t_##WIDTH##w_msa(src, src_stride, dst, dst_stride,  \
diff --git a/libavcodec/mips/hevcdsp_mmi.c b/libavcodec/mips/hevcdsp_mmi.c
index 1da56d3d87..7ece7b9be0 100644
--- a/libavcodec/mips/hevcdsp_mmi.c
+++ b/libavcodec/mips/hevcdsp_mmi.c
@@ -34,7 +34,7 @@  void ff_hevc_put_hevc_qpel_h##w##_8_mmi(int16_t *dst, const uint8_t *_src, \
     ptrdiff_t srcstride = _srcstride / sizeof(pixel);                    \
     double ftmp[15];                                                     \
     uint64_t rtmp[1];                                                    \
-    const int8_t *filter = ff_hevc_qpel_filters[mx - 1];                 \
+    const int8_t *filter = ff_hevc_qpel_filters[mx];                     \
     DECLARE_VAR_ALL64;                                                   \
                                                                          \
     x = x_step;                                                          \
@@ -134,7 +134,7 @@  void ff_hevc_put_hevc_qpel_hv##w##_8_mmi(int16_t *dst, const uint8_t *_src,\
     DECLARE_VAR_ALL64;                                                   \
                                                                          \
     src   -= (QPEL_EXTRA_BEFORE * srcstride + 3);                        \
-    filter = ff_hevc_qpel_filters[mx - 1];                               \
+    filter = ff_hevc_qpel_filters[mx];                                   \
     x = x_step;                                                          \
     y = height + QPEL_EXTRA;                                             \
     __asm__ volatile(                                                    \
@@ -206,7 +206,7 @@  void ff_hevc_put_hevc_qpel_hv##w##_8_mmi(int16_t *dst, const uint8_t *_src,\
     );                                                                   \
                                                                          \
     tmp    = tmp_array + QPEL_EXTRA_BEFORE * 4 -12;                      \
-    filter = ff_hevc_qpel_filters[my - 1];                               \
+    filter = ff_hevc_qpel_filters[my];                                   \
     x = x_step;                                                          \
     y = height;                                                          \
     __asm__ volatile(                                                    \
@@ -314,7 +314,7 @@  void ff_hevc_put_hevc_qpel_bi_h##w##_8_mmi(uint8_t *_dst,               \
     ptrdiff_t     srcstride = _srcstride / sizeof(pixel);               \
     pixel *dst          = (pixel *)_dst;                                \
     ptrdiff_t dststride = _dststride / sizeof(pixel);                   \
-    const int8_t *filter    = ff_hevc_qpel_filters[mx - 1];             \
+    const int8_t *filter    = ff_hevc_qpel_filters[mx];                 \
     double ftmp[20];                                                    \
     uint64_t rtmp[1];                                                   \
     union av_intfloat64 shift;                                          \
@@ -458,7 +458,7 @@  void ff_hevc_put_hevc_qpel_bi_hv##w##_8_mmi(uint8_t *_dst,              \
     offset.i = 64;                                                      \
                                                                         \
     src   -= (QPEL_EXTRA_BEFORE * srcstride + 3);                       \
-    filter = ff_hevc_qpel_filters[mx - 1];                              \
+    filter = ff_hevc_qpel_filters[mx];                                  \
     x = width >> 2;                                                     \
     y = height + QPEL_EXTRA;                                            \
     __asm__ volatile(                                                   \
@@ -530,7 +530,7 @@  void ff_hevc_put_hevc_qpel_bi_hv##w##_8_mmi(uint8_t *_dst,              \
     );                                                                  \
                                                                         \
     tmp    = tmp_array;                                                 \
-    filter = ff_hevc_qpel_filters[my - 1];                              \
+    filter = ff_hevc_qpel_filters[my];                                  \
     x = width >> 2;                                                     \
     y = height;                                                         \
     __asm__ volatile(                                                   \
@@ -665,7 +665,7 @@  void ff_hevc_put_hevc_epel_bi_hv##w##_8_mmi(uint8_t *_dst,              \
     ptrdiff_t srcstride = _srcstride / sizeof(pixel);                   \
     pixel *dst          = (pixel *)_dst;                                \
     ptrdiff_t dststride = _dststride / sizeof(pixel);                   \
-    const int8_t *filter = ff_hevc_epel_filters[mx - 1];                \
+    const int8_t *filter = ff_hevc_epel_filters[mx];                    \
     int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];        \
     int16_t *tmp = tmp_array;                                           \
     double  ftmp[12];                                                   \
@@ -735,7 +735,7 @@  void ff_hevc_put_hevc_epel_bi_hv##w##_8_mmi(uint8_t *_dst,              \
     );                                                                  \
                                                                         \
     tmp      = tmp_array;                                               \
-    filter = ff_hevc_epel_filters[my - 1];                              \
+    filter = ff_hevc_epel_filters[my];                                  \
     x = width >> 2;                                                     \
     y = height;                                                         \
     __asm__ volatile(                                                   \
@@ -969,7 +969,7 @@  void ff_hevc_put_hevc_qpel_uni_hv##w##_8_mmi(uint8_t *_dst,             \
     offset.i = 32;                                                      \
                                                                         \
     src   -= (QPEL_EXTRA_BEFORE * srcstride + 3);                       \
-    filter = ff_hevc_qpel_filters[mx - 1];                              \
+    filter = ff_hevc_qpel_filters[mx];                                  \
     x = width >> 2;                                                     \
     y = height + QPEL_EXTRA;                                            \
     __asm__ volatile(                                                   \
@@ -1041,7 +1041,7 @@  void ff_hevc_put_hevc_qpel_uni_hv##w##_8_mmi(uint8_t *_dst,             \
     );                                                                  \
                                                                         \
     tmp    = tmp_array;                                                 \
-    filter = ff_hevc_qpel_filters[my - 1];                              \
+    filter = ff_hevc_qpel_filters[my];                                  \
     x = width >> 2;                                                     \
     y = height;                                                         \
     __asm__ volatile(                                                   \
diff --git a/libavcodec/mips/hevcdsp_msa.c b/libavcodec/mips/hevcdsp_msa.c
index 9c12029c1f..2b8d2d70ad 100644
--- a/libavcodec/mips/hevcdsp_msa.c
+++ b/libavcodec/mips/hevcdsp_msa.c
@@ -4322,7 +4322,7 @@  void ff_hevc_put_hevc_##PEL##_##DIR##WIDTH##_8_msa(int16_t *dst,          \
                                                    intptr_t my,           \
                                                    int width)             \
 {                                                                         \
-    const int8_t *filter = ff_hevc_##PEL##_filters[FILT_DIR - 1];         \
+    const int8_t *filter = ff_hevc_##PEL##_filters[FILT_DIR];             \
                                                                           \
     hevc_##DIR1##_##TAP##t_##WIDTH##w_msa(src, src_stride, dst,           \
                                           MAX_PB_SIZE, filter, height);   \
@@ -4373,8 +4373,8 @@  void ff_hevc_put_hevc_##PEL##_hv##WIDTH##_8_msa(int16_t *dst,           \
                                                 intptr_t my,            \
                                                 int width)              \
 {                                                                       \
-    const int8_t *filter_x = ff_hevc_##PEL##_filters[mx - 1];           \
-    const int8_t *filter_y = ff_hevc_##PEL##_filters[my - 1];           \
+    const int8_t *filter_x = ff_hevc_##PEL##_filters[mx];               \
+    const int8_t *filter_y = ff_hevc_##PEL##_filters[my];               \
                                                                         \
     hevc_hv_##TAP##t_##WIDTH##w_msa(src, src_stride, dst, MAX_PB_SIZE,  \
                                           filter_x, filter_y, height);  \
diff --git a/libavcodec/x86/hevcdsp_init.c b/libavcodec/x86/hevcdsp_init.c
index 31e81eb11f..3d4e41754d 100644
--- a/libavcodec/x86/hevcdsp_init.c
+++ b/libavcodec/x86/hevcdsp_init.c
@@ -88,8 +88,8 @@  IDCT_FUNCS(avx)
 
 #define ff_hevc_pel_filters ff_hevc_qpel_filters
 #define DECL_HV_FILTER(f)                                  \
-    const uint8_t *hf = ff_hevc_ ## f ## _filters[mx - 1]; \
-    const uint8_t *vf = ff_hevc_ ## f ## _filters[my - 1];
+    const uint8_t *hf = ff_hevc_ ## f ## _filters[mx];     \
+    const uint8_t *vf = ff_hevc_ ## f ## _filters[my];
 
 #define FW_PUT(p, a, b, depth, opt) \
 void ff_hevc_put_hevc_ ## a ## _ ## depth ## _##opt(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride,   \