@@ -2183,7 +2183,7 @@ static void hevc_hz_bi_4t_4x2_msa(uint8_t *src0_ptr,
v8i16 filt0, filt1;
v16i8 src0, src1, dst0, vec0, vec1;
v8i16 in0, in1;
- v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20 };
+ v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[16]);
v16i8 mask1;
v8i16 tmp0;
v8i16 filter_vec, const_vec;
@@ -2226,7 +2226,8 @@ static void hevc_hz_bi_4t_4x4_msa(uint8_t *src0_ptr,
v8i16 filt0, filt1;
v16i8 src0, src1, src2, src3, dst0, vec0, vec1;
v8i16 in0, in1, in2, in3;
- v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20 };
+ v16i8 vec2, vec3;
+ v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[16]);
v16i8 mask1;
v8i16 tmp0, tmp1;
v8i16 filter_vec, const_vec;
@@ -2247,12 +2248,12 @@ static void hevc_hz_bi_4t_4x4_msa(uint8_t *src0_ptr,
ILVR_D2_SH(in1, in0, in3, in2, in0, in1);
XORI_B4_128_SB(src0, src1, src2, src3);
- VSHF_B2_SB(src0, src1, src0, src1, mask0, mask1, vec0, vec1);
tmp0 = const_vec;
- DPADD_SB2_SH(vec0, vec1, filt0, filt1, tmp0, tmp0);
- VSHF_B2_SB(src2, src3, src2, src3, mask0, mask1, vec0, vec1);
tmp1 = const_vec;
- DPADD_SB2_SH(vec0, vec1, filt0, filt1, tmp1, tmp1);
+ VSHF_B2_SB(src0, src1, src2, src3, mask0, mask0, vec0, vec1);
+ VSHF_B2_SB(src0, src1, src2, src3, mask1, mask1, vec2, vec3);
+ DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt1, filt1, tmp0, tmp1,
+ tmp0, tmp1);
HEVC_BI_RND_CLIP2(in0, in1, tmp0, tmp1, 7, tmp0, tmp1);
dst0 = __msa_pckev_b((v16i8) tmp1, (v16i8) tmp0);
@@ -2273,8 +2274,8 @@ static void hevc_hz_bi_4t_4x8multiple_msa(uint8_t *src0_ptr,
v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
v16i8 dst0, dst1;
v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
- v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20 };
- v16i8 mask1, vec0, vec1;
+ v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[16]);
+ v16i8 mask1, vec0, vec1, vec2, vec3;
v8i16 tmp0, tmp1, tmp2, tmp3;
v8i16 filter_vec, const_vec;
@@ -2300,18 +2301,18 @@ static void hevc_hz_bi_4t_4x8multiple_msa(uint8_t *src0_ptr,
ILVR_D2_SH(in5, in4, in7, in6, in2, in3);
XORI_B8_128_SB(src0, src1, src2, src3, src4, src5, src6, src7);
- VSHF_B2_SB(src0, src1, src0, src1, mask0, mask1, vec0, vec1);
tmp0 = const_vec;
- DPADD_SB2_SH(vec0, vec1, filt0, filt1, tmp0, tmp0);
- VSHF_B2_SB(src2, src3, src2, src3, mask0, mask1, vec0, vec1);
tmp1 = const_vec;
- DPADD_SB2_SH(vec0, vec1, filt0, filt1, tmp1, tmp1);
- VSHF_B2_SB(src4, src5, src4, src5, mask0, mask1, vec0, vec1);
tmp2 = const_vec;
- DPADD_SB2_SH(vec0, vec1, filt0, filt1, tmp2, tmp2);
- VSHF_B2_SB(src6, src7, src6, src7, mask0, mask1, vec0, vec1);
tmp3 = const_vec;
- DPADD_SB2_SH(vec0, vec1, filt0, filt1, tmp3, tmp3);
+ VSHF_B2_SB(src0, src1, src2, src3, mask0, mask0, vec0, vec1);
+ VSHF_B2_SB(src4, src5, src6, src7, mask0, mask0, vec2, vec3);
+ DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, tmp0,
+ tmp1, tmp2, tmp3);
+ VSHF_B2_SB(src0, src1, src2, src3, mask1, mask1, vec0, vec1);
+ VSHF_B2_SB(src4, src5, src6, src7, mask1, mask1, vec2, vec3);
+ DPADD_SB4_SH(vec0, vec1, vec2, vec3, filt1, filt1, filt1, filt1, tmp0,
+ tmp1, tmp2, tmp3);
HEVC_BI_RND_CLIP4(in0, in1, in2, in3,
tmp0, tmp1, tmp2, tmp3, 7, tmp0, tmp1, tmp2, tmp3); @@ -2357,9 +2358,9 @@ static void hevc_hz_bi_4t_6w_msa(uint8_t *src0_ptr,