@@ -148,6 +148,17 @@ static const uint8_t luma_mask_arr[16 * 8] = {
hz_out_m; \
} )
+#define AVC_DOT_SH3_SH(in0, in1, in2, coeff0, coeff1, coeff2) \
+( { \
+ v8i16 out0_m; \
+ \
+ out0_m = __msa_dotp_s_h((v16i8) in0, (v16i8) coeff0); \
+ out0_m = __msa_dpadd_s_h(out0_m, (v16i8) in1, (v16i8) coeff1); \
+ out0_m = __msa_dpadd_s_h(out0_m, (v16i8) in2, (v16i8) coeff2); \
+ \
+ out0_m; \
+} )
+
static void avc_luma_hz_4w_msa(const uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
int32_t height) @@ -3373,55 +3384,467 @@ void ff_put_h264_qpel4_mc30_msa(uint8_t *dst, const uint8_t *src, void ff_put_h264_qpel16_mc20_msa(uint8_t *dst, const uint8_t *src,