@@ -19,50 +19,49 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
-#include "config.h"
-#include "libavutil/mem_internal.h"
+#include "libavutil/intfloat.h"
#include "constants.h"
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_1) = {0x0001000100010001ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_2) = {0x0002000200020002ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_3) = {0x0003000300030003ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_4) = {0x0004000400040004ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_5) = {0x0005000500050005ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_6) = {0x0006000600060006ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_8) = {0x0008000800080008ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_9) = {0x0009000900090009ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_10) = {0x000A000A000A000AULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_12) = {0x000C000C000C000CULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_15) = {0x000F000F000F000FULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_16) = {0x0010001000100010ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_17) = {0x0011001100110011ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_18) = {0x0012001200120012ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_20) = {0x0014001400140014ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_22) = {0x0016001600160016ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_28) = {0x001C001C001C001CULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_32) = {0x0020002000200020ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_53) = {0x0035003500350035ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_64) = {0x0040004000400040ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_128) = {0x0080008000800080ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_512) = {0x0200020002000200ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_m8tom5) = {0xFFFBFFFAFFF9FFF8ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_m4tom1) = {0xFFFFFFFEFFFDFFFCULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_1to4) = {0x0004000300020001ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_5to8) = {0x0008000700060005ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_0to3) = {0x0003000200010000ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_4to7) = {0x0007000600050004ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_8tob) = {0x000b000a00090008ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_ctof) = {0x000f000e000d000cULL};
-
-DECLARE_ALIGNED(8, const uint64_t, ff_pb_1) = {0x0101010101010101ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pb_3) = {0x0303030303030303ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pb_80) = {0x8080808080808080ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pb_A1) = {0xA1A1A1A1A1A1A1A1ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pb_FE) = {0xFEFEFEFEFEFEFEFEULL};
-
-DECLARE_ALIGNED(8, const uint64_t, ff_rnd) = {0x0004000400040004ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_rnd2) = {0x0040004000400040ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_rnd3) = {0x0020002000200020ULL};
-
-DECLARE_ALIGNED(8, const uint64_t, ff_wm1010) = {0xFFFF0000FFFF0000ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_d40000) = {0x0000000000040000ULL};
+union av_intfloat64 ff_pw_1 = {0x0001000100010001ULL};
+union av_intfloat64 ff_pw_2 = {0x0002000200020002ULL};
+union av_intfloat64 ff_pw_3 = {0x0003000300030003ULL};
+union av_intfloat64 ff_pw_4 = {0x0004000400040004ULL};
+union av_intfloat64 ff_pw_5 = {0x0005000500050005ULL};
+union av_intfloat64 ff_pw_6 = {0x0006000600060006ULL};
+union av_intfloat64 ff_pw_8 = {0x0008000800080008ULL};
+union av_intfloat64 ff_pw_9 = {0x0009000900090009ULL};
+union av_intfloat64 ff_pw_10 = {0x000A000A000A000AULL};
+union av_intfloat64 ff_pw_12 = {0x000C000C000C000CULL};
+union av_intfloat64 ff_pw_15 = {0x000F000F000F000FULL};
+union av_intfloat64 ff_pw_16 = {0x0010001000100010ULL};
+union av_intfloat64 ff_pw_17 = {0x0011001100110011ULL};
+union av_intfloat64 ff_pw_18 = {0x0012001200120012ULL};
+union av_intfloat64 ff_pw_20 = {0x0014001400140014ULL};
+union av_intfloat64 ff_pw_22 = {0x0016001600160016ULL};
+union av_intfloat64 ff_pw_28 = {0x001C001C001C001CULL};
+union av_intfloat64 ff_pw_32 = {0x0020002000200020ULL};
+union av_intfloat64 ff_pw_53 = {0x0035003500350035ULL};
+union av_intfloat64 ff_pw_64 = {0x0040004000400040ULL};
+union av_intfloat64 ff_pw_128 = {0x0080008000800080ULL};
+union av_intfloat64 ff_pw_512 = {0x0200020002000200ULL};
+union av_intfloat64 ff_pw_m8tom5 = {0xFFFBFFFAFFF9FFF8ULL};
+union av_intfloat64 ff_pw_m4tom1 = {0xFFFFFFFEFFFDFFFCULL};
+union av_intfloat64 ff_pw_1to4 = {0x0004000300020001ULL};
+union av_intfloat64 ff_pw_5to8 = {0x0008000700060005ULL};
+union av_intfloat64 ff_pw_0to3 = {0x0003000200010000ULL};
+union av_intfloat64 ff_pw_4to7 = {0x0007000600050004ULL};
+union av_intfloat64 ff_pw_8tob = {0x000b000a00090008ULL};
+union av_intfloat64 ff_pw_ctof = {0x000f000e000d000cULL};
+union av_intfloat64 ff_pw_32_1 = {0x0000000100000001ULL};
+union av_intfloat64 ff_pw_32_4 = {0x0000000400000004ULL};
+union av_intfloat64 ff_pw_32_64 = {0x0000004000000040ULL};
+union av_intfloat64 ff_pb_1 = {0x0101010101010101ULL};
+union av_intfloat64 ff_pb_3 = {0x0303030303030303ULL};
+union av_intfloat64 ff_pb_80 = {0x8080808080808080ULL};
+union av_intfloat64 ff_pb_A1 = {0xA1A1A1A1A1A1A1A1ULL};
+union av_intfloat64 ff_pb_FE = {0xFEFEFEFEFEFEFEFEULL};
+union av_intfloat64 ff_rnd = {0x0004000400040004ULL};
+union av_intfloat64 ff_rnd2 = {0x0040004000400040ULL};
+union av_intfloat64 ff_rnd3 = {0x0020002000200020ULL};
+union av_intfloat64 ff_ff_wm1010 = {0xFFFF0000FFFF0000ULL};
+union av_intfloat64 ff_d40000 = {0x0000000000040000ULL};
@@ -22,50 +22,48 @@
#ifndef AVCODEC_MIPS_CONSTANTS_H
#define AVCODEC_MIPS_CONSTANTS_H
-#include <stdint.h>
-
-extern const uint64_t ff_pw_1;
-extern const uint64_t ff_pw_2;
-extern const uint64_t ff_pw_3;
-extern const uint64_t ff_pw_4;
-extern const uint64_t ff_pw_5;
-extern const uint64_t ff_pw_6;
-extern const uint64_t ff_pw_8;
-extern const uint64_t ff_pw_9;
-extern const uint64_t ff_pw_10;
-extern const uint64_t ff_pw_12;
-extern const uint64_t ff_pw_15;
-extern const uint64_t ff_pw_16;
-extern const uint64_t ff_pw_17;
-extern const uint64_t ff_pw_18;
-extern const uint64_t ff_pw_20;
-extern const uint64_t ff_pw_22;
-extern const uint64_t ff_pw_28;
-extern const uint64_t ff_pw_32;
-extern const uint64_t ff_pw_53;
-extern const uint64_t ff_pw_64;
-extern const uint64_t ff_pw_128;
-extern const uint64_t ff_pw_512;
-extern const uint64_t ff_pw_m8tom5;
-extern const uint64_t ff_pw_m4tom1;
-extern const uint64_t ff_pw_1to4;
-extern const uint64_t ff_pw_5to8;
-extern const uint64_t ff_pw_0to3;
-extern const uint64_t ff_pw_4to7;
-extern const uint64_t ff_pw_8tob;
-extern const uint64_t ff_pw_ctof;
-
-extern const uint64_t ff_pb_1;
-extern const uint64_t ff_pb_3;
-extern const uint64_t ff_pb_80;
-extern const uint64_t ff_pb_A1;
-extern const uint64_t ff_pb_FE;
-
-extern const uint64_t ff_rnd;
-extern const uint64_t ff_rnd2;
-extern const uint64_t ff_rnd3;
-
-extern const uint64_t ff_wm1010;
-extern const uint64_t ff_d40000;
+extern union av_intfloat64 ff_pw_1;
+extern union av_intfloat64 ff_pw_2;
+extern union av_intfloat64 ff_pw_3;
+extern union av_intfloat64 ff_pw_4;
+extern union av_intfloat64 ff_pw_5;
+extern union av_intfloat64 ff_pw_6;
+extern union av_intfloat64 ff_pw_8;
+extern union av_intfloat64 ff_pw_9;
+extern union av_intfloat64 ff_pw_10;
+extern union av_intfloat64 ff_pw_12;
+extern union av_intfloat64 ff_pw_15;
+extern union av_intfloat64 ff_pw_16;
+extern union av_intfloat64 ff_pw_17;
+extern union av_intfloat64 ff_pw_18;
+extern union av_intfloat64 ff_pw_20;
+extern union av_intfloat64 ff_pw_22;
+extern union av_intfloat64 ff_pw_28;
+extern union av_intfloat64 ff_pw_32;
+extern union av_intfloat64 ff_pw_53;
+extern union av_intfloat64 ff_pw_64;
+extern union av_intfloat64 ff_pw_128;
+extern union av_intfloat64 ff_pw_512;
+extern union av_intfloat64 ff_pw_m8tom5;
+extern union av_intfloat64 ff_pw_m4tom1;
+extern union av_intfloat64 ff_pw_1to4;
+extern union av_intfloat64 ff_pw_5to8;
+extern union av_intfloat64 ff_pw_0to3;
+extern union av_intfloat64 ff_pw_4to7;
+extern union av_intfloat64 ff_pw_8tob;
+extern union av_intfloat64 ff_pw_ctof;
+extern union av_intfloat64 ff_pw_32_1;
+extern union av_intfloat64 ff_pw_32_4;
+extern union av_intfloat64 ff_pw_32_64;
+extern union av_intfloat64 ff_pb_1;
+extern union av_intfloat64 ff_pb_3;
+extern union av_intfloat64 ff_pb_80;
+extern union av_intfloat64 ff_pb_A1;
+extern union av_intfloat64 ff_pb_FE;
+extern union av_intfloat64 ff_rnd;
+extern union av_intfloat64 ff_rnd2;
+extern union av_intfloat64 ff_rnd3;
+extern union av_intfloat64 ff_wm1010;
+extern union av_intfloat64 ff_d40000;
#endif /* AVCODEC_MIPS_CONSTANTS_H */
@@ -29,12 +29,12 @@
void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
int h, int x, int y)
{
- int A = 64, B, C, D, E;
double ftmp[12];
- uint64_t tmp[1];
+ union mmi_intfloat64 A, B, C, D, E;
+ A.i = 64;
if (!(x || y)) {
- /* x=0, y=0, A=64 */
+ /* x=0, y=0, A.i=64 */
__asm__ volatile (
"1: \n\t"
MMI_ULDC1(%[ftmp0], %[src], 0x00)
@@ -66,14 +66,13 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
);
} else if (x && y) {
/* x!=0, y!=0 */
- D = x * y;
- B = (x << 3) - D;
- C = (y << 3) - D;
- A = 64 - D - B - C;
+ D.i = x * y;
+ B.i = (x << 3) - D.i;
+ C.i = (y << 3) - D.i;
+ A.i = 64 - D.i - B.i - C.i;
__asm__ volatile (
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
- "dli %[tmp0], 0x06 \n\t"
"pshufh %[A], %[A], %[ftmp0] \n\t"
"pshufh %[B], %[B], %[ftmp0] \n\t"
"mtc1 %[tmp0], %[ftmp9] \n\t"
@@ -158,22 +157,21 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
[ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
[ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
- [tmp0]"=&r"(tmp[0]),
[dst]"+&r"(dst), [src]"+&r"(src),
[h]"+&r"(h)
- : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
- [A]"f"(A), [B]"f"(B),
- [C]"f"(C), [D]"f"(D)
+ : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32.f),
+ [A]"f"(A.f), [B]"f"(B.f),
+ [C]"f"(C.f), [D]"f"(D.f),
+ [tmp0]"r"(0x06)
: "memory"
);
} else if (x) {
/* x!=0, y==0 */
- E = x << 3;
- A = 64 - E;
+ E.i = x << 3;
+ A.i = 64 - E.i;
__asm__ volatile (
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
- "dli %[tmp0], 0x06 \n\t"
"pshufh %[A], %[A], %[ftmp0] \n\t"
"pshufh %[E], %[E], %[ftmp0] \n\t"
"mtc1 %[tmp0], %[ftmp7] \n\t"
@@ -207,22 +205,20 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
[ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
[ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
- [tmp0]"=&r"(tmp[0]),
[dst]"+&r"(dst), [src]"+&r"(src),
[h]"+&r"(h)
: [stride]"r"((mips_reg)stride),
- [ff_pw_32]"f"(ff_pw_32),
- [A]"f"(A), [E]"f"(E)
+ [ff_pw_32]"f"(ff_pw_32.f), [tmp0]"r"(0x06),
+ [A]"f"(A.f), [E]"f"(E.f)
: "memory"
);
} else {
/* x==0, y!=0 */
- E = y << 3;
- A = 64 - E;
+ E.i = y << 3;
+ A.i = 64 - E.i;
__asm__ volatile (
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
- "dli %[tmp0], 0x06 \n\t"
"pshufh %[A], %[A], %[ftmp0] \n\t"
"pshufh %[E], %[E], %[ftmp0] \n\t"
"mtc1 %[tmp0], %[ftmp7] \n\t"
@@ -276,12 +272,12 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
[ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
[ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
- [ftmp8]"=&f"(ftmp[8]), [tmp0]"=&r"(tmp[0]),
+ [ftmp8]"=&f"(ftmp[8]),
[dst]"+&r"(dst), [src]"+&r"(src),
[h]"+&r"(h)
: [stride]"r"((mips_reg)stride),
- [ff_pw_32]"f"(ff_pw_32),
- [A]"f"(A), [E]"f"(E)
+ [ff_pw_32]"f"(ff_pw_32.f), [A]"f"(A.f),
+ [E]"f"(E.f), [tmp0]"r"(0x06)
: "memory"
);
}
@@ -290,12 +286,12 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
int h, int x, int y)
{
- int A = 64, B, C, D, E;
double ftmp[10];
- uint64_t tmp[1];
+ union mmi_intfloat64 A, B, C, D, E;
+ A.i = 64;
if(!(x || y)){
- /* x=0, y=0, A=64 */
+ /* x=0, y=0, A.i=64 */
__asm__ volatile (
"1: \n\t"
MMI_ULDC1(%[ftmp0], %[src], 0x00)
@@ -323,13 +319,12 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
);
} else if (x && y) {
/* x!=0, y!=0 */
- D = x * y;
- B = (x << 3) - D;
- C = (y << 3) - D;
- A = 64 - D - B - C;
+ D.i = x * y;
+ B.i = (x << 3) - D.i;
+ C.i = (y << 3) - D.i;
+ A.i = 64 - D.i - B.i - C.i;
__asm__ volatile (
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
- "dli %[tmp0], 0x06 \n\t"
"pshufh %[A], %[A], %[ftmp0] \n\t"
"pshufh %[B], %[B], %[ftmp0] \n\t"
"mtc1 %[tmp0], %[ftmp9] \n\t"
@@ -383,21 +378,20 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
[ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
[ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
- [tmp0]"=&r"(tmp[0]),
[dst]"+&r"(dst), [src]"+&r"(src),
[h]"+&r"(h)
- : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
- [A]"f"(A), [B]"f"(B),
- [C]"f"(C), [D]"f"(D)
+ : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32.f),
+ [A]"f"(A.f), [B]"f"(B.f),
+ [C]"f"(C.f), [D]"f"(D.f),
+ [tmp0]"r"(0x06)
: "memory"
);
} else if (x) {
/* x!=0, y==0 */
- E = x << 3;
- A = 64 - E;
+ E.i = x << 3;
+ A.i = 64 - E.i;
__asm__ volatile (
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
- "dli %[tmp0], 0x06 \n\t"
"pshufh %[A], %[A], %[ftmp0] \n\t"
"pshufh %[E], %[E], %[ftmp0] \n\t"
"mtc1 %[tmp0], %[ftmp7] \n\t"
@@ -433,21 +427,19 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
[ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
[ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
- [tmp0]"=&r"(tmp[0]),
[dst]"+&r"(dst), [src]"+&r"(src),
[h]"+&r"(h)
: [stride]"r"((mips_reg)stride),
- [ff_pw_32]"f"(ff_pw_32),
- [A]"f"(A), [E]"f"(E)
+ [ff_pw_32]"f"(ff_pw_32.f), [tmp0]"r"(0x06),
+ [A]"f"(A.f), [E]"f"(E.f)
: "memory"
);
} else {
/* x==0, y!=0 */
- E = y << 3;
- A = 64 - E;
+ E.i = y << 3;
+ A.i = 64 - E.i;
__asm__ volatile (
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
- "dli %[tmp0], 0x06 \n\t"
"pshufh %[A], %[A], %[ftmp0] \n\t"
"pshufh %[E], %[E], %[ftmp0] \n\t"
"mtc1 %[tmp0], %[ftmp7] \n\t"
@@ -469,8 +461,8 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
"pmullh %[ftmp6], %[ftmp6], %[E] \n\t"
"paddh %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
- "paddh %[ftmp1], %[ftmp1], %[ff_pw_32] \n\t"
- "paddh %[ftmp2], %[ftmp2], %[ff_pw_32] \n\t"
+ "paddh %[ftmp1], %[ftmp1], %[ff_pw_32] \n\t"
+ "paddh %[ftmp2], %[ftmp2], %[ff_pw_32] \n\t"
"psrlh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
"psrlh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
"packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
@@ -483,12 +475,11 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
[ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
[ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
- [tmp0]"=&r"(tmp[0]),
[dst]"+&r"(dst), [src]"+&r"(src),
[h]"+&r"(h)
: [stride]"r"((mips_reg)stride),
- [ff_pw_32]"f"(ff_pw_32),
- [A]"f"(A), [E]"f"(E)
+ [ff_pw_32]"f"(ff_pw_32.f), [tmp0]"r"(0x06),
+ [A]"f"(A.f), [E]"f"(E.f)
: "memory"
);
}
@@ -497,20 +488,19 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
int h, int x, int y)
{
- const int A = (8 - x) * (8 - y);
- const int B = x * (8 - y);
- const int C = (8 - x) * y;
- const int D = x * y;
- const int E = B + C;
double ftmp[8];
- uint64_t tmp[1];
mips_reg addr[1];
+ union mmi_intfloat64 A, B, C, D, E;
DECLARE_VAR_LOW32;
+ A.i = (8 - x) * (8 - y);
+ B.i = x * (8 - y);
+ C.i = (8 - x) * y;
+ D.i = x * y;
+ E.i = B.i + C.i;
- if (D) {
+ if (D.i) {
__asm__ volatile (
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
- "dli %[tmp0], 0x06 \n\t"
"pshufh %[A], %[A], %[ftmp0] \n\t"
"pshufh %[B], %[B], %[ftmp0] \n\t"
"mtc1 %[tmp0], %[ftmp7] \n\t"
@@ -547,20 +537,19 @@ void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
[ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
[ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
- [tmp0]"=&r"(tmp[0]),
RESTRICT_ASM_LOW32
[dst]"+&r"(dst), [src]"+&r"(src),
[h]"+&r"(h)
- : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
- [A]"f"(A), [B]"f"(B),
- [C]"f"(C), [D]"f"(D)
+ : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32.f),
+ [A]"f"(A.f), [B]"f"(B.f),
+ [C]"f"(C.f), [D]"f"(D.f),
+ [tmp0]"r"(0x06)
: "memory"
);
- } else if (E) {
- const int step = C ? stride : 1;
+ } else if (E.i) {
+ const int step = C.i ? stride : 1;
__asm__ volatile (
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
- "dli %[tmp0], 0x06 \n\t"
"pshufh %[A], %[A], %[ftmp0] \n\t"
"pshufh %[E], %[E], %[ftmp0] \n\t"
"mtc1 %[tmp0], %[ftmp5] \n\t"
@@ -585,14 +574,13 @@ void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
: [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
[ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
[ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
- [tmp0]"=&r"(tmp[0]),
RESTRICT_ASM_LOW32
[addr0]"=&r"(addr[0]),
[dst]"+&r"(dst), [src]"+&r"(src),
[h]"+&r"(h)
: [stride]"r"((mips_reg)stride),[step]"r"((mips_reg)step),
- [ff_pw_32]"f"(ff_pw_32),
- [A]"f"(A), [E]"f"(E)
+ [ff_pw_32]"f"(ff_pw_32.f), [tmp0]"r"(0x06),
+ [A]"f"(A.f), [E]"f"(E.f)
: "memory"
);
} else {
@@ -621,20 +609,19 @@ void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
int h, int x, int y)
{
- const int A = (8 - x) *(8 - y);
- const int B = x * (8 - y);
- const int C = (8 - x) * y;
- const int D = x * y;
- const int E = B + C;
double ftmp[8];
- uint64_t tmp[1];
mips_reg addr[1];
+ union mmi_intfloat64 A, B, C, D, E;
DECLARE_VAR_LOW32;
+ A.i = (8 - x) *(8 - y);
+ B.i = x * (8 - y);
+ C.i = (8 - x) * y;
+ D.i = x * y;
+ E.i = B.i + C.i;
- if (D) {
+ if (D.i) {
__asm__ volatile (
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
- "dli %[tmp0], 0x06 \n\t"
"pshufh %[A], %[A], %[ftmp0] \n\t"
"pshufh %[B], %[B], %[ftmp0] \n\t"
"mtc1 %[tmp0], %[ftmp7] \n\t"
@@ -673,20 +660,19 @@ void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
[ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
[ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
- [tmp0]"=&r"(tmp[0]),
RESTRICT_ASM_LOW32
[dst]"+&r"(dst), [src]"+&r"(src),
[h]"+&r"(h)
- : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
- [A]"f"(A), [B]"f"(B),
- [C]"f"(C), [D]"f"(D)
+ : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32.f),
+ [A]"f"(A.f), [B]"f"(B.f),
+ [C]"f"(C.f), [D]"f"(D.f),
+ [tmp0]"r"(0x06)
: "memory"
);
- } else if (E) {
- const int step = C ? stride : 1;
+ } else if (E.i) {
+ const int step = C.i ? stride : 1;
__asm__ volatile (
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
- "dli %[tmp0], 0x06 \n\t"
"pshufh %[A], %[A], %[ftmp0] \n\t"
"pshufh %[E], %[E], %[ftmp0] \n\t"
"mtc1 %[tmp0], %[ftmp5] \n\t"
@@ -713,14 +699,13 @@ void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
: [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
[ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
[ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
- [tmp0]"=&r"(tmp[0]),
RESTRICT_ASM_LOW32
[addr0]"=&r"(addr[0]),
[dst]"+&r"(dst), [src]"+&r"(src),
[h]"+&r"(h)
: [stride]"r"((mips_reg)stride),[step]"r"((mips_reg)step),
- [ff_pw_32]"f"(ff_pw_32),
- [A]"f"(A), [E]"f"(E)
+ [ff_pw_32]"f"(ff_pw_32.f), [tmp0]"r"(0x06),
+ [A]"f"(A.f), [E]"f"(E.f)
: "memory"
);
} else {
@@ -162,7 +162,7 @@ void ff_h264_idct_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
RESTRICT_ASM_ADDRT
[tmp0]"=&r"(tmp[0])
: [dst]"r"(dst), [block]"r"(block),
- [stride]"r"((mips_reg)stride), [ff_pw_32]"f"(ff_pw_32)
+ [stride]"r"((mips_reg)stride), [ff_pw_32]"f"(ff_pw_32.f)
: "memory"
);
@@ -1078,7 +1078,7 @@ void ff_h264_luma_dc_dequant_idct_8_mmi(int16_t *output, int16_t *input,
RESTRICT_ASM_ALL64
[output]"+&r"(output), [input]"+&r"(input),
[qmul]"+&r"(qmul)
- : [ff_pw_1]"f"(ff_pw_1)
+ : [ff_pw_1]"f"(ff_pw_1.f)
: "memory"
);
}
@@ -1556,8 +1556,8 @@ void ff_deblock_v8_luma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int bet
[addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1])
: [pix]"r"(pix), [stride]"r"((mips_reg)stride),
[alpha]"r"((mips_reg)alpha), [beta]"r"((mips_reg)beta),
- [tc0]"r"(tc0), [ff_pb_1]"f"(ff_pb_1),
- [ff_pb_3]"f"(ff_pb_3), [ff_pb_A1]"f"(ff_pb_A1)
+ [tc0]"r"(tc0), [ff_pb_1]"f"(ff_pb_1.f),
+ [ff_pb_3]"f"(ff_pb_3.f), [ff_pb_A1]"f"(ff_pb_A1.f)
: "memory"
);
}
@@ -1866,8 +1866,8 @@ void ff_deblock_v_chroma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha,
[addr0]"=&r"(addr[0])
: [pix]"r"(pix), [stride]"r"((mips_reg)stride),
[alpha]"r"(alpha), [beta]"r"(beta),
- [tc0]"r"(tc0), [ff_pb_1]"f"(ff_pb_1),
- [ff_pb_3]"f"(ff_pb_3), [ff_pb_A1]"f"(ff_pb_A1)
+ [tc0]"r"(tc0), [ff_pb_1]"f"(ff_pb_1.f),
+ [ff_pb_3]"f"(ff_pb_3.f), [ff_pb_A1]"f"(ff_pb_A1.f)
: "memory"
);
}
@@ -1945,7 +1945,7 @@ void ff_deblock_v_chroma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha,
[addr0]"=&r"(addr[0])
: [pix]"r"(pix), [stride]"r"((mips_reg)stride),
[alpha]"r"(alpha), [beta]"r"(beta),
- [ff_pb_1]"f"(ff_pb_1)
+ [ff_pb_1]"f"(ff_pb_1.f)
: "memory"
);
}
@@ -2084,8 +2084,8 @@ void ff_deblock_h_chroma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int be
[pix]"+&r"(pix)
: [alpha]"r"(alpha), [beta]"r"(beta),
[stride]"r"((mips_reg)stride), [tc0]"r"(tc0),
- [ff_pb_1]"f"(ff_pb_1), [ff_pb_3]"f"(ff_pb_3),
- [ff_pb_A1]"f"(ff_pb_A1)
+ [ff_pb_1]"f"(ff_pb_1.f), [ff_pb_3]"f"(ff_pb_3.f),
+ [ff_pb_A1]"f"(ff_pb_A1.f)
: "memory"
);
}
@@ -2218,7 +2218,7 @@ void ff_deblock_h_chroma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha,
[addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
[pix]"+&r"(pix)
: [alpha]"r"(alpha), [beta]"r"(beta),
- [stride]"r"((mips_reg)stride), [ff_pb_1]"f"(ff_pb_1)
+ [stride]"r"((mips_reg)stride), [ff_pb_1]"f"(ff_pb_1.f)
: "memory"
);
}
@@ -155,9 +155,9 @@ void ff_pred16x16_dc_8_mmi(uint8_t *src, ptrdiff_t stride)
void ff_pred8x8l_top_dc_8_mmi(uint8_t *src, int has_topleft,
int has_topright, ptrdiff_t stride)
{
- uint32_t dc;
double ftmp[11];
mips_reg tmp[3];
+ union av_intfloat64 dc;
DECLARE_VAR_ALL64;
DECLARE_VAR_ADDRT;
@@ -209,12 +209,12 @@ void ff_pred8x8l_top_dc_8_mmi(uint8_t *src, int has_topleft,
[ftmp10]"=&f"(ftmp[10]),
[tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
RESTRICT_ASM_ALL64
- [dc]"=r"(dc)
+ [dc]"=r"(dc.i)
: [srcA]"r"((mips_reg)(src-stride-1)),
[src0]"r"((mips_reg)(src-stride)),
[src1]"r"((mips_reg)(src-stride+1)),
[has_topleft]"r"(has_topleft), [has_topright]"r"(has_topright),
- [ff_pb_1]"r"(ff_pb_1), [ff_pw_2]"f"(ff_pw_2)
+ [ff_pb_1]"r"(ff_pb_1.i), [ff_pw_2]"f"(ff_pw_2.f)
: "memory"
);
@@ -238,7 +238,7 @@ void ff_pred8x8l_top_dc_8_mmi(uint8_t *src, int has_topleft,
RESTRICT_ASM_ALL64
RESTRICT_ASM_ADDRT
[src]"+&r"(src)
- : [dc]"f"(dc), [stride]"r"((mips_reg)stride)
+ : [dc]"f"(dc.f), [stride]"r"((mips_reg)stride)
: "memory"
);
}
@@ -246,9 +246,10 @@ void ff_pred8x8l_top_dc_8_mmi(uint8_t *src, int has_topleft,
void ff_pred8x8l_dc_8_mmi(uint8_t *src, int has_topleft, int has_topright,
ptrdiff_t stride)
{
- uint32_t dc, dc1, dc2;
+ uint32_t dc1, dc2;
double ftmp[14];
mips_reg tmp[1];
+ union av_intfloat64 dc;
const int l0 = ((has_topleft ? src[-1+-1*stride] : src[-1+0*stride]) + 2*src[-1+0*stride] + src[-1+1*stride] + 2) >> 2;
const int l1 = (src[-1+0*stride] + 2*src[-1+1*stride] + src[-1+2*stride] + 2) >> 2;
@@ -322,7 +323,7 @@ void ff_pred8x8l_dc_8_mmi(uint8_t *src, int has_topleft, int has_topright,
);
dc1 = l0+l1+l2+l3+l4+l5+l6+l7;
- dc = ((dc1+dc2+8)>>4)*0x01010101U;
+ dc.i = ((dc1+dc2+8)>>4)*0x01010101U;
__asm__ volatile (
"dli %[tmp0], 0x02 \n\t"
@@ -344,7 +345,7 @@ void ff_pred8x8l_dc_8_mmi(uint8_t *src, int has_topleft, int has_topright,
RESTRICT_ASM_ALL64
RESTRICT_ASM_ADDRT
[src]"+&r"(src)
- : [dc]"f"(dc), [stride]"r"((mips_reg)stride)
+ : [dc]"f"(dc.f), [stride]"r"((mips_reg)stride)
: "memory"
);
}
@@ -965,10 +966,10 @@ static inline void pred16x16_plane_compat_mmi(uint8_t *src, int stride,
[addr0]"=&r"(addr[0])
: [src]"r"(src), [stride]"r"((mips_reg)stride),
[svq3]"r"(svq3), [rv40]"r"(rv40),
- [ff_pw_m8tom5]"f"(ff_pw_m8tom5), [ff_pw_m4tom1]"f"(ff_pw_m4tom1),
- [ff_pw_1to4]"f"(ff_pw_1to4), [ff_pw_5to8]"f"(ff_pw_5to8),
- [ff_pw_0to3]"f"(ff_pw_0to3), [ff_pw_4to7]"r"(ff_pw_4to7),
- [ff_pw_8tob]"r"(ff_pw_8tob), [ff_pw_ctof]"r"(ff_pw_ctof)
+ [ff_pw_m8tom5]"f"(ff_pw_m8tom5.f),[ff_pw_m4tom1]"f"(ff_pw_m4tom1.f),
+ [ff_pw_1to4]"f"(ff_pw_1to4.f), [ff_pw_5to8]"f"(ff_pw_5to8.f),
+ [ff_pw_0to3]"f"(ff_pw_0to3.f), [ff_pw_4to7]"r"(ff_pw_4to7.i),
+ [ff_pw_8tob]"r"(ff_pw_8tob.i), [ff_pw_ctof]"r"(ff_pw_ctof.i)
: "memory"
);
}
@@ -155,8 +155,8 @@ static void put_h264_qpel4_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
[dst]"+&r"(dst), [src]"+&r"(src)
: [dstStride]"r"((mips_reg)dstStride),
[srcStride]"r"((mips_reg)srcStride),
- [ff_pw_20]"f"(ff_pw_20), [ff_pw_5]"f"(ff_pw_5),
- [ff_pw_16]"f"(ff_pw_16)
+ [ff_pw_20]"f"(ff_pw_20.f), [ff_pw_5]"f"(ff_pw_5.f),
+ [ff_pw_16]"f"(ff_pw_16.f)
: "memory"
);
}
@@ -225,8 +225,8 @@ static void put_h264_qpel8_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
[dst]"+&r"(dst), [src]"+&r"(src)
: [dstStride]"r"((mips_reg)dstStride),
[srcStride]"r"((mips_reg)srcStride),
- [ff_pw_20]"f"(ff_pw_20), [ff_pw_5]"f"(ff_pw_5),
- [ff_pw_16]"f"(ff_pw_16)
+ [ff_pw_20]"f"(ff_pw_20.f), [ff_pw_5]"f"(ff_pw_5.f),
+ [ff_pw_16]"f"(ff_pw_16.f)
: "memory"
);
}
@@ -293,8 +293,8 @@ static void avg_h264_qpel4_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
[dst]"+&r"(dst), [src]"+&r"(src)
: [dstStride]"r"((mips_reg)dstStride),
[srcStride]"r"((mips_reg)srcStride),
- [ff_pw_20]"f"(ff_pw_20), [ff_pw_5]"f"(ff_pw_5),
- [ff_pw_16]"f"(ff_pw_16)
+ [ff_pw_20]"f"(ff_pw_20.f), [ff_pw_5]"f"(ff_pw_5.f),
+ [ff_pw_16]"f"(ff_pw_16.f)
: "memory"
);
}
@@ -365,8 +365,8 @@ static void avg_h264_qpel8_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
[dst]"+&r"(dst), [src]"+&r"(src)
: [dstStride]"r"((mips_reg)dstStride),
[srcStride]"r"((mips_reg)srcStride),
- [ff_pw_20]"f"(ff_pw_20), [ff_pw_5]"f"(ff_pw_5),
- [ff_pw_16]"f"(ff_pw_16)
+ [ff_pw_20]"f"(ff_pw_20.f), [ff_pw_5]"f"(ff_pw_5.f),
+ [ff_pw_16]"f"(ff_pw_16.f)
: "memory"
);
}
@@ -486,7 +486,7 @@ static void put_h264_qpel4_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
[dst]"+&r"(dst), [src]"+&r"(src)
: [dstStride]"r"((mips_reg)dstStride),
[srcStride]"r"((mips_reg)srcStride),
- [ff_pw_5]"f"(ff_pw_5), [ff_pw_16]"f"(ff_pw_16)
+ [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f)
: "memory"
);
}
@@ -780,7 +780,7 @@ static void put_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
[h]"+&r"(h)
: [dstStride]"r"((mips_reg)dstStride),
[srcStride]"r"((mips_reg)srcStride),
- [ff_pw_5]"f"(ff_pw_5), [ff_pw_16]"f"(ff_pw_16)
+ [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f)
: "memory"
);
@@ -909,7 +909,7 @@ static void avg_h264_qpel4_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
[src]"+&r"(src), [dst]"+&r"(dst)
: [dstStride]"r"((mips_reg)dstStride),
[srcStride]"r"((mips_reg)srcStride),
- [ff_pw_5]"f"(ff_pw_5), [ff_pw_16]"f"(ff_pw_16)
+ [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f)
: "memory"
);
}
@@ -1235,7 +1235,7 @@ static void avg_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
[h]"+&r"(h)
: [dstStride]"r"((mips_reg)dstStride),
[srcStride]"r"((mips_reg)srcStride),
- [ff_pw_5]"f"(ff_pw_5), [ff_pw_16]"f"(ff_pw_16)
+ [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f)
: "memory"
);
@@ -1306,7 +1306,7 @@ static void put_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src,
[tmp]"+&r"(tmp), [src]"+&r"(src)
: [tmpStride]"r"(8),
[srcStride]"r"((mips_reg)srcStride),
- [ff_pw_20]"f"(ff_pw_20), [ff_pw_5]"f"(ff_pw_5)
+ [ff_pw_20]"f"(ff_pw_20.f), [ff_pw_5]"f"(ff_pw_5.f)
: "memory"
);
@@ -1567,7 +1567,7 @@ static void put_h264_qpel8or16_hv1_lowpass_mmi(int16_t *tmp,
[src]"+&r"(src)
: [tmp]"r"(tmp), [size]"r"(size),
[srcStride]"r"((mips_reg)srcStride),
- [ff_pw_5]"f"(ff_pw_5), [ff_pw_16]"f"(ff_pw_16)
+ [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f)
: "memory"
);
@@ -1742,7 +1742,7 @@ static void put_h264_qpel8_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src,
[src2]"+&r"(src2), [h]"+&r"(h)
: [src2Stride]"r"((mips_reg)src2Stride),
[dstStride]"r"((mips_reg)dstStride),
- [ff_pw_5]"f"(ff_pw_5), [ff_pw_16]"f"(ff_pw_16)
+ [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f)
: "memory"
);
}
@@ -1870,7 +1870,7 @@ static void avg_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src,
[tmp]"+&r"(tmp), [src]"+&r"(src)
: [tmpStride]"r"(8),
[srcStride]"r"((mips_reg)srcStride),
- [ff_pw_20]"f"(ff_pw_20), [ff_pw_5]"f"(ff_pw_5)
+ [ff_pw_20]"f"(ff_pw_20.f), [ff_pw_5]"f"(ff_pw_5.f)
: "memory"
);
@@ -2065,7 +2065,7 @@ static void avg_h264_qpel8_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src,
[src2]"+&r"(src2)
: [dstStride]"r"((mips_reg)dstStride),
[src2Stride]"r"((mips_reg)src2Stride),
- [ff_pw_5]"f"(ff_pw_5), [ff_pw_16]"f"(ff_pw_16)
+ [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f)
: "memory"
);
}
@@ -32,7 +32,7 @@ void ff_hevc_put_hevc_qpel_h##w##_8_mmi(int16_t *dst, uint8_t *_src, \
int x, y; \
pixel *src = (pixel*)_src - 3; \
ptrdiff_t srcstride = _srcstride / sizeof(pixel); \
- uint64_t ftmp[15]; \
+ double ftmp[15]; \
uint64_t rtmp[1]; \
const int8_t *filter = ff_hevc_qpel_filters[mx - 1]; \
\
@@ -132,7 +132,7 @@ void ff_hevc_put_hevc_qpel_hv##w##_8_mmi(int16_t *dst, uint8_t *_src, \
ptrdiff_t srcstride = _srcstride / sizeof(pixel); \
int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE]; \
int16_t *tmp = tmp_array; \
- uint64_t ftmp[15]; \
+ double ftmp[15]; \
uint64_t rtmp[1]; \
\
src -= (QPEL_EXTRA_BEFORE * srcstride + 3); \
@@ -329,10 +329,12 @@ void ff_hevc_put_hevc_qpel_bi_h##w##_8_mmi(uint8_t *_dst, \
pixel *dst = (pixel *)_dst; \
ptrdiff_t dststride = _dststride / sizeof(pixel); \
const int8_t *filter = ff_hevc_qpel_filters[mx - 1]; \
- uint64_t ftmp[20]; \
+ double ftmp[20]; \
uint64_t rtmp[1]; \
- int shift = 7; \
- int offset = 64; \
+ union av_intfloat64 shift; \
+ union av_intfloat64 offset; \
+ shift.i = 7; \
+ offset.i = 64; \
\
x = width >> 2; \
y = height; \
@@ -430,9 +432,9 @@ void ff_hevc_put_hevc_qpel_bi_h##w##_8_mmi(uint8_t *_dst, \
[ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), \
[ftmp12]"=&f"(ftmp[12]), [src2]"+&r"(src2), \
[dst]"+&r"(dst), [src]"+&r"(src), [y]"+&r"(y), [x]"=&r"(x), \
- [offset]"+&f"(offset), [rtmp0]"=&r"(rtmp[0]) \
+ [offset]"+&f"(offset.f), [rtmp0]"=&r"(rtmp[0]) \
: [src_stride]"r"(srcstride), [dst_stride]"r"(dststride), \
- [filter]"r"(filter), [shift]"f"(shift) \
+ [filter]"r"(filter), [shift]"f"(shift.f) \
: "memory" \
); \
}
@@ -463,10 +465,12 @@ void ff_hevc_put_hevc_qpel_bi_hv##w##_8_mmi(uint8_t *_dst, \
ptrdiff_t dststride = _dststride / sizeof(pixel); \
int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE]; \
int16_t *tmp = tmp_array; \
- uint64_t ftmp[20]; \
+ double ftmp[20]; \
uint64_t rtmp[1]; \
- int shift = 7; \
- int offset = 64; \
+ union av_intfloat64 shift; \
+ union av_intfloat64 offset; \
+ shift.i = 7; \
+ offset.i = 64; \
\
src -= (QPEL_EXTRA_BEFORE * srcstride + 3); \
filter = ff_hevc_qpel_filters[mx - 1]; \
@@ -659,9 +663,9 @@ void ff_hevc_put_hevc_qpel_bi_hv##w##_8_mmi(uint8_t *_dst, \
[ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]), \
[ftmp14]"=&f"(ftmp[14]), [src2]"+&r"(src2), \
[dst]"+&r"(dst), [tmp]"+&r"(tmp), [y]"+&r"(y), [x]"=&r"(x), \
- [offset]"+&f"(offset), [rtmp0]"=&r"(rtmp[0]) \
+ [offset]"+&f"(offset.f), [rtmp0]"=&r"(rtmp[0]) \
: [filter]"r"(filter), [stride]"r"(dststride), \
- [shift]"f"(shift) \
+ [shift]"f"(shift.f) \
: "memory" \
); \
}
@@ -692,10 +696,12 @@ void ff_hevc_put_hevc_epel_bi_hv##w##_8_mmi(uint8_t *_dst, \
const int8_t *filter = ff_hevc_epel_filters[mx - 1]; \
int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE]; \
int16_t *tmp = tmp_array; \
- uint64_t ftmp[12]; \
+ double ftmp[12]; \
uint64_t rtmp[1]; \
- int shift = 7; \
- int offset = 64; \
+ union av_intfloat64 shift; \
+ union av_intfloat64 offset; \
+ shift.i = 7; \
+ offset.i = 64; \
\
src -= (EPEL_EXTRA_BEFORE * srcstride + 1); \
x = width >> 2; \
@@ -847,9 +853,9 @@ void ff_hevc_put_hevc_epel_bi_hv##w##_8_mmi(uint8_t *_dst, \
[ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), \
[ftmp10]"=&f"(ftmp[10]), [src2]"+&r"(src2), \
[dst]"+&r"(dst), [tmp]"+&r"(tmp), [y]"+&r"(y), [x]"=&r"(x), \
- [offset]"+&f"(offset), [rtmp0]"=&r"(rtmp[0]) \
+ [offset]"+&f"(offset.f), [rtmp0]"=&r"(rtmp[0]) \
: [filter]"r"(filter), [stride]"r"(dststride), \
- [shift]"f"(shift) \
+ [shift]"f"(shift.f) \
: "memory" \
); \
}
@@ -875,9 +881,10 @@ void ff_hevc_put_hevc_pel_bi_pixels##w##_8_mmi(uint8_t *_dst, \
ptrdiff_t srcstride = _srcstride / sizeof(pixel); \
pixel *dst = (pixel *)_dst; \
ptrdiff_t dststride = _dststride / sizeof(pixel); \
- uint64_t ftmp[12]; \
+ double ftmp[12]; \
uint64_t rtmp[1]; \
- int shift = 7; \
+ union av_intfloat64 shift; \
+ shift.i = 7; \
\
y = height; \
x = width >> 3; \
@@ -959,7 +966,7 @@ void ff_hevc_put_hevc_pel_bi_pixels##w##_8_mmi(uint8_t *_dst, \
[ftmp10]"=&f"(ftmp[10]), [offset]"=&f"(ftmp[11]), \
[src2]"+&r"(src2), [dst]"+&r"(dst), [src]"+&r"(src), \
[x]"+&r"(x), [y]"+&r"(y), [rtmp0]"=&r"(rtmp[0]) \
- : [dststride]"r"(dststride), [shift]"f"(shift), \
+ : [dststride]"r"(dststride), [shift]"f"(shift.f), \
[srcstride]"r"(srcstride) \
: "memory" \
); \
@@ -989,10 +996,12 @@ void ff_hevc_put_hevc_qpel_uni_hv##w##_8_mmi(uint8_t *_dst, \
ptrdiff_t dststride = _dststride / sizeof(pixel); \
int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE]; \
int16_t *tmp = tmp_array; \
- uint64_t ftmp[20]; \
+ double ftmp[20]; \
uint64_t rtmp[1]; \
- int shift = 6; \
- int offset = 32; \
+ union av_intfloat64 shift; \
+ union av_intfloat64 offset; \
+ shift.i = 6; \
+ offset.i = 32; \
\
src -= (QPEL_EXTRA_BEFORE * srcstride + 3); \
filter = ff_hevc_qpel_filters[mx - 1]; \
@@ -1166,9 +1175,9 @@ void ff_hevc_put_hevc_qpel_uni_hv##w##_8_mmi(uint8_t *_dst, \
[ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]), \
[ftmp14]"=&f"(ftmp[14]), \
[dst]"+&r"(dst), [tmp]"+&r"(tmp), [y]"+&r"(y), [x]"=&r"(x), \
- [offset]"+&f"(offset), [rtmp0]"=&r"(rtmp[0]) \
+ [offset]"+&f"(offset.f), [rtmp0]"=&r"(rtmp[0]) \
: [filter]"r"(filter), [stride]"r"(dststride), \
- [shift]"f"(shift) \
+ [shift]"f"(shift.f) \
: "memory" \
); \
}
@@ -142,7 +142,7 @@ void ff_put_signed_pixels_clamped_mmi(const int16_t *block,
[pixels]"+&r"(pixels)
: [block]"r"(block),
[line_size]"r"((mips_reg)line_size),
- [ff_pb_80]"f"(ff_pb_80)
+ [ff_pb_80]"f"(ff_pb_80.f)
: "memory"
);
}
@@ -28,12 +28,13 @@
void ff_dct_unquantize_h263_intra_mmi(MpegEncContext *s, int16_t *block,
int n, int qscale)
{
- int64_t level, qmul, qadd, nCoeffs;
+ int64_t level, nCoeffs;
double ftmp[6];
mips_reg addr[1];
+ union mmi_intfloat64 qmul_u, qadd_u;
DECLARE_VAR_ALL64;
- qmul = qscale << 1;
+ qmul_u.i = qscale << 1;
av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
if (!s->h263_aic) {
@@ -41,9 +42,9 @@ void ff_dct_unquantize_h263_intra_mmi(MpegEncContext *s, int16_t *block,
level = block[0] * s->y_dc_scale;
else
level = block[0] * s->c_dc_scale;
- qadd = (qscale-1) | 1;
+ qadd_u.i = (qscale-1) | 1;
} else {
- qadd = 0;
+ qadd_u.i = 0;
level = block[0];
}
@@ -93,7 +94,7 @@ void ff_dct_unquantize_h263_intra_mmi(MpegEncContext *s, int16_t *block,
[addr0]"=&r"(addr[0])
: [block]"r"((mips_reg)(block+nCoeffs)),
[nCoeffs]"r"((mips_reg)(2*(-nCoeffs))),
- [qmul]"f"(qmul), [qadd]"f"(qadd)
+ [qmul]"f"(qmul_u.f), [qadd]"f"(qadd_u.f)
: "memory"
);
@@ -103,13 +104,14 @@ void ff_dct_unquantize_h263_intra_mmi(MpegEncContext *s, int16_t *block,
void ff_dct_unquantize_h263_inter_mmi(MpegEncContext *s, int16_t *block,
int n, int qscale)
{
- int64_t qmul, qadd, nCoeffs;
+ int64_t nCoeffs;
double ftmp[6];
mips_reg addr[1];
+ union mmi_intfloat64 qmul_u, qadd_u;
DECLARE_VAR_ALL64;
- qmul = qscale << 1;
- qadd = (qscale - 1) | 1;
+ qmul_u.i = qscale << 1;
+ qadd_u.i = (qscale - 1) | 1;
av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
nCoeffs = s->inter_scantable.raster_end[s->block_last_index[n]];
@@ -153,7 +155,7 @@ void ff_dct_unquantize_h263_inter_mmi(MpegEncContext *s, int16_t *block,
[addr0]"=&r"(addr[0])
: [block]"r"((mips_reg)(block+nCoeffs)),
[nCoeffs]"r"((mips_reg)(2*(-nCoeffs))),
- [qmul]"f"(qmul), [qadd]"f"(qadd)
+ [qmul]"f"(qmul_u.f), [qadd]"f"(qadd_u.f)
: "memory"
);
}
@@ -129,9 +129,11 @@ void ff_vc1_inv_trans_8x8_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *blo
double ftmp[9];
mips_reg addr[1];
int count;
+ union mmi_intfloat64 dc_u;
dc = (3 * dc + 1) >> 1;
dc = (3 * dc + 16) >> 5;
+ dc_u.i = dc;
__asm__ volatile(
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
@@ -189,7 +191,7 @@ void ff_vc1_inv_trans_8x8_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *blo
[addr0]"=&r"(addr[0]),
[count]"=&r"(count), [dest]"+&r"(dest)
: [linesize]"r"((mips_reg)linesize),
- [dc]"f"(dc)
+ [dc]"f"(dc_u.f)
: "memory"
);
}
@@ -198,9 +200,6 @@ void ff_vc1_inv_trans_8x8_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *blo
void ff_vc1_inv_trans_8x8_mmi(int16_t block[64])
{
DECLARE_ALIGNED(16, int16_t, temp[64]);
- DECLARE_ALIGNED(8, const uint64_t, ff_pw_1_local) = {0x0000000100000001ULL};
- DECLARE_ALIGNED(8, const uint64_t, ff_pw_4_local) = {0x0000000400000004ULL};
- DECLARE_ALIGNED(8, const uint64_t, ff_pw_64_local)= {0x0000004000000040ULL};
double ftmp[23];
uint64_t tmp[1];
@@ -407,8 +406,8 @@ void ff_vc1_inv_trans_8x8_mmi(int16_t block[64])
[ftmp20]"=&f"(ftmp[20]), [ftmp21]"=&f"(ftmp[21]),
[ftmp22]"=&f"(ftmp[22]),
[tmp0]"=&r"(tmp[0])
- : [ff_pw_1]"f"(ff_pw_1_local), [ff_pw_64]"f"(ff_pw_64_local),
- [ff_pw_4]"f"(ff_pw_4_local), [block]"r"(block),
+ : [ff_pw_1]"f"(ff_pw_32_1.f), [ff_pw_64]"f"(ff_pw_32_64.f),
+ [ff_pw_4]"f"(ff_pw_32_4.f), [block]"r"(block),
[temp]"r"(temp)
: "memory"
);
@@ -420,9 +419,11 @@ void ff_vc1_inv_trans_8x4_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *blo
{
int dc = block[0];
double ftmp[9];
+ union mmi_intfloat64 dc_u;
dc = ( 3 * dc + 1) >> 1;
dc = (17 * dc + 64) >> 7;
+ dc_u.i = dc;
__asm__ volatile(
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
@@ -467,7 +468,7 @@ void ff_vc1_inv_trans_8x4_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *blo
[ftmp8]"=&f"(ftmp[8])
: [dest0]"r"(dest+0*linesize), [dest1]"r"(dest+1*linesize),
[dest2]"r"(dest+2*linesize), [dest3]"r"(dest+3*linesize),
- [dc]"f"(dc)
+ [dc]"f"(dc_u.f)
: "memory"
);
}
@@ -480,8 +481,6 @@ void ff_vc1_inv_trans_8x4_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
double ftmp[16];
uint32_t tmp[1];
int16_t count = 4;
- DECLARE_ALIGNED(16, const uint64_t, ff_pw_4_local) = {0x0000000400000004ULL};
- DECLARE_ALIGNED(16, const uint64_t, ff_pw_64_local)= {0x0000004000000040ULL};
int16_t coeff[64] = {12, 16, 16, 15, 12, 9, 6, 4,
12, 15, 6, -4, -12, -16, -16, -9,
12, 9, -6, -16, -12, 4, 16, 15,
@@ -591,7 +590,7 @@ void ff_vc1_inv_trans_8x4_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
[ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]),
[ftmp14]"=&f"(ftmp[14]), [tmp0]"=&r"(tmp[0]),
[src]"+&r"(src), [dst]"+&r"(dst), [count]"+&r"(count)
- : [ff_pw_4]"f"(ff_pw_4_local), [coeff]"r"(coeff)
+ : [ff_pw_4]"f"(ff_pw_32_4.f), [coeff]"r"(coeff)
: "memory"
);
@@ -859,7 +858,7 @@ void ff_vc1_inv_trans_8x4_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
[ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]),
[ftmp14]"=&f"(ftmp[14]), [ftmp15]"=&f"(ftmp[15]),
[tmp0]"=&r"(tmp[0])
- : [ff_pw_64]"f"(ff_pw_64_local),
+ : [ff_pw_64]"f"(ff_pw_32_64.f),
[src]"r"(src), [dest]"r"(dest), [linesize]"r"(linesize)
:"memory"
);
@@ -871,10 +870,12 @@ void ff_vc1_inv_trans_4x8_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *blo
{
int dc = block[0];
double ftmp[9];
+ union mmi_intfloat64 dc_u;
DECLARE_VAR_LOW32;
dc = (17 * dc + 4) >> 3;
dc = (12 * dc + 64) >> 7;
+ dc_u.i = dc;
__asm__ volatile(
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
@@ -934,7 +935,7 @@ void ff_vc1_inv_trans_4x8_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *blo
[dest2]"r"(dest+2*linesize), [dest3]"r"(dest+3*linesize),
[dest4]"r"(dest+4*linesize), [dest5]"r"(dest+5*linesize),
[dest6]"r"(dest+6*linesize), [dest7]"r"(dest+7*linesize),
- [dc]"f"(dc)
+ [dc]"f"(dc_u.f)
: "memory"
);
}
@@ -945,14 +946,11 @@ void ff_vc1_inv_trans_4x8_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
int16_t *src = block;
int16_t *dst = block;
double ftmp[23];
- uint32_t count = 8, tmp[1];
+ uint64_t count = 8, tmp[1];
int16_t coeff[16] = {17, 22, 17, 10,
17, 10,-17,-22,
17,-10,-17, 22,
17,-22, 17,-10};
- DECLARE_ALIGNED(8, const uint64_t, ff_pw_1_local) = {0x0000000100000001ULL};
- DECLARE_ALIGNED(8, const uint64_t, ff_pw_4_local) = {0x0000000400000004ULL};
- DECLARE_ALIGNED(8, const uint64_t, ff_pw_64_local)= {0x0000004000000040ULL};
// 1st loop
__asm__ volatile (
@@ -998,7 +996,7 @@ void ff_vc1_inv_trans_4x8_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
[ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
[tmp0]"=&r"(tmp[0]), [count]"+&r"(count),
[src]"+&r"(src), [dst]"+&r"(dst)
- : [ff_pw_4]"f"(ff_pw_4_local), [coeff]"r"(coeff)
+ : [ff_pw_4]"f"(ff_pw_32_4.f), [coeff]"r"(coeff)
: "memory"
);
@@ -1115,7 +1113,7 @@ void ff_vc1_inv_trans_4x8_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
[ftmp20]"=&f"(ftmp[20]), [ftmp21]"=&f"(ftmp[21]),
[ftmp22]"=&f"(ftmp[22]),
[tmp0]"=&r"(tmp[0])
- : [ff_pw_1]"f"(ff_pw_1_local), [ff_pw_64]"f"(ff_pw_64_local),
+ : [ff_pw_1]"f"(ff_pw_32_1.f), [ff_pw_64]"f"(ff_pw_32_64.f),
[src]"r"(src), [dest]"r"(dest), [linesize]"r"(linesize)
: "memory"
);
@@ -1127,10 +1125,12 @@ void ff_vc1_inv_trans_4x4_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *blo
{
int dc = block[0];
double ftmp[5];
+ union mmi_intfloat64 dc_u;
DECLARE_VAR_LOW32;
dc = (17 * dc + 4) >> 3;
dc = (17 * dc + 64) >> 7;
+ dc_u.i = dc;
__asm__ volatile(
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
@@ -1166,7 +1166,7 @@ void ff_vc1_inv_trans_4x4_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *blo
[ftmp4]"=&f"(ftmp[4])
: [dest0]"r"(dest+0*linesize), [dest1]"r"(dest+1*linesize),
[dest2]"r"(dest+2*linesize), [dest3]"r"(dest+3*linesize),
- [dc]"f"(dc)
+ [dc]"f"(dc_u.f)
: "memory"
);
}
@@ -1181,8 +1181,6 @@ void ff_vc1_inv_trans_4x4_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
17, 10,-17,-22,
17,-10,-17, 22,
17,-22, 17,-10};
- DECLARE_ALIGNED(8, const uint64_t, ff_pw_4_local) = {0x0000000400000004ULL};
- DECLARE_ALIGNED(8, const uint64_t, ff_pw_64_local)= {0x0000004000000040ULL};
// 1st loop
__asm__ volatile (
@@ -1226,7 +1224,7 @@ void ff_vc1_inv_trans_4x4_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
[ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
[tmp0]"=&r"(tmp[0]), [count]"+&r"(count),
[src]"+&r"(src), [dst]"+&r"(dst)
- : [ff_pw_4]"f"(ff_pw_4_local), [coeff]"r"(coeff)
+ : [ff_pw_4]"f"(ff_pw_32_4.f), [coeff]"r"(coeff)
: "memory"
);
@@ -1370,7 +1368,7 @@ void ff_vc1_inv_trans_4x4_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
[ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]),
[ftmp14]"=&f"(ftmp[14]), [ftmp15]"=&f"(ftmp[15]),
[tmp0]"=&r"(tmp[0])
- : [ff_pw_64]"f"(ff_pw_64_local),
+ : [ff_pw_64]"f"(ff_pw_32_64.f),
[src]"r"(src), [dest]"r"(dest), [linesize]"r"(linesize)
:"memory"
);
@@ -1660,14 +1658,15 @@ static void vc1_put_ver_16b_shift2_mmi(int16_t *dst,
const uint8_t *src, mips_reg stride,
int rnd, int64_t shift)
{
+ union mmi_intfloat64 shift_u;
DECLARE_VAR_LOW32;
DECLARE_VAR_ADDRT;
+ shift_u.i = shift;
__asm__ volatile(
"pxor $f0, $f0, $f0 \n\t"
"li $8, 0x03 \n\t"
LOAD_ROUNDER_MMI("%[rnd]")
- "ldc1 $f12, %[ff_pw_9] \n\t"
"1: \n\t"
MMI_ULWC1($f4, %[src], 0x00)
PTR_ADDU "%[src], %[src], %[stride] \n\t"
@@ -1689,9 +1688,9 @@ static void vc1_put_ver_16b_shift2_mmi(int16_t *dst,
: RESTRICT_ASM_LOW32 RESTRICT_ASM_ADDRT
[src]"+r"(src), [dst]"+r"(dst)
: [stride]"r"(stride), [stride1]"r"(-2*stride),
- [shift]"f"(shift), [rnd]"m"(rnd),
- [stride2]"r"(9*stride-4), [ff_pw_9]"m"(ff_pw_9)
- : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", "$f12",
+ [shift]"f"(shift_u.f), [rnd]"m"(rnd),
+ [stride2]"r"(9*stride-4)
+ : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10",
"$f14", "$f16", "memory"
);
}
@@ -1713,8 +1712,6 @@ static void OPNAME ## vc1_hor_16b_shift2_mmi(uint8_t *dst, mips_reg stride, \
\
__asm__ volatile( \
LOAD_ROUNDER_MMI("%[rnd]") \
- "ldc1 $f12, %[ff_pw_128] \n\t" \
- "ldc1 $f10, %[ff_pw_9] \n\t" \
"1: \n\t" \
MMI_ULDC1($f2, %[src], 0x00) \
MMI_ULDC1($f4, %[src], 0x08) \
@@ -1728,16 +1725,16 @@ static void OPNAME ## vc1_hor_16b_shift2_mmi(uint8_t *dst, mips_reg stride, \
"paddh $f6, $f6, $f0 \n\t" \
MMI_ULDC1($f0, %[src], 0x0b) \
"paddh $f8, $f8, $f0 \n\t" \
- "pmullh $f6, $f6, $f10 \n\t" \
- "pmullh $f8, $f8, $f10 \n\t" \
+ "pmullh $f6, $f6, %[ff_pw_9] \n\t" \
+ "pmullh $f8, $f8, %[ff_pw_9] \n\t" \
"psubh $f6, $f6, $f2 \n\t" \
"psubh $f8, $f8, $f4 \n\t" \
"li $8, 0x07 \n\t" \
"mtc1 $8, $f16 \n\t" \
NORMALIZE_MMI("$f16") \
/* Remove bias */ \
- "paddh $f6, $f6, $f12 \n\t" \
- "paddh $f8, $f8, $f12 \n\t" \
+ "paddh $f6, $f6, %[ff_pw_128] \n\t" \
+ "paddh $f8, $f8, %[ff_pw_128] \n\t" \
TRANSFER_DO_PACK(OP) \
"addiu %[h], %[h], -0x01 \n\t" \
PTR_ADDIU "%[src], %[src], 0x18 \n\t" \
@@ -1747,8 +1744,8 @@ static void OPNAME ## vc1_hor_16b_shift2_mmi(uint8_t *dst, mips_reg stride, \
[h]"+r"(h), \
[src]"+r"(src), [dst]"+r"(dst) \
: [stride]"r"(stride), [rnd]"m"(rnd), \
- [ff_pw_9]"m"(ff_pw_9), [ff_pw_128]"m"(ff_pw_128) \
- : "$8", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", "$f12", "$f14", \
+ [ff_pw_9]"f"(ff_pw_9.f), [ff_pw_128]"f"(ff_pw_128.f) \
+ : "$8", "$f0", "$f2", "$f4", "$f6", "$f8", "$f14", \
"$f16", "memory" \
); \
}
@@ -1774,7 +1771,6 @@ static void OPNAME ## vc1_shift2_mmi(uint8_t *dst, const uint8_t *src, \
"pxor $f0, $f0, $f0 \n\t" \
"li $10, 0x08 \n\t" \
LOAD_ROUNDER_MMI("%[rnd]") \
- "ldc1 $f12, %[ff_pw_9] \n\t" \
"1: \n\t" \
MMI_ULWC1($f6, %[src], 0x00) \
MMI_ULWC1($f8, %[src], 0x04) \
@@ -1791,8 +1787,8 @@ static void OPNAME ## vc1_shift2_mmi(uint8_t *dst, const uint8_t *src, \
PTR_ADDU "$9, %[src], %[offset_x2n] \n\t" \
MMI_ULWC1($f2, $9, 0x00) \
MMI_ULWC1($f4, $9, 0x04) \
- "pmullh $f6, $f6, $f12 \n\t" /* 0,9,9,0*/ \
- "pmullh $f8, $f8, $f12 \n\t" /* 0,9,9,0*/ \
+ "pmullh $f6, $f6, %[ff_pw_9] \n\t" /* 0,9,9,0*/ \
+ "pmullh $f8, $f8, %[ff_pw_9] \n\t" /* 0,9,9,0*/ \
"punpcklbh $f2, $f2, $f0 \n\t" \
"punpcklbh $f4, $f4, $f0 \n\t" \
"psubh $f6, $f6, $f2 \n\t" /*-1,9,9,0*/ \
@@ -1819,9 +1815,9 @@ static void OPNAME ## vc1_shift2_mmi(uint8_t *dst, const uint8_t *src, \
: [offset]"r"(offset), [offset_x2n]"r"(-2*offset), \
[stride]"r"(stride), [rnd]"m"(rnd), \
[stride1]"r"(stride-offset), \
- [ff_pw_9]"m"(ff_pw_9) \
+ [ff_pw_9]"f"(ff_pw_9.f) \
: "$8", "$9", "$10", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", \
- "$f12", "$f14", "$f16", "memory" \
+ "$f14", "$f16", "memory" \
); \
}
@@ -1852,8 +1848,8 @@ VC1_SHIFT2(OP_AVG, avg_)
LOAD($f8, $9, M*4) \
UNPACK("$f6") \
UNPACK("$f8") \
- "pmullh $f6, $f6, $f12 \n\t" /* *18 */ \
- "pmullh $f8, $f8, $f12 \n\t" /* *18 */ \
+ "pmullh $f6, $f6, %[ff_pw_18] \n\t" /* *18 */ \
+ "pmullh $f8, $f8, %[ff_pw_18] \n\t" /* *18 */ \
"psubh $f6, $f6, $f2 \n\t" /* *18, -3 */ \
"psubh $f8, $f8, $f4 \n\t" /* *18, -3 */ \
PTR_ADDU "$9, %[src], "#A4" \n\t" \
@@ -1872,8 +1868,8 @@ VC1_SHIFT2(OP_AVG, avg_)
LOAD($f4, $9, M*4) \
UNPACK("$f2") \
UNPACK("$f4") \
- "pmullh $f2, $f2, $f10 \n\t" /* *53 */ \
- "pmullh $f4, $f4, $f10 \n\t" /* *53 */ \
+ "pmullh $f2, $f2, %[ff_pw_53] \n\t" /* *53 */ \
+ "pmullh $f4, $f4, %[ff_pw_53] \n\t" /* *53 */ \
"paddh $f6, $f6, $f2 \n\t" /* 4,53,18,-3 */ \
"paddh $f8, $f8, $f4 \n\t" /* 4,53,18,-3 */
@@ -1892,16 +1888,16 @@ vc1_put_ver_16b_ ## NAME ## _mmi(int16_t *dst, const uint8_t *src, \
int rnd, int64_t shift) \
{ \
int h = 8; \
+ union mmi_intfloat64 shift_u; \
DECLARE_VAR_LOW32; \
DECLARE_VAR_ADDRT; \
+ shift_u.i = shift; \
\
src -= src_stride; \
\
__asm__ volatile( \
"pxor $f0, $f0, $f0 \n\t" \
LOAD_ROUNDER_MMI("%[rnd]") \
- "ldc1 $f10, %[ff_pw_53] \n\t" \
- "ldc1 $f12, %[ff_pw_18] \n\t" \
".p2align 3 \n\t" \
"1: \n\t" \
MSPEL_FILTER13_CORE(DO_UNPACK, MMI_ULWC1, 1, A1, A2, A3, A4) \
@@ -1917,12 +1913,12 @@ vc1_put_ver_16b_ ## NAME ## _mmi(int16_t *dst, const uint8_t *src, \
PTR_ADDU "$9, %[src], "#A2" \n\t" \
MMI_ULWC1($f6, $9, 0x08) \
DO_UNPACK("$f6") \
- "pmullh $f6, $f6, $f12 \n\t" /* *18 */ \
+ "pmullh $f6, $f6, %[ff_pw_18] \n\t" /* *18 */ \
"psubh $f6, $f6, $f2 \n\t" /* *18,-3 */ \
PTR_ADDU "$9, %[src], "#A3" \n\t" \
MMI_ULWC1($f2, $9, 0x08) \
DO_UNPACK("$f2") \
- "pmullh $f2, $f2, $f10 \n\t" /* *53 */ \
+ "pmullh $f2, $f2, %[ff_pw_53] \n\t" /* *53 */ \
"paddh $f6, $f6, $f2 \n\t" /* *53,18,-3 */ \
PTR_ADDU "$9, %[src], "#A4" \n\t" \
MMI_ULWC1($f2, $9, 0x08) \
@@ -1945,10 +1941,10 @@ vc1_put_ver_16b_ ## NAME ## _mmi(int16_t *dst, const uint8_t *src, \
[src]"+r"(src), [dst]"+r"(dst) \
: [stride_x1]"r"(src_stride), [stride_x2]"r"(2*src_stride), \
[stride_x3]"r"(3*src_stride), \
- [rnd]"m"(rnd), [shift]"f"(shift), \
- [ff_pw_53]"m"(ff_pw_53), [ff_pw_18]"m"(ff_pw_18), \
- [ff_pw_3]"f"(ff_pw_3) \
- : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", "$f12", \
+ [rnd]"m"(rnd), [shift]"f"(shift_u.f), \
+ [ff_pw_53]"f"(ff_pw_53.f), [ff_pw_18]"f"(ff_pw_18.f), \
+ [ff_pw_3]"f"(ff_pw_3.f) \
+ : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", \
"$f14", "$f16", "memory" \
); \
}
@@ -1975,8 +1971,6 @@ OPNAME ## vc1_hor_16b_ ## NAME ## _mmi(uint8_t *dst, mips_reg stride, \
__asm__ volatile( \
"pxor $f0, $f0, $f0 \n\t" \
LOAD_ROUNDER_MMI("%[rnd]") \
- "ldc1 $f10, %[ff_pw_53] \n\t" \
- "ldc1 $f12, %[ff_pw_18] \n\t" \
".p2align 3 \n\t" \
"1: \n\t" \
MSPEL_FILTER13_CORE(DONT_UNPACK, MMI_ULDC1, 2, A1, A2, A3, A4) \
@@ -1995,9 +1989,9 @@ OPNAME ## vc1_hor_16b_ ## NAME ## _mmi(uint8_t *dst, mips_reg stride, \
[h]"+r"(h), \
[src]"+r"(src), [dst]"+r"(dst) \
: [stride]"r"(stride), [rnd]"m"(rnd), \
- [ff_pw_53]"m"(ff_pw_53), [ff_pw_18]"m"(ff_pw_18), \
- [ff_pw_3]"f"(ff_pw_3), [ff_pw_128]"f"(ff_pw_128) \
- : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", "$f12", \
+ [ff_pw_53]"f"(ff_pw_53.f), [ff_pw_18]"f"(ff_pw_18.f), \
+ [ff_pw_3]"f"(ff_pw_3.f), [ff_pw_128]"f"(ff_pw_128.f) \
+ : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", \
"$f14", "$f16", "memory" \
); \
}
@@ -2025,8 +2019,6 @@ OPNAME ## vc1_## NAME ## _mmi(uint8_t *dst, const uint8_t *src, \
__asm__ volatile ( \
"pxor $f0, $f0, $f0 \n\t" \
LOAD_ROUNDER_MMI("%[rnd]") \
- "ldc1 $f10, %[ff_pw_53] \n\t" \
- "ldc1 $f12, %[ff_pw_18] \n\t" \
".p2align 3 \n\t" \
"1: \n\t" \
MSPEL_FILTER13_CORE(DO_UNPACK, MMI_ULWC1, 1, A1, A2, A3, A4) \
@@ -2044,9 +2036,9 @@ OPNAME ## vc1_## NAME ## _mmi(uint8_t *dst, const uint8_t *src, \
: [offset_x1]"r"(offset), [offset_x2]"r"(2*offset), \
[offset_x3]"r"(3*offset), [stride]"r"(stride), \
[rnd]"m"(rnd), \
- [ff_pw_53]"m"(ff_pw_53), [ff_pw_18]"m"(ff_pw_18), \
- [ff_pw_3]"f"(ff_pw_3) \
- : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", "$f12", \
+ [ff_pw_53]"f"(ff_pw_53.f), [ff_pw_18]"f"(ff_pw_18.f), \
+ [ff_pw_3]"f"(ff_pw_3.f) \
+ : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", \
"$f14", "$f16", "memory" \
); \
}
@@ -2246,14 +2238,15 @@ void ff_put_no_rnd_vc1_chroma_mc8_mmi(uint8_t *dst /* align 8 */,
uint8_t *src /* align 1 */,
ptrdiff_t stride, int h, int x, int y)
{
- const int A = (8 - x) * (8 - y);
- const int B = (x) * (8 - y);
- const int C = (8 - x) * (y);
- const int D = (x) * (y);
+ union mmi_intfloat64 A, B, C, D;
double ftmp[10];
uint32_t tmp[1];
DECLARE_VAR_ALL64;
DECLARE_VAR_ADDRT;
+ A.i = (8 - x) * (8 - y);
+ B.i = (x) * (8 - y);
+ C.i = (8 - x) * (y);
+ D.i = (x) * (y);
av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
@@ -2290,9 +2283,9 @@ void ff_put_no_rnd_vc1_chroma_mc8_mmi(uint8_t *dst /* align 8 */,
[src]"+&r"(src), [dst]"+&r"(dst),
[h]"+&r"(h)
: [stride]"r"((mips_reg)stride),
- [A]"f"(A), [B]"f"(B),
- [C]"f"(C), [D]"f"(D),
- [ff_pw_28]"f"(ff_pw_28)
+ [A]"f"(A.f), [B]"f"(B.f),
+ [C]"f"(C.f), [D]"f"(D.f),
+ [ff_pw_28]"f"(ff_pw_28.f)
: "memory"
);
}
@@ -2301,14 +2294,15 @@ void ff_put_no_rnd_vc1_chroma_mc4_mmi(uint8_t *dst /* align 8 */,
uint8_t *src /* align 1 */,
ptrdiff_t stride, int h, int x, int y)
{
- const int A = (8 - x) * (8 - y);
- const int B = (x) * (8 - y);
- const int C = (8 - x) * (y);
- const int D = (x) * (y);
+ union mmi_intfloat64 A, B, C, D;
double ftmp[6];
uint32_t tmp[1];
DECLARE_VAR_LOW32;
DECLARE_VAR_ADDRT;
+ A.i = (8 - x) * (8 - y);
+ B.i = (x) * (8 - y);
+ C.i = (8 - x) * (y);
+ D.i = (x) * (y);
av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
@@ -2343,9 +2337,9 @@ void ff_put_no_rnd_vc1_chroma_mc4_mmi(uint8_t *dst /* align 8 */,
[src]"+&r"(src), [dst]"+&r"(dst),
[h]"+&r"(h)
: [stride]"r"((mips_reg)stride),
- [A]"f"(A), [B]"f"(B),
- [C]"f"(C), [D]"f"(D),
- [ff_pw_28]"f"(ff_pw_28)
+ [A]"f"(A.f), [B]"f"(B.f),
+ [C]"f"(C.f), [D]"f"(D.f),
+ [ff_pw_28]"f"(ff_pw_28.f)
: "memory"
);
}
@@ -2354,14 +2348,15 @@ void ff_avg_no_rnd_vc1_chroma_mc8_mmi(uint8_t *dst /* align 8 */,
uint8_t *src /* align 1 */,
ptrdiff_t stride, int h, int x, int y)
{
- const int A = (8 - x) * (8 - y);
- const int B = (x) * (8 - y);
- const int C = (8 - x) * (y);
- const int D = (x) * (y);
+ union mmi_intfloat64 A, B, C, D;
double ftmp[10];
uint32_t tmp[1];
DECLARE_VAR_ALL64;
DECLARE_VAR_ADDRT;
+ A.i = (8 - x) * (8 - y);
+ B.i = (x) * (8 - y);
+ C.i = (8 - x) * (y);
+ D.i = (x) * (y);
av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
@@ -2401,9 +2396,9 @@ void ff_avg_no_rnd_vc1_chroma_mc8_mmi(uint8_t *dst /* align 8 */,
[src]"+&r"(src), [dst]"+&r"(dst),
[h]"+&r"(h)
: [stride]"r"((mips_reg)stride),
- [A]"f"(A), [B]"f"(B),
- [C]"f"(C), [D]"f"(D),
- [ff_pw_28]"f"(ff_pw_28)
+ [A]"f"(A.f), [B]"f"(B.f),
+ [C]"f"(C.f), [D]"f"(D.f),
+ [ff_pw_28]"f"(ff_pw_28.f)
: "memory"
);
}
@@ -2412,14 +2407,15 @@ void ff_avg_no_rnd_vc1_chroma_mc4_mmi(uint8_t *dst /* align 8 */,
uint8_t *src /* align 1 */,
ptrdiff_t stride, int h, int x, int y)
{
- const int A = (8 - x) * (8 - y);
- const int B = ( x) * (8 - y);
- const int C = (8 - x) * ( y);
- const int D = ( x) * ( y);
+ union mmi_intfloat64 A, B, C, D;
double ftmp[6];
uint32_t tmp[1];
DECLARE_VAR_LOW32;
DECLARE_VAR_ADDRT;
+ A.i = (8 - x) * (8 - y);
+ B.i = (x) * (8 - y);
+ C.i = (8 - x) * (y);
+ D.i = (x) * (y);
av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
@@ -2457,9 +2453,9 @@ void ff_avg_no_rnd_vc1_chroma_mc4_mmi(uint8_t *dst /* align 8 */,
[src]"+&r"(src), [dst]"+&r"(dst),
[h]"+&r"(h)
: [stride]"r"((mips_reg)stride),
- [A]"f"(A), [B]"f"(B),
- [C]"f"(C), [D]"f"(D),
- [ff_pw_28]"f"(ff_pw_28)
+ [A]"f"(A.f), [B]"f"(B.f),
+ [C]"f"(C.f), [D]"f"(D.f),
+ [ff_pw_28]"f"(ff_pw_28.f)
: "memory"
);
}
@@ -1128,12 +1128,14 @@ void ff_vp8_luma_dc_wht_dc_mmi(int16_t block[4][4][16], int16_t dc[16])
void ff_vp8_idct_add_mmi(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
{
#if 1
- DECLARE_ALIGNED(8, const uint64_t, ff_ph_4e7b) = {0x4e7b4e7b4e7b4e7bULL};
- DECLARE_ALIGNED(8, const uint64_t, ff_ph_22a3) = {0x22a322a322a322a3ULL};
double ftmp[12];
uint32_t tmp[1];
+ union av_intfloat64 ff_ph_4e7b_u;
+ union av_intfloat64 ff_ph_22a3_u;
DECLARE_VAR_LOW32;
DECLARE_VAR_ALL64;
+ ff_ph_4e7b_u.i = 0x4e7b4e7b4e7b4e7bULL;
+ ff_ph_22a3_u.i = 0x22a322a322a322a3ULL;
__asm__ volatile (
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
@@ -1253,8 +1255,8 @@ void ff_vp8_idct_add_mmi(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
[tmp0]"=&r"(tmp[0])
: [dst0]"r"(dst), [dst1]"r"(dst+stride),
[dst2]"r"(dst+2*stride), [dst3]"r"(dst+3*stride),
- [block]"r"(block), [ff_pw_4]"f"(ff_pw_4),
- [ff_ph_4e7b]"f"(ff_ph_4e7b), [ff_ph_22a3]"f"(ff_ph_22a3)
+ [block]"r"(block), [ff_pw_4]"f"(ff_pw_4.f),
+ [ff_ph_4e7b]"f"(ff_ph_4e7b_u.f), [ff_ph_22a3]"f"(ff_ph_22a3_u.f)
: "memory"
);
#else
@@ -1595,8 +1597,16 @@ void ff_put_vp8_epel16_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
const uint64_t *filter = fourtap_subpel_filters[mx - 1];
double ftmp[9];
uint32_t tmp[1];
+ union av_intfloat64 filter1;
+ union av_intfloat64 filter2;
+ union av_intfloat64 filter3;
+ union av_intfloat64 filter4;
mips_reg src1, dst1;
DECLARE_VAR_ALL64;
+ filter1.i = filter[1];
+ filter2.i = filter[2];
+ filter3.i = filter[3];
+ filter4.i = filter[4];
/*
dst[0] = cm[(filter[2] * src[0] - filter[1] * src[-1] + filter[3] * src[1] - filter[4] * src[2] + 64) >> 7];
@@ -1644,11 +1654,11 @@ void ff_put_vp8_epel16_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
[dst1]"=&r"(dst1), [src1]"=&r"(src1),
[h]"+&r"(h),
[dst]"+&r"(dst), [src]"+&r"(src)
- : [ff_pw_64]"f"(ff_pw_64),
+ : [ff_pw_64]"f"(ff_pw_64.f),
[srcstride]"r"((mips_reg)srcstride),
[dststride]"r"((mips_reg)dststride),
- [filter1]"f"(filter[1]), [filter2]"f"(filter[2]),
- [filter3]"f"(filter[3]), [filter4]"f"(filter[4])
+ [filter1]"f"(filter1.f), [filter2]"f"(filter2.f),
+ [filter3]"f"(filter3.f), [filter4]"f"(filter4.f)
: "memory"
);
#else
@@ -1672,7 +1682,16 @@ void ff_put_vp8_epel8_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
const uint64_t *filter = fourtap_subpel_filters[mx - 1];
double ftmp[9];
uint32_t tmp[1];
+ union av_intfloat64 filter1;
+ union av_intfloat64 filter2;
+ union av_intfloat64 filter3;
+ union av_intfloat64 filter4;
DECLARE_VAR_ALL64;
+ filter1.i = filter[1];
+ filter2.i = filter[2];
+ filter3.i = filter[3];
+ filter4.i = filter[4];
+
/*
dst[0] = cm[(filter[2] * src[0] - filter[1] * src[-1] + filter[3] * src[1] - filter[4] * src[2] + 64) >> 7];
@@ -1705,11 +1724,11 @@ void ff_put_vp8_epel8_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
RESTRICT_ASM_ALL64
[h]"+&r"(h),
[dst]"+&r"(dst), [src]"+&r"(src)
- : [ff_pw_64]"f"(ff_pw_64),
+ : [ff_pw_64]"f"(ff_pw_64.f),
[srcstride]"r"((mips_reg)srcstride),
[dststride]"r"((mips_reg)dststride),
- [filter1]"f"(filter[1]), [filter2]"f"(filter[2]),
- [filter3]"f"(filter[3]), [filter4]"f"(filter[4])
+ [filter1]"f"(filter1.f), [filter2]"f"(filter2.f),
+ [filter3]"f"(filter3.f), [filter4]"f"(filter4.f)
: "memory"
);
#else
@@ -1733,7 +1752,15 @@ void ff_put_vp8_epel4_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
const uint64_t *filter = fourtap_subpel_filters[mx - 1];
double ftmp[6];
uint32_t tmp[1];
+ union av_intfloat64 filter1;
+ union av_intfloat64 filter2;
+ union av_intfloat64 filter3;
+ union av_intfloat64 filter4;
DECLARE_VAR_LOW32;
+ filter1.i = filter[1];
+ filter2.i = filter[2];
+ filter3.i = filter[3];
+ filter4.i = filter[4];
/*
dst[0] = cm[(filter[2] * src[0] - filter[1] * src[-1] + filter[3] * src[1] - filter[4] * src[2] + 64) >> 7];
@@ -1760,11 +1787,11 @@ void ff_put_vp8_epel4_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
RESTRICT_ASM_LOW32
[h]"+&r"(h),
[dst]"+&r"(dst), [src]"+&r"(src)
- : [ff_pw_64]"f"(ff_pw_64),
+ : [ff_pw_64]"f"(ff_pw_64.f),
[srcstride]"r"((mips_reg)srcstride),
[dststride]"r"((mips_reg)dststride),
- [filter1]"f"(filter[1]), [filter2]"f"(filter[2]),
- [filter3]"f"(filter[3]), [filter4]"f"(filter[4])
+ [filter1]"f"(filter1.f), [filter2]"f"(filter2.f),
+ [filter3]"f"(filter3.f), [filter4]"f"(filter4.f)
: "memory"
);
#else
@@ -1789,7 +1816,19 @@ void ff_put_vp8_epel16_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
double ftmp[9];
uint32_t tmp[1];
mips_reg src1, dst1;
+ union av_intfloat64 filter0;
+ union av_intfloat64 filter1;
+ union av_intfloat64 filter2;
+ union av_intfloat64 filter3;
+ union av_intfloat64 filter4;
+ union av_intfloat64 filter5;
DECLARE_VAR_ALL64;
+ filter0.i = filter[0];
+ filter1.i = filter[1];
+ filter2.i = filter[2];
+ filter3.i = filter[3];
+ filter4.i = filter[4];
+ filter5.i = filter[5];
/*
dst[ 0] = cm[(filter[2]*src[ 0] - filter[1]*src[-1] + filter[0]*src[-2] + filter[3]*src[ 1] - filter[4]*src[ 2] + filter[5]*src[ 3] + 64) >> 7];
@@ -1837,12 +1876,12 @@ void ff_put_vp8_epel16_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
[dst1]"=&r"(dst1), [src1]"=&r"(src1),
[h]"+&r"(h),
[dst]"+&r"(dst), [src]"+&r"(src)
- : [ff_pw_64]"f"(ff_pw_64),
+ : [ff_pw_64]"f"(ff_pw_64.f),
[srcstride]"r"((mips_reg)srcstride),
[dststride]"r"((mips_reg)dststride),
- [filter0]"f"(filter[0]), [filter1]"f"(filter[1]),
- [filter2]"f"(filter[2]), [filter3]"f"(filter[3]),
- [filter4]"f"(filter[4]), [filter5]"f"(filter[5])
+ [filter0]"f"(filter0.f), [filter1]"f"(filter1.f),
+ [filter2]"f"(filter2.f), [filter3]"f"(filter3.f),
+ [filter4]"f"(filter4.f), [filter5]"f"(filter5.f)
: "memory"
);
#else
@@ -1866,7 +1905,19 @@ void ff_put_vp8_epel8_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
const uint64_t *filter = fourtap_subpel_filters[mx - 1];
double ftmp[9];
uint32_t tmp[1];
+ union av_intfloat64 filter0;
+ union av_intfloat64 filter1;
+ union av_intfloat64 filter2;
+ union av_intfloat64 filter3;
+ union av_intfloat64 filter4;
+ union av_intfloat64 filter5;
DECLARE_VAR_ALL64;
+ filter0.i = filter[0];
+ filter1.i = filter[1];
+ filter2.i = filter[2];
+ filter3.i = filter[3];
+ filter4.i = filter[4];
+ filter5.i = filter[5];
/*
dst[0] = cm[(filter[2]*src[0] - filter[1]*src[-1] + filter[0]*src[-2] + filter[3]*src[1] - filter[4]*src[2] + filter[5]*src[ 3] + 64) >> 7];
@@ -1899,12 +1950,12 @@ void ff_put_vp8_epel8_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
RESTRICT_ASM_ALL64
[h]"+&r"(h),
[dst]"+&r"(dst), [src]"+&r"(src)
- : [ff_pw_64]"f"(ff_pw_64),
+ : [ff_pw_64]"f"(ff_pw_64.f),
[srcstride]"r"((mips_reg)srcstride),
[dststride]"r"((mips_reg)dststride),
- [filter0]"f"(filter[0]), [filter1]"f"(filter[1]),
- [filter2]"f"(filter[2]), [filter3]"f"(filter[3]),
- [filter4]"f"(filter[4]), [filter5]"f"(filter[5])
+ [filter0]"f"(filter0.f), [filter1]"f"(filter1.f),
+ [filter2]"f"(filter2.f), [filter3]"f"(filter3.f),
+ [filter4]"f"(filter4.f), [filter5]"f"(filter5.f)
: "memory"
);
#else
@@ -1928,7 +1979,19 @@ void ff_put_vp8_epel4_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
const uint64_t *filter = fourtap_subpel_filters[mx - 1];
double ftmp[6];
uint32_t tmp[1];
+ union av_intfloat64 filter0;
+ union av_intfloat64 filter1;
+ union av_intfloat64 filter2;
+ union av_intfloat64 filter3;
+ union av_intfloat64 filter4;
+ union av_intfloat64 filter5;
DECLARE_VAR_LOW32;
+ filter0.i = filter[0];
+ filter1.i = filter[1];
+ filter2.i = filter[2];
+ filter3.i = filter[3];
+ filter4.i = filter[4];
+ filter5.i = filter[5];
/*
dst[0] = cm[(filter[2]*src[0] - filter[1]*src[-1] + filter[0]*src[-2] + filter[3]*src[1] - filter[4]*src[2] + filter[5]*src[ 3] + 64) >> 7];
@@ -1955,12 +2018,12 @@ void ff_put_vp8_epel4_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
RESTRICT_ASM_LOW32
[h]"+&r"(h),
[dst]"+&r"(dst), [src]"+&r"(src)
- : [ff_pw_64]"f"(ff_pw_64),
+ : [ff_pw_64]"f"(ff_pw_64.f),
[srcstride]"r"((mips_reg)srcstride),
[dststride]"r"((mips_reg)dststride),
- [filter0]"f"(filter[0]), [filter1]"f"(filter[1]),
- [filter2]"f"(filter[2]), [filter3]"f"(filter[3]),
- [filter4]"f"(filter[4]), [filter5]"f"(filter[5])
+ [filter0]"f"(filter0.f), [filter1]"f"(filter1.f),
+ [filter2]"f"(filter2.f), [filter3]"f"(filter3.f),
+ [filter4]"f"(filter4.f), [filter5]"f"(filter5.f)
: "memory"
);
#else
@@ -1985,7 +2048,15 @@ void ff_put_vp8_epel16_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
double ftmp[9];
uint32_t tmp[1];
mips_reg src0, src1, dst0;
+ union av_intfloat64 filter1;
+ union av_intfloat64 filter2;
+ union av_intfloat64 filter3;
+ union av_intfloat64 filter4;
DECLARE_VAR_ALL64;
+ filter1.i = filter[1];
+ filter2.i = filter[2];
+ filter3.i = filter[3];
+ filter4.i = filter[4];
/*
dst[0] = cm[(filter[2] * src[0] - filter[1] * src[ -srcstride] + filter[3] * src[ srcstride] - filter[4] * src[ 2*srcstride] + 64) >> 7];
@@ -2034,11 +2105,11 @@ void ff_put_vp8_epel16_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
[src1]"=&r"(src1),
[h]"+&r"(h),
[dst]"+&r"(dst), [src]"+&r"(src)
- : [ff_pw_64]"f"(ff_pw_64),
+ : [ff_pw_64]"f"(ff_pw_64.f),
[srcstride]"r"((mips_reg)srcstride),
[dststride]"r"((mips_reg)dststride),
- [filter1]"f"(filter[1]), [filter2]"f"(filter[2]),
- [filter3]"f"(filter[3]), [filter4]"f"(filter[4])
+ [filter1]"f"(filter1.f), [filter2]"f"(filter2.f),
+ [filter3]"f"(filter3.f), [filter4]"f"(filter4.f)
: "memory"
);
#else
@@ -2063,7 +2134,15 @@ void ff_put_vp8_epel8_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
double ftmp[9];
uint32_t tmp[1];
mips_reg src1;
+ union av_intfloat64 filter1;
+ union av_intfloat64 filter2;
+ union av_intfloat64 filter3;
+ union av_intfloat64 filter4;
DECLARE_VAR_ALL64;
+ filter1.i = filter[1];
+ filter2.i = filter[2];
+ filter3.i = filter[3];
+ filter4.i = filter[4];
/*
dst[0] = cm[(filter[2] * src[0] - filter[1] * src[ -srcstride] + filter[3] * src[ srcstride] - filter[4] * src[ 2*srcstride] + 64) >> 7];
@@ -2097,11 +2176,11 @@ void ff_put_vp8_epel8_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
[src1]"=&r"(src1),
[h]"+&r"(h),
[dst]"+&r"(dst), [src]"+&r"(src)
- : [ff_pw_64]"f"(ff_pw_64),
+ : [ff_pw_64]"f"(ff_pw_64.f),
[srcstride]"r"((mips_reg)srcstride),
[dststride]"r"((mips_reg)dststride),
- [filter1]"f"(filter[1]), [filter2]"f"(filter[2]),
- [filter3]"f"(filter[3]), [filter4]"f"(filter[4])
+ [filter1]"f"(filter1.f), [filter2]"f"(filter2.f),
+ [filter3]"f"(filter3.f), [filter4]"f"(filter4.f)
: "memory"
);
#else
@@ -2126,7 +2205,15 @@ void ff_put_vp8_epel4_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
double ftmp[6];
uint32_t tmp[1];
mips_reg src1;
+ union av_intfloat64 filter1;
+ union av_intfloat64 filter2;
+ union av_intfloat64 filter3;
+ union av_intfloat64 filter4;
DECLARE_VAR_LOW32;
+ filter1.i = filter[1];
+ filter2.i = filter[2];
+ filter3.i = filter[3];
+ filter4.i = filter[4];
/*
dst[0] = cm[(filter[2] * src[0] - filter[1] * src[ -srcstride] + filter[3] * src[ srcstride] - filter[4] * src[ 2*srcstride] + 64) >> 7];
@@ -2154,11 +2241,11 @@ void ff_put_vp8_epel4_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
[src1]"=&r"(src1),
[h]"+&r"(h),
[dst]"+&r"(dst), [src]"+&r"(src)
- : [ff_pw_64]"f"(ff_pw_64),
+ : [ff_pw_64]"f"(ff_pw_64.f),
[srcstride]"r"((mips_reg)srcstride),
[dststride]"r"((mips_reg)dststride),
- [filter1]"f"(filter[1]), [filter2]"f"(filter[2]),
- [filter3]"f"(filter[3]), [filter4]"f"(filter[4])
+ [filter1]"f"(filter1.f), [filter2]"f"(filter2.f),
+ [filter3]"f"(filter3.f), [filter4]"f"(filter4.f)
: "memory"
);
#else
@@ -2183,7 +2270,19 @@ void ff_put_vp8_epel16_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
double ftmp[9];
uint32_t tmp[1];
mips_reg src0, src1, dst0;
+ union av_intfloat64 filter0;
+ union av_intfloat64 filter1;
+ union av_intfloat64 filter2;
+ union av_intfloat64 filter3;
+ union av_intfloat64 filter4;
+ union av_intfloat64 filter5;
DECLARE_VAR_ALL64;
+ filter0.i = filter[0];
+ filter1.i = filter[1];
+ filter2.i = filter[2];
+ filter3.i = filter[3];
+ filter4.i = filter[4];
+ filter5.i = filter[5];
/*
dst[0] = cm[(filter[2]*src[0] - filter[1]*src[0-srcstride] + filter[0]*src[0-2*srcstride] + filter[3]*src[0+srcstride] - filter[4]*src[0+2*srcstride] + filter[5]*src[0+3*srcstride] + 64) >> 7];
@@ -2232,12 +2331,12 @@ void ff_put_vp8_epel16_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
[src1]"=&r"(src1),
[h]"+&r"(h),
[dst]"+&r"(dst), [src]"+&r"(src)
- : [ff_pw_64]"f"(ff_pw_64),
+ : [ff_pw_64]"f"(ff_pw_64.f),
[srcstride]"r"((mips_reg)srcstride),
[dststride]"r"((mips_reg)dststride),
- [filter0]"f"(filter[0]), [filter1]"f"(filter[1]),
- [filter2]"f"(filter[2]), [filter3]"f"(filter[3]),
- [filter4]"f"(filter[4]), [filter5]"f"(filter[5])
+ [filter0]"f"(filter0.f), [filter1]"f"(filter1.f),
+ [filter2]"f"(filter2.f), [filter3]"f"(filter3.f),
+ [filter4]"f"(filter4.f), [filter5]"f"(filter5.f)
: "memory"
);
#else
@@ -2262,7 +2361,19 @@ void ff_put_vp8_epel8_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
double ftmp[9];
uint32_t tmp[1];
mips_reg src1;
+ union av_intfloat64 filter0;
+ union av_intfloat64 filter1;
+ union av_intfloat64 filter2;
+ union av_intfloat64 filter3;
+ union av_intfloat64 filter4;
+ union av_intfloat64 filter5;
DECLARE_VAR_ALL64;
+ filter0.i = filter[0];
+ filter1.i = filter[1];
+ filter2.i = filter[2];
+ filter3.i = filter[3];
+ filter4.i = filter[4];
+ filter5.i = filter[5];
/*
dst[0] = cm[(filter[2]*src[0] - filter[1]*src[0-srcstride] + filter[0]*src[0-2*srcstride] + filter[3]*src[0+srcstride] - filter[4]*src[0+2*srcstride] + filter[5]*src[0+3*srcstride] + 64) >> 7];
@@ -2296,12 +2407,12 @@ void ff_put_vp8_epel8_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
[src1]"=&r"(src1),
[h]"+&r"(h),
[dst]"+&r"(dst), [src]"+&r"(src)
- : [ff_pw_64]"f"(ff_pw_64),
+ : [ff_pw_64]"f"(ff_pw_64.f),
[srcstride]"r"((mips_reg)srcstride),
[dststride]"r"((mips_reg)dststride),
- [filter0]"f"(filter[0]), [filter1]"f"(filter[1]),
- [filter2]"f"(filter[2]), [filter3]"f"(filter[3]),
- [filter4]"f"(filter[4]), [filter5]"f"(filter[5])
+ [filter0]"f"(filter0.f), [filter1]"f"(filter1.f),
+ [filter2]"f"(filter2.f), [filter3]"f"(filter3.f),
+ [filter4]"f"(filter4.f), [filter5]"f"(filter5.f)
: "memory"
);
#else
@@ -2326,7 +2437,19 @@ void ff_put_vp8_epel4_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
double ftmp[6];
uint32_t tmp[1];
mips_reg src1;
+ union av_intfloat64 filter0;
+ union av_intfloat64 filter1;
+ union av_intfloat64 filter2;
+ union av_intfloat64 filter3;
+ union av_intfloat64 filter4;
+ union av_intfloat64 filter5;
DECLARE_VAR_LOW32;
+ filter0.i = filter[0];
+ filter1.i = filter[1];
+ filter2.i = filter[2];
+ filter3.i = filter[3];
+ filter4.i = filter[4];
+ filter5.i = filter[5];
/*
dst[0] = cm[(filter[2]*src[0] - filter[1]*src[0-srcstride] + filter[0]*src[0-2*srcstride] + filter[3]*src[0+srcstride] - filter[4]*src[0+2*srcstride] + filter[5]*src[0+3*srcstride] + 64) >> 7];
@@ -2354,12 +2477,12 @@ void ff_put_vp8_epel4_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
[src1]"=&r"(src1),
[h]"+&r"(h),
[dst]"+&r"(dst), [src]"+&r"(src)
- : [ff_pw_64]"f"(ff_pw_64),
+ : [ff_pw_64]"f"(ff_pw_64.f),
[srcstride]"r"((mips_reg)srcstride),
[dststride]"r"((mips_reg)dststride),
- [filter0]"f"(filter[0]), [filter1]"f"(filter[1]),
- [filter2]"f"(filter[2]), [filter3]"f"(filter[3]),
- [filter4]"f"(filter[4]), [filter5]"f"(filter[5])
+ [filter0]"f"(filter0.f), [filter1]"f"(filter1.f),
+ [filter2]"f"(filter2.f), [filter3]"f"(filter3.f),
+ [filter4]"f"(filter4.f), [filter5]"f"(filter5.f)
: "memory"
);
#else
@@ -2847,11 +2970,13 @@ void ff_put_vp8_bilinear16_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
ptrdiff_t sstride, int h, int mx, int my)
{
#if 1
- int a = 8 - mx, b = mx;
+ union mmi_intfloat64 a, b;
double ftmp[7];
uint32_t tmp[1];
mips_reg dst0, src0;
DECLARE_VAR_ALL64;
+ a.i = 8 - mx;
+ b.i = mx;
/*
dst[0] = (a * src[0] + b * src[1] + 4) >> 3;
@@ -2900,10 +3025,10 @@ void ff_put_vp8_bilinear16_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
[dst0]"=&r"(dst0), [src0]"=&r"(src0),
[h]"+&r"(h),
[dst]"+&r"(dst), [src]"+&r"(src),
- [a]"+&f"(a), [b]"+&f"(b)
+ [a]"+&f"(a.f), [b]"+&f"(b.f)
: [sstride]"r"((mips_reg)sstride),
[dstride]"r"((mips_reg)dstride),
- [ff_pw_4]"f"(ff_pw_4)
+ [ff_pw_4]"f"(ff_pw_4.f)
: "memory"
);
#else
@@ -2923,11 +3048,13 @@ void ff_put_vp8_bilinear16_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
ptrdiff_t sstride, int h, int mx, int my)
{
#if 1
- int c = 8 - my, d = my;
+ union mmi_intfloat64 c, d;
double ftmp[7];
uint32_t tmp[1];
mips_reg src0, src1, dst0;
DECLARE_VAR_ALL64;
+ c.i = 8 - my;
+ d.i = my;
/*
dst[0] = (c * src[0] + d * src[ sstride] + 4) >> 3;
@@ -2968,10 +3095,10 @@ void ff_put_vp8_bilinear16_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
[src1]"=&r"(src1),
[h]"+&r"(h),
[dst]"+&r"(dst), [src]"+&r"(src),
- [c]"+&f"(c), [d]"+&f"(d)
+ [c]"+&f"(c.f), [d]"+&f"(d.f)
: [sstride]"r"((mips_reg)sstride),
[dstride]"r"((mips_reg)dstride),
- [ff_pw_4]"f"(ff_pw_4)
+ [ff_pw_4]"f"(ff_pw_4.f)
: "memory"
);
#else
@@ -3025,10 +3152,12 @@ void ff_put_vp8_bilinear8_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
ptrdiff_t sstride, int h, int mx, int my)
{
#if 1
- int a = 8 - mx, b = mx;
+ union mmi_intfloat64 a, b;
double ftmp[7];
uint32_t tmp[1];
DECLARE_VAR_ALL64;
+ a.i = 8 - mx;
+ b.i = mx;
/*
dst[0] = (a * src[0] + b * src[1] + 4) >> 3;
@@ -3062,10 +3191,10 @@ void ff_put_vp8_bilinear8_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
RESTRICT_ASM_ALL64
[h]"+&r"(h),
[dst]"+&r"(dst), [src]"+&r"(src),
- [a]"+&f"(a), [b]"+&f"(b)
+ [a]"+&f"(a.f), [b]"+&f"(b.f)
: [sstride]"r"((mips_reg)sstride),
[dstride]"r"((mips_reg)dstride),
- [ff_pw_4]"f"(ff_pw_4)
+ [ff_pw_4]"f"(ff_pw_4.f)
: "memory"
);
#else
@@ -3085,11 +3214,13 @@ void ff_put_vp8_bilinear8_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
ptrdiff_t sstride, int h, int mx, int my)
{
#if 1
- int c = 8 - my, d = my;
+ union mmi_intfloat64 c, d;
double ftmp[7];
uint32_t tmp[1];
mips_reg src1;
DECLARE_VAR_ALL64;
+ c.i = 8 - my;
+ d.i = my;
/*
dst[0] = (c * src[0] + d * src[ sstride] + 4) >> 3;
@@ -3124,10 +3255,10 @@ void ff_put_vp8_bilinear8_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
[src1]"=&r"(src1),
[h]"+&r"(h),
[dst]"+&r"(dst), [src]"+&r"(src),
- [c]"+&f"(c), [d]"+&f"(d)
+ [c]"+&f"(c.f), [d]"+&f"(d.f)
: [sstride]"r"((mips_reg)sstride),
[dstride]"r"((mips_reg)dstride),
- [ff_pw_4]"f"(ff_pw_4)
+ [ff_pw_4]"f"(ff_pw_4.f)
: "memory"
);
#else
@@ -3181,11 +3312,13 @@ void ff_put_vp8_bilinear4_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
ptrdiff_t sstride, int h, int mx, int my)
{
#if 1
- int a = 8 - mx, b = mx;
+ union mmi_intfloat64 a, b;
double ftmp[5];
uint32_t tmp[1];
DECLARE_VAR_LOW32;
DECLARE_VAR_ALL64;
+ a.i = 8 - mx;
+ b.i = mx;
/*
dst[0] = (a * src[0] + b * src[1] + 4) >> 3;
@@ -3215,10 +3348,10 @@ void ff_put_vp8_bilinear4_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
RESTRICT_ASM_ALL64
[h]"+&r"(h),
[dst]"+&r"(dst), [src]"+&r"(src),
- [a]"+&f"(a), [b]"+&f"(b)
+ [a]"+&f"(a.f), [b]"+&f"(b.f)
: [sstride]"r"((mips_reg)sstride),
[dstride]"r"((mips_reg)dstride),
- [ff_pw_4]"f"(ff_pw_4)
+ [ff_pw_4]"f"(ff_pw_4.f)
: "memory"
);
#else
@@ -3238,12 +3371,14 @@ void ff_put_vp8_bilinear4_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
ptrdiff_t sstride, int h, int mx, int my)
{
#if 1
- int c = 8 - my, d = my;
+ union mmi_intfloat64 c, d;
double ftmp[7];
uint32_t tmp[1];
mips_reg src1;
DECLARE_VAR_LOW32;
DECLARE_VAR_ALL64;
+ c.i = 8 - my;
+ d.i = my;
/*
dst[0] = (c * src[0] + d * src[ sstride] + 4) >> 3;
@@ -3274,10 +3409,10 @@ void ff_put_vp8_bilinear4_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
[src1]"=&r"(src1),
[h]"+&r"(h),
[dst]"+&r"(dst), [src]"+&r"(src),
- [c]"+&f"(c), [d]"+&f"(d)
+ [c]"+&f"(c.f), [d]"+&f"(d.f)
: [sstride]"r"((mips_reg)sstride),
[dstride]"r"((mips_reg)dstride),
- [ff_pw_4]"f"(ff_pw_4)
+ [ff_pw_4]"f"(ff_pw_4.f)
: "memory"
);
#else
@@ -27,6 +27,8 @@
#ifndef AVUTIL_MIPS_ASMDEFS_H
#define AVUTIL_MIPS_ASMDEFS_H
+#include <stdint.h>
+
#if defined(_ABI64) && _MIPS_SIM == _ABI64
# define mips_reg int64_t
# define PTRSIZE " 8 "
@@ -97,4 +99,10 @@ __asm__(".macro parse_r var r\n\t"
".endif\n\t"
".endm");
+/* General union structure for clang adaption */
+union mmi_intfloat64 {
+ int64_t i;
+ double f;
+};
+
#endif /* AVCODEC_MIPS_ASMDEFS_H */
Clang is more strict on the type of asm operands, float or double type variable should use constraint 'f', integer variable should use constraint 'r'. Signed-off-by: Jin Bo <jinbo@loongson.cn> --- libavcodec/mips/constants.c | 89 +++++++------ libavcodec/mips/constants.h | 88 +++++++------ libavcodec/mips/h264chroma_mmi.c | 157 +++++++++++------------ libavcodec/mips/h264dsp_mmi.c | 20 +-- libavcodec/mips/h264pred_mmi.c | 23 ++-- libavcodec/mips/h264qpel_mmi.c | 34 ++--- libavcodec/mips/hevcdsp_mmi.c | 59 +++++---- libavcodec/mips/idctdsp_mmi.c | 2 +- libavcodec/mips/mpegvideo_mmi.c | 20 +-- libavcodec/mips/vc1dsp_mmi.c | 176 +++++++++++++------------- libavcodec/mips/vp8dsp_mmi.c | 263 +++++++++++++++++++++++++++++---------- libavutil/mips/asmdefs.h | 8 ++ 12 files changed, 536 insertions(+), 403 deletions(-)