diff mbox series

[FFmpeg-devel,08/14,inline,assembly] add mmx clobbers to mpegvideoenc

Message ID 20200422174918.7290-8-frederic.recoules@univ-grenoble-alpes.fr
State New
Headers show
Series [FFmpeg-devel,1/5,inline,assembly] prepares for contiguous assembly statements merging | expand

Checks

Context Check Description
andriy/default pending
andriy/make success Make finished
andriy/make_fate success Make fate finished

Commit Message

FRÉDÉRIC RECOULES April 26, 2020, 7:44 p.m. UTC
From: Frédéric Recoules <frederic.recoules@orange.fr>

---
 libavcodec/x86/mpegvideoenc_qns_template.c | 12 +++++---
 libavcodec/x86/mpegvideoencdsp_init.c      | 32 ++++++++++++++++++----
 2 files changed, 35 insertions(+), 9 deletions(-)
diff mbox series

Patch

diff --git a/libavcodec/x86/mpegvideoenc_qns_template.c b/libavcodec/x86/mpegvideoenc_qns_template.c
index 882d486205..96325fd8f8 100644
--- a/libavcodec/x86/mpegvideoenc_qns_template.c
+++ b/libavcodec/x86/mpegvideoenc_qns_template.c
@@ -39,8 +39,8 @@  static int DEF(try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[
     av_assert2(FFABS(scale) < MAX_ABS);
     scale<<= 16 + SCALE_OFFSET - BASIS_SHIFT + RECON_SHIFT;
 
-    SET_RND(mm6);
     __asm__ volatile(
+	SET_RND_TPL(mm6)
         "pxor %%mm7, %%mm7              \n\t"
         "movd  %4, %%mm5                \n\t"
         "punpcklwd %%mm5, %%mm5         \n\t"
@@ -69,7 +69,9 @@  static int DEF(try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[
         "movd %%mm7, %0                 \n\t"
 
         : "+r" (i)
-        : "r"(basis), "r"(rem), "r"(weight), "g"(scale)
+        : "r"(basis), "r"(rem), "r"(weight), "g"(scale) COMMA_SET_RND_IN
+	  MMX_CLOBBERS_ONLY("mm0", "mm1", "mm5", "mm7"
+			    SET_RND_CLOBBER(, "mm6"))
     );
     return i;
 }
@@ -80,8 +82,8 @@  static void DEF(add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale)
 
     if(FFABS(scale) < MAX_ABS){
         scale<<= 16 + SCALE_OFFSET - BASIS_SHIFT + RECON_SHIFT;
-        SET_RND(mm6);
         __asm__ volatile(
+		SET_RND_TPL(mm6)
                 "movd  %3, %%mm5        \n\t"
                 "punpcklwd %%mm5, %%mm5 \n\t"
                 "punpcklwd %%mm5, %%mm5 \n\t"
@@ -99,7 +101,9 @@  static void DEF(add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale)
                 " jb 1b                 \n\t"
 
                 : "+r" (i)
-                : "r"(basis), "r"(rem), "g"(scale)
+                : "r"(basis), "r"(rem), "g"(scale) COMMA_SET_RND_IN
+		  MMX_CLOBBERS_ONLY("mm0", "mm1", "mm5"
+				    SET_RND_CLOBBER(, "mm6"))
         );
     }else{
         for(i=0; i<8*8; i++){
diff --git a/libavcodec/x86/mpegvideoencdsp_init.c b/libavcodec/x86/mpegvideoencdsp_init.c
index 532836cec9..8430ec62ea 100644
--- a/libavcodec/x86/mpegvideoencdsp_init.c
+++ b/libavcodec/x86/mpegvideoencdsp_init.c
@@ -51,17 +51,26 @@  int ff_pix_norm1_sse2(uint8_t *pix, int line_size);
     "psraw      $1, " #y "              \n\t"
 #define DEF(x) x ## _mmx
 #define SET_RND MOVQ_WONE
+#define SET_RND_TPL MOVQ_WONE_TPL
+#define COMMA_SET_RND_IN
+#define SET_RND_CLOBBER(...) __VA_ARGS__
 #define SCALE_OFFSET 1
 
 #include "mpegvideoenc_qns_template.c"
 
 #undef DEF
 #undef SET_RND
+#undef SET_RND_TPL
+#undef COMMA_SET_RND_IN
+#undef SET_RND_CLOBBER
 #undef SCALE_OFFSET
 #undef PMULHRW
 
 #define DEF(x) x ## _3dnow
 #define SET_RND(x)
+#define SET_RND_TPL(x)
+#define COMMA_SET_RND_IN
+#define SET_RND_CLOBBER(...)
 #define SCALE_OFFSET 0
 #define PMULHRW(x, y, s, o)                     \
     "pmulhrw " #s ", " #x "             \n\t"   \
@@ -71,6 +80,9 @@  int ff_pix_norm1_sse2(uint8_t *pix, int line_size);
 
 #undef DEF
 #undef SET_RND
+#undef SET_RND_TPL
+#undef COMMA_SET_RND_IN
+#undef SET_RND_CLOBBER
 #undef SCALE_OFFSET
 #undef PMULHRW
 
@@ -78,6 +90,9 @@  int ff_pix_norm1_sse2(uint8_t *pix, int line_size);
 #undef PHADDD
 #define DEF(x) x ## _ssse3
 #define SET_RND(x)
+#define SET_RND_TPL(x)
+#define COMMA_SET_RND_IN
+#define SET_RND_CLOBBER(...)
 #define SCALE_OFFSET -1
 
 #define PHADDD(a, t)                            \
@@ -93,6 +108,9 @@  int ff_pix_norm1_sse2(uint8_t *pix, int line_size);
 
 #undef DEF
 #undef SET_RND
+#undef SET_RND_TPL
+#undef COMMA_SET_RND_IN
+#undef SET_RND_CLOBBER
 #undef SCALE_OFFSET
 #undef PMULHRW
 #undef PHADDD
@@ -127,7 +145,8 @@  static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
             "jb                1b           \n\t"
             : "+r" (ptr)
             : "r" ((x86_reg) wrap), "r" ((x86_reg) width),
-              "r" (ptr + wrap * height));
+              "r" (ptr + wrap * height)
+	      MMX_CLOBBERS_ONLY("mm0", "mm1") );
     } else if (w == 16) {
         __asm__ volatile (
             "1:                                 \n\t"
@@ -148,7 +167,7 @@  static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
             "jb                1b               \n\t"
             : "+r"(ptr)
             : "r"((x86_reg)wrap), "r"((x86_reg)width), "r"(ptr + wrap * height)
-            );
+	      MMX_CLOBBERS_ONLY("mm0", "mm1") );
     } else {
         av_assert1(w == 4);
         __asm__ volatile (
@@ -167,7 +186,8 @@  static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
             "jb                1b           \n\t"
             : "+r" (ptr)
             : "r" ((x86_reg) wrap), "r" ((x86_reg) width),
-              "r" (ptr + wrap * height));
+              "r" (ptr + wrap * height)
+	      MMX_CLOBBERS_ONLY("mm0", "mm1") );
     }
 
     /* top and bottom (and hopefully also the corners) */
@@ -187,7 +207,8 @@  static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
                 : "+r" (ptr)
                 : "r" ((x86_reg) buf - (x86_reg) ptr - w),
                   "r" ((x86_reg) - wrap), "r" ((x86_reg) - wrap * 3),
-                  "r" (ptr + width + 2 * w));
+                  "r" (ptr + width + 2 * w)
+		  MMX_CLOBBERS_ONLY("mm0") );
         }
     }
 
@@ -207,7 +228,8 @@  static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
                 : "+r" (ptr)
                 : "r" ((x86_reg) last_line - (x86_reg) ptr - w),
                   "r" ((x86_reg) wrap), "r" ((x86_reg) wrap * 3),
-                  "r" (ptr + width + 2 * w));
+                  "r" (ptr + width + 2 * w)
+		  MMX_CLOBBERS_ONLY("mm0") );
         }
     }
 }