diff mbox

[FFmpeg-devel] avcodec/mips: [loongson] fix bug of svq3-watermark failed in fate test.

Message ID 1536807845-8254-1-git-send-email-yinshiyou-hf@loongson.cn
State Accepted
Commit a55adf24b91af70566cf0967df336b700e0aa025
Headers show

Commit Message

Shiyou Yin Sept. 13, 2018, 3:04 a.m. UTC
Failed case: svq3-watermark
When minimum loop count of following functions are greater than parameter h passed to them, svq3-watermark failed.
1. ff_put_pixels4_8_mmi
2. ff_avg_pixels4_8_mmi
3. ff_put_pixels4_l2_8_mmi
4. ff_avg_pixels4_l2_8_mmi
---
 libavcodec/mips/hpeldsp_mmi.c | 112 +++++++++---------------------------------
 1 file changed, 23 insertions(+), 89 deletions(-)

Comments

Michael Niedermayer Sept. 14, 2018, 1 a.m. UTC | #1
On Thu, Sep 13, 2018 at 11:04:05AM +0800, Shiyou Yin wrote:
> Failed case: svq3-watermark
> When minimum loop count of following functions are greater than parameter h passed to them, svq3-watermark failed.
> 1. ff_put_pixels4_8_mmi
> 2. ff_avg_pixels4_8_mmi
> 3. ff_put_pixels4_l2_8_mmi
> 4. ff_avg_pixels4_l2_8_mmi
> ---
>  libavcodec/mips/hpeldsp_mmi.c | 112 +++++++++---------------------------------
>  1 file changed, 23 insertions(+), 89 deletions(-)

will apply

thx

[...]
diff mbox

Patch

diff --git a/libavcodec/mips/hpeldsp_mmi.c b/libavcodec/mips/hpeldsp_mmi.c
index db2fa10..e69b2bd 100644
--- a/libavcodec/mips/hpeldsp_mmi.c
+++ b/libavcodec/mips/hpeldsp_mmi.c
@@ -38,21 +38,13 @@  void ff_put_pixels4_8_mmi(uint8_t *block, const uint8_t *pixels,
         PTR_ADDU   "%[pixels],   %[pixels],      %[line_size]           \n\t"
         MMI_ULWC1(%[ftmp1], %[pixels], 0x00)
         PTR_ADDU   "%[pixels],   %[pixels],      %[line_size]           \n\t"
-        MMI_ULWC1(%[ftmp2], %[pixels], 0x00)
-        PTR_ADDU   "%[pixels],   %[pixels],      %[line_size]           \n\t"
-        MMI_ULWC1(%[ftmp3], %[pixels], 0x00)
-        PTR_ADDU   "%[pixels],   %[pixels],      %[line_size]           \n\t"
 
-        PTR_ADDI   "%[h],       %[h],           -0x04                   \n\t"
+        PTR_ADDI   "%[h],       %[h],           -0x02                   \n\t"
 
         MMI_SWC1(%[ftmp0], %[block], 0x00)
         PTR_ADDU   "%[block],   %[block],       %[line_size]            \n\t"
         MMI_SWC1(%[ftmp1], %[block], 0x00)
         PTR_ADDU   "%[block],   %[block],       %[line_size]            \n\t"
-        MMI_SWC1(%[ftmp2], %[block], 0x00)
-        PTR_ADDU   "%[block],   %[block],       %[line_size]            \n\t"
-        MMI_SWC1(%[ftmp3], %[block], 0x00)
-        PTR_ADDU   "%[block],   %[block],       %[line_size]            \n\t"
 
         "bnez       %[h],       1b                                      \n\t"
         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
@@ -157,12 +149,10 @@  void ff_avg_pixels4_8_mmi(uint8_t *block, const uint8_t *pixels,
     ptrdiff_t line_size, int h)
 {
     double ftmp[4];
-    mips_reg addr[3];
+    mips_reg addr[2];
     DECLARE_VAR_LOW32;
-    DECLARE_VAR_ADDRT;
 
     __asm__ volatile (
-        PTR_ADDU   "%[addr2],   %[line_size],   %[line_size]            \n\t"
         "1:                                                             \n\t"
         PTR_ADDU   "%[addr0],   %[pixels],      %[line_size]            \n\t"
         MMI_ULWC1(%[ftmp0], %[pixels], 0x00)
@@ -170,34 +160,21 @@  void ff_avg_pixels4_8_mmi(uint8_t *block, const uint8_t *pixels,
         PTR_ADDU   "%[addr1],   %[block],       %[line_size]            \n\t"
         MMI_ULWC1(%[ftmp2], %[block], 0x00)
         MMI_ULWC1(%[ftmp3], %[addr1], 0x00)
-        "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
-        "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
-        MMI_SWC1(%[ftmp0], %[block], 0x00)
-        MMI_SWXC1(%[ftmp1], %[block], %[line_size], 0x00)
-        PTR_ADDU   "%[pixels],  %[pixels],      %[addr2]                \n\t"
-        PTR_ADDU   "%[block],   %[block],       %[addr2]                \n\t"
 
-        PTR_ADDU   "%[addr0],   %[pixels],      %[line_size]            \n\t"
-        MMI_ULWC1(%[ftmp0], %[pixels], 0x00)
-        MMI_ULWC1(%[ftmp1], %[addr0], 0x00)
-        PTR_ADDU   "%[addr1],   %[block],       %[line_size]            \n\t"
-        MMI_ULWC1(%[ftmp2], %[block], 0x00)
-        MMI_ULWC1(%[ftmp3], %[addr1], 0x00)
+        PTR_ADDI   "%[h],       %[h],           -0x02                   \n\t"
+
         "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
         "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
         MMI_SWC1(%[ftmp0], %[block], 0x00)
-        MMI_SWXC1(%[ftmp1], %[block], %[line_size], 0x00)
-        PTR_ADDU   "%[pixels],  %[pixels],      %[addr2]                \n\t"
-        PTR_ADDU   "%[block],   %[block],       %[addr2]                \n\t"
+        MMI_SWC1(%[ftmp1], %[addr1], 0x00)
+        PTR_ADDU   "%[pixels],  %[addr0],       %[line_size]            \n\t"
+        PTR_ADDU   "%[block],   %[addr1],       %[line_size]            \n\t"
 
-        PTR_ADDI   "%[h],       %[h],           -0x04                   \n\t"
         "bnez       %[h],       1b                                      \n\t"
         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
           RESTRICT_ASM_LOW32
-          RESTRICT_ASM_ADDRT
           [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
-          [addr2]"=&r"(addr[2]),
           [block]"+&r"(block),              [pixels]"+&r"(pixels),
           [h]"+&r"(h)
         : [line_size]"r"((mips_reg)line_size)
@@ -330,50 +307,33 @@  inline void ff_put_pixels4_l2_8_mmi(uint8_t *dst, const uint8_t *src1,
     double ftmp[4];
     mips_reg addr[5];
     DECLARE_VAR_LOW32;
-    DECLARE_VAR_ADDRT;
 
     __asm__ volatile (
-        PTR_ADDU   "%[addr2],   %[src_stride1], %[src_stride1]          \n\t"
-        PTR_ADDU   "%[addr3],   %[src_stride2], %[src_stride2]          \n\t"
-        PTR_ADDU   "%[addr4],   %[dst_stride],  %[dst_stride]           \n\t"
         "1:                                                             \n\t"
         PTR_ADDU   "%[addr0],   %[src1],        %[src_stride1]          \n\t"
         MMI_ULWC1(%[ftmp0], %[src1], 0x00)
         MMI_ULWC1(%[ftmp1], %[addr0], 0x00)
-        MMI_ULWC1(%[ftmp2], %[src2], 0x00)
         PTR_ADDU   "%[addr1],   %[src2],        %[src_stride2]          \n\t"
-        MMI_ULWC1(%[ftmp3], %[addr1], 0x00)
-        PTR_ADDU   "%[src1],    %[src1],        %[addr2]                \n\t"
-        "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
-        "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
-        MMI_SWC1(%[ftmp0], %[dst], 0x00)
-        MMI_SWXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
-        PTR_ADDU   "%[src2],    %[src2],        %[addr3]                \n\t"
-        PTR_ADDU   "%[dst],     %[dst],         %[addr4]                \n\t"
-
-        PTR_ADDU   "%[addr0],   %[src1],        %[src_stride1]          \n\t"
-        MMI_ULWC1(%[ftmp0], %[src1], 0x00)
-        MMI_ULWC1(%[ftmp1], %[addr0], 0x00)
         MMI_ULWC1(%[ftmp2], %[src2], 0x00)
-        PTR_ADDU   "%[addr1],   %[src2],        %[src_stride2]          \n\t"
         MMI_ULWC1(%[ftmp3], %[addr1], 0x00)
-        PTR_ADDU   "%[src1],    %[src1],        %[addr2]                \n\t"
+        PTR_ADDU   "%[src1],    %[addr0],       %[src_stride1]          \n\t"
+        PTR_ADDU   "%[src2],    %[addr1],       %[src_stride2]          \n\t"
+
+        PTR_ADDI   "%[h],       %[h],           -0x02                   \n\t"
+
         "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
         "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
         MMI_SWC1(%[ftmp0], %[dst], 0x00)
-        MMI_SWXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
-        PTR_ADDU   "%[src2],    %[src2],        %[addr3]                \n\t"
-        PTR_ADDU   "%[dst],     %[dst],         %[addr4]                \n\t"
+        PTR_ADDU   "%[dst],     %[dst],         %[dst_stride]           \n\t"
+        MMI_SWC1(%[ftmp1], %[dst], 0x00)
+        PTR_ADDU   "%[dst],     %[dst],         %[dst_stride]           \n\t"
 
-        PTR_ADDI   "%[h],       %[h],           -0x04                   \n\t"
         "bnez       %[h],       1b                                      \n\t"
         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
           RESTRICT_ASM_LOW32
           RESTRICT_ASM_ADDRT
           [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
-          [addr2]"=&r"(addr[2]),            [addr3]"=&r"(addr[3]),
-          [addr4]"=&r"(addr[4]),
           [dst]"+&r"(dst),                  [src1]"+&r"(src1),
           [src2]"+&r"(src2),                [h]"+&r"(h)
         : [dst_stride]"r"((mips_reg)dst_stride),
@@ -530,62 +490,36 @@  inline void ff_avg_pixels4_l2_8_mmi(uint8_t *dst, const uint8_t *src1,
     double ftmp[6];
     mips_reg addr[6];
     DECLARE_VAR_LOW32;
-    DECLARE_VAR_ADDRT;
 
     __asm__ volatile (
-        PTR_ADDU   "%[addr2],   %[src_stride1], %[src_stride1]          \n\t"
-        PTR_ADDU   "%[addr3],   %[src_stride2], %[src_stride2]          \n\t"
-        PTR_ADDU   "%[addr4],   %[dst_stride],  %[dst_stride]           \n\t"
-
         "1:                                                             \n\t"
         PTR_ADDU   "%[addr0],   %[src1],        %[src_stride1]          \n\t"
         MMI_ULWC1(%[ftmp0], %[src1], 0x00)
         MMI_ULWC1(%[ftmp1], %[addr0], 0x00)
-        MMI_ULWC1(%[ftmp2], %[src2], 0x00)
         PTR_ADDU   "%[addr1],   %[src2],        %[src_stride2]          \n\t"
-        MMI_ULWC1(%[ftmp3], %[addr1], 0x00)
-        PTR_ADDU   "%[src1],    %[src1],        %[addr2]                \n\t"
-        "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
-        "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
-        PTR_ADDU   "%[addr5],   %[dst],         %[dst_stride]           \n\t"
-        MMI_ULWC1(%[ftmp4], %[dst], 0x00)
-        MMI_ULWC1(%[ftmp5], %[addr5], 0x00)
-        "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
-        "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
-        MMI_SWC1(%[ftmp0], %[dst], 0x00)
-        MMI_SWXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
-        PTR_ADDU   "%[src2],    %[src2],        %[addr3]                \n\t"
-        PTR_ADDU   "%[dst],     %[dst],         %[addr4]                \n\t"
-
-        PTR_ADDU   "%[addr0],   %[src1],        %[src_stride1]          \n\t"
-        MMI_ULWC1(%[ftmp0], %[src1], 0x00)
-        MMI_ULWC1(%[ftmp1], %[addr0], 0x00)
         MMI_ULWC1(%[ftmp2], %[src2], 0x00)
-        PTR_ADDU   "%[addr1],   %[src2],        %[src_stride2]          \n\t"
         MMI_ULWC1(%[ftmp3], %[addr1], 0x00)
-        PTR_ADDU   "%[src1],    %[src1],        %[addr2]                \n\t"
+        PTR_ADDU   "%[src1],    %[addr0],       %[src_stride1]          \n\t"
+        PTR_ADDU   "%[src2],    %[addr1],       %[src_stride2]          \n\t"
         "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
         "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
-        PTR_ADDU   "%[addr5],   %[dst],         %[dst_stride]           \n\t"
+        PTR_ADDU   "%[addr2],   %[dst],         %[dst_stride]           \n\t"
         MMI_ULWC1(%[ftmp4], %[dst], 0x00)
-        MMI_ULWC1(%[ftmp5], %[addr5], 0x00)
+        MMI_ULWC1(%[ftmp5], %[addr2], 0x00)
+        PTR_ADDI   "%[h],       %[h],           -0x02                   \n\t"
         "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
         "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
         MMI_SWC1(%[ftmp0], %[dst], 0x00)
-        MMI_SWXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
-        PTR_ADDU   "%[src2],    %[src2],        %[addr3]                \n\t"
-        PTR_ADDU   "%[dst],     %[dst],         %[addr4]                \n\t"
+        MMI_SWC1(%[ftmp1], %[addr2], 0x00)
+        PTR_ADDU   "%[dst],     %[addr2],       %[dst_stride]           \n\t"
 
-        PTR_ADDI   "%[h],       %[h],           -0x04                   \n\t"
         "bnez       %[h],       1b                                      \n\t"
         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
           RESTRICT_ASM_LOW32
-          RESTRICT_ASM_ADDRT
           [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
-          [addr2]"=&r"(addr[2]),            [addr3]"=&r"(addr[3]),
-          [addr4]"=&r"(addr[4]),            [addr5]"=&r"(addr[5]),
+          [addr2]"=&r"(addr[2]),
           [dst]"+&r"(dst),                  [src1]"+&r"(src1),
           [src2]"+&r"(src2),                [h]"+&r"(h)
         : [dst_stride]"r"((mips_reg)dst_stride),