diff mbox

[FFmpeg-devel] Ticket #7124: Fixes compiler bug - replace vec_lvsl/vec_perm with vec_xl

Message ID e18da82c990970bdfbd6414521dcdc52@linux.vnet.ibm.com
State New
Headers show

Commit Message

ckerchne July 23, 2019, 12:51 p.m. UTC
A bug exist with the gcc compilers for Power in versions 6.x and 7.x 
(verified with 6.3 and 7.4). It was fixed in version 8.x (verified with 
8.3). I was using a Power 9 ppc64le machine for building and testing.

It appears the compiler is generating the wrong code for little endian 
machines for the vec_lvsl/vec_perm instruction combos in some cases. If 
these instructions are replaced with vec_xl, the problem goes away for 
all versions of the compilers.

---
  libswscale/ppc/yuv2rgb_altivec.c | 24 ++++--------------------
  1 file changed, 4 insertions(+), 20 deletions(-)

-    vector unsigned char align_perm;                                    
       \
-                                                                        
       \
      vector signed short lCY       = c->CY;                              
       \
      vector signed short lOY       = c->OY;                              
       \
      vector signed short lCRV      = c->CRV;                             
       \
@@ -338,26 +335,13 @@ static int altivec_ ## name(SwsContext *c, const 
unsigned char **in,          \
          vec_dstst(oute, (0x02000002 | (((w * 3 + 32) / 32) << 16)), 1); 
       \
                                                                          
       \
          for (j = 0; j < w / 16; j++) {                                  
       \
-            y1ivP = (const vector unsigned char *) y1i;                 
       \
-            y2ivP = (const vector unsigned char *) y2i;                 
       \
-            uivP  = (const vector unsigned char *) ui;                  
       \
-            vivP  = (const vector unsigned char *) vi;                  
       \
-                                                                        
       \
-            align_perm = vec_lvsl(0, y1i);                              
       \
-            y0 = (vector unsigned char)                                 
       \
-                     vec_perm(y1ivP[0], y1ivP[1], align_perm);          
       \
+            y0 = vec_xl(0, y1i);                                        
       \
                                                                          
       \
-            align_perm = vec_lvsl(0, y2i);                              
       \
-            y1 = (vector unsigned char)                                 
       \
-                     vec_perm(y2ivP[0], y2ivP[1], align_perm);          
       \
+            y1 = vec_xl(0, y2i);                                        
       \
                                                                          
       \
-            align_perm = vec_lvsl(0, ui);                               
       \
-            u = (vector signed char)                                    
       \
-                    vec_perm(uivP[0], uivP[1], align_perm);             
       \
+            u = (vector signed char) vec_xl(0, ui);                     
       \
                                                                          
       \
-            align_perm = vec_lvsl(0, vi);                               
       \
-            v = (vector signed char)                                    
       \
-                    vec_perm(vivP[0], vivP[1], align_perm);             
       \
+            v = (vector signed char) vec_xl(0, vi);                     
       \
                                                                          
       \
              u = (vector signed char)                                    
       \
                      vec_sub(u,                                          
       \
--
2.17.1

Comments

ckerchne Aug. 1, 2019, 11:42 a.m. UTC | #1
On 2019-07-23 08:51, ckerchne wrote:
> A bug exist with the gcc compilers for Power in versions 6.x and 7.x
> (verified with 6.3 and 7.4). It was fixed in version 8.x (verified
> with 8.3). I was using a Power 9 ppc64le machine for building and
> testing.
> 
> It appears the compiler is generating the wrong code for little endian
> machines for the vec_lvsl/vec_perm instruction combos in some cases.
> If these instructions are replaced with vec_xl, the problem goes away
> for all versions of the compilers.
> 
> ---
>  libswscale/ppc/yuv2rgb_altivec.c | 24 ++++--------------------
>  1 file changed, 4 insertions(+), 20 deletions(-)
> 
> diff --git a/libswscale/ppc/yuv2rgb_altivec.c 
> b/libswscale/ppc/yuv2rgb_altivec.c
> index c1e2852adb..536545293d 100644
> --- a/libswscale/ppc/yuv2rgb_altivec.c
> +++ b/libswscale/ppc/yuv2rgb_altivec.c
> @@ -305,9 +305,6 @@ static int altivec_ ## name(SwsContext *c, const
> unsigned char **in,          \
>      vector signed short R1, G1, B1;                                    
>        \
>      vector unsigned char R, G, B;                                      
>        \
>                                                                         
>        \
> -    const vector unsigned char *y1ivP, *y2ivP, *uivP, *vivP;           
>        \
> -    vector unsigned char align_perm;                                   
>        \
> -                                                                       
>        \
>      vector signed short lCY       = c->CY;                             
>        \
>      vector signed short lOY       = c->OY;                             
>        \
>      vector signed short lCRV      = c->CRV;                            
>        \
> @@ -338,26 +335,13 @@ static int altivec_ ## name(SwsContext *c, const
> unsigned char **in,          \
>          vec_dstst(oute, (0x02000002 | (((w * 3 + 32) / 32) << 16)), 
> 1);       \
>                                                                         
>        \
>          for (j = 0; j < w / 16; j++) {                                 
>        \
> -            y1ivP = (const vector unsigned char *) y1i;                
>        \
> -            y2ivP = (const vector unsigned char *) y2i;                
>        \
> -            uivP  = (const vector unsigned char *) ui;                 
>        \
> -            vivP  = (const vector unsigned char *) vi;                 
>        \
> -                                                                       
>        \
> -            align_perm = vec_lvsl(0, y1i);                             
>        \
> -            y0 = (vector unsigned char)                                
>        \
> -                     vec_perm(y1ivP[0], y1ivP[1], align_perm);         
>        \
> +            y0 = vec_xl(0, y1i);                                       
>        \
>                                                                         
>        \
> -            align_perm = vec_lvsl(0, y2i);                             
>        \
> -            y1 = (vector unsigned char)                                
>        \
> -                     vec_perm(y2ivP[0], y2ivP[1], align_perm);         
>        \
> +            y1 = vec_xl(0, y2i);                                       
>        \
>                                                                         
>        \
> -            align_perm = vec_lvsl(0, ui);                              
>        \
> -            u = (vector signed char)                                   
>        \
> -                    vec_perm(uivP[0], uivP[1], align_perm);            
>        \
> +            u = (vector signed char) vec_xl(0, ui);                    
>        \
>                                                                         
>        \
> -            align_perm = vec_lvsl(0, vi);                              
>        \
> -            v = (vector signed char)                                   
>        \
> -                    vec_perm(vivP[0], vivP[1], align_perm);            
>        \
> +            v = (vector signed char) vec_xl(0, vi);                    
>        \
>                                                                         
>        \
>              u = (vector signed char)                                   
>        \
>                      vec_sub(u,                                         
>        \
> --
> 2.17.1
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Could someone please engage on this issue?  It's been almost 10 days.

Chip Kerchner
Moritz Barsnick Aug. 1, 2019, 11:52 a.m. UTC | #2
Hi,

On Thu, Aug 01, 2019 at 07:42:28 -0400, ckerchne wrote:

Just a small hint: You patch was corrupted by newlines inserted by your
email client.

Try attaching the patch file, or using git send-email.

> > --- a/libswscale/ppc/yuv2rgb_altivec.c
> > +++ b/libswscale/ppc/yuv2rgb_altivec.c
> > @@ -305,9 +305,6 @@ static int altivec_ ## name(SwsContext *c, const
> > unsigned char **in,          \
> >      vector signed short R1, G1, B1;
> >        \
> >      vector unsigned char R, G, B;
> >        \
> >
> >        \
> > -    const vector unsigned char *y1ivP, *y2ivP, *uivP, *vivP;
> >        \
> > -    vector unsigned char align_perm;
> >        \
> > -
> >        \

See also here how the formatting ended up:
https://patchwork.ffmpeg.org/patch/14045/

Also please prefix the first line of the commit message with
"swscale/ppc/yuv2rgb_altivec:". The "Fixes #xxxx" part can go lower
into the commit message body (like to the last line, as a separate
parapgraph, for example).

Cheers,
Moritz

P.S.: I can't review this anyway. ;-)
diff mbox

Patch

diff --git a/libswscale/ppc/yuv2rgb_altivec.c 
b/libswscale/ppc/yuv2rgb_altivec.c
index c1e2852adb..536545293d 100644
--- a/libswscale/ppc/yuv2rgb_altivec.c
+++ b/libswscale/ppc/yuv2rgb_altivec.c
@@ -305,9 +305,6 @@  static int altivec_ ## name(SwsContext *c, const 
unsigned char **in,          \
      vector signed short R1, G1, B1;                                     
       \
      vector unsigned char R, G, B;                                       
       \
                                                                          
       \
-    const vector unsigned char *y1ivP, *y2ivP, *uivP, *vivP;            
       \