diff mbox series

[FFmpeg-devel,4/7] postproc/postprocess_template: Fix alignment

Message ID GV1P250MB0737E96BC05905599CB1B4728F3D9@GV1P250MB0737.EURP250.PROD.OUTLOOK.COM
State Accepted
Commit f0ee38790b9fad7bef69f30fc5292be9f1a6fcc3
Headers show
Series [FFmpeg-devel,1/7] postproc/postprocess: Remove obsolete MMX(EXT)/3Dnow functions | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Andreas Rheinhardt Nov. 6, 2022, 4:30 p.m. UTC
postProcess in postprocess_template.c copies a PPContext
to the stack, works with this copy and then copies
it back again. Said local copy uses a hardcoded alignment
of eight, although PPContext has alignment 32 since
cbe27006cee0099076d1d68af646f3ef914167d8
(this commit was in anticipation of AVX2 code that never landed).
This leads to misalignment in the filter-(pp|pp1|pp2|pp3|qp)
FATE-tests which UBSan complains about. So avoid the local copy.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
 libpostproc/postprocess_template.c | 156 ++++++++++++++---------------
 1 file changed, 77 insertions(+), 79 deletions(-)
diff mbox series

Patch

diff --git a/libpostproc/postprocess_template.c b/libpostproc/postprocess_template.c
index bcf7bdad66..ade1d6ce2b 100644
--- a/libpostproc/postprocess_template.c
+++ b/libpostproc/postprocess_template.c
@@ -2860,14 +2860,13 @@  static inline void RENAME(prefetcht2)(const void *p)
  * Filter array of bytes (Y or U or V values)
  */
 static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
-                                const int8_t QPs[], int QPStride, int isColor, PPContext *c2)
+                                const int8_t QPs[], int QPStride, int isColor, PPContext *c)
 {
-    DECLARE_ALIGNED(8, PPContext, c)= *c2; //copy to stack for faster access
     int x,y;
 #ifdef TEMPLATE_PP_TIME_MODE
     const int mode= TEMPLATE_PP_TIME_MODE;
 #else
-    const int mode= isColor ? c.ppMode.chromMode : c.ppMode.lumMode;
+    const int mode = isColor ? c->ppMode.chromMode : c->ppMode.lumMode;
 #endif
     int black=0, white=255; // blackest black and whitest white in the picture
     int QPCorrecture= 256*256;
@@ -2877,29 +2876,29 @@  static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
     int i;
 #endif
 
-    const int qpHShift= isColor ? 4-c.hChromaSubSample : 4;
-    const int qpVShift= isColor ? 4-c.vChromaSubSample : 4;
+    const int qpHShift = isColor ? 4 - c->hChromaSubSample : 4;
+    const int qpVShift = isColor ? 4 - c->vChromaSubSample : 4;
 
     //FIXME remove
-    uint64_t * const yHistogram= c.yHistogram;
-    uint8_t * const tempSrc= srcStride > 0 ? c.tempSrc : c.tempSrc - 23*srcStride;
-    uint8_t * const tempDst= (dstStride > 0 ? c.tempDst : c.tempDst - 23*dstStride) + 32;
+    uint64_t * const yHistogram= c->yHistogram;
+    uint8_t * const tempSrc =  srcStride > 0 ? c->tempSrc : c->tempSrc - 23*srcStride;
+    uint8_t * const tempDst = (dstStride > 0 ? c->tempDst : c->tempDst - 23*dstStride) + 32;
     //const int mbWidth= isColor ? (width+7)>>3 : (width+15)>>4;
 
     if (mode & VISUALIZE){
         if(!(mode & (V_A_DEBLOCK | H_A_DEBLOCK)) || TEMPLATE_PP_MMX) {
-            av_log(c2, AV_LOG_WARNING, "Visualization is currently only supported with the accurate deblock filter without SIMD\n");
+            av_log(c, AV_LOG_WARNING, "Visualization is currently only supported with the accurate deblock filter without SIMD\n");
         }
     }
 
 #if TEMPLATE_PP_MMX
     for(i=0; i<57; i++){
-        int offset= ((i*c.ppMode.baseDcDiff)>>8) + 1;
+        int offset = ((i * c->ppMode.baseDcDiff) >> 8) + 1;
         int threshold= offset*2 + 1;
-        c.mmxDcOffset[i]= 0x7F - offset;
-        c.mmxDcThreshold[i]= 0x7F - threshold;
-        c.mmxDcOffset[i]*= 0x0101010101010101LL;
-        c.mmxDcThreshold[i]*= 0x0101010101010101LL;
+        c->mmxDcOffset[i]     = 0x7F - offset;
+        c->mmxDcThreshold[i]  = 0x7F - threshold;
+        c->mmxDcOffset[i]    *= 0x0101010101010101LL;
+        c->mmxDcThreshold[i] *= 0x0101010101010101LL;
     }
 #endif
 
@@ -2925,16 +2924,18 @@  static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
         uint64_t clipped;
         AVRational scale;
 
-        c.frameNum++;
+        c->frameNum++;
         // first frame is fscked so we ignore it
-        if(c.frameNum == 1) yHistogram[0]= width*(uint64_t)height/64*15/256;
+        if (c->frameNum == 1)
+            yHistogram[0] = width * (uint64_t)height/64*15/256;
 
         for(i=0; i<256; i++){
             sum+= yHistogram[i];
         }
 
         /* We always get a completely black picture first. */
-        maxClipped= av_rescale(sum, c.ppMode.maxClippedThreshold.num, c.ppMode.maxClippedThreshold.den);
+        maxClipped = av_rescale(sum, c->ppMode.maxClippedThreshold.num,
+                                     c->ppMode.maxClippedThreshold.den);
 
         clipped= sum;
         for(black=255; black>0; black--){
@@ -2948,27 +2949,27 @@  static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
             clipped-= yHistogram[white];
         }
 
-        scale = (AVRational){c.ppMode.maxAllowedY - c.ppMode.minAllowedY, white - black};
+        scale = (AVRational){c->ppMode.maxAllowedY - c->ppMode.minAllowedY, white - black};
 
 #if TEMPLATE_PP_MMXEXT
-        c.packedYScale = (uint16_t)av_rescale(scale.num, 256, scale.den);
-        c.packedYOffset= (((black*c.packedYScale)>>8) - c.ppMode.minAllowedY) & 0xFFFF;
+        c->packedYScale  = (uint16_t)av_rescale(scale.num, 256, scale.den);
+        c->packedYOffset = (((black*c->packedYScale)>>8) - c->ppMode.minAllowedY) & 0xFFFF;
 #else
-        c.packedYScale = (uint16_t)av_rescale(scale.num, 1024, scale.den);
-        c.packedYOffset= (black - c.ppMode.minAllowedY) & 0xFFFF;
+        c->packedYScale  = (uint16_t)av_rescale(scale.num, 1024, scale.den);
+        c->packedYOffset = (black - c->ppMode.minAllowedY) & 0xFFFF;
 #endif
 
-        c.packedYOffset|= c.packedYOffset<<32;
-        c.packedYOffset|= c.packedYOffset<<16;
+        c->packedYOffset |= c->packedYOffset<<32;
+        c->packedYOffset |= c->packedYOffset<<16;
 
-        c.packedYScale|= c.packedYScale<<32;
-        c.packedYScale|= c.packedYScale<<16;
+        c->packedYScale |= c->packedYScale<<32;
+        c->packedYScale |= c->packedYScale<<16;
 
         if(mode & LEVEL_FIX)        QPCorrecture= (int)av_rescale(scale.num, 256*256, scale.den);
         else                        QPCorrecture= 256*256;
     }else{
-        c.packedYScale= 0x0100010001000100LL;
-        c.packedYOffset= 0;
+        c->packedYScale  = 0x0100010001000100LL;
+        c->packedYOffset = 0;
         QPCorrecture= 256*256;
     }
 
@@ -2988,22 +2989,22 @@  static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
             RENAME(prefetcht0)(dstBlock + (((x>>2)&6) + copyAhead+1)*dstStride + 32);
 
             RENAME(blockCopy)(dstBlock + dstStride*8, dstStride,
-                              srcBlock + srcStride*8, srcStride, mode & LEVEL_FIX, &c.packedYOffset);
+                              srcBlock + srcStride*8, srcStride, mode & LEVEL_FIX, &c->packedYOffset);
 
             RENAME(duplicate)(dstBlock + dstStride*8, dstStride);
 
             if(mode & LINEAR_IPOL_DEINT_FILTER)
                 RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride);
             else if(mode & LINEAR_BLEND_DEINT_FILTER)
-                RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c.deintTemp + x);
+                RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c->deintTemp + x);
             else if(mode & MEDIAN_DEINT_FILTER)
                 RENAME(deInterlaceMedian)(dstBlock, dstStride);
             else if(mode & CUBIC_IPOL_DEINT_FILTER)
                 RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride);
             else if(mode & FFMPEG_DEINT_FILTER)
-                RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x);
+                RENAME(deInterlaceFF)(dstBlock, dstStride, c->deintTemp + x);
             else if(mode & LOWPASS5_DEINT_FILTER)
-                RENAME(deInterlaceL5)(dstBlock, dstStride, c.deintTemp + x, c.deintTemp + width + x);
+                RENAME(deInterlaceL5)(dstBlock, dstStride, c->deintTemp + x, c->deintTemp + width + x);
 /*          else if(mode & CUBIC_BLEND_DEINT_FILTER)
                 RENAME(deInterlaceBlendCubic)(dstBlock, dstStride);
 */
@@ -3025,11 +3026,11 @@  static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
         const uint8_t *srcBlock= &(src[y*srcStride]);
         uint8_t *dstBlock= &(dst[y*dstStride]);
 #if TEMPLATE_PP_MMX
-        uint8_t *tempBlock1= c.tempBlocks;
-        uint8_t *tempBlock2= c.tempBlocks + 8;
+        uint8_t *tempBlock1 = c->tempBlocks;
+        uint8_t *tempBlock2 = c->tempBlocks + 8;
 #endif
         const int8_t *QPptr= &QPs[(y>>qpVShift)*QPStride];
-        int8_t *nonBQPptr= &c.nonBQPTable[(y>>qpVShift)*FFABS(QPStride)];
+        int8_t *nonBQPptr = &c->nonBQPTable[(y>>qpVShift)*FFABS(QPStride)];
         int QP=0, nonBQP=0;
         /* can we mess with a 8x16 block from srcBlock/dstBlock downwards and 1 line upwards
            if not than use a temporary buffer */
@@ -3072,8 +3073,8 @@  static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
                 nonBQP= (nonBQP* QPCorrecture + 256*128)>>16;
                 yHistogram[(srcBlock+qp_index*8)[srcStride*12 + 4]]++;
             }
-            c.QP_block[qp_index] = QP;
-            c.nonBQP_block[qp_index] = nonBQP;
+            c->QP_block[qp_index] = QP;
+            c->nonBQP_block[qp_index] = nonBQP;
 #if TEMPLATE_PP_MMX
             __asm__ volatile(
                 "movd %1, %%mm7         \n\t"
@@ -3081,7 +3082,7 @@  static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
                 "packuswb %%mm7, %%mm7  \n\t" // 0,QP, 0, QP, 0,QP, 0, QP
                 "packuswb %%mm7, %%mm7  \n\t" // QP,..., QP
                 "movq %%mm7, %0         \n\t"
-                : "=m" (c.pQPb_block[qp_index])
+                : "=m" (c->pQPb_block[qp_index])
                 : "r" (QP)
             );
 #endif
@@ -3093,20 +3094,20 @@  static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
             RENAME(prefetcht0)(dstBlock + (((x>>2)&6) + copyAhead+1)*dstStride + 32);
 
             RENAME(blockCopy)(dstBlock + dstStride*copyAhead, dstStride,
-                              srcBlock + srcStride*copyAhead, srcStride, mode & LEVEL_FIX, &c.packedYOffset);
+                              srcBlock + srcStride*copyAhead, srcStride, mode & LEVEL_FIX, &c->packedYOffset);
 
             if(mode & LINEAR_IPOL_DEINT_FILTER)
                 RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride);
             else if(mode & LINEAR_BLEND_DEINT_FILTER)
-                RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c.deintTemp + x);
+                RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c->deintTemp + x);
             else if(mode & MEDIAN_DEINT_FILTER)
                 RENAME(deInterlaceMedian)(dstBlock, dstStride);
             else if(mode & CUBIC_IPOL_DEINT_FILTER)
                 RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride);
             else if(mode & FFMPEG_DEINT_FILTER)
-                RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x);
+                RENAME(deInterlaceFF)(dstBlock, dstStride, c->deintTemp + x);
             else if(mode & LOWPASS5_DEINT_FILTER)
-                RENAME(deInterlaceL5)(dstBlock, dstStride, c.deintTemp + x, c.deintTemp + width + x);
+                RENAME(deInterlaceL5)(dstBlock, dstStride, c->deintTemp + x, c->deintTemp + width + x);
 /*          else if(mode & CUBIC_BLEND_DEINT_FILTER)
                 RENAME(deInterlaceBlendCubic)(dstBlock, dstStride);
 */
@@ -3121,24 +3122,24 @@  static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
             const int stride= dstStride;
             //temporary while changing QP stuff to make things continue to work
             //eventually QP,nonBQP,etc will be arrays and this will be unnecessary
-            c.QP = c.QP_block[qp_index];
-            c.nonBQP = c.nonBQP_block[qp_index];
-            c.pQPb = c.pQPb_block[qp_index];
-            c.pQPb2 = c.pQPb2_block[qp_index];
+            c->QP     = c->QP_block[qp_index];
+            c->nonBQP = c->nonBQP_block[qp_index];
+            c->pQPb   = c->pQPb_block[qp_index];
+            c->pQPb2  = c->pQPb2_block[qp_index];
 
             /* only deblock if we have 2 blocks */
             if(y + 8 < height){
                 if(mode & V_X1_FILTER)
-                    RENAME(vertX1Filter)(dstBlock, stride, &c);
+                    RENAME(vertX1Filter)(dstBlock, stride, c);
                 else if(mode & V_DEBLOCK){
-                    const int t= RENAME(vertClassify)(dstBlock, stride, &c);
+                    const int t = RENAME(vertClassify)(dstBlock, stride, c);
 
                     if(t==1)
-                        RENAME(doVertLowPass)(dstBlock, stride, &c);
+                        RENAME(doVertLowPass)(dstBlock, stride, c);
                     else if(t==2)
-                        RENAME(doVertDefFilter)(dstBlock, stride, &c);
+                        RENAME(doVertDefFilter)(dstBlock, stride, c);
                 }else if(mode & V_A_DEBLOCK){
-                    RENAME(do_a_deblock)(dstBlock, stride, 1, &c, mode);
+                    RENAME(do_a_deblock)(dstBlock, stride, 1, c, mode);
                 }
             }
 
@@ -3151,10 +3152,10 @@  static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
 
           for(x = startx, qp_index=0; x < endx; x+=BLOCK_SIZE, qp_index++){
             const int stride= dstStride;
-            c.QP = c.QP_block[qp_index];
-            c.nonBQP = c.nonBQP_block[qp_index];
-            c.pQPb = c.pQPb_block[qp_index];
-            c.pQPb2 = c.pQPb2_block[qp_index];
+            c->QP     = c->QP_block[qp_index];
+            c->nonBQP = c->nonBQP_block[qp_index];
+            c->pQPb   = c->pQPb_block[qp_index];
+            c->pQPb2  = c->pQPb2_block[qp_index];
 #if TEMPLATE_PP_MMX
             RENAME(transpose1)(tempBlock1, tempBlock2, dstBlock, dstStride);
 #endif
@@ -3162,60 +3163,60 @@  static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
             if(x - 8 >= 0){
 #if TEMPLATE_PP_MMX
                 if(mode & H_X1_FILTER)
-                        RENAME(vertX1Filter)(tempBlock1, 16, &c);
+                        RENAME(vertX1Filter)(tempBlock1, 16, c);
                 else if(mode & H_DEBLOCK){
-                    const int t= RENAME(vertClassify)(tempBlock1, 16, &c);
+                    const int t= RENAME(vertClassify)(tempBlock1, 16, c);
                     if(t==1)
-                        RENAME(doVertLowPass)(tempBlock1, 16, &c);
+                        RENAME(doVertLowPass)(tempBlock1, 16, c);
                     else if(t==2)
-                        RENAME(doVertDefFilter)(tempBlock1, 16, &c);
+                        RENAME(doVertDefFilter)(tempBlock1, 16, c);
                 }else if(mode & H_A_DEBLOCK){
-                        RENAME(do_a_deblock)(tempBlock1, 16, 1, &c, mode);
+                        RENAME(do_a_deblock)(tempBlock1, 16, 1, c, mode);
                 }
 
                 RENAME(transpose2)(dstBlock-4, dstStride, tempBlock1 + 4*16);
 
 #else
                 if(mode & H_X1_FILTER)
-                    horizX1Filter(dstBlock-4, stride, c.QP);
+                    horizX1Filter(dstBlock-4, stride, c->QP);
                 else if(mode & H_DEBLOCK){
 #if TEMPLATE_PP_ALTIVEC
                     DECLARE_ALIGNED(16, unsigned char, tempBlock)[272];
                     int t;
                     transpose_16x8_char_toPackedAlign_altivec(tempBlock, dstBlock - (4 + 1), stride);
 
-                    t = vertClassify_altivec(tempBlock-48, 16, &c);
+                    t = vertClassify_altivec(tempBlock-48, 16, c);
                     if(t==1) {
-                        doVertLowPass_altivec(tempBlock-48, 16, &c);
+                        doVertLowPass_altivec(tempBlock-48, 16, c);
                         transpose_8x16_char_fromPackedAlign_altivec(dstBlock - (4 + 1), tempBlock, stride);
                     }
                     else if(t==2) {
-                        doVertDefFilter_altivec(tempBlock-48, 16, &c);
+                        doVertDefFilter_altivec(tempBlock-48, 16, c);
                         transpose_8x16_char_fromPackedAlign_altivec(dstBlock - (4 + 1), tempBlock, stride);
                     }
 #else
-                    const int t= RENAME(horizClassify)(dstBlock-4, stride, &c);
+                    const int t= RENAME(horizClassify)(dstBlock-4, stride, c);
 
                     if(t==1)
-                        RENAME(doHorizLowPass)(dstBlock-4, stride, &c);
+                        RENAME(doHorizLowPass)(dstBlock-4, stride, c);
                     else if(t==2)
-                        RENAME(doHorizDefFilter)(dstBlock-4, stride, &c);
+                        RENAME(doHorizDefFilter)(dstBlock-4, stride, c);
 #endif
                 }else if(mode & H_A_DEBLOCK){
-                    RENAME(do_a_deblock)(dstBlock-8, 1, stride, &c, mode);
+                    RENAME(do_a_deblock)(dstBlock-8, 1, stride, c, mode);
                 }
 #endif //TEMPLATE_PP_MMX
                 if(mode & DERING){
                 //FIXME filter first line
-                    if(y>0) RENAME(dering)(dstBlock - stride - 8, stride, &c);
+                    if(y>0) RENAME(dering)(dstBlock - stride - 8, stride, c);
                 }
 
                 if(mode & TEMP_NOISE_FILTER)
                 {
                     RENAME(tempNoiseReducer)(dstBlock-8, stride,
-                            c.tempBlurred[isColor] + y*dstStride + x,
-                            c.tempBlurredPast[isColor] + (y>>3)*256 + (x>>3) + 256,
-                            c.ppMode.maxTmpNoise);
+                            c->tempBlurred[isColor] + y*dstStride + x,
+                            c->tempBlurredPast[isColor] + (y>>3)*256 + (x>>3) + 256,
+                            c->ppMode.maxTmpNoise);
                 }
             }
 
@@ -3229,14 +3230,14 @@  static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
         }
 
         if(mode & DERING){
-            if(y > 0) RENAME(dering)(dstBlock - dstStride - 8, dstStride, &c);
+            if(y > 0) RENAME(dering)(dstBlock - dstStride - 8, dstStride, c);
         }
 
         if((mode & TEMP_NOISE_FILTER)){
             RENAME(tempNoiseReducer)(dstBlock-8, dstStride,
-                    c.tempBlurred[isColor] + y*dstStride + x,
-                    c.tempBlurredPast[isColor] + (y>>3)*256 + (x>>3) + 256,
-                    c.ppMode.maxTmpNoise);
+                    c->tempBlurred[isColor] + y*dstStride + x,
+                    c->tempBlurredPast[isColor] + (y>>3)*256 + (x>>3) + 256,
+                    c->ppMode.maxTmpNoise);
         }
 
         /* did we use a tmp buffer for the last lines*/
@@ -3278,9 +3279,6 @@  static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
         }
     }
 #endif
-
-    *c2= c; //copy local context back
-
 }
 
 #undef RENAME