diff mbox

[FFmpeg-devel] avcodec/ppc/hevcdsp: Fix build failures with powerpc-linux-gnu-gcc-4.8 with --disable-optimizations

Message ID 20181204152940.25829-1-michael@niedermayer.cc
State Accepted
Commit 2c64a6bcd280c64997e6c4799bc89c0a9393bbf3
Headers show

Commit Message

Michael Niedermayer Dec. 4, 2018, 3:29 p.m. UTC
The affected functions could also be changed into macros, this is the
smaller change to fix it though. And avoids (probably) less readable macros
The extra code should be optimized out when optimizations are done as all values
are known at build after inlining.

Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
---
 libavcodec/ppc/hevcdsp.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

Comments

Carl Eugen Hoyos Dec. 4, 2018, 3:33 p.m. UTC | #1
2018-12-04 16:29 GMT+01:00, Michael Niedermayer <michael@niedermayer.cc>:
> The affected functions could also be changed into macros, this is the
> smaller change to fix it though. And avoids (probably) less readable macros

> The extra code should be optimized out when optimizations are done as all
> values are known at build after inlining.

Shouldn't this be verified?
This is speed-critical code, no?

Carl Eugen
diff mbox

Patch

diff --git a/libavcodec/ppc/hevcdsp.c b/libavcodec/ppc/hevcdsp.c
index dcae43305a..c1d562a409 100644
--- a/libavcodec/ppc/hevcdsp.c
+++ b/libavcodec/ppc/hevcdsp.c
@@ -58,7 +58,13 @@  static av_always_inline void transform4x4(vec_s16 src_01, vec_s16 src_23,
     e1 = vec_msums(src_02, trans4[2], zero);
     o1 = vec_msums(src_13, trans4[3], zero);
 
-    add = vec_sl(vec_splat_s32(1), vec_splat_u32(shift - 1));
+    switch(shift) {
+    case  7: add = vec_sl(vec_splat_s32(1), vec_splat_u32( 7 - 1)); break;
+    case 10: add = vec_sl(vec_splat_s32(1), vec_splat_u32(10 - 1)); break;
+    case 12: add = vec_sl(vec_splat_s32(1), vec_splat_u32(12 - 1)); break;
+    default: abort();
+    }
+
     e0 = vec_add(e0, add);
     e1 = vec_add(e1, add);
 
@@ -72,7 +78,14 @@  static av_always_inline void scale(vec_s32 res[4], vec_s16 res_packed[2],
                                    const int shift)
 {
     int i;
-    vec_u32 v_shift = vec_splat_u32(shift);
+    vec_u32 v_shift;
+
+    switch(shift) {
+    case  7: v_shift = vec_splat_u32(7) ; break;
+    case 10: v_shift = vec_splat_u32(10); break;
+    case 12: v_shift = vec_splat_u32(12); break;
+    default: abort();
+    }
 
     for (i = 0; i < 4; i++)
         res[i] = vec_sra(res[i], v_shift);