Message ID | GV1P250MB073779BE2727AE926A3E08AA8F3C2@GV1P250MB0737.EURP250.PROD.OUTLOOK.COM |
---|---|
State | Accepted |
Commit | 428ff7bd8c76c329f280bcaf88f8a9947ed8bbe0 |
Headers | show |
Series | [FFmpeg-devel,1/2] swscale/ppc/swscale_ppc_template: Remove code not passing checkasm | expand |
Context | Check | Description |
---|---|---|
andriy/make_x86 | success | Make finished |
andriy/make_fate_x86 | success | Make fate finished |
Apr 4, 2024, 04:58 by andreas.rheinhardt@outlook.com: > Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> > --- > libswscale/ppc/swscale_ppc_template.c | 107 +++++++++++++------------- > 1 file changed, 53 insertions(+), 54 deletions(-) > > diff --git a/libswscale/ppc/swscale_ppc_template.c b/libswscale/ppc/swscale_ppc_template.c > index e9abd33cbf..3c2addd4a4 100644 > --- a/libswscale/ppc/swscale_ppc_template.c > +++ b/libswscale/ppc/swscale_ppc_template.c > @@ -101,70 +101,69 @@ static void FUNC(hScale_real)(SwsContext *c, int16_t *dst, int dstW, > const uint8_t *src, const int16_t *filter, > const int32_t *filterPos, int filterSize) > { > - register int i; > LOCAL_ALIGNED(16, int, tempo, [4]); > > - switch (filterSize) { > - case 4: > - for (i = 0; i < dstW; i++) { > - register int srcPos = filterPos[i]; > - > - vector unsigned char src_vF = unaligned_load(srcPos, src); > - vector signed short src_v, filter_v; > - vector signed int val_vEven, val_s; > - src_v = // vec_unpackh sign-extends... > - (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF)); > - // now put our elements in the even slots > - src_v = vec_mergeh(src_v, (vector signed short)vzero); > - GET_VF4(i, filter_v, filter); > - val_vEven = vec_mule(src_v, filter_v); > - val_s = vec_sums(val_vEven, vzero); > - vec_st(val_s, 0, tempo); > - dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1); > - } > + switch (filterSize) { > + case 4: > + for (register int i = 0; i < dstW; i++) { > + register int srcPos = filterPos[i]; > + > + vector unsigned char src_vF = unaligned_load(srcPos, src); > + vector signed short src_v, filter_v; > + vector signed int val_vEven, val_s; > + src_v = // vec_unpackh sign-extends... > + (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF)); > + // now put our elements in the even slots > + src_v = vec_mergeh(src_v, (vector signed short)vzero); > + GET_VF4(i, filter_v, filter); > + val_vEven = vec_mule(src_v, filter_v); > + val_s = vec_sums(val_vEven, vzero); > + vec_st(val_s, 0, tempo); > + dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1); > + } > break; > - case 8: > - for (i = 0; i < dstW; i++) { > - register int srcPos = filterPos[i]; > - vector unsigned char src_vF, av_unused src_v0, av_unused src_v1; > - vector unsigned char av_unused permS; > - vector signed short src_v, filter_v; > - vector signed int val_v, val_s; > - FIRST_LOAD(src_v0, srcPos, src, permS); > - LOAD_SRCV8(srcPos, 0, src, permS, src_v0, src_v1, src_vF); > - src_v = // vec_unpackh sign-extends... > - (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF)); > - filter_v = vec_ld(i << 4, filter); > - val_v = vec_msums(src_v, filter_v, (vector signed int)vzero); > - val_s = vec_sums(val_v, vzero); > - vec_st(val_s, 0, tempo); > - dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1); > - } > + case 8: > + for (register int i = 0; i < dstW; i++) { > + register int srcPos = filterPos[i]; > + vector unsigned char src_vF, av_unused src_v0, av_unused src_v1; > + vector unsigned char av_unused permS; > + vector signed short src_v, filter_v; > + vector signed int val_v, val_s; > + FIRST_LOAD(src_v0, srcPos, src, permS); > + LOAD_SRCV8(srcPos, 0, src, permS, src_v0, src_v1, src_vF); > + src_v = // vec_unpackh sign-extends... > + (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF)); > + filter_v = vec_ld(i << 4, filter); > + val_v = vec_msums(src_v, filter_v, (vector signed int)vzero); > + val_s = vec_sums(val_v, vzero); > + vec_st(val_s, 0, tempo); > + dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1); > + } > break; > > - case 16: > - for (i = 0; i < dstW; i++) { > - register int srcPos = filterPos[i]; > + case 16: > + for (register int i = 0; i < dstW; i++) { > + register int srcPos = filterPos[i]; > > - vector unsigned char src_vF = unaligned_load(srcPos, src); > - vector signed short src_vA = // vec_unpackh sign-extends... > - (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF)); > - vector signed short src_vB = // vec_unpackh sign-extends... > - (vector signed short)(VEC_MERGEL((vector unsigned char)vzero, src_vF)); > - vector signed short filter_v0 = vec_ld(i << 5, filter); > - vector signed short filter_v1 = vec_ld((i << 5) + 16, filter); > + vector unsigned char src_vF = unaligned_load(srcPos, src); > + vector signed short src_vA = // vec_unpackh sign-extends... > + (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF)); > + vector signed short src_vB = // vec_unpackh sign-extends... > + (vector signed short)(VEC_MERGEL((vector unsigned char)vzero, src_vF)); > + vector signed short filter_v0 = vec_ld(i << 5, filter); > + vector signed short filter_v1 = vec_ld((i << 5) + 16, filter); > > - vector signed int val_acc = vec_msums(src_vA, filter_v0, (vector signed int)vzero); > - vector signed int val_v = vec_msums(src_vB, filter_v1, val_acc); > + vector signed int val_acc = vec_msums(src_vA, filter_v0, (vector signed int)vzero); > + vector signed int val_v = vec_msums(src_vB, filter_v1, val_acc); > > - vector signed int val_s = vec_sums(val_v, vzero); > + vector signed int val_s = vec_sums(val_v, vzero); > > - VEC_ST(val_s, 0, tempo); > - dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1); > - } > + VEC_ST(val_s, 0, tempo); > + dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1); > + } > break; > > - default: > + default: > for (register int i = 0; i < dstW; i++) { > register int j; > register int srcPos = filterPos[i]; > @@ -174,5 +173,5 @@ static void FUNC(hScale_real)(SwsContext *c, int16_t *dst, int dstW, > dst[i] = FFMIN(val >> 7, (1 << 15) - 1); > } > break; > - } > + } > } > Patchset LGTM. I missed those in the previous broken PPC code removal I did.
diff --git a/libswscale/ppc/swscale_ppc_template.c b/libswscale/ppc/swscale_ppc_template.c index e9abd33cbf..3c2addd4a4 100644 --- a/libswscale/ppc/swscale_ppc_template.c +++ b/libswscale/ppc/swscale_ppc_template.c @@ -101,70 +101,69 @@ static void FUNC(hScale_real)(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize) { - register int i; LOCAL_ALIGNED(16, int, tempo, [4]); - switch (filterSize) { - case 4: - for (i = 0; i < dstW; i++) { - register int srcPos = filterPos[i]; - - vector unsigned char src_vF = unaligned_load(srcPos, src); - vector signed short src_v, filter_v; - vector signed int val_vEven, val_s; - src_v = // vec_unpackh sign-extends... - (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF)); - // now put our elements in the even slots - src_v = vec_mergeh(src_v, (vector signed short)vzero); - GET_VF4(i, filter_v, filter); - val_vEven = vec_mule(src_v, filter_v); - val_s = vec_sums(val_vEven, vzero); - vec_st(val_s, 0, tempo); - dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1); - } + switch (filterSize) { + case 4: + for (register int i = 0; i < dstW; i++) { + register int srcPos = filterPos[i]; + + vector unsigned char src_vF = unaligned_load(srcPos, src); + vector signed short src_v, filter_v; + vector signed int val_vEven, val_s; + src_v = // vec_unpackh sign-extends... + (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF)); + // now put our elements in the even slots + src_v = vec_mergeh(src_v, (vector signed short)vzero); + GET_VF4(i, filter_v, filter); + val_vEven = vec_mule(src_v, filter_v); + val_s = vec_sums(val_vEven, vzero); + vec_st(val_s, 0, tempo); + dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1); + } break; - case 8: - for (i = 0; i < dstW; i++) { - register int srcPos = filterPos[i]; - vector unsigned char src_vF, av_unused src_v0, av_unused src_v1; - vector unsigned char av_unused permS; - vector signed short src_v, filter_v; - vector signed int val_v, val_s; - FIRST_LOAD(src_v0, srcPos, src, permS); - LOAD_SRCV8(srcPos, 0, src, permS, src_v0, src_v1, src_vF); - src_v = // vec_unpackh sign-extends... - (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF)); - filter_v = vec_ld(i << 4, filter); - val_v = vec_msums(src_v, filter_v, (vector signed int)vzero); - val_s = vec_sums(val_v, vzero); - vec_st(val_s, 0, tempo); - dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1); - } + case 8: + for (register int i = 0; i < dstW; i++) { + register int srcPos = filterPos[i]; + vector unsigned char src_vF, av_unused src_v0, av_unused src_v1; + vector unsigned char av_unused permS; + vector signed short src_v, filter_v; + vector signed int val_v, val_s; + FIRST_LOAD(src_v0, srcPos, src, permS); + LOAD_SRCV8(srcPos, 0, src, permS, src_v0, src_v1, src_vF); + src_v = // vec_unpackh sign-extends... + (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF)); + filter_v = vec_ld(i << 4, filter); + val_v = vec_msums(src_v, filter_v, (vector signed int)vzero); + val_s = vec_sums(val_v, vzero); + vec_st(val_s, 0, tempo); + dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1); + } break; - case 16: - for (i = 0; i < dstW; i++) { - register int srcPos = filterPos[i]; + case 16: + for (register int i = 0; i < dstW; i++) { + register int srcPos = filterPos[i]; - vector unsigned char src_vF = unaligned_load(srcPos, src); - vector signed short src_vA = // vec_unpackh sign-extends... - (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF)); - vector signed short src_vB = // vec_unpackh sign-extends... - (vector signed short)(VEC_MERGEL((vector unsigned char)vzero, src_vF)); - vector signed short filter_v0 = vec_ld(i << 5, filter); - vector signed short filter_v1 = vec_ld((i << 5) + 16, filter); + vector unsigned char src_vF = unaligned_load(srcPos, src); + vector signed short src_vA = // vec_unpackh sign-extends... + (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF)); + vector signed short src_vB = // vec_unpackh sign-extends... + (vector signed short)(VEC_MERGEL((vector unsigned char)vzero, src_vF)); + vector signed short filter_v0 = vec_ld(i << 5, filter); + vector signed short filter_v1 = vec_ld((i << 5) + 16, filter); - vector signed int val_acc = vec_msums(src_vA, filter_v0, (vector signed int)vzero); - vector signed int val_v = vec_msums(src_vB, filter_v1, val_acc); + vector signed int val_acc = vec_msums(src_vA, filter_v0, (vector signed int)vzero); + vector signed int val_v = vec_msums(src_vB, filter_v1, val_acc); - vector signed int val_s = vec_sums(val_v, vzero); + vector signed int val_s = vec_sums(val_v, vzero); - VEC_ST(val_s, 0, tempo); - dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1); - } + VEC_ST(val_s, 0, tempo); + dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1); + } break; - default: + default: for (register int i = 0; i < dstW; i++) { register int j; register int srcPos = filterPos[i]; @@ -174,5 +173,5 @@ static void FUNC(hScale_real)(SwsContext *c, int16_t *dst, int dstW, dst[i] = FFMIN(val >> 7, (1 << 15) - 1); } break; - } + } }
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> --- libswscale/ppc/swscale_ppc_template.c | 107 +++++++++++++------------- 1 file changed, 53 insertions(+), 54 deletions(-)