diff mbox series

[FFmpeg-devel] swscale/ppc: remove hScale8To19_vsx

Message ID NViuta7--B-9@lynne.ee
State New
Headers show
Series [FFmpeg-devel] swscale/ppc: remove hScale8To19_vsx | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished

Commit Message

Lynne May 18, 2023, 12:52 p.m. UTC
Fails checkasm on a Power9 DD2.2 02CY771 system.
The assembly doesn't seem to have been independently tested at all.

https://paste.sr.ht/~ky0ko/fe255ff73fab49b0c6d335437d894c1db626289e

Patch attached.

Comments

Lynne May 20, 2023, 6:38 p.m. UTC | #1
May 18, 2023, 14:53 by dev@lynne.ee:

> Fails checkasm on a Power9 DD2.2 02CY771 system.
> The assembly doesn't seem to have been independently tested at all.
>
> https://paste.sr.ht/~ky0ko/fe255ff73fab49b0c6d335437d894c1db626289e
>
> Patch attached.
>

Pushed.
Maybe PPC will get better testing one day.
Martin Storsjö May 22, 2023, 7:45 a.m. UTC | #2
On Thu, 18 May 2023, Lynne wrote:

> Fails checkasm on a Power9 DD2.2 02CY771 system.
> The assembly doesn't seem to have been independently tested at all.
>
> https://paste.sr.ht/~ky0ko/fe255ff73fab49b0c6d335437d894c1db626289e
>
> Patch attached.

FWIW, I don't know about the PPC functions, but... swscale in general is 
quite complex on the inside, and the checkasm tests are quite rough 
approximations (and are added much later than most of the functions); it's 
possible that the tests are overly strict or otherwise wrong and check 
things that don't matter in real use cases.

It's also possible that this function might be appropriate for cases when 
SWS_ACCURATE_RND isn't set (when it's expected that the function rounds 
differently than the C implementation).

// Martin
diff mbox series

Patch

From 0ba39b07e85d866ef43c38e1bcf352af2bedacb9 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 18 May 2023 14:42:14 +0200
Subject: [PATCH] swscale/ppc: remove hScale8To19_vsx

Fails checkasm on a Power9 system.
---
 libswscale/ppc/swscale_vsx.c | 60 ------------------------------------
 1 file changed, 60 deletions(-)

diff --git a/libswscale/ppc/swscale_vsx.c b/libswscale/ppc/swscale_vsx.c
index 8152ce7f10..7080a16aee 100644
--- a/libswscale/ppc/swscale_vsx.c
+++ b/libswscale/ppc/swscale_vsx.c
@@ -1858,64 +1858,6 @@  static void hcscale_fast_vsx(SwsContext *c, int16_t *dst1, int16_t *dst2,
 
 #undef HCSCALE
 
-static void hScale8To19_vsx(SwsContext *c, int16_t *_dst, int dstW,
-                            const uint8_t *src, const int16_t *filter,
-                            const int32_t *filterPos, int filterSize)
-{
-    int i, j;
-    int32_t *dst = (int32_t *) _dst;
-    vec_s16 vfilter, vin;
-    vec_u8 vin8;
-    vec_s32 vout;
-    const vec_u8 vzero = vec_splat_u8(0);
-    const vec_u8 vunusedtab[8] = {
-        (vec_u8) {0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
-                  0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf},
-        (vec_u8) {0x0, 0x1, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
-                  0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10},
-        (vec_u8) {0x0, 0x1, 0x2, 0x3, 0x10, 0x10, 0x10, 0x10,
-                  0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10},
-        (vec_u8) {0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x10, 0x10,
-                  0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10},
-        (vec_u8) {0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
-                  0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10},
-        (vec_u8) {0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
-                  0x8, 0x9, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10},
-        (vec_u8) {0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
-                  0x8, 0x9, 0xa, 0xb, 0x10, 0x10, 0x10, 0x10},
-        (vec_u8) {0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
-                  0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0x10, 0x10},
-    };
-    const vec_u8 vunused = vunusedtab[filterSize % 8];
-
-    if (filterSize == 1) {
-        for (i = 0; i < dstW; i++) {
-            int srcPos = filterPos[i];
-            int val    = 0;
-            for (j = 0; j < filterSize; j++) {
-                val += ((int)src[srcPos + j]) * filter[filterSize * i + j];
-            }
-            dst[i] = FFMIN(val >> 3, (1 << 19) - 1); // the cubic equation does overflow ...
-        }
-    } else {
-        for (i = 0; i < dstW; i++) {
-            const int srcPos = filterPos[i];
-            vout = vec_splat_s32(0);
-            for (j = 0; j < filterSize; j += 8) {
-                vin8 = vec_vsx_ld(0, &src[srcPos + j]);
-                vin = (vec_s16) vec_mergeh(vin8, vzero);
-                if (j + 8 > filterSize) // Remove the unused elements on the last round
-                    vin = vec_perm(vin, (vec_s16) vzero, vunused);
-
-                vfilter = vec_vsx_ld(0, &filter[filterSize * i + j]);
-                vout = vec_msums(vin, vfilter, vout);
-            }
-            vout = vec_sums(vout, (vec_s32) vzero);
-            dst[i] = FFMIN(vout[3] >> 3, (1 << 19) - 1);
-        }
-    }
-}
-
 static void hScale16To19_vsx(SwsContext *c, int16_t *_dst, int dstW,
                              const uint8_t *_src, const int16_t *filter,
                              const int32_t *filterPos, int filterSize)
@@ -2092,8 +2034,6 @@  av_cold void ff_sws_init_swscale_vsx(SwsContext *c)
                 c->hyscale_fast = hyscale_fast_vsx;
                 c->hcscale_fast = hcscale_fast_vsx;
             }
-        } else {
-            c->hyScale = c->hcScale = hScale8To19_vsx;
         }
     } else {
         if (power8) {
-- 
2.40.0