diff mbox series

[FFmpeg-devel,v2,1/2] swscale/input: unify grayf32 funcs with rgbf32 funcs

Message ID 20211114025653.654-1-mindmark@gmail.com
State New
Headers show
Series [FFmpeg-devel,v2,1/2] swscale/input: unify grayf32 funcs with rgbf32 funcs | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished
andriy/make_ppc success Make finished
andriy/make_fate_ppc success Make fate finished

Commit Message

Mark Reid Nov. 14, 2021, 2:56 a.m. UTC
From: Mark Reid <mindmark@gmail.com>

This is ment to be a cosmetic change

old timings:
  42780 UNITS in grayf32le,       1 runs,      0 skips
  56720 UNITS in grayf32le,       2 runs,      0 skips
  67265 UNITS in grayf32le,       4 runs,      0 skips
  58082 UNITS in grayf32le,       8 runs,      0 skips
  63512 UNITS in grayf32le,      16 runs,      0 skips
  52720 UNITS in grayf32le,      32 runs,      0 skips
  46491 UNITS in grayf32le,      64 runs,      0 skips

  68500 UNITS in grayf32be,       1 runs,      0 skips
  66930 UNITS in grayf32be,       2 runs,      0 skips
  62305 UNITS in grayf32be,       4 runs,      0 skips
  55510 UNITS in grayf32be,       8 runs,      0 skips
  50216 UNITS in grayf32be,      16 runs,      0 skips
  44480 UNITS in grayf32be,      32 runs,      0 skips
  42394 UNITS in grayf32be,      64 runs,      0 skips

new timings:
  46660 UNITS in grayf32le,       1 runs,      0 skips
  51830 UNITS in grayf32le,       2 runs,      0 skips
  53390 UNITS in grayf32le,       4 runs,      0 skips
  50910 UNITS in grayf32le,       8 runs,      0 skips
  44968 UNITS in grayf32le,      16 runs,      0 skips
  40349 UNITS in grayf32le,      32 runs,      0 skips
  38330 UNITS in grayf32le,      64 runs,      0 skips

  39980 UNITS in grayf32be,       1 runs,      0 skips
  49630 UNITS in grayf32be,       2 runs,      0 skips
  53540 UNITS in grayf32be,       4 runs,      0 skips
  59767 UNITS in grayf32be,       8 runs,      0 skips
  51206 UNITS in grayf32be,      16 runs,      0 skips
  44743 UNITS in grayf32be,      32 runs,      0 skips
  41468 UNITS in grayf32be,      64 runs,      0 skips
---
 libswscale/input.c | 36 +++++++++++-------------------------
 1 file changed, 11 insertions(+), 25 deletions(-)

--
2.31.1.windows.1

Comments

Michael Niedermayer Nov. 14, 2021, 4:14 p.m. UTC | #1
On Sat, Nov 13, 2021 at 06:56:52PM -0800, mindmark@gmail.com wrote:
> From: Mark Reid <mindmark@gmail.com>
> 
> This is ment to be a cosmetic change
> 
> old timings:
>   42780 UNITS in grayf32le,       1 runs,      0 skips
>   56720 UNITS in grayf32le,       2 runs,      0 skips
>   67265 UNITS in grayf32le,       4 runs,      0 skips
>   58082 UNITS in grayf32le,       8 runs,      0 skips
>   63512 UNITS in grayf32le,      16 runs,      0 skips
>   52720 UNITS in grayf32le,      32 runs,      0 skips
>   46491 UNITS in grayf32le,      64 runs,      0 skips
> 
>   68500 UNITS in grayf32be,       1 runs,      0 skips
>   66930 UNITS in grayf32be,       2 runs,      0 skips
>   62305 UNITS in grayf32be,       4 runs,      0 skips
>   55510 UNITS in grayf32be,       8 runs,      0 skips
>   50216 UNITS in grayf32be,      16 runs,      0 skips
>   44480 UNITS in grayf32be,      32 runs,      0 skips
>   42394 UNITS in grayf32be,      64 runs,      0 skips
> 
> new timings:
>   46660 UNITS in grayf32le,       1 runs,      0 skips
>   51830 UNITS in grayf32le,       2 runs,      0 skips
>   53390 UNITS in grayf32le,       4 runs,      0 skips
>   50910 UNITS in grayf32le,       8 runs,      0 skips
>   44968 UNITS in grayf32le,      16 runs,      0 skips
>   40349 UNITS in grayf32le,      32 runs,      0 skips
>   38330 UNITS in grayf32le,      64 runs,      0 skips
> 
>   39980 UNITS in grayf32be,       1 runs,      0 skips
>   49630 UNITS in grayf32be,       2 runs,      0 skips
>   53540 UNITS in grayf32be,       4 runs,      0 skips
>   59767 UNITS in grayf32be,       8 runs,      0 skips
>   51206 UNITS in grayf32be,      16 runs,      0 skips
>   44743 UNITS in grayf32be,      32 runs,      0 skips
>   41468 UNITS in grayf32be,      64 runs,      0 skips

will apply



thx
[...]
diff mbox series

Patch

diff --git a/libswscale/input.c b/libswscale/input.c
index 336f957c8c..90efdd2ffc 100644
--- a/libswscale/input.c
+++ b/libswscale/input.c
@@ -1013,31 +1013,19 @@  static av_always_inline void planar_rgbf32_to_y(uint8_t *_dst, const uint8_t *_s
     }
 }

-#undef rdpx
-
 static av_always_inline void grayf32ToY16_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1,
-                                            const uint8_t *unused2, int width, uint32_t *unused)
+                                            const uint8_t *unused2, int width, int is_be, uint32_t *unused)
 {
     int i;
     const float *src = (const float *)_src;
     uint16_t *dst    = (uint16_t *)_dst;

     for (i = 0; i < width; ++i){
-        dst[i] = av_clip_uint16(lrintf(65535.0f * src[i]));
+        dst[i] = av_clip_uint16(lrintf(65535.0f * rdpx(src + i)));
     }
 }

-static av_always_inline void grayf32ToY16_bswap_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1,
-                                                  const uint8_t *unused2, int width, uint32_t *unused)
-{
-    int i;
-    const uint32_t *src = (const uint32_t *)_src;
-    uint16_t *dst    = (uint16_t *)_dst;
-
-    for (i = 0; i < width; ++i){
-        dst[i] = av_clip_uint16(lrintf(65535.0f * av_int2float(av_bswap32(src[i]))));
-    }
-}
+#undef rdpx

 #define rgb9plus_planar_funcs_endian(nbits, endian_name, endian)                                    \
 static void planar_rgb##nbits##endian_name##_to_y(uint8_t *dst, const uint8_t *src[4],              \
@@ -1092,6 +1080,12 @@  static void planar_rgbf32##endian_name##_to_a(uint8_t *dst, const uint8_t *src[4
                                               int w, int32_t *rgb2yuv)                              \
 {                                                                                                   \
     planar_rgbf32_to_a(dst, src, w, endian, rgb2yuv);                                               \
+}                                                                                                   \
+static void grayf32##endian_name##ToY16_c(uint8_t *dst, const uint8_t *src,                         \
+                                          const uint8_t *unused1, const uint8_t *unused2,           \
+                                          int width, uint32_t *unused)                              \
+{                                                                                                   \
+    grayf32ToY16_c(dst, src, unused1, unused2, width, endian, unused);                              \
 }

 rgbf32_planar_funcs_endian(le, 0)
@@ -1699,18 +1693,10 @@  av_cold void ff_sws_init_input_funcs(SwsContext *c)
         c->lumToYV12 = p010BEToY_c;
         break;
     case AV_PIX_FMT_GRAYF32LE:
-#if HAVE_BIGENDIAN
-        c->lumToYV12 = grayf32ToY16_bswap_c;
-#else
-        c->lumToYV12 = grayf32ToY16_c;
-#endif
+        c->lumToYV12 = grayf32leToY16_c;
         break;
     case AV_PIX_FMT_GRAYF32BE:
-#if HAVE_BIGENDIAN
-        c->lumToYV12 = grayf32ToY16_c;
-#else
-        c->lumToYV12 = grayf32ToY16_bswap_c;
-#endif
+        c->lumToYV12 = grayf32beToY16_c;
         break;
     case AV_PIX_FMT_Y210LE:
         c->lumToYV12 = y210le_Y_c;