diff mbox series

[FFmpeg-devel] swresample: misc improvements

Message ID CAPYw7P6p1DP01GCk4kE4VT95R8_L9e9JS_dC5b-BtxiKGR-Tig@mail.gmail.com
State New
Headers show
Series [FFmpeg-devel] swresample: misc improvements | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Paul B Mahol May 11, 2023, 5:13 p.m. UTC
Attached.

Comments

Michael Niedermayer May 12, 2023, 11:36 p.m. UTC | #1
On Thu, May 11, 2023 at 07:13:19PM +0200, Paul B Mahol wrote:
> Attached.
[...]
> @@ -33,64 +33,86 @@
>  
>  
>  #define CONV_FUNC_NAME(dst_fmt, src_fmt) conv_ ## src_fmt ## _to_ ## dst_fmt
> +#define CONVP_FUNC_NAME(dst_fmt, src_fmt) convp_ ## src_fmt ## _to_ ## dst_fmt
>  
>  //FIXME rounding ?
> -#define CONV_FUNC(ofmt, otype, ifmt, expr)\
> +#define CONV_FUNC(ofmt, otype, ifmt, itype, expr)\
> +    \
>  static void CONV_FUNC_NAME(ofmt, ifmt)(uint8_t *po, const uint8_t *pi, int is, int os, uint8_t *end)\
>  {\
>      uint8_t *end2 = end - 3*os;\
>      while(po < end2){\
> +        itype x = *(itype*)pi;\
>          *(otype*)po = expr; pi += is; po += os;\
> +        x = *(itype*)pi;\
>          *(otype*)po = expr; pi += is; po += os;\
> +        x = *(itype*)pi;\
>          *(otype*)po = expr; pi += is; po += os;\
> +        x = *(itype*)pi;\
>          *(otype*)po = expr; pi += is; po += os;\
>      }\
>      while(po < end){\
> +        itype x = *(itype*)pi;\
>          *(otype*)po = expr; pi += is; po += os;\
>      }\
> +}\
> +\
> +static void CONVP_FUNC_NAME(ofmt, ifmt)(uint8_t *ddst, const uint8_t *ssrc, int len)\
> +{\
> +    const itype *src = (const itype *)ssrc;\
> +    otype *dst = (otype *)ddst;\
> +    for (int n = 0; n < len; n++){\
> +        itype x = src[n];\
> +        dst[n] = expr;\
> +    }\
>  }
>  
>  //FIXME put things below under ifdefs so we do not waste space for cases no codec will need
> -CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_U8 ,  *(const uint8_t*)pi)
> -CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_U8 , (*(const uint8_t*)pi - 0x80U)<<8)
> -CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_U8 , (*(const uint8_t*)pi - 0x80U)<<24)
> -CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_U8 , (uint64_t)((*(const uint8_t*)pi - 0x80U))<<56)
> -CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_U8 , (*(const uint8_t*)pi - 0x80)*(1.0f/ (1<<7)))
> -CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_U8 , (*(const uint8_t*)pi - 0x80)*(1.0 / (1<<7)))
> -CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_S16, (*(const int16_t*)pi>>8) + 0x80)
> -CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_S16,  *(const int16_t*)pi)
> -CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_S16, *(const int16_t*)pi * (1 << 16))
> -CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S16, (uint64_t)(*(const int16_t*)pi)<<48)
> -CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_S16,  *(const int16_t*)pi*(1.0f/ (1<<15)))
> -CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_S16,  *(const int16_t*)pi*(1.0 / (1<<15)))
> -CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_S32, (*(const int32_t*)pi>>24) + 0x80)
> -CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_S32,  *(const int32_t*)pi>>16)
> -CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_S32,  *(const int32_t*)pi)
> -CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S32, (uint64_t)(*(const int32_t*)pi)<<32)
> -CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_S32,  *(const int32_t*)pi*(1.0f/ (1U<<31)))
> -CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_S32,  *(const int32_t*)pi*(1.0 / (1U<<31)))
> -CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_S64, (*(const int64_t*)pi>>56) + 0x80)
> -CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_S64,  *(const int64_t*)pi>>48)
> -CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_S64,  *(const int64_t*)pi>>32)
> -CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S64,  *(const int64_t*)pi)
> -CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_S64,  *(const int64_t*)pi*(1.0f/ (UINT64_C(1)<<63)))
> -CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_S64,  *(const int64_t*)pi*(1.0 / (UINT64_C(1)<<63)))
> -CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_FLT, av_clip_uint8(  lrintf(*(const float*)pi * (1<<7)) + 0x80))
> -CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, av_clip_int16(  lrintf(*(const float*)pi * (1<<15))))
> -CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, av_clipl_int32(llrintf(*(const float*)pi * (1U<<31))))
> -CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_FLT, llrintf(*(const float*)pi * (UINT64_C(1)<<63)))
> -CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_FLT, *(const float*)pi)
> -CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_FLT, *(const float*)pi)
> -CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_DBL, av_clip_uint8(  lrint(*(const double*)pi * (1<<7)) + 0x80))
> -CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, av_clip_int16(  lrint(*(const double*)pi * (1<<15))))
> -CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, av_clipl_int32(llrint(*(const double*)pi * (1U<<31))))
> -CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_DBL, llrint(*(const double*)pi * (UINT64_C(1)<<63)))
> -CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_DBL, *(const double*)pi)
> -CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_DBL, *(const double*)pi)
> -
> -#define FMT_PAIR_FUNC(out, in) [(out) + AV_SAMPLE_FMT_NB*(in)] = CONV_FUNC_NAME(out, in)
> -
> -static conv_func_type * const fmt_pair_to_conv_functions[AV_SAMPLE_FMT_NB*AV_SAMPLE_FMT_NB] = {
> +CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_U8 , uint8_t, x)
> +CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_U8 , uint8_t, (x - 0x80U)<<8)
> +CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_U8 , uint8_t, (x - 0x80U)<<24)
> +CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_U8 , uint8_t, (uint64_t)(x - 0x80U)<<56)
> +CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_U8 , uint8_t, (x - 0x80)*(1.0f/ (1<<7)))
> +CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_U8 , uint8_t, (x - 0x80)*(1.0 / (1<<7)))
> +CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_S16, int16_t, (x>>8) + 0x80)
> +CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_S16, int16_t, x)
> +CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_S16, int16_t, x * (1 << 16))
> +CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S16, int16_t, (uint64_t)(x)<<48)
> +CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_S16, int16_t, x*(1.0f/ (1<<15)))
> +CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_S16, int16_t, x*(1.0 / (1<<15)))
> +CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_S32, int32_t, (x>>24) + 0x80)
> +CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_S32, int32_t, x>>16)
> +CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_S32, int32_t, x)
> +CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S32, int32_t, (uint64_t)(x)<<32)
> +CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_S32, int32_t,  x*(1.0f/ (1U<<31)))
> +CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_S32, int32_t,  x*(1.0 / (1U<<31)))
> +CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_S64, int64_t, (x>>56) + 0x80)
> +CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_S64, int64_t, x>>48)
> +CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_S64, int64_t, x>>32)
> +CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S64, int64_t, x)
> +CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_S64, int64_t, x*(1.0f/ (UINT64_C(1)<<63)))
> +CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_S64, int64_t, x*(1.0 / (UINT64_C(1)<<63)))
> +CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(  lrintf(x * (1<<7)) + 0x80))
> +CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(  lrintf(x * (1<<15))))
> +CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(x * (1U<<31))))
> +CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_FLT, float, llrintf(x * (UINT64_C(1)<<63)))
> +CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_FLT, float, x)
> +CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_FLT, float, x)
> +CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(  lrint(x * (1<<7)) + 0x80))
> +CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(  lrint(x * (1<<15))))
> +CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(x * (1U<<31))))
> +CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_DBL, double, llrint(x * (UINT64_C(1)<<63)))
> +CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_DBL, double, x)
> +CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_DBL, double, x)

i think the new cases are longer const, is that intended ?
(it would cast const to non const)

except that patch LGTM  

thx

[...]
Paul B Mahol May 13, 2023, 6:29 a.m. UTC | #2
On Sat, May 13, 2023 at 1:37 AM Michael Niedermayer <michael@niedermayer.cc>
wrote:

> On Thu, May 11, 2023 at 07:13:19PM +0200, Paul B Mahol wrote:
> > Attached.
> [...]
> > @@ -33,64 +33,86 @@
> >
> >
> >  #define CONV_FUNC_NAME(dst_fmt, src_fmt) conv_ ## src_fmt ## _to_ ##
> dst_fmt
> > +#define CONVP_FUNC_NAME(dst_fmt, src_fmt) convp_ ## src_fmt ## _to_ ##
> dst_fmt
> >
> >  //FIXME rounding ?
> > -#define CONV_FUNC(ofmt, otype, ifmt, expr)\
> > +#define CONV_FUNC(ofmt, otype, ifmt, itype, expr)\
> > +    \
> >  static void CONV_FUNC_NAME(ofmt, ifmt)(uint8_t *po, const uint8_t *pi,
> int is, int os, uint8_t *end)\
> >  {\
> >      uint8_t *end2 = end - 3*os;\
> >      while(po < end2){\
> > +        itype x = *(itype*)pi;\
> >          *(otype*)po = expr; pi += is; po += os;\
> > +        x = *(itype*)pi;\
> >          *(otype*)po = expr; pi += is; po += os;\
> > +        x = *(itype*)pi;\
> >          *(otype*)po = expr; pi += is; po += os;\
> > +        x = *(itype*)pi;\
> >          *(otype*)po = expr; pi += is; po += os;\
> >      }\
> >      while(po < end){\
> > +        itype x = *(itype*)pi;\
> >          *(otype*)po = expr; pi += is; po += os;\
> >      }\
> > +}\
> > +\
> > +static void CONVP_FUNC_NAME(ofmt, ifmt)(uint8_t *ddst, const uint8_t
> *ssrc, int len)\
> > +{\
> > +    const itype *src = (const itype *)ssrc;\
> > +    otype *dst = (otype *)ddst;\
> > +    for (int n = 0; n < len; n++){\
> > +        itype x = src[n];\
> > +        dst[n] = expr;\
> > +    }\
> >  }
> >
> >  //FIXME put things below under ifdefs so we do not waste space for
> cases no codec will need
> > -CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_U8 ,  *(const
> uint8_t*)pi)
> > -CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_U8 , (*(const
> uint8_t*)pi - 0x80U)<<8)
> > -CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_U8 , (*(const
> uint8_t*)pi - 0x80U)<<24)
> > -CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_U8 ,
> (uint64_t)((*(const uint8_t*)pi - 0x80U))<<56)
> > -CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_U8 , (*(const
> uint8_t*)pi - 0x80)*(1.0f/ (1<<7)))
> > -CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_U8 , (*(const
> uint8_t*)pi - 0x80)*(1.0 / (1<<7)))
> > -CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_S16, (*(const
> int16_t*)pi>>8) + 0x80)
> > -CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_S16,  *(const
> int16_t*)pi)
> > -CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_S16, *(const
> int16_t*)pi * (1 << 16))
> > -CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S16,
> (uint64_t)(*(const int16_t*)pi)<<48)
> > -CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_S16,  *(const
> int16_t*)pi*(1.0f/ (1<<15)))
> > -CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_S16,  *(const
> int16_t*)pi*(1.0 / (1<<15)))
> > -CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_S32, (*(const
> int32_t*)pi>>24) + 0x80)
> > -CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_S32,  *(const
> int32_t*)pi>>16)
> > -CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_S32,  *(const
> int32_t*)pi)
> > -CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S32,
> (uint64_t)(*(const int32_t*)pi)<<32)
> > -CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_S32,  *(const
> int32_t*)pi*(1.0f/ (1U<<31)))
> > -CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_S32,  *(const
> int32_t*)pi*(1.0 / (1U<<31)))
> > -CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_S64, (*(const
> int64_t*)pi>>56) + 0x80)
> > -CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_S64,  *(const
> int64_t*)pi>>48)
> > -CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_S64,  *(const
> int64_t*)pi>>32)
> > -CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S64,  *(const
> int64_t*)pi)
> > -CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_S64,  *(const
> int64_t*)pi*(1.0f/ (UINT64_C(1)<<63)))
> > -CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_S64,  *(const
> int64_t*)pi*(1.0 / (UINT64_C(1)<<63)))
> > -CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_FLT,
> av_clip_uint8(  lrintf(*(const float*)pi * (1<<7)) + 0x80))
> > -CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT,
> av_clip_int16(  lrintf(*(const float*)pi * (1<<15))))
> > -CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT,
> av_clipl_int32(llrintf(*(const float*)pi * (1U<<31))))
> > -CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_FLT,
> llrintf(*(const float*)pi * (UINT64_C(1)<<63)))
> > -CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_FLT, *(const
> float*)pi)
> > -CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_FLT, *(const
> float*)pi)
> > -CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_DBL,
> av_clip_uint8(  lrint(*(const double*)pi * (1<<7)) + 0x80))
> > -CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL,
> av_clip_int16(  lrint(*(const double*)pi * (1<<15))))
> > -CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL,
> av_clipl_int32(llrint(*(const double*)pi * (1U<<31))))
> > -CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_DBL, llrint(*(const
> double*)pi * (UINT64_C(1)<<63)))
> > -CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_DBL, *(const
> double*)pi)
> > -CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_DBL, *(const
> double*)pi)
> > -
> > -#define FMT_PAIR_FUNC(out, in) [(out) + AV_SAMPLE_FMT_NB*(in)] =
> CONV_FUNC_NAME(out, in)
> > -
> > -static conv_func_type * const
> fmt_pair_to_conv_functions[AV_SAMPLE_FMT_NB*AV_SAMPLE_FMT_NB] = {
> > +CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_U8 , uint8_t, x)
> > +CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_U8 , uint8_t, (x -
> 0x80U)<<8)
> > +CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_U8 , uint8_t, (x -
> 0x80U)<<24)
> > +CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_U8 , uint8_t,
> (uint64_t)(x - 0x80U)<<56)
> > +CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_U8 , uint8_t, (x -
> 0x80)*(1.0f/ (1<<7)))
> > +CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_U8 , uint8_t, (x -
> 0x80)*(1.0 / (1<<7)))
> > +CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_S16, int16_t,
> (x>>8) + 0x80)
> > +CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_S16, int16_t, x)
> > +CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_S16, int16_t, x *
> (1 << 16))
> > +CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S16, int16_t,
> (uint64_t)(x)<<48)
> > +CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_S16, int16_t,
> x*(1.0f/ (1<<15)))
> > +CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_S16, int16_t,
> x*(1.0 / (1<<15)))
> > +CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_S32, int32_t,
> (x>>24) + 0x80)
> > +CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_S32, int32_t, x>>16)
> > +CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_S32, int32_t, x)
> > +CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S32, int32_t,
> (uint64_t)(x)<<32)
> > +CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_S32, int32_t,
> x*(1.0f/ (1U<<31)))
> > +CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_S32, int32_t,
> x*(1.0 / (1U<<31)))
> > +CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_S64, int64_t,
> (x>>56) + 0x80)
> > +CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_S64, int64_t, x>>48)
> > +CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_S64, int64_t, x>>32)
> > +CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S64, int64_t, x)
> > +CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_S64, int64_t,
> x*(1.0f/ (UINT64_C(1)<<63)))
> > +CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_S64, int64_t,
> x*(1.0 / (UINT64_C(1)<<63)))
> > +CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_FLT, float,
> av_clip_uint8(  lrintf(x * (1<<7)) + 0x80))
> > +CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float,
> av_clip_int16(  lrintf(x * (1<<15))))
> > +CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float,
> av_clipl_int32(llrintf(x * (1U<<31))))
> > +CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_FLT, float,
> llrintf(x * (UINT64_C(1)<<63)))
> > +CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_FLT, float, x)
> > +CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_FLT, float, x)
> > +CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_DBL, double,
> av_clip_uint8(  lrint(x * (1<<7)) + 0x80))
> > +CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double,
> av_clip_int16(  lrint(x * (1<<15))))
> > +CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double,
> av_clipl_int32(llrint(x * (1U<<31))))
> > +CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_DBL, double,
> llrint(x * (UINT64_C(1)<<63)))
> > +CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_DBL, double, x)
> > +CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_DBL, double, x)
>
> i think the new cases are longer const, is that intended ?
> (it would cast const to non const)
>

You mean I removed const from old macro?
Can fix that if that is the case.


>
> except that patch LGTM
>
> thx
>
> [...]
>
> --
> Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
>
> The real ebay dictionary, page 2
> "100% positive feedback" - "All either got their money back or didnt
> complain"
> "Best seller ever, very honest" - "Seller refunded buyer after failed scam"
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
Michael Niedermayer May 13, 2023, 2:55 p.m. UTC | #3
On Sat, May 13, 2023 at 08:29:37AM +0200, Paul B Mahol wrote:
> On Sat, May 13, 2023 at 1:37 AM Michael Niedermayer <michael@niedermayer.cc>
> wrote:
> 
> > On Thu, May 11, 2023 at 07:13:19PM +0200, Paul B Mahol wrote:
> > > Attached.
> > [...]
> > > @@ -33,64 +33,86 @@
> > >
> > >
> > >  #define CONV_FUNC_NAME(dst_fmt, src_fmt) conv_ ## src_fmt ## _to_ ##
> > dst_fmt
> > > +#define CONVP_FUNC_NAME(dst_fmt, src_fmt) convp_ ## src_fmt ## _to_ ##
> > dst_fmt
> > >
> > >  //FIXME rounding ?
> > > -#define CONV_FUNC(ofmt, otype, ifmt, expr)\
> > > +#define CONV_FUNC(ofmt, otype, ifmt, itype, expr)\
> > > +    \
> > >  static void CONV_FUNC_NAME(ofmt, ifmt)(uint8_t *po, const uint8_t *pi,
> > int is, int os, uint8_t *end)\
> > >  {\
> > >      uint8_t *end2 = end - 3*os;\
> > >      while(po < end2){\
> > > +        itype x = *(itype*)pi;\
> > >          *(otype*)po = expr; pi += is; po += os;\
> > > +        x = *(itype*)pi;\
> > >          *(otype*)po = expr; pi += is; po += os;\
> > > +        x = *(itype*)pi;\
> > >          *(otype*)po = expr; pi += is; po += os;\
> > > +        x = *(itype*)pi;\
> > >          *(otype*)po = expr; pi += is; po += os;\
> > >      }\
> > >      while(po < end){\
> > > +        itype x = *(itype*)pi;\
> > >          *(otype*)po = expr; pi += is; po += os;\
> > >      }\
> > > +}\
> > > +\
> > > +static void CONVP_FUNC_NAME(ofmt, ifmt)(uint8_t *ddst, const uint8_t
> > *ssrc, int len)\
> > > +{\
> > > +    const itype *src = (const itype *)ssrc;\
> > > +    otype *dst = (otype *)ddst;\
> > > +    for (int n = 0; n < len; n++){\
> > > +        itype x = src[n];\
> > > +        dst[n] = expr;\
> > > +    }\
> > >  }
> > >
> > >  //FIXME put things below under ifdefs so we do not waste space for
> > cases no codec will need
> > > -CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_U8 ,  *(const
> > uint8_t*)pi)
> > > -CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_U8 , (*(const
> > uint8_t*)pi - 0x80U)<<8)
> > > -CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_U8 , (*(const
> > uint8_t*)pi - 0x80U)<<24)
> > > -CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_U8 ,
> > (uint64_t)((*(const uint8_t*)pi - 0x80U))<<56)
> > > -CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_U8 , (*(const
> > uint8_t*)pi - 0x80)*(1.0f/ (1<<7)))
> > > -CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_U8 , (*(const
> > uint8_t*)pi - 0x80)*(1.0 / (1<<7)))
> > > -CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_S16, (*(const
> > int16_t*)pi>>8) + 0x80)
> > > -CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_S16,  *(const
> > int16_t*)pi)
> > > -CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_S16, *(const
> > int16_t*)pi * (1 << 16))
> > > -CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S16,
> > (uint64_t)(*(const int16_t*)pi)<<48)
> > > -CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_S16,  *(const
> > int16_t*)pi*(1.0f/ (1<<15)))
> > > -CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_S16,  *(const
> > int16_t*)pi*(1.0 / (1<<15)))
> > > -CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_S32, (*(const
> > int32_t*)pi>>24) + 0x80)
> > > -CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_S32,  *(const
> > int32_t*)pi>>16)
> > > -CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_S32,  *(const
> > int32_t*)pi)
> > > -CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S32,
> > (uint64_t)(*(const int32_t*)pi)<<32)
> > > -CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_S32,  *(const
> > int32_t*)pi*(1.0f/ (1U<<31)))
> > > -CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_S32,  *(const
> > int32_t*)pi*(1.0 / (1U<<31)))
> > > -CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_S64, (*(const
> > int64_t*)pi>>56) + 0x80)
> > > -CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_S64,  *(const
> > int64_t*)pi>>48)
> > > -CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_S64,  *(const
> > int64_t*)pi>>32)
> > > -CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S64,  *(const
> > int64_t*)pi)
> > > -CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_S64,  *(const
> > int64_t*)pi*(1.0f/ (UINT64_C(1)<<63)))
> > > -CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_S64,  *(const
> > int64_t*)pi*(1.0 / (UINT64_C(1)<<63)))
> > > -CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_FLT,
> > av_clip_uint8(  lrintf(*(const float*)pi * (1<<7)) + 0x80))
> > > -CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT,
> > av_clip_int16(  lrintf(*(const float*)pi * (1<<15))))
> > > -CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT,
> > av_clipl_int32(llrintf(*(const float*)pi * (1U<<31))))
> > > -CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_FLT,
> > llrintf(*(const float*)pi * (UINT64_C(1)<<63)))
> > > -CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_FLT, *(const
> > float*)pi)
> > > -CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_FLT, *(const
> > float*)pi)
> > > -CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_DBL,
> > av_clip_uint8(  lrint(*(const double*)pi * (1<<7)) + 0x80))
> > > -CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL,
> > av_clip_int16(  lrint(*(const double*)pi * (1<<15))))
> > > -CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL,
> > av_clipl_int32(llrint(*(const double*)pi * (1U<<31))))
> > > -CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_DBL, llrint(*(const
> > double*)pi * (UINT64_C(1)<<63)))
> > > -CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_DBL, *(const
> > double*)pi)
> > > -CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_DBL, *(const
> > double*)pi)
> > > -
> > > -#define FMT_PAIR_FUNC(out, in) [(out) + AV_SAMPLE_FMT_NB*(in)] =
> > CONV_FUNC_NAME(out, in)
> > > -
> > > -static conv_func_type * const
> > fmt_pair_to_conv_functions[AV_SAMPLE_FMT_NB*AV_SAMPLE_FMT_NB] = {
> > > +CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_U8 , uint8_t, x)
> > > +CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_U8 , uint8_t, (x -
> > 0x80U)<<8)
> > > +CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_U8 , uint8_t, (x -
> > 0x80U)<<24)
> > > +CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_U8 , uint8_t,
> > (uint64_t)(x - 0x80U)<<56)
> > > +CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_U8 , uint8_t, (x -
> > 0x80)*(1.0f/ (1<<7)))
> > > +CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_U8 , uint8_t, (x -
> > 0x80)*(1.0 / (1<<7)))
> > > +CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_S16, int16_t,
> > (x>>8) + 0x80)
> > > +CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_S16, int16_t, x)
> > > +CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_S16, int16_t, x *
> > (1 << 16))
> > > +CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S16, int16_t,
> > (uint64_t)(x)<<48)
> > > +CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_S16, int16_t,
> > x*(1.0f/ (1<<15)))
> > > +CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_S16, int16_t,
> > x*(1.0 / (1<<15)))
> > > +CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_S32, int32_t,
> > (x>>24) + 0x80)
> > > +CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_S32, int32_t, x>>16)
> > > +CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_S32, int32_t, x)
> > > +CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S32, int32_t,
> > (uint64_t)(x)<<32)
> > > +CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_S32, int32_t,
> > x*(1.0f/ (1U<<31)))
> > > +CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_S32, int32_t,
> > x*(1.0 / (1U<<31)))
> > > +CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_S64, int64_t,
> > (x>>56) + 0x80)
> > > +CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_S64, int64_t, x>>48)
> > > +CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_S64, int64_t, x>>32)
> > > +CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S64, int64_t, x)
> > > +CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_S64, int64_t,
> > x*(1.0f/ (UINT64_C(1)<<63)))
> > > +CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_S64, int64_t,
> > x*(1.0 / (UINT64_C(1)<<63)))
> > > +CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_FLT, float,
> > av_clip_uint8(  lrintf(x * (1<<7)) + 0x80))
> > > +CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float,
> > av_clip_int16(  lrintf(x * (1<<15))))
> > > +CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float,
> > av_clipl_int32(llrintf(x * (1U<<31))))
> > > +CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_FLT, float,
> > llrintf(x * (UINT64_C(1)<<63)))
> > > +CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_FLT, float, x)
> > > +CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_FLT, float, x)
> > > +CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_DBL, double,
> > av_clip_uint8(  lrint(x * (1<<7)) + 0x80))
> > > +CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double,
> > av_clip_int16(  lrint(x * (1<<15))))
> > > +CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double,
> > av_clipl_int32(llrint(x * (1U<<31))))
> > > +CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_DBL, double,
> > llrint(x * (UINT64_C(1)<<63)))
> > > +CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_DBL, double, x)
> > > +CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_DBL, double, x)
> >
> > i think the new cases are longer const, is that intended ?
> > (it would cast const to non const)
> >
> 
> You mean I removed const from old macro?

yes


> Can fix that if that is the case.

thx

[...]
diff mbox series

Patch

From 5a8ab5b948423e6cde7b59df0d21f38dc0235155 Mon Sep 17 00:00:00 2001
From: Paul B Mahol <onemda@gmail.com>
Date: Thu, 11 May 2023 01:11:42 +0200
Subject: [PATCH 1/2] swresample/x86: add float<->double paths

Signed-off-by: Paul B Mahol <onemda@gmail.com>
---
 libswresample/x86/audio_convert.asm    | 25 +++++++++++++++++++++++++
 libswresample/x86/audio_convert_init.c |  8 ++++++--
 2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/libswresample/x86/audio_convert.asm b/libswresample/x86/audio_convert.asm
index ad65008e23..82eda3758e 100644
--- a/libswresample/x86/audio_convert.asm
+++ b/libswresample/x86/audio_convert.asm
@@ -540,6 +540,26 @@  pack_8ch_%2_to_%1_u_int %+ SUFFIX:
     punpckhwd m1, m4
 %endmacro
 
+%macro FLOAT_TO_DOUBLE_N 6
+    shufps    %3, %1, %1, q3232
+    shufps    %4, %2, %2, q3232
+    cvtps2pd  %1, %1
+    cvtps2pd  %2, %2
+    cvtps2pd  %3, %3
+    cvtps2pd  %4, %4
+    SWAP 1,2
+%endmacro
+
+%macro DOUBLE_TO_FLOAT_N 6
+    cvtpd2ps  %1, %1
+    cvtpd2ps  %2, %2
+    cvtpd2ps  %3, %3
+    cvtpd2ps  %4, %4
+    shufps    %1, %2, q1010
+    shufps    %3, %4, q1010
+    SWAP 1,2
+%endmacro
+
 %macro INT32_TO_INT16_N 6
     psrad     m0, 16
     psrad     m1, 16
@@ -648,6 +668,11 @@  CONV float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
 CONV int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
 CONV int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
 
+CONV double, float, u, 3, 2, FLOAT_TO_DOUBLE_N, NOP_N
+CONV double, float, a, 3, 2, FLOAT_TO_DOUBLE_N, NOP_N
+CONV float, double, u, 2, 3, DOUBLE_TO_FLOAT_N, NOP_N
+CONV float, double, a, 2, 3, DOUBLE_TO_FLOAT_N, NOP_N
+
 PACK_2CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
 PACK_2CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
 PACK_2CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
diff --git a/libswresample/x86/audio_convert_init.c b/libswresample/x86/audio_convert_init.c
index f6d36f9ca6..e10b978c68 100644
--- a/libswresample/x86/audio_convert_init.c
+++ b/libswresample/x86/audio_convert_init.c
@@ -24,8 +24,8 @@ 
 #include "libswresample/audioconvert.h"
 
 #define PROTO(pre, in, out, cap) void ff ## pre ## in## _to_ ##out## _a_ ##cap(uint8_t **dst, const uint8_t **src, int len);
-#define PROTO2(pre, out, cap) PROTO(pre, int16, out, cap) PROTO(pre, int32, out, cap) PROTO(pre, float, out, cap)
-#define PROTO3(pre, cap) PROTO2(pre, int16, cap) PROTO2(pre, int32, cap) PROTO2(pre, float, cap)
+#define PROTO2(pre, out, cap) PROTO(pre, int16, out, cap) PROTO(pre, int32, out, cap) PROTO(pre, float, out, cap) PROTO(pre, double, out,cap)
+#define PROTO3(pre, cap) PROTO2(pre, int16, cap) PROTO2(pre, int32, cap) PROTO2(pre, float, cap) PROTO2(pre, double, cap)
 #define PROTO4(pre) PROTO3(pre, sse) PROTO3(pre, sse2) PROTO3(pre, ssse3) PROTO3(pre, sse4) PROTO3(pre, avx) PROTO3(pre, avx2)
 PROTO4(_)
 PROTO4(_pack_2ch_)
@@ -72,6 +72,10 @@  MULTI_CAPS_FUNC(SSE2, sse2)
             ac->simd_f =  ff_float_to_int32_a_sse2;
         if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_FLTP)
             ac->simd_f =  ff_float_to_int16_a_sse2;
+        if(   out_fmt == AV_SAMPLE_FMT_DBL  && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_DBLP && in_fmt == AV_SAMPLE_FMT_FLTP)
+            ac->simd_f =  ff_float_to_double_a_sse2;
+        if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_DBL || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_DBLP)
+            ac->simd_f =  ff_double_to_float_a_sse2;
 
         if(channels == 2) {
             if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
-- 
2.39.1