diff mbox

[FFmpeg-devel,1/2] avutil: add float_dsp.vector_dmul

Message ID 20180912104245.5326-1-onemda@gmail.com
State Accepted
Commit bb16a0624a2f98d21bac3f42a731c4c70f06aad3
Headers show

Commit Message

Paul B Mahol Sept. 12, 2018, 10:42 a.m. UTC
Signed-off-by: Paul B Mahol <onemda@gmail.com>
---
 libavutil/float_dsp.c |  9 +++++++++
 libavutil/float_dsp.h | 16 ++++++++++++++++
 2 files changed, 25 insertions(+)

Comments

James Almer Sept. 12, 2018, 1:30 p.m. UTC | #1
On 9/12/2018 7:42 AM, Paul B Mahol wrote:
> Signed-off-by: Paul B Mahol <onemda@gmail.com>
> ---
>  libavutil/float_dsp.c |  9 +++++++++
>  libavutil/float_dsp.h | 16 ++++++++++++++++
>  2 files changed, 25 insertions(+)

Ah, i had a patch like this lying around but never got to send it.
I still have the x86 simd implementation, though, so I'll send that later.

> 
> diff --git a/libavutil/float_dsp.c b/libavutil/float_dsp.c
> index 1d4911d815..6e28d71b57 100644
> --- a/libavutil/float_dsp.c
> +++ b/libavutil/float_dsp.c
> @@ -32,6 +32,14 @@ static void vector_fmul_c(float *dst, const float *src0, const float *src1,
>          dst[i] = src0[i] * src1[i];
>  }
>  
> +static void vector_dmul_c(double *dst, const double *src0, const double *src1,
> +                          int len)
> +{
> +    int i;
> +    for (i = 0; i < len; i++)
> +        dst[i] = src0[i] * src1[i];
> +}
> +
>  static void vector_fmac_scalar_c(float *dst, const float *src, float mul,
>                                   int len)
>  {
> @@ -131,6 +139,7 @@ av_cold AVFloatDSPContext *avpriv_float_dsp_alloc(int bit_exact)
>          return NULL;
>  
>      fdsp->vector_fmul = vector_fmul_c;
> +    fdsp->vector_dmul = vector_dmul_c;
>      fdsp->vector_fmac_scalar = vector_fmac_scalar_c;
>      fdsp->vector_fmul_scalar = vector_fmul_scalar_c;
>      fdsp->vector_dmac_scalar = vector_dmac_scalar_c;
> diff --git a/libavutil/float_dsp.h b/libavutil/float_dsp.h
> index 2c24d93471..9c664592bd 100644
> --- a/libavutil/float_dsp.h
> +++ b/libavutil/float_dsp.h
> @@ -173,6 +173,22 @@ typedef struct AVFloatDSPContext {
>       * @return sum of elementwise products
>       */
>      float (*scalarproduct_float)(const float *v1, const float *v2, int len);
> +
> +    /**
> +     * Calculate the entry wise product of two vectors of doubles and store the result in
> +     * a vector of doubles.
> +     *
> +     * @param dst  output vector
> +     *             constraints: 32-byte aligned
> +     * @param src0 first input vector
> +     *             constraints: 32-byte aligned
> +     * @param src1 second input vector
> +     *             constraints: 32-byte aligned
> +     * @param len  number of elements in the input
> +     *             constraints: multiple of 16

Why not 8?

> +     */
> +    void (*vector_dmul)(double *dst, const double *src0, const double *src1,
> +                        int len);
>  } AVFloatDSPContext;
>  
>  /**
> 

LGTM.
Paul B Mahol Sept. 12, 2018, 2:48 p.m. UTC | #2
On 9/12/18, James Almer <jamrial@gmail.com> wrote:
> On 9/12/2018 7:42 AM, Paul B Mahol wrote:
>> Signed-off-by: Paul B Mahol <onemda@gmail.com>
>> ---
>>  libavutil/float_dsp.c |  9 +++++++++
>>  libavutil/float_dsp.h | 16 ++++++++++++++++
>>  2 files changed, 25 insertions(+)
>
> Ah, i had a patch like this lying around but never got to send it.
> I still have the x86 simd implementation, though, so I'll send that later.
>
>>
>> diff --git a/libavutil/float_dsp.c b/libavutil/float_dsp.c
>> index 1d4911d815..6e28d71b57 100644
>> --- a/libavutil/float_dsp.c
>> +++ b/libavutil/float_dsp.c
>> @@ -32,6 +32,14 @@ static void vector_fmul_c(float *dst, const float
>> *src0, const float *src1,
>>          dst[i] = src0[i] * src1[i];
>>  }
>>
>> +static void vector_dmul_c(double *dst, const double *src0, const double
>> *src1,
>> +                          int len)
>> +{
>> +    int i;
>> +    for (i = 0; i < len; i++)
>> +        dst[i] = src0[i] * src1[i];
>> +}
>> +
>>  static void vector_fmac_scalar_c(float *dst, const float *src, float mul,
>>                                   int len)
>>  {
>> @@ -131,6 +139,7 @@ av_cold AVFloatDSPContext *avpriv_float_dsp_alloc(int
>> bit_exact)
>>          return NULL;
>>
>>      fdsp->vector_fmul = vector_fmul_c;
>> +    fdsp->vector_dmul = vector_dmul_c;
>>      fdsp->vector_fmac_scalar = vector_fmac_scalar_c;
>>      fdsp->vector_fmul_scalar = vector_fmul_scalar_c;
>>      fdsp->vector_dmac_scalar = vector_dmac_scalar_c;
>> diff --git a/libavutil/float_dsp.h b/libavutil/float_dsp.h
>> index 2c24d93471..9c664592bd 100644
>> --- a/libavutil/float_dsp.h
>> +++ b/libavutil/float_dsp.h
>> @@ -173,6 +173,22 @@ typedef struct AVFloatDSPContext {
>>       * @return sum of elementwise products
>>       */
>>      float (*scalarproduct_float)(const float *v1, const float *v2, int
>> len);
>> +
>> +    /**
>> +     * Calculate the entry wise product of two vectors of doubles and
>> store the result in
>> +     * a vector of doubles.
>> +     *
>> +     * @param dst  output vector
>> +     *             constraints: 32-byte aligned
>> +     * @param src0 first input vector
>> +     *             constraints: 32-byte aligned
>> +     * @param src1 second input vector
>> +     *             constraints: 32-byte aligned
>> +     * @param len  number of elements in the input
>> +     *             constraints: multiple of 16
>
> Why not 8?

It is what float variant uses. And for good reason.

>
>> +     */
>> +    void (*vector_dmul)(double *dst, const double *src0, const double
>> *src1,
>> +                        int len);
>>  } AVFloatDSPContext;
>>
>>  /**
>>
>
> LGTM.
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
diff mbox

Patch

diff --git a/libavutil/float_dsp.c b/libavutil/float_dsp.c
index 1d4911d815..6e28d71b57 100644
--- a/libavutil/float_dsp.c
+++ b/libavutil/float_dsp.c
@@ -32,6 +32,14 @@  static void vector_fmul_c(float *dst, const float *src0, const float *src1,
         dst[i] = src0[i] * src1[i];
 }
 
+static void vector_dmul_c(double *dst, const double *src0, const double *src1,
+                          int len)
+{
+    int i;
+    for (i = 0; i < len; i++)
+        dst[i] = src0[i] * src1[i];
+}
+
 static void vector_fmac_scalar_c(float *dst, const float *src, float mul,
                                  int len)
 {
@@ -131,6 +139,7 @@  av_cold AVFloatDSPContext *avpriv_float_dsp_alloc(int bit_exact)
         return NULL;
 
     fdsp->vector_fmul = vector_fmul_c;
+    fdsp->vector_dmul = vector_dmul_c;
     fdsp->vector_fmac_scalar = vector_fmac_scalar_c;
     fdsp->vector_fmul_scalar = vector_fmul_scalar_c;
     fdsp->vector_dmac_scalar = vector_dmac_scalar_c;
diff --git a/libavutil/float_dsp.h b/libavutil/float_dsp.h
index 2c24d93471..9c664592bd 100644
--- a/libavutil/float_dsp.h
+++ b/libavutil/float_dsp.h
@@ -173,6 +173,22 @@  typedef struct AVFloatDSPContext {
      * @return sum of elementwise products
      */
     float (*scalarproduct_float)(const float *v1, const float *v2, int len);
+
+    /**
+     * Calculate the entry wise product of two vectors of doubles and store the result in
+     * a vector of doubles.
+     *
+     * @param dst  output vector
+     *             constraints: 32-byte aligned
+     * @param src0 first input vector
+     *             constraints: 32-byte aligned
+     * @param src1 second input vector
+     *             constraints: 32-byte aligned
+     * @param len  number of elements in the input
+     *             constraints: multiple of 16
+     */
+    void (*vector_dmul)(double *dst, const double *src0, const double *src1,
+                        int len);
 } AVFloatDSPContext;
 
 /**