diff mbox series

[FFmpeg-devel,PATCHv2,1/5] lavu/float_dsp: add double-precision scalar product

Message ID 20240530190659.65309-1-remi@remlab.net
State New
Headers show
Series [FFmpeg-devel,PATCHv2,1/5] lavu/float_dsp: add double-precision scalar product | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Rémi Denis-Courmont May 30, 2024, 7:06 p.m. UTC
The function pointer is appended to the structure for backward binary
compatibility. Fortunately, this is allocated by libavutil, not by the
user, so increasing the structure size is safe.
---
 libavutil/float_dsp.c | 12 ++++++++++++
 libavutil/float_dsp.h | 31 ++++++++++++++++++++++++++++++-
 2 files changed, 42 insertions(+), 1 deletion(-)

Comments

James Almer May 30, 2024, 7:10 p.m. UTC | #1
On 5/30/2024 4:06 PM, Rémi Denis-Courmont wrote:
> The function pointer is appended to the structure for backward binary
> compatibility. Fortunately, this is allocated by libavutil, not by the
> user, so increasing the structure size is safe.
> ---
>   libavutil/float_dsp.c | 12 ++++++++++++
>   libavutil/float_dsp.h | 31 ++++++++++++++++++++++++++++++-
>   2 files changed, 42 insertions(+), 1 deletion(-)
> 
> diff --git a/libavutil/float_dsp.c b/libavutil/float_dsp.c
> index e9fb023466..08bbc85e3e 100644
> --- a/libavutil/float_dsp.c
> +++ b/libavutil/float_dsp.c
> @@ -132,6 +132,17 @@ float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len)
>       return p;
>   }
>   
> +double ff_scalarproduct_double_c(const double *v1, const double *v2,
> +                                 size_t len)
> +{
> +    double p = 0.0;
> +
> +    for (size_t i = 0; i < len; i++)
> +        p += v1[i] * v2[i];
> +
> +    return p;
> +}
> +
>   av_cold AVFloatDSPContext *avpriv_float_dsp_alloc(int bit_exact)
>   {
>       AVFloatDSPContext *fdsp = av_mallocz(sizeof(AVFloatDSPContext));
> @@ -149,6 +160,7 @@ av_cold AVFloatDSPContext *avpriv_float_dsp_alloc(int bit_exact)
>       fdsp->vector_fmul_reverse = vector_fmul_reverse_c;
>       fdsp->butterflies_float = butterflies_float_c;
>       fdsp->scalarproduct_float = avpriv_scalarproduct_float_c;
> +    fdsp->scalarproduct_double = ff_scalarproduct_double_c;
>   
>   #if ARCH_AARCH64
>       ff_float_dsp_init_aarch64(fdsp);
> diff --git a/libavutil/float_dsp.h b/libavutil/float_dsp.h
> index 342a8715c5..5053aa240d 100644
> --- a/libavutil/float_dsp.h
> +++ b/libavutil/float_dsp.h
> @@ -19,6 +19,8 @@
>   #ifndef AVUTIL_FLOAT_DSP_H
>   #define AVUTIL_FLOAT_DSP_H
>   
> +#include <stddef.h>
> +
>   typedef struct AVFloatDSPContext {
>       /**
>        * Calculate the entry wise product of two vectors of floats and store the result in
> @@ -187,19 +189,46 @@ typedef struct AVFloatDSPContext {
>        */
>       void (*vector_dmul)(double *dst, const double *src0, const double *src1,
>                           int len);
> +
> +    /**
> +     * Calculate the scalar product of two vectors of doubles.
> +     *
> +     * @param v1  first vector
> +     * @param v2  second vector
> +     * @param len length of vectors
> +     *
> +     * @return inner product of the vectors
> +     */
> +    double (*scalarproduct_double)(const double *v1, const double *v2,
> +                                   size_t len);
>   } AVFloatDSPContext;
>   
>   /**
> - * Return the scalar product of two vectors.
> + * Return the scalar product of two vectors of floats.
>    *
>    * @param v1  first input vector
> + *            constraints: 32-byte aligned
>    * @param v2  first input vector
> + *            constraints: 32-byte aligned
>    * @param len number of elements
> + *            constraints: multiple of 16

Why are you adding this to the doxy for scalarproduct_float()? Those 
constrains are not correct for it. They are for scalarproduct_double() 
which you're adding now.

>    *
>    * @return sum of elementwise products
>    */
>   float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len);
>   
> +/**
> + * Return the scalar product of two vectors of doubles.
> + *
> + * @param v1  first input vector
> + * @param v2  first input vector
> + * @param len number of elements
> + *
> + * @return inner product of the vectors
> + */
> +double ff_scalarproduct_double_c(const double *v1, const double *v2,
> +                                 size_t len);
> +
>   void ff_float_dsp_init_aarch64(AVFloatDSPContext *fdsp);
>   void ff_float_dsp_init_arm(AVFloatDSPContext *fdsp);
>   void ff_float_dsp_init_ppc(AVFloatDSPContext *fdsp, int strict);
Rémi Denis-Courmont May 30, 2024, 7:21 p.m. UTC | #2
Le torstaina 30. toukokuuta 2024, 22.10.28 EEST James Almer a écrit :
> Why are you adding this to the doxy for scalarproduct_float()? Those
> constrains are not correct for it. They are for scalarproduct_double()
> which you're adding now.

Because copy-paste error.
James Almer May 30, 2024, 7:28 p.m. UTC | #3
On 5/30/2024 4:21 PM, Rémi Denis-Courmont wrote:
> Le torstaina 30. toukokuuta 2024, 22.10.28 EEST James Almer a écrit :
>> Why are you adding this to the doxy for scalarproduct_float()? Those
>> constrains are not correct for it. They are for scalarproduct_double()
>> which you're adding now.
> 
> Because copy-paste error.

Ok, patchset LGTM after you amend that.
Rémi Denis-Courmont May 30, 2024, 7:31 p.m. UTC | #4
Le torstaina 30. toukokuuta 2024, 22.06.55 EEST Rémi Denis-Courmont a écrit :
> The function pointer is appended to the structure for backward binary
> compatibility. Fortunately, this is allocated by libavutil, not by the
> user, so increasing the structure size is safe.
> ---
>  libavutil/float_dsp.c | 12 ++++++++++++
>  libavutil/float_dsp.h | 31 ++++++++++++++++++++++++++++++-
>  2 files changed, 42 insertions(+), 1 deletion(-)
> 
> diff --git a/libavutil/float_dsp.c b/libavutil/float_dsp.c
> index e9fb023466..08bbc85e3e 100644
> --- a/libavutil/float_dsp.c
> +++ b/libavutil/float_dsp.c
> @@ -132,6 +132,17 @@ float avpriv_scalarproduct_float_c(const float *v1,
> const float *v2, int len) return p;
>  }
> 
> +double ff_scalarproduct_double_c(const double *v1, const double *v2,
> +                                 size_t len)
> +{
> +    double p = 0.0;
> +
> +    for (size_t i = 0; i < len; i++)
> +        p += v1[i] * v2[i];
> +
> +    return p;
> +}
> +

If somebody wants to write x86 assembly, they can probably borrow most of the 
code for evaluate_lls. It is a double precision scalar product with a little 
bit of extra fluff in the prologue.
James Almer May 30, 2024, 7:33 p.m. UTC | #5
On 5/30/2024 4:31 PM, Rémi Denis-Courmont wrote:
> Le torstaina 30. toukokuuta 2024, 22.06.55 EEST Rémi Denis-Courmont a écrit :
>> The function pointer is appended to the structure for backward binary
>> compatibility. Fortunately, this is allocated by libavutil, not by the
>> user, so increasing the structure size is safe.
>> ---
>>   libavutil/float_dsp.c | 12 ++++++++++++
>>   libavutil/float_dsp.h | 31 ++++++++++++++++++++++++++++++-
>>   2 files changed, 42 insertions(+), 1 deletion(-)
>>
>> diff --git a/libavutil/float_dsp.c b/libavutil/float_dsp.c
>> index e9fb023466..08bbc85e3e 100644
>> --- a/libavutil/float_dsp.c
>> +++ b/libavutil/float_dsp.c
>> @@ -132,6 +132,17 @@ float avpriv_scalarproduct_float_c(const float *v1,
>> const float *v2, int len) return p;
>>   }
>>
>> +double ff_scalarproduct_double_c(const double *v1, const double *v2,
>> +                                 size_t len)
>> +{
>> +    double p = 0.0;
>> +
>> +    for (size_t i = 0; i < len; i++)
>> +        p += v1[i] * v2[i];
>> +
>> +    return p;
>> +}
>> +
> 
> If somebody wants to write x86 assembly, they can probably borrow most of the
> code for evaluate_lls. It is a double precision scalar product with a little
> bit of extra fluff in the prologue.

I already did, I'm just waiting for this set to be pushed before sending it.
diff mbox series

Patch

diff --git a/libavutil/float_dsp.c b/libavutil/float_dsp.c
index e9fb023466..08bbc85e3e 100644
--- a/libavutil/float_dsp.c
+++ b/libavutil/float_dsp.c
@@ -132,6 +132,17 @@  float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len)
     return p;
 }
 
+double ff_scalarproduct_double_c(const double *v1, const double *v2,
+                                 size_t len)
+{
+    double p = 0.0;
+
+    for (size_t i = 0; i < len; i++)
+        p += v1[i] * v2[i];
+
+    return p;
+}
+
 av_cold AVFloatDSPContext *avpriv_float_dsp_alloc(int bit_exact)
 {
     AVFloatDSPContext *fdsp = av_mallocz(sizeof(AVFloatDSPContext));
@@ -149,6 +160,7 @@  av_cold AVFloatDSPContext *avpriv_float_dsp_alloc(int bit_exact)
     fdsp->vector_fmul_reverse = vector_fmul_reverse_c;
     fdsp->butterflies_float = butterflies_float_c;
     fdsp->scalarproduct_float = avpriv_scalarproduct_float_c;
+    fdsp->scalarproduct_double = ff_scalarproduct_double_c;
 
 #if ARCH_AARCH64
     ff_float_dsp_init_aarch64(fdsp);
diff --git a/libavutil/float_dsp.h b/libavutil/float_dsp.h
index 342a8715c5..5053aa240d 100644
--- a/libavutil/float_dsp.h
+++ b/libavutil/float_dsp.h
@@ -19,6 +19,8 @@ 
 #ifndef AVUTIL_FLOAT_DSP_H
 #define AVUTIL_FLOAT_DSP_H
 
+#include <stddef.h>
+
 typedef struct AVFloatDSPContext {
     /**
      * Calculate the entry wise product of two vectors of floats and store the result in
@@ -187,19 +189,46 @@  typedef struct AVFloatDSPContext {
      */
     void (*vector_dmul)(double *dst, const double *src0, const double *src1,
                         int len);
+
+    /**
+     * Calculate the scalar product of two vectors of doubles.
+     *
+     * @param v1  first vector
+     * @param v2  second vector
+     * @param len length of vectors
+     *
+     * @return inner product of the vectors
+     */
+    double (*scalarproduct_double)(const double *v1, const double *v2,
+                                   size_t len);
 } AVFloatDSPContext;
 
 /**
- * Return the scalar product of two vectors.
+ * Return the scalar product of two vectors of floats.
  *
  * @param v1  first input vector
+ *            constraints: 32-byte aligned
  * @param v2  first input vector
+ *            constraints: 32-byte aligned
  * @param len number of elements
+ *            constraints: multiple of 16
  *
  * @return sum of elementwise products
  */
 float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len);
 
+/**
+ * Return the scalar product of two vectors of doubles.
+ *
+ * @param v1  first input vector
+ * @param v2  first input vector
+ * @param len number of elements
+ *
+ * @return inner product of the vectors
+ */
+double ff_scalarproduct_double_c(const double *v1, const double *v2,
+                                 size_t len);
+
 void ff_float_dsp_init_aarch64(AVFloatDSPContext *fdsp);
 void ff_float_dsp_init_arm(AVFloatDSPContext *fdsp);
 void ff_float_dsp_init_ppc(AVFloatDSPContext *fdsp, int strict);