diff mbox series

[FFmpeg-devel,PATCHv4,1/4] lavc/h263dsp: add DCT dequantisation functions

Message ID 20240610202322.786800-1-remi@remlab.net
State New
Headers show
Series [FFmpeg-devel,PATCHv4,1/4] lavc/h263dsp: add DCT dequantisation functions | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Rémi Denis-Courmont June 10, 2024, 8:23 p.m. UTC
Note that optimised implementations of these functions will be taken
into actual use only if MpegEncContext.dct_unquantize_h263_{inter,intra}
are *not* overloaded by existing optimisations.
---
 libavcodec/h263dsp.c | 25 +++++++++++++++++++++++++
 libavcodec/h263dsp.h |  4 ++++
 2 files changed, 29 insertions(+)

Comments

Andreas Rheinhardt June 11, 2024, 10:30 a.m. UTC | #1
Rémi Denis-Courmont:
> Note that optimised implementations of these functions will be taken
> into actual use only if MpegEncContext.dct_unquantize_h263_{inter,intra}
> are *not* overloaded by existing optimisations.
> ---
>  libavcodec/h263dsp.c | 25 +++++++++++++++++++++++++
>  libavcodec/h263dsp.h |  4 ++++
>  2 files changed, 29 insertions(+)
> 
> diff --git a/libavcodec/h263dsp.c b/libavcodec/h263dsp.c
> index 6a13353499..755d7077bf 100644
> --- a/libavcodec/h263dsp.c
> +++ b/libavcodec/h263dsp.c
> @@ -23,6 +23,29 @@
>  #include "config.h"
>  #include "h263dsp.h"
>  
> +static void h263_dct_unquantize_inter_c(int16_t *block, size_t nCoeffs,
> +                                        int qmul, int qadd)
> +{
> +    for (size_t i = 0; i <= nCoeffs; i++) {
> +        int level = block[i];
> +
> +        if (level) {
> +            if (level < 0)
> +                level = level * qmul - qadd;
> +            else
> +                level = level * qmul + qadd;
> +            block[i] = level;
> +        }
> +    }
> +}
> +
> +static void h263_dct_unquantize_intra_c(int16_t *block, size_t nCoeffs,
> +                                        int qmul, int qadd)
> +{
> +    if (nCoeffs > 0)

Great, a branch. Why don't you just use a signed type?

> +        h263_dct_unquantize_inter_c(block + 1, nCoeffs - 1, qmul, qadd);
> +}
> +
>  const uint8_t ff_h263_loop_filter_strength[32] = {
>      0, 1, 1, 2, 2, 3, 3,  4,  4,  4,  5,  5,  6,  6,  7, 7,
>      7, 8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11, 12, 12, 12
> @@ -116,6 +139,8 @@ static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale)
>  
>  av_cold void ff_h263dsp_init(H263DSPContext *ctx)
>  {
> +    ctx->h263_dct_unquantize_intra = h263_dct_unquantize_intra_c;
> +    ctx->h263_dct_unquantize_inter = h263_dct_unquantize_inter_c;
>      ctx->h263_h_loop_filter = h263_h_loop_filter_c;
>      ctx->h263_v_loop_filter = h263_v_loop_filter_c;
>  
> diff --git a/libavcodec/h263dsp.h b/libavcodec/h263dsp.h
> index 2dccd23392..0ecbe83314 100644
> --- a/libavcodec/h263dsp.h
> +++ b/libavcodec/h263dsp.h
> @@ -24,6 +24,10 @@
>  extern const uint8_t ff_h263_loop_filter_strength[32];
>  
>  typedef struct H263DSPContext {
> +    void (*h263_dct_unquantize_intra)(int16_t *block /* align 16 */,
> +                                      size_t len, int mul, int add);
> +    void (*h263_dct_unquantize_inter)(int16_t *block /* align 16 */,
> +                                      size_t len, int mul, int add);
>      void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale);
>      void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale);
>  } H263DSPContext;
Rémi Denis-Courmont June 12, 2024, 3:41 a.m. UTC | #2
Le tiistaina 11. kesäkuuta 2024, 13.30.28 EEST Andreas Rheinhardt a écrit :
> > +static void h263_dct_unquantize_intra_c(int16_t *block, size_t nCoeffs,
> > +                                        int qmul, int qadd)
> > +{
> > +    if (nCoeffs > 0)
> 
> Great, a branch.

Okay so you want sarcasms, have at it.

If you had ever looked at optimised compilations, you would have noticed that 
optimising compiler emits that branch implicitly in scenarii like the current 
code to deal with that case, thus rewriting the code as

if (nCoeffs > 0) {
    do { ...; i++ } while (i < nCoeffs);
}

This patch is simply making the branch explicit so we can share the otherwise 
identical C code.

> Why don't you just use a signed type?

Just so you know, the correct type for positive sizes is size_t.
You know, a size, like *not* a stride.
Andreas Rheinhardt June 12, 2024, 3:58 a.m. UTC | #3
Rémi Denis-Courmont:
> Le tiistaina 11. kesäkuuta 2024, 13.30.28 EEST Andreas Rheinhardt a écrit :
>>> +static void h263_dct_unquantize_intra_c(int16_t *block, size_t nCoeffs,
>>> +                                        int qmul, int qadd)
>>> +{
>>> +    if (nCoeffs > 0)
>>
>> Great, a branch.
> 
> Okay so you want sarcasms, have at it.
> 
> If you had ever looked at optimised compilations, you would have noticed that 
> optimising compiler emits that branch implicitly in scenarii like the current 
> code to deal with that case, thus rewriting the code as
> 
> if (nCoeffs > 0) {
>     do { ...; i++ } while (i < nCoeffs);
> }
> 
> This patch is simply making the branch explicit so we can share the otherwise 
> identical C code.

With the current code the initial branch can be avoided in the ac_pred case.

> 
>> Why don't you just use a signed type?
> 
> Just so you know, the correct type for positive sizes is size_t.
> You know, a size, like *not* a stride.
>
James Almer June 12, 2024, 5:28 p.m. UTC | #4
On 6/10/2024 5:23 PM, Rémi Denis-Courmont wrote:
> Note that optimised implementations of these functions will be taken
> into actual use only if MpegEncContext.dct_unquantize_h263_{inter,intra}
> are *not* overloaded by existing optimisations.
> ---
>   libavcodec/h263dsp.c | 25 +++++++++++++++++++++++++
>   libavcodec/h263dsp.h |  4 ++++
>   2 files changed, 29 insertions(+)
> 
> diff --git a/libavcodec/h263dsp.c b/libavcodec/h263dsp.c
> index 6a13353499..755d7077bf 100644
> --- a/libavcodec/h263dsp.c
> +++ b/libavcodec/h263dsp.c
> @@ -23,6 +23,29 @@
>   #include "config.h"
>   #include "h263dsp.h"
>   
> +static void h263_dct_unquantize_inter_c(int16_t *block, size_t nCoeffs,
> +                                        int qmul, int qadd)
> +{
> +    for (size_t i = 0; i <= nCoeffs; i++) {
> +        int level = block[i];
> +
> +        if (level) {
> +            if (level < 0)
> +                level = level * qmul - qadd;
> +            else
> +                level = level * qmul + qadd;
> +            block[i] = level;
> +        }
> +    }
> +}
> +
> +static void h263_dct_unquantize_intra_c(int16_t *block, size_t nCoeffs,
> +                                        int qmul, int qadd)
> +{
> +    if (nCoeffs > 0)
> +        h263_dct_unquantize_inter_c(block + 1, nCoeffs - 1, qmul, qadd);

I think you can avoid this branch by doing:

     for (size_t i = 1; i <= nCoeffs; i++) {
         int level = block[i];

         if (level) {
             if (level < 0)
                 level = level * qmul - qadd;
             else
                 level = level * qmul + qadd;
             block[i] = level;
         }
     }

> +}
> +
>   const uint8_t ff_h263_loop_filter_strength[32] = {
>       0, 1, 1, 2, 2, 3, 3,  4,  4,  4,  5,  5,  6,  6,  7, 7,
>       7, 8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11, 12, 12, 12
> @@ -116,6 +139,8 @@ static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale)
>   
>   av_cold void ff_h263dsp_init(H263DSPContext *ctx)
>   {
> +    ctx->h263_dct_unquantize_intra = h263_dct_unquantize_intra_c;
> +    ctx->h263_dct_unquantize_inter = h263_dct_unquantize_inter_c;
>       ctx->h263_h_loop_filter = h263_h_loop_filter_c;
>       ctx->h263_v_loop_filter = h263_v_loop_filter_c;
>   
> diff --git a/libavcodec/h263dsp.h b/libavcodec/h263dsp.h
> index 2dccd23392..0ecbe83314 100644
> --- a/libavcodec/h263dsp.h
> +++ b/libavcodec/h263dsp.h
> @@ -24,6 +24,10 @@
>   extern const uint8_t ff_h263_loop_filter_strength[32];
>   
>   typedef struct H263DSPContext {
> +    void (*h263_dct_unquantize_intra)(int16_t *block /* align 16 */,
> +                                      size_t len, int mul, int add);
> +    void (*h263_dct_unquantize_inter)(int16_t *block /* align 16 */,
> +                                      size_t len, int mul, int add);
>       void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale);
>       void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale);
>   } H263DSPContext;
diff mbox series

Patch

diff --git a/libavcodec/h263dsp.c b/libavcodec/h263dsp.c
index 6a13353499..755d7077bf 100644
--- a/libavcodec/h263dsp.c
+++ b/libavcodec/h263dsp.c
@@ -23,6 +23,29 @@ 
 #include "config.h"
 #include "h263dsp.h"
 
+static void h263_dct_unquantize_inter_c(int16_t *block, size_t nCoeffs,
+                                        int qmul, int qadd)
+{
+    for (size_t i = 0; i <= nCoeffs; i++) {
+        int level = block[i];
+
+        if (level) {
+            if (level < 0)
+                level = level * qmul - qadd;
+            else
+                level = level * qmul + qadd;
+            block[i] = level;
+        }
+    }
+}
+
+static void h263_dct_unquantize_intra_c(int16_t *block, size_t nCoeffs,
+                                        int qmul, int qadd)
+{
+    if (nCoeffs > 0)
+        h263_dct_unquantize_inter_c(block + 1, nCoeffs - 1, qmul, qadd);
+}
+
 const uint8_t ff_h263_loop_filter_strength[32] = {
     0, 1, 1, 2, 2, 3, 3,  4,  4,  4,  5,  5,  6,  6,  7, 7,
     7, 8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11, 12, 12, 12
@@ -116,6 +139,8 @@  static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale)
 
 av_cold void ff_h263dsp_init(H263DSPContext *ctx)
 {
+    ctx->h263_dct_unquantize_intra = h263_dct_unquantize_intra_c;
+    ctx->h263_dct_unquantize_inter = h263_dct_unquantize_inter_c;
     ctx->h263_h_loop_filter = h263_h_loop_filter_c;
     ctx->h263_v_loop_filter = h263_v_loop_filter_c;
 
diff --git a/libavcodec/h263dsp.h b/libavcodec/h263dsp.h
index 2dccd23392..0ecbe83314 100644
--- a/libavcodec/h263dsp.h
+++ b/libavcodec/h263dsp.h
@@ -24,6 +24,10 @@ 
 extern const uint8_t ff_h263_loop_filter_strength[32];
 
 typedef struct H263DSPContext {
+    void (*h263_dct_unquantize_intra)(int16_t *block /* align 16 */,
+                                      size_t len, int mul, int add);
+    void (*h263_dct_unquantize_inter)(int16_t *block /* align 16 */,
+                                      size_t len, int mul, int add);
     void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale);
     void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale);
 } H263DSPContext;