diff mbox series

[FFmpeg-devel,2/4] lavc/mpegvideo: use H263DSP dequant function

Message ID 20240612044723.175502-2-remi@remlab.net
State New
Headers show
Series [FFmpeg-devel,PATCHv5,1/4] lavc/h263dsp: add DCT dequantisation functions | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Rémi Denis-Courmont June 12, 2024, 4:47 a.m. UTC
---
 configure              |  4 ++--
 libavcodec/mpegvideo.c | 46 +++++++++++-------------------------------
 2 files changed, 14 insertions(+), 36 deletions(-)

Comments

James Almer June 14, 2024, 2:33 p.m. UTC | #1
On 6/12/2024 1:47 AM, Rémi Denis-Courmont wrote:
> ---
>   configure              |  4 ++--
>   libavcodec/mpegvideo.c | 46 +++++++++++-------------------------------
>   2 files changed, 14 insertions(+), 36 deletions(-)
> 
> diff --git a/configure b/configure
> index 6baa9b0646..eb9d1b1f5d 100755
> --- a/configure
> +++ b/configure
> @@ -2957,8 +2957,8 @@ ftr_decoder_select="adts_header"
>   g2m_decoder_deps="zlib"
>   g2m_decoder_select="blockdsp idctdsp jpegtables"
>   g729_decoder_select="audiodsp"
> -h261_decoder_select="mpegvideodec"
> -h261_encoder_select="mpegvideoenc"
> +h261_decoder_select="h263dsp mpegvideodec"
> +h261_encoder_select="h263dsp mpegvideoenc"
>   h263_decoder_select="h263_parser h263dsp mpegvideodec qpeldsp"
>   h263_encoder_select="h263dsp mpegvideoenc"
>   h263i_decoder_select="h263_decoder"
> diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
> index 7af823b8bd..b35fd37083 100644
> --- a/libavcodec/mpegvideo.c
> +++ b/libavcodec/mpegvideo.c
> @@ -201,13 +201,11 @@ static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
>   static void dct_unquantize_h263_intra_c(MpegEncContext *s,
>                                     int16_t *block, int n, int qscale)
>   {
> -    int i, level, qmul, qadd;
> -    int nCoeffs;
> +    int qmul = qscale << 1;
> +    int qadd, nCoeffs;
>   
>       av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
>   
> -    qmul = qscale << 1;
> -
>       if (!s->h263_aic) {
>           block[0] *= n < 4 ? s->y_dc_scale : s->c_dc_scale;
>           qadd = (qscale - 1) | 1;
> @@ -215,47 +213,24 @@ static void dct_unquantize_h263_intra_c(MpegEncContext *s,
>           qadd = 0;
>       }
>       if(s->ac_pred)
> -        nCoeffs=63;
> +        nCoeffs = 64;
>       else
> -        nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
> +        nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]] + 1;
>   
> -    for(i=1; i<=nCoeffs; i++) {
> -        level = block[i];
> -        if (level) {
> -            if (level < 0) {
> -                level = level * qmul - qadd;
> -            } else {
> -                level = level * qmul + qadd;
> -            }
> -            block[i] = level;
> -        }
> -    }
> +    s->h263dsp.h263_dct_unquantize_intra(block, nCoeffs, qmul, qadd);

Looking further into this, you're adding a function pointer call in a 
function that's already called from a function pointer. And both x86 and 
arm have asm optimized versions of this entire method, which includes 
all the setup before the loop.

Can't you do the same for riscv? Is there anything preventing you from 
accessing fields at specific offsets within MpegEncContext?

>   }
>   
>   static void dct_unquantize_h263_inter_c(MpegEncContext *s,
>                                     int16_t *block, int n, int qscale)
>   {
> -    int i, level, qmul, qadd;
> +    int qmul = qscale << 1;
> +    int qadd = (qscale - 1) | 1;
>       int nCoeffs;
>   
>       av_assert2(s->block_last_index[n]>=0);
>   
> -    qadd = (qscale - 1) | 1;
> -    qmul = qscale << 1;
> -
> -    nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
> -
> -    for(i=0; i<=nCoeffs; i++) {
> -        level = block[i];
> -        if (level) {
> -            if (level < 0) {
> -                level = level * qmul - qadd;
> -            } else {
> -                level = level * qmul + qadd;
> -            }
> -            block[i] = level;
> -        }
> -    }
> +    nCoeffs = s->inter_scantable.raster_end[s->block_last_index[n]] + 1;
> +    s->h263dsp.h263_dct_unquantize_inter(block, nCoeffs, qmul, qadd);
>   }
>   
>   
> @@ -275,6 +250,9 @@ static void gray8(uint8_t *dst, const uint8_t *src, ptrdiff_t linesize, int h)
>   static av_cold int dct_init(MpegEncContext *s)
>   {
>       ff_blockdsp_init(&s->bdsp);
> +#if CONFIG_H263DSP
> +    ff_h263dsp_init(&s->h263dsp);
> +#endif
>       ff_hpeldsp_init(&s->hdsp, s->avctx->flags);
>       ff_videodsp_init(&s->vdsp, s->avctx->bits_per_raw_sample);
>
Rémi Denis-Courmont June 14, 2024, 2:45 p.m. UTC | #2
Le 14 juin 2024 17:33:16 GMT+03:00, James Almer <jamrial@gmail.com> a écrit :
>On 6/12/2024 1:47 AM, Rémi Denis-Courmont wrote:
>> ---
>>   configure              |  4 ++--
>>   libavcodec/mpegvideo.c | 46 +++++++++++-------------------------------
>>   2 files changed, 14 insertions(+), 36 deletions(-)
>> 
>> diff --git a/configure b/configure
>> index 6baa9b0646..eb9d1b1f5d 100755
>> --- a/configure
>> +++ b/configure
>> @@ -2957,8 +2957,8 @@ ftr_decoder_select="adts_header"
>>   g2m_decoder_deps="zlib"
>>   g2m_decoder_select="blockdsp idctdsp jpegtables"
>>   g729_decoder_select="audiodsp"
>> -h261_decoder_select="mpegvideodec"
>> -h261_encoder_select="mpegvideoenc"
>> +h261_decoder_select="h263dsp mpegvideodec"
>> +h261_encoder_select="h263dsp mpegvideoenc"
>>   h263_decoder_select="h263_parser h263dsp mpegvideodec qpeldsp"
>>   h263_encoder_select="h263dsp mpegvideoenc"
>>   h263i_decoder_select="h263_decoder"
>> diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
>> index 7af823b8bd..b35fd37083 100644
>> --- a/libavcodec/mpegvideo.c
>> +++ b/libavcodec/mpegvideo.c
>> @@ -201,13 +201,11 @@ static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
>>   static void dct_unquantize_h263_intra_c(MpegEncContext *s,
>>                                     int16_t *block, int n, int qscale)
>>   {
>> -    int i, level, qmul, qadd;
>> -    int nCoeffs;
>> +    int qmul = qscale << 1;
>> +    int qadd, nCoeffs;
>>         av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
>>   -    qmul = qscale << 1;
>> -
>>       if (!s->h263_aic) {
>>           block[0] *= n < 4 ? s->y_dc_scale : s->c_dc_scale;
>>           qadd = (qscale - 1) | 1;
>> @@ -215,47 +213,24 @@ static void dct_unquantize_h263_intra_c(MpegEncContext *s,
>>           qadd = 0;
>>       }
>>       if(s->ac_pred)
>> -        nCoeffs=63;
>> +        nCoeffs = 64;
>>       else
>> -        nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
>> +        nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]] + 1;
>>   -    for(i=1; i<=nCoeffs; i++) {
>> -        level = block[i];
>> -        if (level) {
>> -            if (level < 0) {
>> -                level = level * qmul - qadd;
>> -            } else {
>> -                level = level * qmul + qadd;
>> -            }
>> -            block[i] = level;
>> -        }
>> -    }
>> +    s->h263dsp.h263_dct_unquantize_intra(block, nCoeffs, qmul, qadd);
>
>Looking further into this, you're adding a function pointer call in a function that's already called from a function pointer. And both x86 and arm have asm optimized versions of this entire method, which includes all the setup before the loop.

I am not interested in copying existing bad design. Note that the Arm code uses intrinsics. I don't want to use RVV intrinsics especially just for this. And x86 only has MMX.

>Can't you do the same for riscv?

Sure, RV can address fixed offsets up to +/-2 KiB. If someone *else* adds a assembler-friendly header file that defines the offsets to the relevant context fields, and rewrites the checkasm code to match, that is.

> Is there anything preventing you from accessing fields at specific offsets within MpegEncContext?

My strong belief that that would be technically wrong, yes.
Rémi Denis-Courmont June 14, 2024, 3:11 p.m. UTC | #3
Le perjantaina 14. kesäkuuta 2024, 17.45.50 EEST Rémi Denis-Courmont a écrit :
> And x86 only has MMX.

And the x86 code uses inline assembler. That's not viable on any ISA with sane 
conventions such as Arm or RISC-V. The compiler will reject our vector 
clobbers, unless the Vector extension is included in the compilation target. 
But then that breaks run-time detection, and will be rejected by all Linux 
distros.
James Almer June 14, 2024, 3:41 p.m. UTC | #4
On 6/14/2024 12:11 PM, Rémi Denis-Courmont wrote:
> Le perjantaina 14. kesäkuuta 2024, 17.45.50 EEST Rémi Denis-Courmont a écrit :
>> And x86 only has MMX.
> 
> And the x86 code uses inline assembler. That's not viable on any ISA with sane
> conventions such as Arm or RISC-V. The compiler will reject our vector
> clobbers, unless the Vector extension is included in the compilation target.
> But then that breaks run-time detection, and will be rejected by all Linux
> distros.

I was thinking on removing that inline version and probably replacing it 
with sse2, using the dsp prototypes you introduce in this set.
Rémi Denis-Courmont June 14, 2024, 7:03 p.m. UTC | #5
Le perjantaina 14. kesäkuuta 2024, 18.41.48 EEST James Almer a écrit :
> On 6/14/2024 12:11 PM, Rémi Denis-Courmont wrote:
> > Le perjantaina 14. kesäkuuta 2024, 17.45.50 EEST Rémi Denis-Courmont a 
écrit :
> >> And x86 only has MMX.
> > 
> > And the x86 code uses inline assembler. That's not viable on any ISA with
> > sane conventions such as Arm or RISC-V. The compiler will reject our
> > vector clobbers, unless the Vector extension is included in the
> > compilation target. But then that breaks run-time detection, and will be
> > rejected by all Linux distros.
> 
> I was thinking on removing that inline version and probably replacing it
> with sse2, using the dsp prototypes you introduce in this set.

I saw that, but we can't have the cake and eat it. Those DSP callbacks can't 
exist without adding a layer of indirection.
diff mbox series

Patch

diff --git a/configure b/configure
index 6baa9b0646..eb9d1b1f5d 100755
--- a/configure
+++ b/configure
@@ -2957,8 +2957,8 @@  ftr_decoder_select="adts_header"
 g2m_decoder_deps="zlib"
 g2m_decoder_select="blockdsp idctdsp jpegtables"
 g729_decoder_select="audiodsp"
-h261_decoder_select="mpegvideodec"
-h261_encoder_select="mpegvideoenc"
+h261_decoder_select="h263dsp mpegvideodec"
+h261_encoder_select="h263dsp mpegvideoenc"
 h263_decoder_select="h263_parser h263dsp mpegvideodec qpeldsp"
 h263_encoder_select="h263dsp mpegvideoenc"
 h263i_decoder_select="h263_decoder"
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index 7af823b8bd..b35fd37083 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -201,13 +201,11 @@  static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
                                   int16_t *block, int n, int qscale)
 {
-    int i, level, qmul, qadd;
-    int nCoeffs;
+    int qmul = qscale << 1;
+    int qadd, nCoeffs;
 
     av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
 
-    qmul = qscale << 1;
-
     if (!s->h263_aic) {
         block[0] *= n < 4 ? s->y_dc_scale : s->c_dc_scale;
         qadd = (qscale - 1) | 1;
@@ -215,47 +213,24 @@  static void dct_unquantize_h263_intra_c(MpegEncContext *s,
         qadd = 0;
     }
     if(s->ac_pred)
-        nCoeffs=63;
+        nCoeffs = 64;
     else
-        nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
+        nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]] + 1;
 
-    for(i=1; i<=nCoeffs; i++) {
-        level = block[i];
-        if (level) {
-            if (level < 0) {
-                level = level * qmul - qadd;
-            } else {
-                level = level * qmul + qadd;
-            }
-            block[i] = level;
-        }
-    }
+    s->h263dsp.h263_dct_unquantize_intra(block, nCoeffs, qmul, qadd);
 }
 
 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
                                   int16_t *block, int n, int qscale)
 {
-    int i, level, qmul, qadd;
+    int qmul = qscale << 1;
+    int qadd = (qscale - 1) | 1;
     int nCoeffs;
 
     av_assert2(s->block_last_index[n]>=0);
 
-    qadd = (qscale - 1) | 1;
-    qmul = qscale << 1;
-
-    nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
-
-    for(i=0; i<=nCoeffs; i++) {
-        level = block[i];
-        if (level) {
-            if (level < 0) {
-                level = level * qmul - qadd;
-            } else {
-                level = level * qmul + qadd;
-            }
-            block[i] = level;
-        }
-    }
+    nCoeffs = s->inter_scantable.raster_end[s->block_last_index[n]] + 1;
+    s->h263dsp.h263_dct_unquantize_inter(block, nCoeffs, qmul, qadd);
 }
 
 
@@ -275,6 +250,9 @@  static void gray8(uint8_t *dst, const uint8_t *src, ptrdiff_t linesize, int h)
 static av_cold int dct_init(MpegEncContext *s)
 {
     ff_blockdsp_init(&s->bdsp);
+#if CONFIG_H263DSP
+    ff_h263dsp_init(&s->h263dsp);
+#endif
     ff_hpeldsp_init(&s->hdsp, s->avctx->flags);
     ff_videodsp_init(&s->vdsp, s->avctx->bits_per_raw_sample);