Message ID | 20240612044723.175502-2-remi@remlab.net |
---|---|
State | New |
Headers | show |
Series | [FFmpeg-devel,PATCHv5,1/4] lavc/h263dsp: add DCT dequantisation functions | expand |
Context | Check | Description |
---|---|---|
yinshiyou/make_loongarch64 | success | Make finished |
yinshiyou/make_fate_loongarch64 | success | Make fate finished |
andriy/make_x86 | success | Make finished |
andriy/make_fate_x86 | success | Make fate finished |
On 6/12/2024 1:47 AM, Rémi Denis-Courmont wrote: > --- > configure | 4 ++-- > libavcodec/mpegvideo.c | 46 +++++++++++------------------------------- > 2 files changed, 14 insertions(+), 36 deletions(-) > > diff --git a/configure b/configure > index 6baa9b0646..eb9d1b1f5d 100755 > --- a/configure > +++ b/configure > @@ -2957,8 +2957,8 @@ ftr_decoder_select="adts_header" > g2m_decoder_deps="zlib" > g2m_decoder_select="blockdsp idctdsp jpegtables" > g729_decoder_select="audiodsp" > -h261_decoder_select="mpegvideodec" > -h261_encoder_select="mpegvideoenc" > +h261_decoder_select="h263dsp mpegvideodec" > +h261_encoder_select="h263dsp mpegvideoenc" > h263_decoder_select="h263_parser h263dsp mpegvideodec qpeldsp" > h263_encoder_select="h263dsp mpegvideoenc" > h263i_decoder_select="h263_decoder" > diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c > index 7af823b8bd..b35fd37083 100644 > --- a/libavcodec/mpegvideo.c > +++ b/libavcodec/mpegvideo.c > @@ -201,13 +201,11 @@ static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s, > static void dct_unquantize_h263_intra_c(MpegEncContext *s, > int16_t *block, int n, int qscale) > { > - int i, level, qmul, qadd; > - int nCoeffs; > + int qmul = qscale << 1; > + int qadd, nCoeffs; > > av_assert2(s->block_last_index[n]>=0 || s->h263_aic); > > - qmul = qscale << 1; > - > if (!s->h263_aic) { > block[0] *= n < 4 ? s->y_dc_scale : s->c_dc_scale; > qadd = (qscale - 1) | 1; > @@ -215,47 +213,24 @@ static void dct_unquantize_h263_intra_c(MpegEncContext *s, > qadd = 0; > } > if(s->ac_pred) > - nCoeffs=63; > + nCoeffs = 64; > else > - nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; > + nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]] + 1; > > - for(i=1; i<=nCoeffs; i++) { > - level = block[i]; > - if (level) { > - if (level < 0) { > - level = level * qmul - qadd; > - } else { > - level = level * qmul + qadd; > - } > - block[i] = level; > - } > - } > + s->h263dsp.h263_dct_unquantize_intra(block, nCoeffs, qmul, qadd); Looking further into this, you're adding a function pointer call in a function that's already called from a function pointer. And both x86 and arm have asm optimized versions of this entire method, which includes all the setup before the loop. Can't you do the same for riscv? Is there anything preventing you from accessing fields at specific offsets within MpegEncContext? > } > > static void dct_unquantize_h263_inter_c(MpegEncContext *s, > int16_t *block, int n, int qscale) > { > - int i, level, qmul, qadd; > + int qmul = qscale << 1; > + int qadd = (qscale - 1) | 1; > int nCoeffs; > > av_assert2(s->block_last_index[n]>=0); > > - qadd = (qscale - 1) | 1; > - qmul = qscale << 1; > - > - nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; > - > - for(i=0; i<=nCoeffs; i++) { > - level = block[i]; > - if (level) { > - if (level < 0) { > - level = level * qmul - qadd; > - } else { > - level = level * qmul + qadd; > - } > - block[i] = level; > - } > - } > + nCoeffs = s->inter_scantable.raster_end[s->block_last_index[n]] + 1; > + s->h263dsp.h263_dct_unquantize_inter(block, nCoeffs, qmul, qadd); > } > > > @@ -275,6 +250,9 @@ static void gray8(uint8_t *dst, const uint8_t *src, ptrdiff_t linesize, int h) > static av_cold int dct_init(MpegEncContext *s) > { > ff_blockdsp_init(&s->bdsp); > +#if CONFIG_H263DSP > + ff_h263dsp_init(&s->h263dsp); > +#endif > ff_hpeldsp_init(&s->hdsp, s->avctx->flags); > ff_videodsp_init(&s->vdsp, s->avctx->bits_per_raw_sample); >
Le 14 juin 2024 17:33:16 GMT+03:00, James Almer <jamrial@gmail.com> a écrit : >On 6/12/2024 1:47 AM, Rémi Denis-Courmont wrote: >> --- >> configure | 4 ++-- >> libavcodec/mpegvideo.c | 46 +++++++++++------------------------------- >> 2 files changed, 14 insertions(+), 36 deletions(-) >> >> diff --git a/configure b/configure >> index 6baa9b0646..eb9d1b1f5d 100755 >> --- a/configure >> +++ b/configure >> @@ -2957,8 +2957,8 @@ ftr_decoder_select="adts_header" >> g2m_decoder_deps="zlib" >> g2m_decoder_select="blockdsp idctdsp jpegtables" >> g729_decoder_select="audiodsp" >> -h261_decoder_select="mpegvideodec" >> -h261_encoder_select="mpegvideoenc" >> +h261_decoder_select="h263dsp mpegvideodec" >> +h261_encoder_select="h263dsp mpegvideoenc" >> h263_decoder_select="h263_parser h263dsp mpegvideodec qpeldsp" >> h263_encoder_select="h263dsp mpegvideoenc" >> h263i_decoder_select="h263_decoder" >> diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c >> index 7af823b8bd..b35fd37083 100644 >> --- a/libavcodec/mpegvideo.c >> +++ b/libavcodec/mpegvideo.c >> @@ -201,13 +201,11 @@ static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s, >> static void dct_unquantize_h263_intra_c(MpegEncContext *s, >> int16_t *block, int n, int qscale) >> { >> - int i, level, qmul, qadd; >> - int nCoeffs; >> + int qmul = qscale << 1; >> + int qadd, nCoeffs; >> av_assert2(s->block_last_index[n]>=0 || s->h263_aic); >> - qmul = qscale << 1; >> - >> if (!s->h263_aic) { >> block[0] *= n < 4 ? s->y_dc_scale : s->c_dc_scale; >> qadd = (qscale - 1) | 1; >> @@ -215,47 +213,24 @@ static void dct_unquantize_h263_intra_c(MpegEncContext *s, >> qadd = 0; >> } >> if(s->ac_pred) >> - nCoeffs=63; >> + nCoeffs = 64; >> else >> - nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; >> + nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]] + 1; >> - for(i=1; i<=nCoeffs; i++) { >> - level = block[i]; >> - if (level) { >> - if (level < 0) { >> - level = level * qmul - qadd; >> - } else { >> - level = level * qmul + qadd; >> - } >> - block[i] = level; >> - } >> - } >> + s->h263dsp.h263_dct_unquantize_intra(block, nCoeffs, qmul, qadd); > >Looking further into this, you're adding a function pointer call in a function that's already called from a function pointer. And both x86 and arm have asm optimized versions of this entire method, which includes all the setup before the loop. I am not interested in copying existing bad design. Note that the Arm code uses intrinsics. I don't want to use RVV intrinsics especially just for this. And x86 only has MMX. >Can't you do the same for riscv? Sure, RV can address fixed offsets up to +/-2 KiB. If someone *else* adds a assembler-friendly header file that defines the offsets to the relevant context fields, and rewrites the checkasm code to match, that is. > Is there anything preventing you from accessing fields at specific offsets within MpegEncContext? My strong belief that that would be technically wrong, yes.
Le perjantaina 14. kesäkuuta 2024, 17.45.50 EEST Rémi Denis-Courmont a écrit :
> And x86 only has MMX.
And the x86 code uses inline assembler. That's not viable on any ISA with sane
conventions such as Arm or RISC-V. The compiler will reject our vector
clobbers, unless the Vector extension is included in the compilation target.
But then that breaks run-time detection, and will be rejected by all Linux
distros.
On 6/14/2024 12:11 PM, Rémi Denis-Courmont wrote: > Le perjantaina 14. kesäkuuta 2024, 17.45.50 EEST Rémi Denis-Courmont a écrit : >> And x86 only has MMX. > > And the x86 code uses inline assembler. That's not viable on any ISA with sane > conventions such as Arm or RISC-V. The compiler will reject our vector > clobbers, unless the Vector extension is included in the compilation target. > But then that breaks run-time detection, and will be rejected by all Linux > distros. I was thinking on removing that inline version and probably replacing it with sse2, using the dsp prototypes you introduce in this set.
Le perjantaina 14. kesäkuuta 2024, 18.41.48 EEST James Almer a écrit : > On 6/14/2024 12:11 PM, Rémi Denis-Courmont wrote: > > Le perjantaina 14. kesäkuuta 2024, 17.45.50 EEST Rémi Denis-Courmont a écrit : > >> And x86 only has MMX. > > > > And the x86 code uses inline assembler. That's not viable on any ISA with > > sane conventions such as Arm or RISC-V. The compiler will reject our > > vector clobbers, unless the Vector extension is included in the > > compilation target. But then that breaks run-time detection, and will be > > rejected by all Linux distros. > > I was thinking on removing that inline version and probably replacing it > with sse2, using the dsp prototypes you introduce in this set. I saw that, but we can't have the cake and eat it. Those DSP callbacks can't exist without adding a layer of indirection.
diff --git a/configure b/configure index 6baa9b0646..eb9d1b1f5d 100755 --- a/configure +++ b/configure @@ -2957,8 +2957,8 @@ ftr_decoder_select="adts_header" g2m_decoder_deps="zlib" g2m_decoder_select="blockdsp idctdsp jpegtables" g729_decoder_select="audiodsp" -h261_decoder_select="mpegvideodec" -h261_encoder_select="mpegvideoenc" +h261_decoder_select="h263dsp mpegvideodec" +h261_encoder_select="h263dsp mpegvideoenc" h263_decoder_select="h263_parser h263dsp mpegvideodec qpeldsp" h263_encoder_select="h263dsp mpegvideoenc" h263i_decoder_select="h263_decoder" diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c index 7af823b8bd..b35fd37083 100644 --- a/libavcodec/mpegvideo.c +++ b/libavcodec/mpegvideo.c @@ -201,13 +201,11 @@ static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s, static void dct_unquantize_h263_intra_c(MpegEncContext *s, int16_t *block, int n, int qscale) { - int i, level, qmul, qadd; - int nCoeffs; + int qmul = qscale << 1; + int qadd, nCoeffs; av_assert2(s->block_last_index[n]>=0 || s->h263_aic); - qmul = qscale << 1; - if (!s->h263_aic) { block[0] *= n < 4 ? s->y_dc_scale : s->c_dc_scale; qadd = (qscale - 1) | 1; @@ -215,47 +213,24 @@ static void dct_unquantize_h263_intra_c(MpegEncContext *s, qadd = 0; } if(s->ac_pred) - nCoeffs=63; + nCoeffs = 64; else - nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; + nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]] + 1; - for(i=1; i<=nCoeffs; i++) { - level = block[i]; - if (level) { - if (level < 0) { - level = level * qmul - qadd; - } else { - level = level * qmul + qadd; - } - block[i] = level; - } - } + s->h263dsp.h263_dct_unquantize_intra(block, nCoeffs, qmul, qadd); } static void dct_unquantize_h263_inter_c(MpegEncContext *s, int16_t *block, int n, int qscale) { - int i, level, qmul, qadd; + int qmul = qscale << 1; + int qadd = (qscale - 1) | 1; int nCoeffs; av_assert2(s->block_last_index[n]>=0); - qadd = (qscale - 1) | 1; - qmul = qscale << 1; - - nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; - - for(i=0; i<=nCoeffs; i++) { - level = block[i]; - if (level) { - if (level < 0) { - level = level * qmul - qadd; - } else { - level = level * qmul + qadd; - } - block[i] = level; - } - } + nCoeffs = s->inter_scantable.raster_end[s->block_last_index[n]] + 1; + s->h263dsp.h263_dct_unquantize_inter(block, nCoeffs, qmul, qadd); } @@ -275,6 +250,9 @@ static void gray8(uint8_t *dst, const uint8_t *src, ptrdiff_t linesize, int h) static av_cold int dct_init(MpegEncContext *s) { ff_blockdsp_init(&s->bdsp); +#if CONFIG_H263DSP + ff_h263dsp_init(&s->h263dsp); +#endif ff_hpeldsp_init(&s->hdsp, s->avctx->flags); ff_videodsp_init(&s->vdsp, s->avctx->bits_per_raw_sample);