Message ID | 20240609162347.2541907-2-remi@remlab.net |
---|---|
State | New |
Headers | show |
Series | [FFmpeg-devel,PATCHv3,1/4] lavc/h263dsp: add DCT dequantisation functions | expand |
Context | Check | Description |
---|---|---|
andriy/make_x86 | success | Make finished |
andriy/make_fate_x86 | success | Make fate finished |
On Sun, Jun 09, 2024 at 07:23:45PM +0300, Rémi Denis-Courmont wrote: > --- > libavcodec/mpegvideo.c | 44 ++++++++++-------------------------------- > 1 file changed, 10 insertions(+), 34 deletions(-) > > diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c > index 7af823b8bd..9be0fecc8d 100644 > --- a/libavcodec/mpegvideo.c > +++ b/libavcodec/mpegvideo.c > @@ -201,13 +201,11 @@ static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s, > static void dct_unquantize_h263_intra_c(MpegEncContext *s, > int16_t *block, int n, int qscale) > { > - int i, level, qmul, qadd; > - int nCoeffs; > + int qmul = qscale << 1; > + int qadd, nCoeffs; > > av_assert2(s->block_last_index[n]>=0 || s->h263_aic); > > - qmul = qscale << 1; > - > if (!s->h263_aic) { > block[0] *= n < 4 ? s->y_dc_scale : s->c_dc_scale; > qadd = (qscale - 1) | 1; > @@ -215,47 +213,24 @@ static void dct_unquantize_h263_intra_c(MpegEncContext *s, > qadd = 0; > } > if(s->ac_pred) > - nCoeffs=63; > + nCoeffs = 64; > else > - nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; > + nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]] + 1; > > - for(i=1; i<=nCoeffs; i++) { > - level = block[i]; > - if (level) { > - if (level < 0) { > - level = level * qmul - qadd; > - } else { > - level = level * qmul + qadd; > - } > - block[i] = level; > - } > - } > + s->h263dsp.h263_dct_unquantize_intra(block, nCoeffs, qmul, qadd); > } > > static void dct_unquantize_h263_inter_c(MpegEncContext *s, > int16_t *block, int n, int qscale) > { > - int i, level, qmul, qadd; > + int qmul = qscale << 1; > + int qadd = (qscale - 1) | 1; > int nCoeffs; > > av_assert2(s->block_last_index[n]>=0); > > - qadd = (qscale - 1) | 1; > - qmul = qscale << 1; > - > - nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; > - > - for(i=0; i<=nCoeffs; i++) { > - level = block[i]; > - if (level) { > - if (level < 0) { > - level = level * qmul - qadd; > - } else { > - level = level * qmul + qadd; > - } > - block[i] = level; > - } > - } > + nCoeffs = s->inter_scantable.raster_end[s->block_last_index[n]] + 1; > + s->h263dsp.h263_dct_unquantize_inter(block, nCoeffs, qmul, qadd); It would be better if the "+ 1" would not be needed and teh functions would keep using "<=" thx [...]
Le 10 juin 2024 14:41:31 GMT+03:00, Michael Niedermayer <michael@niedermayer.cc> a écrit : >On Sun, Jun 09, 2024 at 07:23:45PM +0300, Rémi Denis-Courmont wrote: >> --- >> libavcodec/mpegvideo.c | 44 ++++++++++-------------------------------- >> 1 file changed, 10 insertions(+), 34 deletions(-) >> >> diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c >> index 7af823b8bd..9be0fecc8d 100644 >> --- a/libavcodec/mpegvideo.c >> +++ b/libavcodec/mpegvideo.c >> @@ -201,13 +201,11 @@ static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s, >> static void dct_unquantize_h263_intra_c(MpegEncContext *s, >> int16_t *block, int n, int qscale) >> { >> - int i, level, qmul, qadd; >> - int nCoeffs; >> + int qmul = qscale << 1; >> + int qadd, nCoeffs; >> >> av_assert2(s->block_last_index[n]>=0 || s->h263_aic); >> >> - qmul = qscale << 1; >> - >> if (!s->h263_aic) { >> block[0] *= n < 4 ? s->y_dc_scale : s->c_dc_scale; >> qadd = (qscale - 1) | 1; >> @@ -215,47 +213,24 @@ static void dct_unquantize_h263_intra_c(MpegEncContext *s, >> qadd = 0; >> } >> if(s->ac_pred) >> - nCoeffs=63; >> + nCoeffs = 64; >> else >> - nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; >> + nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]] + 1; >> >> - for(i=1; i<=nCoeffs; i++) { >> - level = block[i]; >> - if (level) { >> - if (level < 0) { >> - level = level * qmul - qadd; >> - } else { >> - level = level * qmul + qadd; >> - } >> - block[i] = level; >> - } >> - } >> + s->h263dsp.h263_dct_unquantize_intra(block, nCoeffs, qmul, qadd); >> } >> >> static void dct_unquantize_h263_inter_c(MpegEncContext *s, >> int16_t *block, int n, int qscale) >> { >> - int i, level, qmul, qadd; >> + int qmul = qscale << 1; >> + int qadd = (qscale - 1) | 1; >> int nCoeffs; >> >> av_assert2(s->block_last_index[n]>=0); >> >> - qadd = (qscale - 1) | 1; >> - qmul = qscale << 1; >> - >> - nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; >> - >> - for(i=0; i<=nCoeffs; i++) { >> - level = block[i]; >> - if (level) { >> - if (level < 0) { >> - level = level * qmul - qadd; >> - } else { >> - level = level * qmul + qadd; >> - } >> - block[i] = level; >> - } >> - } >> + nCoeffs = s->inter_scantable.raster_end[s->block_last_index[n]] + 1; >> + s->h263dsp.h263_dct_unquantize_inter(block, nCoeffs, qmul, qadd); > >It would be better if the "+ 1" would not be needed and teh functions >would keep using "<=" Why? AFAICT, that just makes the assembler more confusing by requiring an unusual boundary check. And then for SVE and RVV, adding one is unavoidable possible, as we need the element count to predicate the vector ops. So we'd just end up having to add one in the assembler instead of C.
On Mon, Jun 10, 2024 at 03:14:14PM +0300, Rémi Denis-Courmont wrote: > > > Le 10 juin 2024 14:41:31 GMT+03:00, Michael Niedermayer <michael@niedermayer.cc> a écrit : > >On Sun, Jun 09, 2024 at 07:23:45PM +0300, Rémi Denis-Courmont wrote: > >> --- > >> libavcodec/mpegvideo.c | 44 ++++++++++-------------------------------- > >> 1 file changed, 10 insertions(+), 34 deletions(-) > >> > >> diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c > >> index 7af823b8bd..9be0fecc8d 100644 > >> --- a/libavcodec/mpegvideo.c > >> +++ b/libavcodec/mpegvideo.c > >> @@ -201,13 +201,11 @@ static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s, > >> static void dct_unquantize_h263_intra_c(MpegEncContext *s, > >> int16_t *block, int n, int qscale) > >> { > >> - int i, level, qmul, qadd; > >> - int nCoeffs; > >> + int qmul = qscale << 1; > >> + int qadd, nCoeffs; > >> > >> av_assert2(s->block_last_index[n]>=0 || s->h263_aic); > >> > >> - qmul = qscale << 1; > >> - > >> if (!s->h263_aic) { > >> block[0] *= n < 4 ? s->y_dc_scale : s->c_dc_scale; > >> qadd = (qscale - 1) | 1; > >> @@ -215,47 +213,24 @@ static void dct_unquantize_h263_intra_c(MpegEncContext *s, > >> qadd = 0; > >> } > >> if(s->ac_pred) > >> - nCoeffs=63; > >> + nCoeffs = 64; > >> else > >> - nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; > >> + nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]] + 1; > >> > >> - for(i=1; i<=nCoeffs; i++) { > >> - level = block[i]; > >> - if (level) { > >> - if (level < 0) { > >> - level = level * qmul - qadd; > >> - } else { > >> - level = level * qmul + qadd; > >> - } > >> - block[i] = level; > >> - } > >> - } > >> + s->h263dsp.h263_dct_unquantize_intra(block, nCoeffs, qmul, qadd); > >> } > >> > >> static void dct_unquantize_h263_inter_c(MpegEncContext *s, > >> int16_t *block, int n, int qscale) > >> { > >> - int i, level, qmul, qadd; > >> + int qmul = qscale << 1; > >> + int qadd = (qscale - 1) | 1; > >> int nCoeffs; > >> > >> av_assert2(s->block_last_index[n]>=0); > >> > >> - qadd = (qscale - 1) | 1; > >> - qmul = qscale << 1; > >> - > >> - nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; > >> - > >> - for(i=0; i<=nCoeffs; i++) { > >> - level = block[i]; > >> - if (level) { > >> - if (level < 0) { > >> - level = level * qmul - qadd; > >> - } else { > >> - level = level * qmul + qadd; > >> - } > >> - block[i] = level; > >> - } > >> - } > >> + nCoeffs = s->inter_scantable.raster_end[s->block_last_index[n]] + 1; > >> + s->h263dsp.h263_dct_unquantize_inter(block, nCoeffs, qmul, qadd); > > > >It would be better if the "+ 1" would not be needed and teh functions > >would keep using "<=" > > Why? because its 1 instruction less and the compieler cannot optimize it out. > AFAICT, that just makes the assembler more confusing by requiring an unusual boundary check. > > And then for SVE and RVV, adding one is unavoidable possible, as we need the element count to predicate the vector ops. So we'd just end up having to add one in the assembler instead of C. [...]
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c index 7af823b8bd..9be0fecc8d 100644 --- a/libavcodec/mpegvideo.c +++ b/libavcodec/mpegvideo.c @@ -201,13 +201,11 @@ static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s, static void dct_unquantize_h263_intra_c(MpegEncContext *s, int16_t *block, int n, int qscale) { - int i, level, qmul, qadd; - int nCoeffs; + int qmul = qscale << 1; + int qadd, nCoeffs; av_assert2(s->block_last_index[n]>=0 || s->h263_aic); - qmul = qscale << 1; - if (!s->h263_aic) { block[0] *= n < 4 ? s->y_dc_scale : s->c_dc_scale; qadd = (qscale - 1) | 1; @@ -215,47 +213,24 @@ static void dct_unquantize_h263_intra_c(MpegEncContext *s, qadd = 0; } if(s->ac_pred) - nCoeffs=63; + nCoeffs = 64; else - nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; + nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]] + 1; - for(i=1; i<=nCoeffs; i++) { - level = block[i]; - if (level) { - if (level < 0) { - level = level * qmul - qadd; - } else { - level = level * qmul + qadd; - } - block[i] = level; - } - } + s->h263dsp.h263_dct_unquantize_intra(block, nCoeffs, qmul, qadd); } static void dct_unquantize_h263_inter_c(MpegEncContext *s, int16_t *block, int n, int qscale) { - int i, level, qmul, qadd; + int qmul = qscale << 1; + int qadd = (qscale - 1) | 1; int nCoeffs; av_assert2(s->block_last_index[n]>=0); - qadd = (qscale - 1) | 1; - qmul = qscale << 1; - - nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; - - for(i=0; i<=nCoeffs; i++) { - level = block[i]; - if (level) { - if (level < 0) { - level = level * qmul - qadd; - } else { - level = level * qmul + qadd; - } - block[i] = level; - } - } + nCoeffs = s->inter_scantable.raster_end[s->block_last_index[n]] + 1; + s->h263dsp.h263_dct_unquantize_inter(block, nCoeffs, qmul, qadd); } @@ -275,6 +250,7 @@ static void gray8(uint8_t *dst, const uint8_t *src, ptrdiff_t linesize, int h) static av_cold int dct_init(MpegEncContext *s) { ff_blockdsp_init(&s->bdsp); + ff_h263dsp_init(&s->h263dsp); ff_hpeldsp_init(&s->hdsp, s->avctx->flags); ff_videodsp_init(&s->vdsp, s->avctx->bits_per_raw_sample);