[FFmpeg-devel] mdec: use correctly permutated quant matrix for dequantization.

Submitted by Ronald S. Bultje on June 20, 2017, 12:47 p.m.

Details

Message ID 1497962853-21428-1-git-send-email-rsbultje@gmail.com
State New
Headers show

Commit Message

Ronald S. Bultje June 20, 2017, 12:47 p.m.
This allows using non-simple (e.g. simplemmx) IDCT implementations.
The result is not bitexact (which is why the fate test continues to
use -idct simple), but the PSNR between C/MMX goes from ~35 to ~90.
---
 libavcodec/mdec.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

Comments

James Darnley June 20, 2017, 4:04 p.m.
On 2017-06-20 14:47, Ronald S. Bultje wrote:
> This allows using non-simple (e.g. simplemmx) IDCT implementations.
> The result is not bitexact (which is why the fate test continues to
> use -idct simple), but the PSNR between C/MMX goes from ~35 to ~90.
> ---
>  libavcodec/mdec.c | 14 ++++++++++----
>  1 file changed, 10 insertions(+), 4 deletions(-)
> 
> diff --git a/libavcodec/mdec.c b/libavcodec/mdec.c
> index 97bfebb..59658b3 100644
> --- a/libavcodec/mdec.c
> +++ b/libavcodec/mdec.c
> @@ -49,6 +49,7 @@ typedef struct MDECContext {
>      int mb_height;
>      int mb_x, mb_y;
>      DECLARE_ALIGNED(16, int16_t, block)[6][64];
> +    DECLARE_ALIGNED(16, uint16_t, quant_matrix)[64];
>      uint8_t *bitstream_buffer;
>      unsigned int bitstream_buffer_size;
>      int block_last_index[6];
> @@ -61,7 +62,7 @@ static inline int mdec_decode_block_intra(MDECContext *a, int16_t *block, int n)
>      int component;
>      RLTable *rl = &ff_rl_mpeg1;
>      uint8_t * const scantable = a->scantable.permutated;
> -    const uint16_t *quant_matrix = ff_mpeg1_default_intra_matrix;
> +    const uint16_t *quant_matrix = a->quant_matrix;
>      const int qscale = a->qscale;
>  
>      /* DC coefficient */
> @@ -212,9 +213,7 @@ static int decode_frame(AVCodecContext *avctx,
>  static av_cold int decode_init(AVCodecContext *avctx)
>  {
>      MDECContext * const a = avctx->priv_data;
> -
> -    if (avctx->idct_algo == FF_IDCT_AUTO)
> -        avctx->idct_algo = FF_IDCT_SIMPLE;
> +    int i;
>  
>      a->mb_width  = (avctx->coded_width  + 15) / 16;
>      a->mb_height = (avctx->coded_height + 15) / 16;
> @@ -231,6 +230,13 @@ static av_cold int decode_init(AVCodecContext *avctx)
>      avctx->pix_fmt  = AV_PIX_FMT_YUVJ420P;
>      avctx->color_range = AVCOL_RANGE_JPEG;
>  
> +    /* init q matrix */
> +    for (i = 0; i < 64; i++) {
> +        int j = a->idsp.idct_permutation[i];
> +
> +        a->quant_matrix[j] = ff_mpeg1_default_intra_matrix[i];
> +    }
> +
>      return 0;
>  }
>  
> 

That patch seems to work.  FATE doesn't complain when I apply on top of
my others after dropping the one Michael complained about.  That is what
it was supposed to do, right?
Ronald S. Bultje June 20, 2017, 4:16 p.m.
Hi,

On Tue, Jun 20, 2017 at 12:04 PM, James Darnley <jdarnley@obe.tv> wrote:

> On 2017-06-20 14:47, Ronald S. Bultje wrote:
> > This allows using non-simple (e.g. simplemmx) IDCT implementations.
> > The result is not bitexact (which is why the fate test continues to
> > use -idct simple), but the PSNR between C/MMX goes from ~35 to ~90.
> > ---
> >  libavcodec/mdec.c | 14 ++++++++++----
> >  1 file changed, 10 insertions(+), 4 deletions(-)
> >
> > diff --git a/libavcodec/mdec.c b/libavcodec/mdec.c
> > index 97bfebb..59658b3 100644
> > --- a/libavcodec/mdec.c
> > +++ b/libavcodec/mdec.c
> > @@ -49,6 +49,7 @@ typedef struct MDECContext {
> >      int mb_height;
> >      int mb_x, mb_y;
> >      DECLARE_ALIGNED(16, int16_t, block)[6][64];
> > +    DECLARE_ALIGNED(16, uint16_t, quant_matrix)[64];
> >      uint8_t *bitstream_buffer;
> >      unsigned int bitstream_buffer_size;
> >      int block_last_index[6];
> > @@ -61,7 +62,7 @@ static inline int mdec_decode_block_intra(MDECContext
> *a, int16_t *block, int n)
> >      int component;
> >      RLTable *rl = &ff_rl_mpeg1;
> >      uint8_t * const scantable = a->scantable.permutated;
> > -    const uint16_t *quant_matrix = ff_mpeg1_default_intra_matrix;
> > +    const uint16_t *quant_matrix = a->quant_matrix;
> >      const int qscale = a->qscale;
> >
> >      /* DC coefficient */
> > @@ -212,9 +213,7 @@ static int decode_frame(AVCodecContext *avctx,
> >  static av_cold int decode_init(AVCodecContext *avctx)
> >  {
> >      MDECContext * const a = avctx->priv_data;
> > -
> > -    if (avctx->idct_algo == FF_IDCT_AUTO)
> > -        avctx->idct_algo = FF_IDCT_SIMPLE;
> > +    int i;
> >
> >      a->mb_width  = (avctx->coded_width  + 15) / 16;
> >      a->mb_height = (avctx->coded_height + 15) / 16;
> > @@ -231,6 +230,13 @@ static av_cold int decode_init(AVCodecContext
> *avctx)
> >      avctx->pix_fmt  = AV_PIX_FMT_YUVJ420P;
> >      avctx->color_range = AVCOL_RANGE_JPEG;
> >
> > +    /* init q matrix */
> > +    for (i = 0; i < 64; i++) {
> > +        int j = a->idsp.idct_permutation[i];
> > +
> > +        a->quant_matrix[j] = ff_mpeg1_default_intra_matrix[i];
> > +    }
> > +
> >      return 0;
> >  }
> >
> >
>
> That patch seems to work.  FATE doesn't complain when I apply on top of
> my others after dropping the one Michael complained about.  That is what
> it was supposed to do, right?


Yes, it's intended to be an alternative to that patch, based on Michael's
review/comments.

Ronald
James Darnley June 20, 2017, 4:40 p.m.
On 2017-06-20 18:16, Ronald S. Bultje wrote:
> On Tue, Jun 20, 2017 at 12:04 PM, James Darnley <jdarnley@obe.tv> wrote:
>>> @@ -231,6 +230,13 @@ static av_cold int decode_init(AVCodecContext
>> *avctx)
>>>      avctx->pix_fmt  = AV_PIX_FMT_YUVJ420P;
>>>      avctx->color_range = AVCOL_RANGE_JPEG;
>>>
>>> +    /* init q matrix */
>>> +    for (i = 0; i < 64; i++) {
>>> +        int j = a->idsp.idct_permutation[i];
>>> +
>>> +        a->quant_matrix[j] = ff_mpeg1_default_intra_matrix[i];
>>> +    }
>>> +
>>>      return 0;
>>>  }
>>>
>>>
>>
>> That patch seems to work.  FATE doesn't complain when I apply on top of
>> my others after dropping the one Michael complained about.  That is what
>> it was supposed to do, right?
> 
> 
> Yes, it's intended to be an alternative to that patch, based on Michael's
> review/comments.

Good.  Are you going to push it?  Do you want me to?  Are you waiting
for something else?

>>> @@ -212,9 +213,7 @@ static int decode_frame(AVCodecContext *avctx,
>>>  static av_cold int decode_init(AVCodecContext *avctx)
>>>  {
>>>      MDECContext * const a = avctx->priv_data;
>>> -
>>> -    if (avctx->idct_algo == FF_IDCT_AUTO)
>>> -        avctx->idct_algo = FF_IDCT_SIMPLE;
>>> +    int i;
>>>
>>>      a->mb_width  = (avctx->coded_width  + 15) / 16;
>>>      a->mb_height = (avctx->coded_height + 15) / 16;

I have just one suggestion to make this time.  I think you should
mention that you remove the idct override in the commit message.
Michael Niedermayer June 20, 2017, 9:22 p.m.
On Tue, Jun 20, 2017 at 08:47:33AM -0400, Ronald S. Bultje wrote:
> This allows using non-simple (e.g. simplemmx) IDCT implementations.
> The result is not bitexact (which is why the fate test continues to
> use -idct simple), but the PSNR between C/MMX goes from ~35 to ~90.
> ---
>  libavcodec/mdec.c | 14 ++++++++++----
>  1 file changed, 10 insertions(+), 4 deletions(-)
> 
[...]

> @@ -212,9 +213,7 @@ static int decode_frame(AVCodecContext *avctx,
>  static av_cold int decode_init(AVCodecContext *avctx)
>  {
>      MDECContext * const a = avctx->priv_data;
> -
> -    if (avctx->idct_algo == FF_IDCT_AUTO)
> -        avctx->idct_algo = FF_IDCT_SIMPLE;

iam not sure this should be in this patch

rest LGTM

thx

[...]
Ronald S. Bultje June 21, 2017, 12:18 a.m.
Hi,

On Tue, Jun 20, 2017 at 5:22 PM, Michael Niedermayer <michael@niedermayer.cc
> wrote:

> On Tue, Jun 20, 2017 at 08:47:33AM -0400, Ronald S. Bultje wrote:
> > This allows using non-simple (e.g. simplemmx) IDCT implementations.
> > The result is not bitexact (which is why the fate test continues to
> > use -idct simple), but the PSNR between C/MMX goes from ~35 to ~90.
> > ---
> >  libavcodec/mdec.c | 14 ++++++++++----
> >  1 file changed, 10 insertions(+), 4 deletions(-)
> >
> [...]
>
> > @@ -212,9 +213,7 @@ static int decode_frame(AVCodecContext *avctx,
> >  static av_cold int decode_init(AVCodecContext *avctx)
> >  {
> >      MDECContext * const a = avctx->priv_data;
> > -
> > -    if (avctx->idct_algo == FF_IDCT_AUTO)
> > -        avctx->idct_algo = FF_IDCT_SIMPLE;
>
> iam not sure this should be in this patch


Can you be more precise? Would you like this in a separate patch? Or do you
think it's wrong?

I believe this code was there because the PSNR between FF_IDCT_SIMPLE and
SIMPLEMMX was 35, which is pretty terrible. This was probably because of
the bug in the dequant permutation that's fixed in this patch. After this
patch, the PSNR between the two is 90, which is pretty decent and good
enough for me to enable it by default (although not in fate).

Ronald
Michael Niedermayer June 21, 2017, 12:45 a.m.
On Tue, Jun 20, 2017 at 08:18:20PM -0400, Ronald S. Bultje wrote:
> Hi,
> 
> On Tue, Jun 20, 2017 at 5:22 PM, Michael Niedermayer <michael@niedermayer.cc
> > wrote:
> 
> > On Tue, Jun 20, 2017 at 08:47:33AM -0400, Ronald S. Bultje wrote:
> > > This allows using non-simple (e.g. simplemmx) IDCT implementations.
> > > The result is not bitexact (which is why the fate test continues to
> > > use -idct simple), but the PSNR between C/MMX goes from ~35 to ~90.
> > > ---
> > >  libavcodec/mdec.c | 14 ++++++++++----
> > >  1 file changed, 10 insertions(+), 4 deletions(-)
> > >
> > [...]
> >
> > > @@ -212,9 +213,7 @@ static int decode_frame(AVCodecContext *avctx,
> > >  static av_cold int decode_init(AVCodecContext *avctx)
> > >  {
> > >      MDECContext * const a = avctx->priv_data;
> > > -
> > > -    if (avctx->idct_algo == FF_IDCT_AUTO)
> > > -        avctx->idct_algo = FF_IDCT_SIMPLE;
> >
> > iam not sure this should be in this patch
> 
> 
> Can you be more precise?

> Would you like this in a separate patch?

yes


> Or do you
> think it's wrong?

no, i dont think the change is wrong, but thats just thinking

The code was added in:

commit e3e3c82555e2382125195c1ba9f34b5a43299abc
Author: Vitor Sessak <vitor1001@gmail.com>
Date:   Sun Jan 2 11:16:21 2011 +0000

    Make PSX MDEC decoder output YUVJ420 and always use IDCT_SIMPLE. This
    makes the output much closer to original Playstation hardware.



> 
> I believe this code was there because the PSNR between FF_IDCT_SIMPLE and
> SIMPLEMMX was 35, which is pretty terrible. This was probably because of
> the bug in the dequant permutation that's fixed in this patch. After this
> patch, the PSNR between the two is 90, which is pretty decent and good
> enough for me to enable it by default (although not in fate).
> 
> Ronald
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Ronald S. Bultje June 21, 2017, 1:02 p.m.
Hi,

On Tue, Jun 20, 2017 at 8:45 PM, Michael Niedermayer <michael@niedermayer.cc
> wrote:

> On Tue, Jun 20, 2017 at 08:18:20PM -0400, Ronald S. Bultje wrote:
> > Hi,
> >
> > On Tue, Jun 20, 2017 at 5:22 PM, Michael Niedermayer
> <michael@niedermayer.cc
> > > wrote:
> >
> > > On Tue, Jun 20, 2017 at 08:47:33AM -0400, Ronald S. Bultje wrote:
> > > > This allows using non-simple (e.g. simplemmx) IDCT implementations.
> > > > The result is not bitexact (which is why the fate test continues to
> > > > use -idct simple), but the PSNR between C/MMX goes from ~35 to ~90.
> > > > ---
> > > >  libavcodec/mdec.c | 14 ++++++++++----
> > > >  1 file changed, 10 insertions(+), 4 deletions(-)
> > > >
> > > [...]
> > >
> > > > @@ -212,9 +213,7 @@ static int decode_frame(AVCodecContext *avctx,
> > > >  static av_cold int decode_init(AVCodecContext *avctx)
> > > >  {
> > > >      MDECContext * const a = avctx->priv_data;
> > > > -
> > > > -    if (avctx->idct_algo == FF_IDCT_AUTO)
> > > > -        avctx->idct_algo = FF_IDCT_SIMPLE;
> > >
> > > iam not sure this should be in this patch
> >
> >
> > Can you be more precise?
>
> > Would you like this in a separate patch?
>
> yes


OK, pushed separately then.

Ronald

Patch hide | download patch | download mbox

diff --git a/libavcodec/mdec.c b/libavcodec/mdec.c
index 97bfebb..59658b3 100644
--- a/libavcodec/mdec.c
+++ b/libavcodec/mdec.c
@@ -49,6 +49,7 @@  typedef struct MDECContext {
     int mb_height;
     int mb_x, mb_y;
     DECLARE_ALIGNED(16, int16_t, block)[6][64];
+    DECLARE_ALIGNED(16, uint16_t, quant_matrix)[64];
     uint8_t *bitstream_buffer;
     unsigned int bitstream_buffer_size;
     int block_last_index[6];
@@ -61,7 +62,7 @@  static inline int mdec_decode_block_intra(MDECContext *a, int16_t *block, int n)
     int component;
     RLTable *rl = &ff_rl_mpeg1;
     uint8_t * const scantable = a->scantable.permutated;
-    const uint16_t *quant_matrix = ff_mpeg1_default_intra_matrix;
+    const uint16_t *quant_matrix = a->quant_matrix;
     const int qscale = a->qscale;
 
     /* DC coefficient */
@@ -212,9 +213,7 @@  static int decode_frame(AVCodecContext *avctx,
 static av_cold int decode_init(AVCodecContext *avctx)
 {
     MDECContext * const a = avctx->priv_data;
-
-    if (avctx->idct_algo == FF_IDCT_AUTO)
-        avctx->idct_algo = FF_IDCT_SIMPLE;
+    int i;
 
     a->mb_width  = (avctx->coded_width  + 15) / 16;
     a->mb_height = (avctx->coded_height + 15) / 16;
@@ -231,6 +230,13 @@  static av_cold int decode_init(AVCodecContext *avctx)
     avctx->pix_fmt  = AV_PIX_FMT_YUVJ420P;
     avctx->color_range = AVCOL_RANGE_JPEG;
 
+    /* init q matrix */
+    for (i = 0; i < 64; i++) {
+        int j = a->idsp.idct_permutation[i];
+
+        a->quant_matrix[j] = ff_mpeg1_default_intra_matrix[i];
+    }
+
     return 0;
 }