Message ID | 1497962853-21428-1-git-send-email-rsbultje@gmail.com |
---|---|
State | New |
Headers | show |
On 2017-06-20 14:47, Ronald S. Bultje wrote: > This allows using non-simple (e.g. simplemmx) IDCT implementations. > The result is not bitexact (which is why the fate test continues to > use -idct simple), but the PSNR between C/MMX goes from ~35 to ~90. > --- > libavcodec/mdec.c | 14 ++++++++++---- > 1 file changed, 10 insertions(+), 4 deletions(-) > > diff --git a/libavcodec/mdec.c b/libavcodec/mdec.c > index 97bfebb..59658b3 100644 > --- a/libavcodec/mdec.c > +++ b/libavcodec/mdec.c > @@ -49,6 +49,7 @@ typedef struct MDECContext { > int mb_height; > int mb_x, mb_y; > DECLARE_ALIGNED(16, int16_t, block)[6][64]; > + DECLARE_ALIGNED(16, uint16_t, quant_matrix)[64]; > uint8_t *bitstream_buffer; > unsigned int bitstream_buffer_size; > int block_last_index[6]; > @@ -61,7 +62,7 @@ static inline int mdec_decode_block_intra(MDECContext *a, int16_t *block, int n) > int component; > RLTable *rl = &ff_rl_mpeg1; > uint8_t * const scantable = a->scantable.permutated; > - const uint16_t *quant_matrix = ff_mpeg1_default_intra_matrix; > + const uint16_t *quant_matrix = a->quant_matrix; > const int qscale = a->qscale; > > /* DC coefficient */ > @@ -212,9 +213,7 @@ static int decode_frame(AVCodecContext *avctx, > static av_cold int decode_init(AVCodecContext *avctx) > { > MDECContext * const a = avctx->priv_data; > - > - if (avctx->idct_algo == FF_IDCT_AUTO) > - avctx->idct_algo = FF_IDCT_SIMPLE; > + int i; > > a->mb_width = (avctx->coded_width + 15) / 16; > a->mb_height = (avctx->coded_height + 15) / 16; > @@ -231,6 +230,13 @@ static av_cold int decode_init(AVCodecContext *avctx) > avctx->pix_fmt = AV_PIX_FMT_YUVJ420P; > avctx->color_range = AVCOL_RANGE_JPEG; > > + /* init q matrix */ > + for (i = 0; i < 64; i++) { > + int j = a->idsp.idct_permutation[i]; > + > + a->quant_matrix[j] = ff_mpeg1_default_intra_matrix[i]; > + } > + > return 0; > } > > That patch seems to work. FATE doesn't complain when I apply on top of my others after dropping the one Michael complained about. That is what it was supposed to do, right?
Hi, On Tue, Jun 20, 2017 at 12:04 PM, James Darnley <jdarnley@obe.tv> wrote: > On 2017-06-20 14:47, Ronald S. Bultje wrote: > > This allows using non-simple (e.g. simplemmx) IDCT implementations. > > The result is not bitexact (which is why the fate test continues to > > use -idct simple), but the PSNR between C/MMX goes from ~35 to ~90. > > --- > > libavcodec/mdec.c | 14 ++++++++++---- > > 1 file changed, 10 insertions(+), 4 deletions(-) > > > > diff --git a/libavcodec/mdec.c b/libavcodec/mdec.c > > index 97bfebb..59658b3 100644 > > --- a/libavcodec/mdec.c > > +++ b/libavcodec/mdec.c > > @@ -49,6 +49,7 @@ typedef struct MDECContext { > > int mb_height; > > int mb_x, mb_y; > > DECLARE_ALIGNED(16, int16_t, block)[6][64]; > > + DECLARE_ALIGNED(16, uint16_t, quant_matrix)[64]; > > uint8_t *bitstream_buffer; > > unsigned int bitstream_buffer_size; > > int block_last_index[6]; > > @@ -61,7 +62,7 @@ static inline int mdec_decode_block_intra(MDECContext > *a, int16_t *block, int n) > > int component; > > RLTable *rl = &ff_rl_mpeg1; > > uint8_t * const scantable = a->scantable.permutated; > > - const uint16_t *quant_matrix = ff_mpeg1_default_intra_matrix; > > + const uint16_t *quant_matrix = a->quant_matrix; > > const int qscale = a->qscale; > > > > /* DC coefficient */ > > @@ -212,9 +213,7 @@ static int decode_frame(AVCodecContext *avctx, > > static av_cold int decode_init(AVCodecContext *avctx) > > { > > MDECContext * const a = avctx->priv_data; > > - > > - if (avctx->idct_algo == FF_IDCT_AUTO) > > - avctx->idct_algo = FF_IDCT_SIMPLE; > > + int i; > > > > a->mb_width = (avctx->coded_width + 15) / 16; > > a->mb_height = (avctx->coded_height + 15) / 16; > > @@ -231,6 +230,13 @@ static av_cold int decode_init(AVCodecContext > *avctx) > > avctx->pix_fmt = AV_PIX_FMT_YUVJ420P; > > avctx->color_range = AVCOL_RANGE_JPEG; > > > > + /* init q matrix */ > > + for (i = 0; i < 64; i++) { > > + int j = a->idsp.idct_permutation[i]; > > + > > + a->quant_matrix[j] = ff_mpeg1_default_intra_matrix[i]; > > + } > > + > > return 0; > > } > > > > > > That patch seems to work. FATE doesn't complain when I apply on top of > my others after dropping the one Michael complained about. That is what > it was supposed to do, right? Yes, it's intended to be an alternative to that patch, based on Michael's review/comments. Ronald
On 2017-06-20 18:16, Ronald S. Bultje wrote: > On Tue, Jun 20, 2017 at 12:04 PM, James Darnley <jdarnley@obe.tv> wrote: >>> @@ -231,6 +230,13 @@ static av_cold int decode_init(AVCodecContext >> *avctx) >>> avctx->pix_fmt = AV_PIX_FMT_YUVJ420P; >>> avctx->color_range = AVCOL_RANGE_JPEG; >>> >>> + /* init q matrix */ >>> + for (i = 0; i < 64; i++) { >>> + int j = a->idsp.idct_permutation[i]; >>> + >>> + a->quant_matrix[j] = ff_mpeg1_default_intra_matrix[i]; >>> + } >>> + >>> return 0; >>> } >>> >>> >> >> That patch seems to work. FATE doesn't complain when I apply on top of >> my others after dropping the one Michael complained about. That is what >> it was supposed to do, right? > > > Yes, it's intended to be an alternative to that patch, based on Michael's > review/comments. Good. Are you going to push it? Do you want me to? Are you waiting for something else? >>> @@ -212,9 +213,7 @@ static int decode_frame(AVCodecContext *avctx, >>> static av_cold int decode_init(AVCodecContext *avctx) >>> { >>> MDECContext * const a = avctx->priv_data; >>> - >>> - if (avctx->idct_algo == FF_IDCT_AUTO) >>> - avctx->idct_algo = FF_IDCT_SIMPLE; >>> + int i; >>> >>> a->mb_width = (avctx->coded_width + 15) / 16; >>> a->mb_height = (avctx->coded_height + 15) / 16; I have just one suggestion to make this time. I think you should mention that you remove the idct override in the commit message.
On Tue, Jun 20, 2017 at 08:47:33AM -0400, Ronald S. Bultje wrote: > This allows using non-simple (e.g. simplemmx) IDCT implementations. > The result is not bitexact (which is why the fate test continues to > use -idct simple), but the PSNR between C/MMX goes from ~35 to ~90. > --- > libavcodec/mdec.c | 14 ++++++++++---- > 1 file changed, 10 insertions(+), 4 deletions(-) > [...] > @@ -212,9 +213,7 @@ static int decode_frame(AVCodecContext *avctx, > static av_cold int decode_init(AVCodecContext *avctx) > { > MDECContext * const a = avctx->priv_data; > - > - if (avctx->idct_algo == FF_IDCT_AUTO) > - avctx->idct_algo = FF_IDCT_SIMPLE; iam not sure this should be in this patch rest LGTM thx [...]
Hi, On Tue, Jun 20, 2017 at 5:22 PM, Michael Niedermayer <michael@niedermayer.cc > wrote: > On Tue, Jun 20, 2017 at 08:47:33AM -0400, Ronald S. Bultje wrote: > > This allows using non-simple (e.g. simplemmx) IDCT implementations. > > The result is not bitexact (which is why the fate test continues to > > use -idct simple), but the PSNR between C/MMX goes from ~35 to ~90. > > --- > > libavcodec/mdec.c | 14 ++++++++++---- > > 1 file changed, 10 insertions(+), 4 deletions(-) > > > [...] > > > @@ -212,9 +213,7 @@ static int decode_frame(AVCodecContext *avctx, > > static av_cold int decode_init(AVCodecContext *avctx) > > { > > MDECContext * const a = avctx->priv_data; > > - > > - if (avctx->idct_algo == FF_IDCT_AUTO) > > - avctx->idct_algo = FF_IDCT_SIMPLE; > > iam not sure this should be in this patch Can you be more precise? Would you like this in a separate patch? Or do you think it's wrong? I believe this code was there because the PSNR between FF_IDCT_SIMPLE and SIMPLEMMX was 35, which is pretty terrible. This was probably because of the bug in the dequant permutation that's fixed in this patch. After this patch, the PSNR between the two is 90, which is pretty decent and good enough for me to enable it by default (although not in fate). Ronald
On Tue, Jun 20, 2017 at 08:18:20PM -0400, Ronald S. Bultje wrote: > Hi, > > On Tue, Jun 20, 2017 at 5:22 PM, Michael Niedermayer <michael@niedermayer.cc > > wrote: > > > On Tue, Jun 20, 2017 at 08:47:33AM -0400, Ronald S. Bultje wrote: > > > This allows using non-simple (e.g. simplemmx) IDCT implementations. > > > The result is not bitexact (which is why the fate test continues to > > > use -idct simple), but the PSNR between C/MMX goes from ~35 to ~90. > > > --- > > > libavcodec/mdec.c | 14 ++++++++++---- > > > 1 file changed, 10 insertions(+), 4 deletions(-) > > > > > [...] > > > > > @@ -212,9 +213,7 @@ static int decode_frame(AVCodecContext *avctx, > > > static av_cold int decode_init(AVCodecContext *avctx) > > > { > > > MDECContext * const a = avctx->priv_data; > > > - > > > - if (avctx->idct_algo == FF_IDCT_AUTO) > > > - avctx->idct_algo = FF_IDCT_SIMPLE; > > > > iam not sure this should be in this patch > > > Can you be more precise? > Would you like this in a separate patch? yes > Or do you > think it's wrong? no, i dont think the change is wrong, but thats just thinking The code was added in: commit e3e3c82555e2382125195c1ba9f34b5a43299abc Author: Vitor Sessak <vitor1001@gmail.com> Date: Sun Jan 2 11:16:21 2011 +0000 Make PSX MDEC decoder output YUVJ420 and always use IDCT_SIMPLE. This makes the output much closer to original Playstation hardware. > > I believe this code was there because the PSNR between FF_IDCT_SIMPLE and > SIMPLEMMX was 35, which is pretty terrible. This was probably because of > the bug in the dequant permutation that's fixed in this patch. After this > patch, the PSNR between the two is 90, which is pretty decent and good > enough for me to enable it by default (although not in fate). > > Ronald > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Hi, On Tue, Jun 20, 2017 at 8:45 PM, Michael Niedermayer <michael@niedermayer.cc > wrote: > On Tue, Jun 20, 2017 at 08:18:20PM -0400, Ronald S. Bultje wrote: > > Hi, > > > > On Tue, Jun 20, 2017 at 5:22 PM, Michael Niedermayer > <michael@niedermayer.cc > > > wrote: > > > > > On Tue, Jun 20, 2017 at 08:47:33AM -0400, Ronald S. Bultje wrote: > > > > This allows using non-simple (e.g. simplemmx) IDCT implementations. > > > > The result is not bitexact (which is why the fate test continues to > > > > use -idct simple), but the PSNR between C/MMX goes from ~35 to ~90. > > > > --- > > > > libavcodec/mdec.c | 14 ++++++++++---- > > > > 1 file changed, 10 insertions(+), 4 deletions(-) > > > > > > > [...] > > > > > > > @@ -212,9 +213,7 @@ static int decode_frame(AVCodecContext *avctx, > > > > static av_cold int decode_init(AVCodecContext *avctx) > > > > { > > > > MDECContext * const a = avctx->priv_data; > > > > - > > > > - if (avctx->idct_algo == FF_IDCT_AUTO) > > > > - avctx->idct_algo = FF_IDCT_SIMPLE; > > > > > > iam not sure this should be in this patch > > > > > > Can you be more precise? > > > Would you like this in a separate patch? > > yes OK, pushed separately then. Ronald
diff --git a/libavcodec/mdec.c b/libavcodec/mdec.c index 97bfebb..59658b3 100644 --- a/libavcodec/mdec.c +++ b/libavcodec/mdec.c @@ -49,6 +49,7 @@ typedef struct MDECContext { int mb_height; int mb_x, mb_y; DECLARE_ALIGNED(16, int16_t, block)[6][64]; + DECLARE_ALIGNED(16, uint16_t, quant_matrix)[64]; uint8_t *bitstream_buffer; unsigned int bitstream_buffer_size; int block_last_index[6]; @@ -61,7 +62,7 @@ static inline int mdec_decode_block_intra(MDECContext *a, int16_t *block, int n) int component; RLTable *rl = &ff_rl_mpeg1; uint8_t * const scantable = a->scantable.permutated; - const uint16_t *quant_matrix = ff_mpeg1_default_intra_matrix; + const uint16_t *quant_matrix = a->quant_matrix; const int qscale = a->qscale; /* DC coefficient */ @@ -212,9 +213,7 @@ static int decode_frame(AVCodecContext *avctx, static av_cold int decode_init(AVCodecContext *avctx) { MDECContext * const a = avctx->priv_data; - - if (avctx->idct_algo == FF_IDCT_AUTO) - avctx->idct_algo = FF_IDCT_SIMPLE; + int i; a->mb_width = (avctx->coded_width + 15) / 16; a->mb_height = (avctx->coded_height + 15) / 16; @@ -231,6 +230,13 @@ static av_cold int decode_init(AVCodecContext *avctx) avctx->pix_fmt = AV_PIX_FMT_YUVJ420P; avctx->color_range = AVCOL_RANGE_JPEG; + /* init q matrix */ + for (i = 0; i < 64; i++) { + int j = a->idsp.idct_permutation[i]; + + a->quant_matrix[j] = ff_mpeg1_default_intra_matrix[i]; + } + return 0; }