Message ID | 20230315233445.5282-1-michael@niedermayer.cc |
---|---|
State | Accepted |
Commit | 771c27119d13c3c1d0ced53dbb72a65e8ad2d9c7 |
Headers | show |
Series | [FFmpeg-devel,1/3] avfilter/vf_uspp: update to new APIs | expand |
Context | Check | Description |
---|---|---|
yinshiyou/make_loongarch64 | success | Make finished |
yinshiyou/make_fate_loongarch64 | success | Make fate finished |
andriy/make_x86 | success | Make finished |
andriy/make_fate_x86 | success | Make fate finished |
On 3/15/2023 8:34 PM, Michael Niedermayer wrote: > Signed-off-by: Michael Niedermayer <michael@niedermayer.cc> > --- > configure | 1 - > libavfilter/vf_uspp.c | 49 ++++++++++++++++++++++++++++++++++++------- > 2 files changed, 41 insertions(+), 9 deletions(-) > > diff --git a/configure b/configure > index 03d3c429a5..0370e25577 100755 > --- a/configure > +++ b/configure > @@ -7359,7 +7359,6 @@ enable frame_thread_encoder > # they are kept disabled for now, but will be removed if > # nobody updates and re-enables them > disable mcdeint_filter > -disable uspp_filter > > enabled asm || { arch=c; disable $ARCH_LIST $ARCH_EXT_LIST; } > > diff --git a/libavfilter/vf_uspp.c b/libavfilter/vf_uspp.c > index 051de00771..43114e1b50 100644 > --- a/libavfilter/vf_uspp.c > +++ b/libavfilter/vf_uspp.c > @@ -53,6 +53,7 @@ typedef struct USPPContext { > int outbuf_size; > uint8_t *outbuf; > AVCodecContext *avctx_enc[BLOCK*BLOCK]; > + AVCodecContext *avctx_dec[BLOCK*BLOCK]; Wouldn't it be more efficient to try to implement recon_frame support on more encoders, including snow, and limit this filter to those in patch 2/2? I suggest this mainly seeing how you're creating 256 encoders and now also 256 decoders, all of which might be threaded since you're not forcing them to use a single thread, and in patch 3 you'll add slice threading on top of that. > AVPacket *pkt; > AVFrame *frame; > AVFrame *frame_dec; > @@ -244,7 +245,6 @@ static void filter(USPPContext *p, uint8_t *dst[3], uint8_t *src[3], > const int BLOCKc = BLOCK >> p->hsub; > int offset; > AVPacket *pkt = p->pkt; > - int got_pkt_ptr; > > av_packet_unref(pkt); > pkt->data = p->outbuf; > @@ -255,14 +255,28 @@ static void filter(USPPContext *p, uint8_t *dst[3], uint8_t *src[3], > p->frame->data[2] = p->src[2] + x1c + y1c * p->frame->linesize[2]; > p->frame->format = p->avctx_enc[i]->pix_fmt; > > - ret = avcodec_encode_video2(p->avctx_enc[i], pkt, p->frame, &got_pkt_ptr); > + ret = avcodec_send_frame(p->avctx_enc[i], p->frame); > if (ret < 0) { > - av_log(p->avctx_enc[i], AV_LOG_ERROR, "Encoding failed\n"); > + av_log(p->avctx_enc[i], AV_LOG_ERROR, "Error sending a frame for encoding\n"); > + continue; > + } > + ret = avcodec_receive_packet(p->avctx_enc[i], pkt); > + if (ret < 0) { > + av_log(p->avctx_enc[i], AV_LOG_ERROR, "Error receiving a packet from encoding\n"); > continue; > } > - av_packet_unref(pkt); > > - p->frame_dec = p->avctx_enc[i]->coded_frame; > + ret = avcodec_send_packet(p->avctx_dec[i], pkt); > + av_packet_unref(pkt); > + if (ret < 0) { > + av_log(p->avctx_dec[i], AV_LOG_ERROR, "Error sending a packet for decoding\n"); > + continue; > + } > + ret = avcodec_receive_frame(p->avctx_dec[i], p->frame_dec); > + if (ret < 0) { > + av_log(p->avctx_dec[i], AV_LOG_ERROR, "Error receiving a frame from decoding\n"); > + continue; > + } > > offset = (BLOCK-x1) + (BLOCK-y1) * p->frame_dec->linesize[0]; > > @@ -315,10 +329,15 @@ static int config_input(AVFilterLink *inlink) > int i; > > const AVCodec *enc = avcodec_find_encoder(AV_CODEC_ID_SNOW); > + const AVCodec *dec = avcodec_find_decoder(AV_CODEC_ID_SNOW); > if (!enc) { > av_log(ctx, AV_LOG_ERROR, "SNOW encoder not found.\n"); > return AVERROR(EINVAL); > } > + if (!dec) { > + av_log(ctx, AV_LOG_ERROR, "SNOW decoder not found.\n"); > + return AVERROR(EINVAL); > + } > > uspp->hsub = desc->log2_chroma_w; > uspp->vsub = desc->log2_chroma_h; > @@ -341,15 +360,20 @@ static int config_input(AVFilterLink *inlink) > } > > for (i = 0; i < (1<<uspp->log2_count); i++) { > - AVCodecContext *avctx_enc; > + AVCodecContext *avctx_enc, *avctx_dec; > AVDictionary *opts = NULL; > int ret; > > if (!(uspp->avctx_enc[i] = avcodec_alloc_context3(NULL))) > return AVERROR(ENOMEM); > + if (!(uspp->avctx_dec[i] = avcodec_alloc_context3(NULL))) > + return AVERROR(ENOMEM); > > avctx_enc = uspp->avctx_enc[i]; > + avctx_dec = uspp->avctx_dec[i]; > + avctx_dec->width = > avctx_enc->width = width + BLOCK; > + avctx_dec->height = > avctx_enc->height = height + BLOCK; > avctx_enc->time_base = (AVRational){1,25}; // meaningless > avctx_enc->gop_size = INT_MAX; > @@ -358,17 +382,24 @@ static int config_input(AVFilterLink *inlink) > avctx_enc->flags = AV_CODEC_FLAG_QSCALE | AV_CODEC_FLAG_LOW_DELAY; > avctx_enc->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL; > avctx_enc->global_quality = 123; > - av_dict_set(&opts, "no_bitstream", "1", 0); > ret = avcodec_open2(avctx_enc, enc, &opts); > av_dict_free(&opts); > if (ret < 0) > return ret; > av_assert0(avctx_enc->codec); > + > + > + ret = avcodec_open2(avctx_dec, dec, NULL); > + if (ret < 0) > + return ret; > + > } > > uspp->outbuf_size = (width + BLOCK) * (height + BLOCK) * 10; > if (!(uspp->frame = av_frame_alloc())) > return AVERROR(ENOMEM); > + if (!(uspp->frame_dec = av_frame_alloc())) > + return AVERROR(ENOMEM); > if (!(uspp->pkt = av_packet_alloc())) > return AVERROR(ENOMEM); > if (!(uspp->outbuf = av_malloc(uspp->outbuf_size))) > @@ -460,8 +491,10 @@ static av_cold void uninit(AVFilterContext *ctx) > av_freep(&uspp->src[i]); > } > > - for (i = 0; i < (1 << uspp->log2_count); i++) > + for (i = 0; i < (1 << uspp->log2_count); i++) { > avcodec_free_context(&uspp->avctx_enc[i]); > + avcodec_free_context(&uspp->avctx_dec[i]); > + } > > av_freep(&uspp->non_b_qp_table); > av_freep(&uspp->outbuf);
On Thu, Mar 16, 2023 at 01:13:54PM -0300, James Almer wrote: > On 3/15/2023 8:34 PM, Michael Niedermayer wrote: > > Signed-off-by: Michael Niedermayer <michael@niedermayer.cc> > > --- > > configure | 1 - > > libavfilter/vf_uspp.c | 49 ++++++++++++++++++++++++++++++++++++------- > > 2 files changed, 41 insertions(+), 9 deletions(-) > > > > diff --git a/configure b/configure > > index 03d3c429a5..0370e25577 100755 > > --- a/configure > > +++ b/configure > > @@ -7359,7 +7359,6 @@ enable frame_thread_encoder > > # they are kept disabled for now, but will be removed if > > # nobody updates and re-enables them > > disable mcdeint_filter > > -disable uspp_filter > > enabled asm || { arch=c; disable $ARCH_LIST $ARCH_EXT_LIST; } > > diff --git a/libavfilter/vf_uspp.c b/libavfilter/vf_uspp.c > > index 051de00771..43114e1b50 100644 > > --- a/libavfilter/vf_uspp.c > > +++ b/libavfilter/vf_uspp.c > > @@ -53,6 +53,7 @@ typedef struct USPPContext { > > int outbuf_size; > > uint8_t *outbuf; > > AVCodecContext *avctx_enc[BLOCK*BLOCK]; > > + AVCodecContext *avctx_dec[BLOCK*BLOCK]; > > Wouldn't it be more efficient to try to implement recon_frame support on > more encoders, including snow, and limit this filter to those in patch 2/2? recon_frame support makes sense, but i think we should have generic support first. Then when recon_frame is available use it. First make it work then make it work fast where an optimization is possible > I suggest this mainly seeing how you're creating 256 encoders and now also > 256 decoders, all of which might be threaded since you're not forcing them > to use a single thread, and in patch 3 you'll add slice threading on top of > that. ill add a thread_count = 1 so we dont multiply the threading. if another variant is faster, of course that should be preferred [...] thx
On Thu, Mar 16, 2023 at 10:15:44PM +0100, Michael Niedermayer wrote: > On Thu, Mar 16, 2023 at 01:13:54PM -0300, James Almer wrote: > > On 3/15/2023 8:34 PM, Michael Niedermayer wrote: > > > Signed-off-by: Michael Niedermayer <michael@niedermayer.cc> > > > --- > > > configure | 1 - > > > libavfilter/vf_uspp.c | 49 ++++++++++++++++++++++++++++++++++++------- > > > 2 files changed, 41 insertions(+), 9 deletions(-) > > > > > > diff --git a/configure b/configure > > > index 03d3c429a5..0370e25577 100755 > > > --- a/configure > > > +++ b/configure > > > @@ -7359,7 +7359,6 @@ enable frame_thread_encoder > > > # they are kept disabled for now, but will be removed if > > > # nobody updates and re-enables them > > > disable mcdeint_filter > > > -disable uspp_filter > > > enabled asm || { arch=c; disable $ARCH_LIST $ARCH_EXT_LIST; } > > > diff --git a/libavfilter/vf_uspp.c b/libavfilter/vf_uspp.c > > > index 051de00771..43114e1b50 100644 > > > --- a/libavfilter/vf_uspp.c > > > +++ b/libavfilter/vf_uspp.c > > > @@ -53,6 +53,7 @@ typedef struct USPPContext { > > > int outbuf_size; > > > uint8_t *outbuf; > > > AVCodecContext *avctx_enc[BLOCK*BLOCK]; > > > + AVCodecContext *avctx_dec[BLOCK*BLOCK]; > > > > Wouldn't it be more efficient to try to implement recon_frame support on > > more encoders, including snow, and limit this filter to those in patch 2/2? > > recon_frame support makes sense, but i think we should have generic support > first. Then when recon_frame is available use it. > First make it work then make it work fast where an optimization is possible will apply the patchset later today and then look into recon_frame in snow and uspp thx [...]
diff --git a/configure b/configure index 03d3c429a5..0370e25577 100755 --- a/configure +++ b/configure @@ -7359,7 +7359,6 @@ enable frame_thread_encoder # they are kept disabled for now, but will be removed if # nobody updates and re-enables them disable mcdeint_filter -disable uspp_filter enabled asm || { arch=c; disable $ARCH_LIST $ARCH_EXT_LIST; } diff --git a/libavfilter/vf_uspp.c b/libavfilter/vf_uspp.c index 051de00771..43114e1b50 100644 --- a/libavfilter/vf_uspp.c +++ b/libavfilter/vf_uspp.c @@ -53,6 +53,7 @@ typedef struct USPPContext { int outbuf_size; uint8_t *outbuf; AVCodecContext *avctx_enc[BLOCK*BLOCK]; + AVCodecContext *avctx_dec[BLOCK*BLOCK]; AVPacket *pkt; AVFrame *frame; AVFrame *frame_dec; @@ -244,7 +245,6 @@ static void filter(USPPContext *p, uint8_t *dst[3], uint8_t *src[3], const int BLOCKc = BLOCK >> p->hsub; int offset; AVPacket *pkt = p->pkt; - int got_pkt_ptr; av_packet_unref(pkt); pkt->data = p->outbuf; @@ -255,14 +255,28 @@ static void filter(USPPContext *p, uint8_t *dst[3], uint8_t *src[3], p->frame->data[2] = p->src[2] + x1c + y1c * p->frame->linesize[2]; p->frame->format = p->avctx_enc[i]->pix_fmt; - ret = avcodec_encode_video2(p->avctx_enc[i], pkt, p->frame, &got_pkt_ptr); + ret = avcodec_send_frame(p->avctx_enc[i], p->frame); if (ret < 0) { - av_log(p->avctx_enc[i], AV_LOG_ERROR, "Encoding failed\n"); + av_log(p->avctx_enc[i], AV_LOG_ERROR, "Error sending a frame for encoding\n"); + continue; + } + ret = avcodec_receive_packet(p->avctx_enc[i], pkt); + if (ret < 0) { + av_log(p->avctx_enc[i], AV_LOG_ERROR, "Error receiving a packet from encoding\n"); continue; } - av_packet_unref(pkt); - p->frame_dec = p->avctx_enc[i]->coded_frame; + ret = avcodec_send_packet(p->avctx_dec[i], pkt); + av_packet_unref(pkt); + if (ret < 0) { + av_log(p->avctx_dec[i], AV_LOG_ERROR, "Error sending a packet for decoding\n"); + continue; + } + ret = avcodec_receive_frame(p->avctx_dec[i], p->frame_dec); + if (ret < 0) { + av_log(p->avctx_dec[i], AV_LOG_ERROR, "Error receiving a frame from decoding\n"); + continue; + } offset = (BLOCK-x1) + (BLOCK-y1) * p->frame_dec->linesize[0]; @@ -315,10 +329,15 @@ static int config_input(AVFilterLink *inlink) int i; const AVCodec *enc = avcodec_find_encoder(AV_CODEC_ID_SNOW); + const AVCodec *dec = avcodec_find_decoder(AV_CODEC_ID_SNOW); if (!enc) { av_log(ctx, AV_LOG_ERROR, "SNOW encoder not found.\n"); return AVERROR(EINVAL); } + if (!dec) { + av_log(ctx, AV_LOG_ERROR, "SNOW decoder not found.\n"); + return AVERROR(EINVAL); + } uspp->hsub = desc->log2_chroma_w; uspp->vsub = desc->log2_chroma_h; @@ -341,15 +360,20 @@ static int config_input(AVFilterLink *inlink) } for (i = 0; i < (1<<uspp->log2_count); i++) { - AVCodecContext *avctx_enc; + AVCodecContext *avctx_enc, *avctx_dec; AVDictionary *opts = NULL; int ret; if (!(uspp->avctx_enc[i] = avcodec_alloc_context3(NULL))) return AVERROR(ENOMEM); + if (!(uspp->avctx_dec[i] = avcodec_alloc_context3(NULL))) + return AVERROR(ENOMEM); avctx_enc = uspp->avctx_enc[i]; + avctx_dec = uspp->avctx_dec[i]; + avctx_dec->width = avctx_enc->width = width + BLOCK; + avctx_dec->height = avctx_enc->height = height + BLOCK; avctx_enc->time_base = (AVRational){1,25}; // meaningless avctx_enc->gop_size = INT_MAX; @@ -358,17 +382,24 @@ static int config_input(AVFilterLink *inlink) avctx_enc->flags = AV_CODEC_FLAG_QSCALE | AV_CODEC_FLAG_LOW_DELAY; avctx_enc->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL; avctx_enc->global_quality = 123; - av_dict_set(&opts, "no_bitstream", "1", 0); ret = avcodec_open2(avctx_enc, enc, &opts); av_dict_free(&opts); if (ret < 0) return ret; av_assert0(avctx_enc->codec); + + + ret = avcodec_open2(avctx_dec, dec, NULL); + if (ret < 0) + return ret; + } uspp->outbuf_size = (width + BLOCK) * (height + BLOCK) * 10; if (!(uspp->frame = av_frame_alloc())) return AVERROR(ENOMEM); + if (!(uspp->frame_dec = av_frame_alloc())) + return AVERROR(ENOMEM); if (!(uspp->pkt = av_packet_alloc())) return AVERROR(ENOMEM); if (!(uspp->outbuf = av_malloc(uspp->outbuf_size))) @@ -460,8 +491,10 @@ static av_cold void uninit(AVFilterContext *ctx) av_freep(&uspp->src[i]); } - for (i = 0; i < (1 << uspp->log2_count); i++) + for (i = 0; i < (1 << uspp->log2_count); i++) { avcodec_free_context(&uspp->avctx_enc[i]); + avcodec_free_context(&uspp->avctx_dec[i]); + } av_freep(&uspp->non_b_qp_table); av_freep(&uspp->outbuf);
Signed-off-by: Michael Niedermayer <michael@niedermayer.cc> --- configure | 1 - libavfilter/vf_uspp.c | 49 ++++++++++++++++++++++++++++++++++++------- 2 files changed, 41 insertions(+), 9 deletions(-)