[FFmpeg-devel,1/3] avfilter/vf_uspp: update to new APIs

Message ID	20230315233445.5282-1-michael@niedermayer.cc
State	Accepted
Commit	771c27119d13c3c1d0ced53dbb72a65e8ad2d9c7
Headers	show Delivered-To: ffmpegpatchwork2@gmail.com Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100; sender: michael@niedermayer.cc) by mail.gandi.net (Postfix) with ESMTPSA id 37BADE0008 for <ffmpeg-devel@ffmpeg.org>; Wed, 15 Mar 2023 23:34:45 +0000 (UTC) From: Michael Niedermayer <michael@niedermayer.cc> To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> Date: Thu, 16 Mar 2023 00:34:43 +0100 Message-Id: <20230315233445.5282-1-michael@niedermayer.cc> Subject: [FFmpeg-devel] [PATCH 1/3] avfilter/vf_uspp: update to new APIs Precedence: list Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>
Series	[FFmpeg-devel,1/3] avfilter/vf_uspp: update to new APIs \| expand [FFmpeg-devel,1/3] avfilter/vf_uspp: update to new APIs [FFmpeg-devel,2/3] avfilter/vf_uspp: Support any codec [FFmpeg-devel,3/3] avfilter/vf_uspp: 1000% faster with threads

Message ID

20230315233445.5282-1-michael@niedermayer.cc

State

Accepted

Commit

771c27119d13c3c1d0ced53dbb72a65e8ad2d9c7

Headers

Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org
 designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100;
From: Michael Niedermayer <michael@niedermayer.cc>
To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
Date: Thu, 16 Mar 2023 00:34:43 +0100
Message-Id: <20230315233445.5282-1-michael@niedermayer.cc>
Subject: [FFmpeg-devel] [PATCH 1/3] avfilter/vf_uspp: update to new APIs
Precedence: list
Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
MIME-Version: 1.0
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7bit
Errors-To: ffmpeg-devel-bounces@ffmpeg.org
Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>

Series

[FFmpeg-devel,1/3] avfilter/vf_uspp: update to new APIs | expand

Checks

Context	Check	Description
yinshiyou/make_loongarch64	success	Make finished
yinshiyou/make_fate_loongarch64	success	Make fate finished
andriy/make_x86	success	Make finished
andriy/make_fate_x86	success	Make fate finished

Context

Check

Description

yinshiyou/make_loongarch64

success

Make finished

yinshiyou/make_fate_loongarch64

success

Make fate finished

andriy/make_x86

success

Make finished

andriy/make_fate_x86

success

Make fate finished

Signed-off-by: Michael Niedermayer <michael@niedermayer.cc> --- configure | 1 - libavfilter/vf_uspp.c | 49 ++++++++++++++++++++++++++++++++++++------- 2 files changed, 41 insertions(+), 9 deletions(-)

Comments

James Almer March 16, 2023, 4:13 p.m. UTC | #1

On 3/15/2023 8:34 PM, Michael Niedermayer wrote:
> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
> ---
>   configure             |  1 -
>   libavfilter/vf_uspp.c | 49 ++++++++++++++++++++++++++++++++++++-------
>   2 files changed, 41 insertions(+), 9 deletions(-)
> 
> diff --git a/configure b/configure
> index 03d3c429a5..0370e25577 100755
> --- a/configure
> +++ b/configure
> @@ -7359,7 +7359,6 @@ enable frame_thread_encoder
>   # they are kept disabled for now, but will be removed if
>   # nobody updates and re-enables them
>   disable mcdeint_filter
> -disable uspp_filter
>   
>   enabled asm || { arch=c; disable $ARCH_LIST $ARCH_EXT_LIST; }
>   
> diff --git a/libavfilter/vf_uspp.c b/libavfilter/vf_uspp.c
> index 051de00771..43114e1b50 100644
> --- a/libavfilter/vf_uspp.c
> +++ b/libavfilter/vf_uspp.c
> @@ -53,6 +53,7 @@ typedef struct USPPContext {
>       int outbuf_size;
>       uint8_t *outbuf;
>       AVCodecContext *avctx_enc[BLOCK*BLOCK];
> +    AVCodecContext *avctx_dec[BLOCK*BLOCK];

Wouldn't it be more efficient to try to implement recon_frame support on 
more encoders, including snow, and limit this filter to those in patch 2/2?
I suggest this mainly seeing how you're creating 256 encoders and now 
also 256 decoders, all of which might be threaded since you're not 
forcing them to use a single thread, and in patch 3 you'll add slice 
threading on top of that.

>       AVPacket *pkt;
>       AVFrame *frame;
>       AVFrame *frame_dec;
> @@ -244,7 +245,6 @@ static void filter(USPPContext *p, uint8_t *dst[3], uint8_t *src[3],
>           const int BLOCKc = BLOCK >> p->hsub;
>           int offset;
>           AVPacket *pkt = p->pkt;
> -        int got_pkt_ptr;
>   
>           av_packet_unref(pkt);
>           pkt->data = p->outbuf;
> @@ -255,14 +255,28 @@ static void filter(USPPContext *p, uint8_t *dst[3], uint8_t *src[3],
>           p->frame->data[2] = p->src[2] + x1c  + y1c  * p->frame->linesize[2];
>           p->frame->format  = p->avctx_enc[i]->pix_fmt;
>   
> -        ret = avcodec_encode_video2(p->avctx_enc[i], pkt, p->frame, &got_pkt_ptr);
> +        ret = avcodec_send_frame(p->avctx_enc[i], p->frame);
>           if (ret < 0) {
> -            av_log(p->avctx_enc[i], AV_LOG_ERROR, "Encoding failed\n");
> +            av_log(p->avctx_enc[i], AV_LOG_ERROR, "Error sending a frame for encoding\n");
> +            continue;
> +        }
> +        ret = avcodec_receive_packet(p->avctx_enc[i], pkt);
> +        if (ret < 0) {
> +            av_log(p->avctx_enc[i], AV_LOG_ERROR, "Error receiving a packet from encoding\n");
>               continue;
>           }
> -        av_packet_unref(pkt);
>   
> -        p->frame_dec = p->avctx_enc[i]->coded_frame;
> +        ret = avcodec_send_packet(p->avctx_dec[i], pkt);
> +        av_packet_unref(pkt);
> +        if (ret < 0) {
> +            av_log(p->avctx_dec[i], AV_LOG_ERROR, "Error sending a packet for decoding\n");
> +            continue;
> +        }
> +        ret = avcodec_receive_frame(p->avctx_dec[i], p->frame_dec);
> +        if (ret < 0) {
> +            av_log(p->avctx_dec[i], AV_LOG_ERROR, "Error receiving a frame from decoding\n");
> +            continue;
> +        }
>   
>           offset = (BLOCK-x1) + (BLOCK-y1) * p->frame_dec->linesize[0];
>   
> @@ -315,10 +329,15 @@ static int config_input(AVFilterLink *inlink)
>       int i;
>   
>       const AVCodec *enc = avcodec_find_encoder(AV_CODEC_ID_SNOW);
> +    const AVCodec *dec = avcodec_find_decoder(AV_CODEC_ID_SNOW);
>       if (!enc) {
>           av_log(ctx, AV_LOG_ERROR, "SNOW encoder not found.\n");
>           return AVERROR(EINVAL);
>       }
> +    if (!dec) {
> +        av_log(ctx, AV_LOG_ERROR, "SNOW decoder not found.\n");
> +        return AVERROR(EINVAL);
> +    }
>   
>       uspp->hsub = desc->log2_chroma_w;
>       uspp->vsub = desc->log2_chroma_h;
> @@ -341,15 +360,20 @@ static int config_input(AVFilterLink *inlink)
>       }
>   
>       for (i = 0; i < (1<<uspp->log2_count); i++) {
> -        AVCodecContext *avctx_enc;
> +        AVCodecContext *avctx_enc, *avctx_dec;
>           AVDictionary *opts = NULL;
>           int ret;
>   
>           if (!(uspp->avctx_enc[i] = avcodec_alloc_context3(NULL)))
>               return AVERROR(ENOMEM);
> +        if (!(uspp->avctx_dec[i] = avcodec_alloc_context3(NULL)))
> +            return AVERROR(ENOMEM);
>   
>           avctx_enc = uspp->avctx_enc[i];
> +        avctx_dec = uspp->avctx_dec[i];
> +        avctx_dec->width =
>           avctx_enc->width = width + BLOCK;
> +        avctx_dec->height =
>           avctx_enc->height = height + BLOCK;
>           avctx_enc->time_base = (AVRational){1,25};  // meaningless
>           avctx_enc->gop_size = INT_MAX;
> @@ -358,17 +382,24 @@ static int config_input(AVFilterLink *inlink)
>           avctx_enc->flags = AV_CODEC_FLAG_QSCALE | AV_CODEC_FLAG_LOW_DELAY;
>           avctx_enc->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL;
>           avctx_enc->global_quality = 123;
> -        av_dict_set(&opts, "no_bitstream", "1", 0);
>           ret = avcodec_open2(avctx_enc, enc, &opts);
>           av_dict_free(&opts);
>           if (ret < 0)
>               return ret;
>           av_assert0(avctx_enc->codec);
> +
> +
> +        ret = avcodec_open2(avctx_dec, dec, NULL);
> +        if (ret < 0)
> +            return ret;
> +
>       }
>   
>       uspp->outbuf_size = (width + BLOCK) * (height + BLOCK) * 10;
>       if (!(uspp->frame = av_frame_alloc()))
>           return AVERROR(ENOMEM);
> +    if (!(uspp->frame_dec = av_frame_alloc()))
> +        return AVERROR(ENOMEM);
>       if (!(uspp->pkt = av_packet_alloc()))
>           return AVERROR(ENOMEM);
>       if (!(uspp->outbuf = av_malloc(uspp->outbuf_size)))
> @@ -460,8 +491,10 @@ static av_cold void uninit(AVFilterContext *ctx)
>           av_freep(&uspp->src[i]);
>       }
>   
> -    for (i = 0; i < (1 << uspp->log2_count); i++)
> +    for (i = 0; i < (1 << uspp->log2_count); i++) {
>           avcodec_free_context(&uspp->avctx_enc[i]);
> +        avcodec_free_context(&uspp->avctx_dec[i]);
> +    }
>   
>       av_freep(&uspp->non_b_qp_table);
>       av_freep(&uspp->outbuf);

Michael Niedermayer March 16, 2023, 9:15 p.m. UTC | #2

On Thu, Mar 16, 2023 at 01:13:54PM -0300, James Almer wrote:
> On 3/15/2023 8:34 PM, Michael Niedermayer wrote:
> > Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
> > ---
> >   configure             |  1 -
> >   libavfilter/vf_uspp.c | 49 ++++++++++++++++++++++++++++++++++++-------
> >   2 files changed, 41 insertions(+), 9 deletions(-)
> > 
> > diff --git a/configure b/configure
> > index 03d3c429a5..0370e25577 100755
> > --- a/configure
> > +++ b/configure
> > @@ -7359,7 +7359,6 @@ enable frame_thread_encoder
> >   # they are kept disabled for now, but will be removed if
> >   # nobody updates and re-enables them
> >   disable mcdeint_filter
> > -disable uspp_filter
> >   enabled asm || { arch=c; disable $ARCH_LIST $ARCH_EXT_LIST; }
> > diff --git a/libavfilter/vf_uspp.c b/libavfilter/vf_uspp.c
> > index 051de00771..43114e1b50 100644
> > --- a/libavfilter/vf_uspp.c
> > +++ b/libavfilter/vf_uspp.c
> > @@ -53,6 +53,7 @@ typedef struct USPPContext {
> >       int outbuf_size;
> >       uint8_t *outbuf;
> >       AVCodecContext *avctx_enc[BLOCK*BLOCK];
> > +    AVCodecContext *avctx_dec[BLOCK*BLOCK];
> 
> Wouldn't it be more efficient to try to implement recon_frame support on
> more encoders, including snow, and limit this filter to those in patch 2/2?

recon_frame support makes sense, but i think we should have generic support
first. Then when recon_frame is available use it.
First make it work then make it work fast where an optimization is possible


> I suggest this mainly seeing how you're creating 256 encoders and now also
> 256 decoders, all of which might be threaded since you're not forcing them
> to use a single thread, and in patch 3 you'll add slice threading on top of
> that.

ill add a thread_count = 1 so we dont multiply the threading.
if another variant is faster, of course that should be preferred

[...]

thx

Michael Niedermayer March 18, 2023, 12:31 p.m. UTC | #3

On Thu, Mar 16, 2023 at 10:15:44PM +0100, Michael Niedermayer wrote:
> On Thu, Mar 16, 2023 at 01:13:54PM -0300, James Almer wrote:
> > On 3/15/2023 8:34 PM, Michael Niedermayer wrote:
> > > Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
> > > ---
> > >   configure             |  1 -
> > >   libavfilter/vf_uspp.c | 49 ++++++++++++++++++++++++++++++++++++-------
> > >   2 files changed, 41 insertions(+), 9 deletions(-)
> > > 
> > > diff --git a/configure b/configure
> > > index 03d3c429a5..0370e25577 100755
> > > --- a/configure
> > > +++ b/configure
> > > @@ -7359,7 +7359,6 @@ enable frame_thread_encoder
> > >   # they are kept disabled for now, but will be removed if
> > >   # nobody updates and re-enables them
> > >   disable mcdeint_filter
> > > -disable uspp_filter
> > >   enabled asm || { arch=c; disable $ARCH_LIST $ARCH_EXT_LIST; }
> > > diff --git a/libavfilter/vf_uspp.c b/libavfilter/vf_uspp.c
> > > index 051de00771..43114e1b50 100644
> > > --- a/libavfilter/vf_uspp.c
> > > +++ b/libavfilter/vf_uspp.c
> > > @@ -53,6 +53,7 @@ typedef struct USPPContext {
> > >       int outbuf_size;
> > >       uint8_t *outbuf;
> > >       AVCodecContext *avctx_enc[BLOCK*BLOCK];
> > > +    AVCodecContext *avctx_dec[BLOCK*BLOCK];
> > 
> > Wouldn't it be more efficient to try to implement recon_frame support on
> > more encoders, including snow, and limit this filter to those in patch 2/2?
> 
> recon_frame support makes sense, but i think we should have generic support
> first. Then when recon_frame is available use it.
> First make it work then make it work fast where an optimization is possible

will apply the patchset later today and then look into recon_frame in snow and uspp

thx

[...]

diff --git a/configure b/configure
index 03d3c429a5..0370e25577 100755
--- a/configure
+++ b/configure
@@ -7359,7 +7359,6 @@  enable frame_thread_encoder
 # they are kept disabled for now, but will be removed if
 # nobody updates and re-enables them
 disable mcdeint_filter
-disable uspp_filter
 
 enabled asm || { arch=c; disable $ARCH_LIST $ARCH_EXT_LIST; }
 
diff --git a/libavfilter/vf_uspp.c b/libavfilter/vf_uspp.c
index 051de00771..43114e1b50 100644
--- a/libavfilter/vf_uspp.c
+++ b/libavfilter/vf_uspp.c
@@ -53,6 +53,7 @@  typedef struct USPPContext {
     int outbuf_size;
     uint8_t *outbuf;
     AVCodecContext *avctx_enc[BLOCK*BLOCK];
+    AVCodecContext *avctx_dec[BLOCK*BLOCK];
     AVPacket *pkt;
     AVFrame *frame;
     AVFrame *frame_dec;
@@ -244,7 +245,6 @@  static void filter(USPPContext *p, uint8_t *dst[3], uint8_t *src[3],
         const int BLOCKc = BLOCK >> p->hsub;
         int offset;
         AVPacket *pkt = p->pkt;
-        int got_pkt_ptr;
 
         av_packet_unref(pkt);
         pkt->data = p->outbuf;
@@ -255,14 +255,28 @@  static void filter(USPPContext *p, uint8_t *dst[3], uint8_t *src[3],
         p->frame->data[2] = p->src[2] + x1c  + y1c  * p->frame->linesize[2];
         p->frame->format  = p->avctx_enc[i]->pix_fmt;
 
-        ret = avcodec_encode_video2(p->avctx_enc[i], pkt, p->frame, &got_pkt_ptr);
+        ret = avcodec_send_frame(p->avctx_enc[i], p->frame);
         if (ret < 0) {
-            av_log(p->avctx_enc[i], AV_LOG_ERROR, "Encoding failed\n");
+            av_log(p->avctx_enc[i], AV_LOG_ERROR, "Error sending a frame for encoding\n");
+            continue;
+        }
+        ret = avcodec_receive_packet(p->avctx_enc[i], pkt);
+        if (ret < 0) {
+            av_log(p->avctx_enc[i], AV_LOG_ERROR, "Error receiving a packet from encoding\n");
             continue;
         }
-        av_packet_unref(pkt);
 
-        p->frame_dec = p->avctx_enc[i]->coded_frame;
+        ret = avcodec_send_packet(p->avctx_dec[i], pkt);
+        av_packet_unref(pkt);
+        if (ret < 0) {
+            av_log(p->avctx_dec[i], AV_LOG_ERROR, "Error sending a packet for decoding\n");
+            continue;
+        }
+        ret = avcodec_receive_frame(p->avctx_dec[i], p->frame_dec);
+        if (ret < 0) {
+            av_log(p->avctx_dec[i], AV_LOG_ERROR, "Error receiving a frame from decoding\n");
+            continue;
+        }
 
         offset = (BLOCK-x1) + (BLOCK-y1) * p->frame_dec->linesize[0];
 
@@ -315,10 +329,15 @@  static int config_input(AVFilterLink *inlink)
     int i;
 
     const AVCodec *enc = avcodec_find_encoder(AV_CODEC_ID_SNOW);
+    const AVCodec *dec = avcodec_find_decoder(AV_CODEC_ID_SNOW);
     if (!enc) {
         av_log(ctx, AV_LOG_ERROR, "SNOW encoder not found.\n");
         return AVERROR(EINVAL);
     }
+    if (!dec) {
+        av_log(ctx, AV_LOG_ERROR, "SNOW decoder not found.\n");
+        return AVERROR(EINVAL);
+    }
 
     uspp->hsub = desc->log2_chroma_w;
     uspp->vsub = desc->log2_chroma_h;
@@ -341,15 +360,20 @@  static int config_input(AVFilterLink *inlink)
     }
 
     for (i = 0; i < (1<<uspp->log2_count); i++) {
-        AVCodecContext *avctx_enc;
+        AVCodecContext *avctx_enc, *avctx_dec;
         AVDictionary *opts = NULL;
         int ret;
 
         if (!(uspp->avctx_enc[i] = avcodec_alloc_context3(NULL)))
             return AVERROR(ENOMEM);
+        if (!(uspp->avctx_dec[i] = avcodec_alloc_context3(NULL)))
+            return AVERROR(ENOMEM);
 
         avctx_enc = uspp->avctx_enc[i];
+        avctx_dec = uspp->avctx_dec[i];
+        avctx_dec->width =
         avctx_enc->width = width + BLOCK;
+        avctx_dec->height =
         avctx_enc->height = height + BLOCK;
         avctx_enc->time_base = (AVRational){1,25};  // meaningless
         avctx_enc->gop_size = INT_MAX;
@@ -358,17 +382,24 @@  static int config_input(AVFilterLink *inlink)
         avctx_enc->flags = AV_CODEC_FLAG_QSCALE | AV_CODEC_FLAG_LOW_DELAY;
         avctx_enc->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL;
         avctx_enc->global_quality = 123;
-        av_dict_set(&opts, "no_bitstream", "1", 0);
         ret = avcodec_open2(avctx_enc, enc, &opts);
         av_dict_free(&opts);
         if (ret < 0)
             return ret;
         av_assert0(avctx_enc->codec);
+
+
+        ret = avcodec_open2(avctx_dec, dec, NULL);
+        if (ret < 0)
+            return ret;
+
     }
 
     uspp->outbuf_size = (width + BLOCK) * (height + BLOCK) * 10;
     if (!(uspp->frame = av_frame_alloc()))
         return AVERROR(ENOMEM);
+    if (!(uspp->frame_dec = av_frame_alloc()))
+        return AVERROR(ENOMEM);
     if (!(uspp->pkt = av_packet_alloc()))
         return AVERROR(ENOMEM);
     if (!(uspp->outbuf = av_malloc(uspp->outbuf_size)))
@@ -460,8 +491,10 @@  static av_cold void uninit(AVFilterContext *ctx)
         av_freep(&uspp->src[i]);
     }
 
-    for (i = 0; i < (1 << uspp->log2_count); i++)
+    for (i = 0; i < (1 << uspp->log2_count); i++) {
         avcodec_free_context(&uspp->avctx_enc[i]);
+        avcodec_free_context(&uspp->avctx_dec[i]);
+    }
 
     av_freep(&uspp->non_b_qp_table);
     av_freep(&uspp->outbuf);

[FFmpeg-devel,1/3] avfilter/vf_uspp: update to new APIs

Checks

Commit Message

Comments

Patch