diff mbox series

[FFmpeg-devel,2/7] lavc/pngdec: perform APNG blending in-place

Message ID 20210402144033.17410-2-anton@khirnov.net
State Accepted
Commit 5a50bd88db670f8c030a814e4cdb2a880dc1d4f4
Headers show
Series [FFmpeg-devel,1/7] tests/fate: add a test for APNG_DISPOSE_OP_BACKGROUND | expand

Checks

Context Check Description
andriy/x86_make success Make finished
andriy/x86_make_fate fail Make fate failed
andriy/PPC64_make success Make finished
andriy/PPC64_make_fate warning Make fate failed

Commit Message

Anton Khirnov April 2, 2021, 2:40 p.m. UTC
Saves an allocation+free and two frame copies per each frame.
---
 libavcodec/pngdec.c | 51 +++++++++++++++++++++++++--------------------
 1 file changed, 28 insertions(+), 23 deletions(-)

Comments

James Almer April 2, 2021, 2:54 p.m. UTC | #1
On 4/2/2021 11:40 AM, Anton Khirnov wrote:
> Saves an allocation+free and two frame copies per each frame.
> ---
>   libavcodec/pngdec.c | 51 +++++++++++++++++++++++++--------------------
>   1 file changed, 28 insertions(+), 23 deletions(-)
> 
> diff --git a/libavcodec/pngdec.c b/libavcodec/pngdec.c
> index 63c22063d9..095e4e86c2 100644
> --- a/libavcodec/pngdec.c
> +++ b/libavcodec/pngdec.c
> @@ -1068,8 +1068,12 @@ static void handle_p_frame_png(PNGDecContext *s, AVFrame *p)
>   static int handle_p_frame_apng(AVCodecContext *avctx, PNGDecContext *s,
>                                  AVFrame *p)
>   {
> +    uint8_t       *dst        = p->data[0];
> +    ptrdiff_t      dst_stride = p->linesize[0];
> +    const uint8_t *src        = s->last_picture.f->data[0];
> +    ptrdiff_t      src_stride = s->last_picture.f->linesize[0];
> +
>       size_t x, y;
> -    uint8_t *buffer;
>   
>       if (s->blend_op == APNG_BLEND_OP_OVER &&
>           avctx->pix_fmt != AV_PIX_FMT_RGBA &&
> @@ -1089,26 +1093,32 @@ static int handle_p_frame_apng(AVCodecContext *avctx, PNGDecContext *s,
>           if (ret < 0)
>               return ret;
>   
> +        src        = s->last_picture.f->data[0];
> +        src_stride = s->last_picture.f->linesize[0];

Is calling av_frame_make_writable(s->last_picture.f) valid at all? 
Shouldn't the frame's buffer be freed with ff_thread_release_buffer()? 
Especially if a non threading safe get_buffer2() callback was used.

Considering you can't call ff_thread_get_buffer() at this point to get a 
new writable buffer to av_frame_copy() to, since this is long after 
ff_thread_finish_setup() was called, not sure what else could be done.

> +
>           for (y = s->last_y_offset; y < s->last_y_offset + s->last_h; y++) {
> -            memset(s->last_picture.f->data[0] + s->image_linesize * y +
> +            memset(s->last_picture.f->data[0] + src_stride * y +
>                      s->bpp * s->last_x_offset, 0, s->bpp * s->last_w);
>           }
>       }
>   
> -    buffer = av_memdup(s->last_picture.f->data[0], s->image_linesize * s->height);
> -    if (!buffer)
> -        return AVERROR(ENOMEM);
> +    // copy unchanged rectangles from the last frame
> +    for (y = 0; y < s->y_offset; y++)
> +        memcpy(dst + y * dst_stride, src + y * src_stride, p->width * s->bpp);
> +    for (y = s->y_offset; y < s->y_offset + s->cur_h; y++) {
> +        memcpy(dst + y * dst_stride, src + y * src_stride, s->x_offset * s->bpp);
> +        memcpy(dst + y * dst_stride + (s->x_offset + s->cur_w) * s->bpp,
> +               src + y * src_stride + (s->x_offset + s->cur_w) * s->bpp,
> +               (p->width - s->cur_w - s->x_offset) * s->bpp);
> +    }
> +    for (y = s->y_offset + s->cur_h; y < p->height; y++)
> +        memcpy(dst + y * dst_stride, src + y * src_stride, p->width * s->bpp);
>   
> -    // Perform blending
> -    if (s->blend_op == APNG_BLEND_OP_SOURCE) {
> -        for (y = s->y_offset; y < s->y_offset + s->cur_h; ++y) {
> -            size_t row_start = s->image_linesize * y + s->bpp * s->x_offset;
> -            memcpy(buffer + row_start, p->data[0] + row_start, s->bpp * s->cur_w);
> -        }
> -    } else { // APNG_BLEND_OP_OVER
> +    if (s->blend_op == APNG_BLEND_OP_OVER) {
> +        // Perform blending
>           for (y = s->y_offset; y < s->y_offset + s->cur_h; ++y) {
> -            uint8_t *foreground = p->data[0] + s->image_linesize * y + s->bpp * s->x_offset;
> -            uint8_t *background = buffer + s->image_linesize * y + s->bpp * s->x_offset;
> +            uint8_t       *foreground = dst + dst_stride * y + s->bpp * s->x_offset;
> +            const uint8_t *background = src + src_stride * y + s->bpp * s->x_offset;
>               for (x = s->x_offset; x < s->x_offset + s->cur_w; ++x, foreground += s->bpp, background += s->bpp) {
>                   size_t b;
>                   uint8_t foreground_alpha, background_alpha, output_alpha;
> @@ -1135,18 +1145,17 @@ static int handle_p_frame_apng(AVCodecContext *avctx, PNGDecContext *s,
>                       break;
>                   }
>   
> -                if (foreground_alpha == 0)
> +                if (foreground_alpha == 255)
>                       continue;
>   
> -                if (foreground_alpha == 255) {
> -                    memcpy(background, foreground, s->bpp);
> +                if (foreground_alpha == 0) {
> +                    memcpy(foreground, background, s->bpp);
>                       continue;
>                   }
>   
>                   if (avctx->pix_fmt == AV_PIX_FMT_PAL8) {
>                       // TODO: Alpha blending with PAL8 will likely need the entire image converted over to RGBA first
>                       avpriv_request_sample(avctx, "Alpha blending palette samples");
> -                    background[0] = foreground[0];
>                       continue;
>                   }
>   
> @@ -1164,15 +1173,11 @@ static int handle_p_frame_apng(AVCodecContext *avctx, PNGDecContext *s,
>                       }
>                   }
>                   output[b] = output_alpha;
> -                memcpy(background, output, s->bpp);
> +                memcpy(foreground, output, s->bpp);
>               }
>           }
>       }
>   
> -    // Copy blended buffer into the frame and free
> -    memcpy(p->data[0], buffer, s->image_linesize * s->height);
> -    av_free(buffer);
> -
>       return 0;
>   }
>   
>
Anton Khirnov April 3, 2021, 7:48 a.m. UTC | #2
Quoting James Almer (2021-04-02 16:54:47)
> On 4/2/2021 11:40 AM, Anton Khirnov wrote:
> > Saves an allocation+free and two frame copies per each frame.
> > ---
> >   libavcodec/pngdec.c | 51 +++++++++++++++++++++++++--------------------
> >   1 file changed, 28 insertions(+), 23 deletions(-)
> > 
> > diff --git a/libavcodec/pngdec.c b/libavcodec/pngdec.c
> > index 63c22063d9..095e4e86c2 100644
> > --- a/libavcodec/pngdec.c
> > +++ b/libavcodec/pngdec.c
> > @@ -1068,8 +1068,12 @@ static void handle_p_frame_png(PNGDecContext *s, AVFrame *p)
> >   static int handle_p_frame_apng(AVCodecContext *avctx, PNGDecContext *s,
> >                                  AVFrame *p)
> >   {
> > +    uint8_t       *dst        = p->data[0];
> > +    ptrdiff_t      dst_stride = p->linesize[0];
> > +    const uint8_t *src        = s->last_picture.f->data[0];
> > +    ptrdiff_t      src_stride = s->last_picture.f->linesize[0];
> > +
> >       size_t x, y;
> > -    uint8_t *buffer;
> >   
> >       if (s->blend_op == APNG_BLEND_OP_OVER &&
> >           avctx->pix_fmt != AV_PIX_FMT_RGBA &&
> > @@ -1089,26 +1093,32 @@ static int handle_p_frame_apng(AVCodecContext *avctx, PNGDecContext *s,
> >           if (ret < 0)
> >               return ret;
> >   
> > +        src        = s->last_picture.f->data[0];
> > +        src_stride = s->last_picture.f->linesize[0];
> 
> Is calling av_frame_make_writable(s->last_picture.f) valid at all? 
> Shouldn't the frame's buffer be freed with ff_thread_release_buffer()? 
> Especially if a non threading safe get_buffer2() callback was used.
> 
> Considering you can't call ff_thread_get_buffer() at this point to get a 
> new writable buffer to av_frame_copy() to, since this is long after 
> ff_thread_finish_setup() was called, not sure what else could be done.

You're right, I guess making an unconditional copy local to this
function is the only way to make it compatible with thread safe
callbacks.

Will send a new patch.
Michael Niedermayer April 7, 2021, 7:21 p.m. UTC | #3
On Sat, Apr 03, 2021 at 09:48:18AM +0200, Anton Khirnov wrote:
> Quoting James Almer (2021-04-02 16:54:47)
> > On 4/2/2021 11:40 AM, Anton Khirnov wrote:
> > > Saves an allocation+free and two frame copies per each frame.
> > > ---
> > >   libavcodec/pngdec.c | 51 +++++++++++++++++++++++++--------------------
> > >   1 file changed, 28 insertions(+), 23 deletions(-)
> > > 
> > > diff --git a/libavcodec/pngdec.c b/libavcodec/pngdec.c
> > > index 63c22063d9..095e4e86c2 100644
> > > --- a/libavcodec/pngdec.c
> > > +++ b/libavcodec/pngdec.c
> > > @@ -1068,8 +1068,12 @@ static void handle_p_frame_png(PNGDecContext *s, AVFrame *p)
> > >   static int handle_p_frame_apng(AVCodecContext *avctx, PNGDecContext *s,
> > >                                  AVFrame *p)
> > >   {
> > > +    uint8_t       *dst        = p->data[0];
> > > +    ptrdiff_t      dst_stride = p->linesize[0];
> > > +    const uint8_t *src        = s->last_picture.f->data[0];
> > > +    ptrdiff_t      src_stride = s->last_picture.f->linesize[0];
> > > +
> > >       size_t x, y;
> > > -    uint8_t *buffer;
> > >   
> > >       if (s->blend_op == APNG_BLEND_OP_OVER &&
> > >           avctx->pix_fmt != AV_PIX_FMT_RGBA &&
> > > @@ -1089,26 +1093,32 @@ static int handle_p_frame_apng(AVCodecContext *avctx, PNGDecContext *s,
> > >           if (ret < 0)
> > >               return ret;
> > >   
> > > +        src        = s->last_picture.f->data[0];
> > > +        src_stride = s->last_picture.f->linesize[0];
> > 
> > Is calling av_frame_make_writable(s->last_picture.f) valid at all? 
> > Shouldn't the frame's buffer be freed with ff_thread_release_buffer()? 
> > Especially if a non threading safe get_buffer2() callback was used.
> > 
> > Considering you can't call ff_thread_get_buffer() at this point to get a 
> > new writable buffer to av_frame_copy() to, since this is long after 
> > ff_thread_finish_setup() was called, not sure what else could be done.
> 
> You're right, I guess making an unconditional copy local to this
> function is the only way to make it compatible with thread safe
> callbacks.
> 
> Will send a new patch.

Do you want this in the 4.4 release ?
iam asking as most other things are resolved ATM (might of course change
and more release blockers might appear)

thx

[...]
Anton Khirnov April 7, 2021, 7:37 p.m. UTC | #4
Quoting Michael Niedermayer (2021-04-07 21:21:39)
> On Sat, Apr 03, 2021 at 09:48:18AM +0200, Anton Khirnov wrote:
> > Quoting James Almer (2021-04-02 16:54:47)
> > > On 4/2/2021 11:40 AM, Anton Khirnov wrote:
> > > > Saves an allocation+free and two frame copies per each frame.
> > > > ---
> > > >   libavcodec/pngdec.c | 51 +++++++++++++++++++++++++--------------------
> > > >   1 file changed, 28 insertions(+), 23 deletions(-)
> > > > 
> > > > diff --git a/libavcodec/pngdec.c b/libavcodec/pngdec.c
> > > > index 63c22063d9..095e4e86c2 100644
> > > > --- a/libavcodec/pngdec.c
> > > > +++ b/libavcodec/pngdec.c
> > > > @@ -1068,8 +1068,12 @@ static void handle_p_frame_png(PNGDecContext *s, AVFrame *p)
> > > >   static int handle_p_frame_apng(AVCodecContext *avctx, PNGDecContext *s,
> > > >                                  AVFrame *p)
> > > >   {
> > > > +    uint8_t       *dst        = p->data[0];
> > > > +    ptrdiff_t      dst_stride = p->linesize[0];
> > > > +    const uint8_t *src        = s->last_picture.f->data[0];
> > > > +    ptrdiff_t      src_stride = s->last_picture.f->linesize[0];
> > > > +
> > > >       size_t x, y;
> > > > -    uint8_t *buffer;
> > > >   
> > > >       if (s->blend_op == APNG_BLEND_OP_OVER &&
> > > >           avctx->pix_fmt != AV_PIX_FMT_RGBA &&
> > > > @@ -1089,26 +1093,32 @@ static int handle_p_frame_apng(AVCodecContext *avctx, PNGDecContext *s,
> > > >           if (ret < 0)
> > > >               return ret;
> > > >   
> > > > +        src        = s->last_picture.f->data[0];
> > > > +        src_stride = s->last_picture.f->linesize[0];
> > > 
> > > Is calling av_frame_make_writable(s->last_picture.f) valid at all? 
> > > Shouldn't the frame's buffer be freed with ff_thread_release_buffer()? 
> > > Especially if a non threading safe get_buffer2() callback was used.
> > > 
> > > Considering you can't call ff_thread_get_buffer() at this point to get a 
> > > new writable buffer to av_frame_copy() to, since this is long after 
> > > ff_thread_finish_setup() was called, not sure what else could be done.
> > 
> > You're right, I guess making an unconditional copy local to this
> > function is the only way to make it compatible with thread safe
> > callbacks.
> > 
> > Will send a new patch.
> 
> Do you want this in the 4.4 release ?
> iam asking as most other things are resolved ATM (might of course change
> and more release blockers might appear)

This set fixes a number of major issues in pngdec, so all non-test
patches except for the last one definitely should be in the release.

I've been waiting for more comments/busy with other things, but will
probably push it tomorrow.
Or feel free to do it first.
diff mbox series

Patch

diff --git a/libavcodec/pngdec.c b/libavcodec/pngdec.c
index 63c22063d9..095e4e86c2 100644
--- a/libavcodec/pngdec.c
+++ b/libavcodec/pngdec.c
@@ -1068,8 +1068,12 @@  static void handle_p_frame_png(PNGDecContext *s, AVFrame *p)
 static int handle_p_frame_apng(AVCodecContext *avctx, PNGDecContext *s,
                                AVFrame *p)
 {
+    uint8_t       *dst        = p->data[0];
+    ptrdiff_t      dst_stride = p->linesize[0];
+    const uint8_t *src        = s->last_picture.f->data[0];
+    ptrdiff_t      src_stride = s->last_picture.f->linesize[0];
+
     size_t x, y;
-    uint8_t *buffer;
 
     if (s->blend_op == APNG_BLEND_OP_OVER &&
         avctx->pix_fmt != AV_PIX_FMT_RGBA &&
@@ -1089,26 +1093,32 @@  static int handle_p_frame_apng(AVCodecContext *avctx, PNGDecContext *s,
         if (ret < 0)
             return ret;
 
+        src        = s->last_picture.f->data[0];
+        src_stride = s->last_picture.f->linesize[0];
+
         for (y = s->last_y_offset; y < s->last_y_offset + s->last_h; y++) {
-            memset(s->last_picture.f->data[0] + s->image_linesize * y +
+            memset(s->last_picture.f->data[0] + src_stride * y +
                    s->bpp * s->last_x_offset, 0, s->bpp * s->last_w);
         }
     }
 
-    buffer = av_memdup(s->last_picture.f->data[0], s->image_linesize * s->height);
-    if (!buffer)
-        return AVERROR(ENOMEM);
+    // copy unchanged rectangles from the last frame
+    for (y = 0; y < s->y_offset; y++)
+        memcpy(dst + y * dst_stride, src + y * src_stride, p->width * s->bpp);
+    for (y = s->y_offset; y < s->y_offset + s->cur_h; y++) {
+        memcpy(dst + y * dst_stride, src + y * src_stride, s->x_offset * s->bpp);
+        memcpy(dst + y * dst_stride + (s->x_offset + s->cur_w) * s->bpp,
+               src + y * src_stride + (s->x_offset + s->cur_w) * s->bpp,
+               (p->width - s->cur_w - s->x_offset) * s->bpp);
+    }
+    for (y = s->y_offset + s->cur_h; y < p->height; y++)
+        memcpy(dst + y * dst_stride, src + y * src_stride, p->width * s->bpp);
 
-    // Perform blending
-    if (s->blend_op == APNG_BLEND_OP_SOURCE) {
-        for (y = s->y_offset; y < s->y_offset + s->cur_h; ++y) {
-            size_t row_start = s->image_linesize * y + s->bpp * s->x_offset;
-            memcpy(buffer + row_start, p->data[0] + row_start, s->bpp * s->cur_w);
-        }
-    } else { // APNG_BLEND_OP_OVER
+    if (s->blend_op == APNG_BLEND_OP_OVER) {
+        // Perform blending
         for (y = s->y_offset; y < s->y_offset + s->cur_h; ++y) {
-            uint8_t *foreground = p->data[0] + s->image_linesize * y + s->bpp * s->x_offset;
-            uint8_t *background = buffer + s->image_linesize * y + s->bpp * s->x_offset;
+            uint8_t       *foreground = dst + dst_stride * y + s->bpp * s->x_offset;
+            const uint8_t *background = src + src_stride * y + s->bpp * s->x_offset;
             for (x = s->x_offset; x < s->x_offset + s->cur_w; ++x, foreground += s->bpp, background += s->bpp) {
                 size_t b;
                 uint8_t foreground_alpha, background_alpha, output_alpha;
@@ -1135,18 +1145,17 @@  static int handle_p_frame_apng(AVCodecContext *avctx, PNGDecContext *s,
                     break;
                 }
 
-                if (foreground_alpha == 0)
+                if (foreground_alpha == 255)
                     continue;
 
-                if (foreground_alpha == 255) {
-                    memcpy(background, foreground, s->bpp);
+                if (foreground_alpha == 0) {
+                    memcpy(foreground, background, s->bpp);
                     continue;
                 }
 
                 if (avctx->pix_fmt == AV_PIX_FMT_PAL8) {
                     // TODO: Alpha blending with PAL8 will likely need the entire image converted over to RGBA first
                     avpriv_request_sample(avctx, "Alpha blending palette samples");
-                    background[0] = foreground[0];
                     continue;
                 }
 
@@ -1164,15 +1173,11 @@  static int handle_p_frame_apng(AVCodecContext *avctx, PNGDecContext *s,
                     }
                 }
                 output[b] = output_alpha;
-                memcpy(background, output, s->bpp);
+                memcpy(foreground, output, s->bpp);
             }
         }
     }
 
-    // Copy blended buffer into the frame and free
-    memcpy(p->data[0], buffer, s->image_linesize * s->height);
-    av_free(buffer);
-
     return 0;
 }