diff mbox

[FFmpeg-devel,v5,3/3] lavc/libdavs2.c: reduce memcpy

Message ID 1563779635-20749-4-git-send-email-hwrenx@126.com
State New
Headers show

Commit Message

hwren July 22, 2019, 7:13 a.m. UTC
Can effectivly improved decoding speed when memcpy becomes a limitation
for proccessing high resolution source.
Tested under i7-8700k with `ffmpeg -i 7680x4320.avs2 -vsync 0 -f null -`
got performance 23fps => 42fps

Signed-off-by: hwrenx <hwrenx@126.com>
---
 libavcodec/libdavs2.c | 52 ++++++++++++++++++++++++++++-----------------------
 1 file changed, 29 insertions(+), 23 deletions(-)
diff mbox

Patch

diff --git a/libavcodec/libdavs2.c b/libavcodec/libdavs2.c
index 6d66209..32c07fe 100644
--- a/libavcodec/libdavs2.c
+++ b/libavcodec/libdavs2.c
@@ -73,13 +73,22 @@  static av_cold int davs2_init(AVCodecContext *avctx)
     return 0;
 }
 
+static void davs2_frame_unref(void *opaque, uint8_t *data) {
+    DAVS2Context    *cad = (DAVS2Context *)opaque;
+    davs2_picture_t  pic;
+
+    pic.magic = (davs2_picture_t *)data;
+
+    if (cad->decoder) {
+        davs2_decoder_frame_unref(cad->decoder, &pic);
+    }
+}
+
 static int davs2_dump_frames(AVCodecContext *avctx, davs2_picture_t *pic, int *got_frame,
                              davs2_seq_info_t *headerset, int ret_type, AVFrame *frame)
 {
     DAVS2Context *cad    = avctx->priv_data;
-    int bytes_per_sample = pic->bytes_per_sample;
-    int plane = 0;
-    int line  = 0;
+    int plane;
 
     if (!headerset) {
         *got_frame = 0;
@@ -117,29 +126,28 @@  static int davs2_dump_frames(AVCodecContext *avctx, davs2_picture_t *pic, int *g
         return AVERROR_EXTERNAL;
     }
 
-    for (plane = 0; plane < 3; ++plane) {
-        int size_line = pic->widths[plane] * bytes_per_sample;
-        frame->buf[plane]  = av_buffer_alloc(size_line * pic->lines[plane]);
-
-        if (!frame->buf[plane]){
-            av_log(avctx, AV_LOG_ERROR, "Decoder error: allocation failure, can't dump frames.\n");
-            return AVERROR(ENOMEM);
-        }
-
-        frame->data[plane]     = frame->buf[plane]->data;
-        frame->linesize[plane] = size_line;
-
-        for (line = 0; line < pic->lines[plane]; ++line)
-            memcpy(frame->data[plane] + line * size_line,
-                   pic->planes[plane] + line * pic->strides[plane],
-                   pic->widths[plane] * bytes_per_sample);
-    }
-
     frame->width     = cad->headerset.width;
     frame->height    = cad->headerset.height;
     frame->pts       = cad->out_frame.pts;
     frame->format    = avctx->pix_fmt;
 
+    /* handle the actual picture in magic */
+    frame->buf[0]    = av_buffer_create((uint8_t *)pic->magic,
+                                        sizeof(davs2_picture_t *),
+                                        davs2_frame_unref,
+                                        (void *)cad,
+                                        AV_BUFFER_FLAG_READONLY);
+    if (!frame->buf[0]) {
+        av_log(avctx, AV_LOG_ERROR,
+            "Decoder error: allocation failure, can't dump frames.\n");
+        return AVERROR(ENOMEM);
+    }
+
+    for (plane = 0; plane < 3; ++plane) {
+        frame->linesize[plane] = pic->strides[plane];
+        frame->data[plane] = pic->planes[plane];
+    }
+
     *got_frame = 1;
     return 0;
 }
@@ -171,7 +179,6 @@  static int send_delayed_frame(AVCodecContext *avctx, AVFrame *frame, int *got_fr
     }
     if (ret == DAVS2_GOT_FRAME) {
         ret = davs2_dump_frames(avctx, &cad->out_frame, got_frame, &cad->headerset, ret, frame);
-        davs2_decoder_frame_unref(cad->decoder, &cad->out_frame);
     }
     return ret;
 }
@@ -220,7 +227,6 @@  static int davs2_decode_frame(AVCodecContext *avctx, void *data,
 
     if (ret != DAVS2_DEFAULT) {
         ret = davs2_dump_frames(avctx, &cad->out_frame, got_frame, &cad->headerset, ret, frame);
-        davs2_decoder_frame_unref(cad->decoder, &cad->out_frame);
     }
 
     return ret == 0 ? buf_size : ret;