[FFmpeg-devel,v1,2/2] avcodec/v210dec: add the slice threading support

Submitted by lance.lmwang@gmail.com on Aug. 30, 2019, 3:37 a.m.

Details

Message ID 20190830033752.26454-2-lance.lmwang@gmail.com
State New
Headers show

Commit Message

lance.lmwang@gmail.com Aug. 30, 2019, 3:37 a.m.
From: Limin Wang <lance.lmwang@gmail.com>

Signed-off-by: Limin Wang <lance.lmwang@gmail.com>
---
 libavcodec/v210dec.c | 136 +++++++++++++++++++++++++++----------------
 libavcodec/v210dec.h |   1 +
 2 files changed, 87 insertions(+), 50 deletions(-)

Patch hide | download patch | download mbox

diff --git a/libavcodec/v210dec.c b/libavcodec/v210dec.c
index 6ce18aab5a..d3add8b45b 100644
--- a/libavcodec/v210dec.c
+++ b/libavcodec/v210dec.c
@@ -28,6 +28,7 @@ 
 #include "libavutil/internal.h"
 #include "libavutil/mem.h"
 #include "libavutil/intreadwrite.h"
+#include "thread.h"
 
 #define READ_PIXELS(a, b, c)         \
     do {                             \
@@ -37,6 +38,13 @@ 
         *c++ = (val >> 20) & 0x3FF;  \
     } while (0)
 
+#define MAX_SLICES 32
+typedef struct ThreadData {
+    AVFrame *frame;
+    uint8_t *buf;
+    int stride;
+} ThreadData;
+
 static void v210_planar_unpack_c(const uint32_t *src, uint16_t *y, uint16_t *u, uint16_t *v, int width)
 {
     uint32_t val;
@@ -67,58 +75,32 @@  static av_cold int decode_init(AVCodecContext *avctx)
     s->aligned_input = 0;
     ff_v210dec_init(s);
 
+    s->slice_count = av_clip(avctx->thread_count, 1, MAX_SLICES);
     return 0;
 }
 
-static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
-                        AVPacket *avpkt)
+static int v210_decode_slice(AVCodecContext *avctx, void *arg, int jobnr, int nb_jobs)
 {
     V210DecContext *s = avctx->priv_data;
-
-    int h, w, ret, stride, aligned_input;
-    AVFrame *pic = data;
-    const uint8_t *psrc = avpkt->data;
+    int h, w;
+    ThreadData *td = arg;
+    AVFrame *frame = td->frame;
+    int stride = td->stride;
+    int slice_h = avctx->height / s->slice_count;
+    int slice_m = avctx->height % s->slice_count;
+    int slice_start = jobnr * slice_h;
+    int slice_end = slice_start + slice_h;
+    const uint8_t *psrc = td->buf + stride * slice_start;
     uint16_t *y, *u, *v;
 
-    if (s->custom_stride )
-        stride = s->custom_stride;
-    else {
-        int aligned_width = ((avctx->width + 47) / 48) * 48;
-        stride = aligned_width * 8 / 3;
-    }
+    /* add the remaining slice for the last job */
+    if (jobnr == s->slice_count - 1)
+        slice_end += slice_m;
 
-    if (avpkt->size < stride * avctx->height) {
-        if ((((avctx->width + 23) / 24) * 24 * 8) / 3 * avctx->height == avpkt->size) {
-            stride = avpkt->size / avctx->height;
-            if (!s->stride_warning_shown)
-                av_log(avctx, AV_LOG_WARNING, "Broken v210 with too small padding (64 byte) detected\n");
-            s->stride_warning_shown = 1;
-        } else {
-            av_log(avctx, AV_LOG_ERROR, "packet too small\n");
-            return AVERROR_INVALIDDATA;
-        }
-    }
-    if (avctx->codec_tag == MKTAG('C', '2', '1', '0')
-        && AV_RN32(psrc) == AV_RN32("INFO")
-        && avpkt->size - 64 >= stride * avctx->height)
-        psrc += 64;
-
-    aligned_input = !((uintptr_t)psrc & 0x1f) && !(stride & 0x1f);
-    if (aligned_input != s->aligned_input) {
-        s->aligned_input = aligned_input;
-        ff_v210dec_init(s);
-    }
-
-    if ((ret = ff_get_buffer(avctx, pic, 0)) < 0)
-        return ret;
-
-    y = (uint16_t*)pic->data[0];
-    u = (uint16_t*)pic->data[1];
-    v = (uint16_t*)pic->data[2];
-    pic->pict_type = AV_PICTURE_TYPE_I;
-    pic->key_frame = 1;
-
-    for (h = 0; h < avctx->height; h++) {
+    y = (uint16_t*)frame->data[0] + slice_start * frame->linesize[0] / 2;
+    u = (uint16_t*)frame->data[1] + slice_start * frame->linesize[1] / 2;
+    v = (uint16_t*)frame->data[2] + slice_start * frame->linesize[2] / 2;
+    for (h = slice_start; h < slice_end; h++) {
         const uint32_t *src = (const uint32_t*)psrc;
         uint32_t val;
 
@@ -154,16 +136,68 @@  static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
         }
 
         psrc += stride;
-        y += pic->linesize[0] / 2 - avctx->width + (avctx->width & 1);
-        u += pic->linesize[1] / 2 - avctx->width / 2;
-        v += pic->linesize[2] / 2 - avctx->width / 2;
+        y += frame->linesize[0] / 2 - avctx->width + (avctx->width & 1);
+        u += frame->linesize[1] / 2 - avctx->width / 2;
+        v += frame->linesize[2] / 2 - avctx->width / 2;
+    }
+
+    return 0;
+}
+
+static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
+                        AVPacket *avpkt)
+{
+    V210DecContext *s = avctx->priv_data;
+    ThreadData td;
+    int ret, stride, aligned_input;
+    AVFrame *frame = data;
+    const uint8_t *psrc = avpkt->data;
+
+    if (s->custom_stride )
+        stride = s->custom_stride;
+    else {
+        int aligned_width = ((avctx->width + 47) / 48) * 48;
+        stride = aligned_width * 8 / 3;
     }
+    td.stride = stride;
+
+    if (avpkt->size < stride * avctx->height) {
+        if ((((avctx->width + 23) / 24) * 24 * 8) / 3 * avctx->height == avpkt->size) {
+            stride = avpkt->size / avctx->height;
+            if (!s->stride_warning_shown)
+                av_log(avctx, AV_LOG_WARNING, "Broken v210 with too small padding (64 byte) detected\n");
+            s->stride_warning_shown = 1;
+        } else {
+            av_log(avctx, AV_LOG_ERROR, "packet too small\n");
+            return AVERROR_INVALIDDATA;
+        }
+    }
+    if (avctx->codec_tag == MKTAG('C', '2', '1', '0')
+        && AV_RN32(psrc) == AV_RN32("INFO")
+        && avpkt->size - 64 >= stride * avctx->height)
+        psrc += 64;
+
+    aligned_input = !((uintptr_t)psrc & 0x1f) && !(stride & 0x1f);
+    if (aligned_input != s->aligned_input) {
+        s->aligned_input = aligned_input;
+        ff_v210dec_init(s);
+    }
+
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
+        return ret;
+
+    frame->pict_type = AV_PICTURE_TYPE_I;
+    frame->key_frame = 1;
+
+    td.buf = (uint8_t*)psrc;
+    td.frame = frame;
+    avctx->execute2(avctx, v210_decode_slice, &td, NULL, s->slice_count);
 
     if (avctx->field_order > AV_FIELD_PROGRESSIVE) {
         /* we have interlaced material flagged in container */
-        pic->interlaced_frame = 1;
+        frame->interlaced_frame = 1;
         if (avctx->field_order == AV_FIELD_TT || avctx->field_order == AV_FIELD_TB)
-            pic->top_field_first = 1;
+            frame->top_field_first = 1;
     }
 
     *got_frame      = 1;
@@ -193,6 +227,8 @@  AVCodec ff_v210_decoder = {
     .priv_data_size = sizeof(V210DecContext),
     .init           = decode_init,
     .decode         = decode_frame,
-    .capabilities   = AV_CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_SLICE_THREADS,
     .priv_class     = &v210dec_class,
+    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE |
+                      FF_CODEC_CAP_INIT_CLEANUP,
 };
diff --git a/libavcodec/v210dec.h b/libavcodec/v210dec.h
index cfdb29da09..35819437de 100644
--- a/libavcodec/v210dec.h
+++ b/libavcodec/v210dec.h
@@ -26,6 +26,7 @@ 
 typedef struct {
     AVClass *av_class;
     int custom_stride;
+    int slice_count;         // Number of slices for threaded operations
     int aligned_input;
     int stride_warning_shown;
     void (*unpack_frame)(const uint32_t *src, uint16_t *y, uint16_t *u, uint16_t *v, int width);