From patchwork Fri Aug 30 15:53:56 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Lance Wang X-Patchwork-Id: 14811 Return-Path: X-Original-To: patchwork@ffaux-bg.ffmpeg.org Delivered-To: patchwork@ffaux-bg.ffmpeg.org Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by ffaux.localdomain (Postfix) with ESMTP id 1F75A44992E for ; Fri, 30 Aug 2019 19:01:33 +0300 (EEST) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id F0227687F34; Fri, 30 Aug 2019 19:01:32 +0300 (EEST) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from mail-pf1-f193.google.com (mail-pf1-f193.google.com [209.85.210.193]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id A04926808E9 for ; Fri, 30 Aug 2019 19:01:26 +0300 (EEST) Received: by mail-pf1-f193.google.com with SMTP id w26so4894409pfq.12 for ; Fri, 30 Aug 2019 09:01:26 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025; h=from:to:cc:subject:date:message-id; bh=IZd8wAuFt4AwSFyJueybKYozGPKougvo/gKkIQVsd/k=; b=rDK6mYiG7faC2xeyIslnouZXtJfZ446duQSW99ua/M5AUgzlrs50osoA3pw+g4EmDJ KHFU/hBqRHKHtfsscvswjHL4TyjhK1t0v8Pkd8CPL36XvjlJIGKMZFBs69cTsvTXrRvw JWQALgni626Wrgs880+Y0iJCpJfL6X09t487HbYb0l7IFS3+Mts04c/D4qUkEMncpSpk PtJiNfvEVCriWaZ0+kNFzHjpuTjdNN4DEOTe9hUb37n9r3MyCx+KublIhwMVBAQar4dK pb9DprRsSCAN/V6T1zmPVhbqBHX+IAZRcCb3aXVqXa86z7OyNy9uH9jwi5jT9eJJUDb1 aHYQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:cc:subject:date:message-id; bh=IZd8wAuFt4AwSFyJueybKYozGPKougvo/gKkIQVsd/k=; b=tyIpA6hVSJSohtQLVYEEvXWK9NM4KoiVr7xuJtor1ExdSURwBI4yjATccMCtQuE8rf IXFNtGJdZ2hpmVhZIrN7aF6pf4FLjmo6+2dm0MMykac0dp9sZlSr7OZq3HfyzDlBindC OnvU8NKbu32xBBd43X/M2cScItoj99uSm1F3na/pHB7C5Qowf2N7cuMYFPtUGoXcwBU/ xE7/2XNacix/1EcwHjwC5pC3zHu2v+X0M9ndk5PARz+ILuUkz09EgZMM8WgDYAT0Nlvw 0Pbzf+P7AqADrYUGnReu+BUBi8U49TMXlcA92GVZLzcdaru4UjVvOen49saae9MOwNTd 2wOg== X-Gm-Message-State: APjAAAWRF1DyoH3TVOGR/Fpwb1e71/MGcFHiRaLSYjLcnonYIrxmDs0Q N/Di9OVEixa2P7ohZLLjMn7mfoHc X-Google-Smtp-Source: APXvYqzlq8+g5UxZjgKrE+qwDjwLxayCZEiLfLBfCt2BSl9i40hf3eKqemT5iSwiPct296UP/MvI2w== X-Received: by 2002:a17:90a:d594:: with SMTP id v20mr3717952pju.2.1567180443866; Fri, 30 Aug 2019 08:54:03 -0700 (PDT) Received: from vpn.localdomain ([47.90.99.151]) by smtp.gmail.com with ESMTPSA id v20sm2899140pfm.63.2019.08.30.08.54.02 (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Fri, 30 Aug 2019 08:54:03 -0700 (PDT) From: lance.lmwang@gmail.com To: ffmpeg-devel@ffmpeg.org Date: Fri, 30 Aug 2019 23:53:56 +0800 Message-Id: <20190830155357.27747-1-lance.lmwang@gmail.com> X-Mailer: git-send-email 2.9.5 Subject: [FFmpeg-devel] [PATCH v1 1/2] avcodec/v410dec: add the slice threading support X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Cc: Limin Wang MIME-Version: 1.0 Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" From: Limin Wang Signed-off-by: Limin Wang --- libavcodec/v410dec.c | 77 ++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 57 insertions(+), 20 deletions(-) diff --git a/libavcodec/v410dec.c b/libavcodec/v410dec.c index 48fab68..a9b17a0 100644 --- a/libavcodec/v410dec.c +++ b/libavcodec/v410dec.c @@ -24,6 +24,14 @@ #include "libavutil/intreadwrite.h" #include "avcodec.h" #include "internal.h" +#include "thread.h" + +#define MAX_SLICES 32 +typedef struct ThreadData { + AVFrame *frame; + uint8_t *buf; + int stride; +} ThreadData; static av_cold int v410_decode_init(AVCodecContext *avctx) { @@ -42,31 +50,30 @@ static av_cold int v410_decode_init(AVCodecContext *avctx) return 0; } -static int v410_decode_frame(AVCodecContext *avctx, void *data, - int *got_frame, AVPacket *avpkt) +static int v410_decode_slice(AVCodecContext *avctx, void *arg, int jobnr, int nb_jobs) { - AVFrame *pic = data; - uint8_t *src = avpkt->data; + ThreadData *td = arg; + AVFrame *pic = td->frame; + int stride = td->stride; + int thread_count = av_clip(avctx->thread_count, 1, MAX_SLICES); + int slice_h = avctx->height / thread_count; + int slice_m = avctx->height % thread_count; + int slice_start = jobnr * slice_h; + int slice_end = slice_start + slice_h; + const uint8_t *src = td->buf + stride * slice_start; uint16_t *y, *u, *v; uint32_t val; - int i, j, ret; - - if (avpkt->size < 4 * avctx->height * avctx->width) { - av_log(avctx, AV_LOG_ERROR, "Insufficient input data.\n"); - return AVERROR(EINVAL); - } + int i, j; - if ((ret = ff_get_buffer(avctx, pic, 0)) < 0) - return ret; + /* add the remaining slice for the last job */ + if (jobnr == thread_count - 1) + slice_end += slice_m; - pic->key_frame = 1; - pic->pict_type = AV_PICTURE_TYPE_I; + y = (uint16_t*)pic->data[0] + slice_start * (pic->linesize[0] >> 1); + u = (uint16_t*)pic->data[1] + slice_start * (pic->linesize[1] >> 1); + v = (uint16_t*)pic->data[2] + slice_start * (pic->linesize[2] >> 1); - y = (uint16_t *)pic->data[0]; - u = (uint16_t *)pic->data[1]; - v = (uint16_t *)pic->data[2]; - - for (i = 0; i < avctx->height; i++) { + for (i = slice_start; i < avctx->height; i++) { for (j = 0; j < avctx->width; j++) { val = AV_RL32(src); @@ -82,6 +89,34 @@ static int v410_decode_frame(AVCodecContext *avctx, void *data, v += pic->linesize[2] >> 1; } + return 0; +} + +static int v410_decode_frame(AVCodecContext *avctx, void *data, + int *got_frame, AVPacket *avpkt) +{ + ThreadData td; + AVFrame *pic = data; + uint8_t *src = avpkt->data; + int ret; + int thread_count = av_clip(avctx->thread_count, 1, MAX_SLICES); + + td.stride = avctx->width * 4; + if (avpkt->size < 4 * avctx->height * avctx->width) { + av_log(avctx, AV_LOG_ERROR, "Insufficient input data.\n"); + return AVERROR(EINVAL); + } + + if ((ret = ff_get_buffer(avctx, pic, 0)) < 0) + return ret; + + pic->key_frame = 1; + pic->pict_type = AV_PICTURE_TYPE_I; + + td.buf = src; + td.frame = pic; + avctx->execute2(avctx, v410_decode_slice, &td, NULL, thread_count); + *got_frame = 1; return avpkt->size; @@ -94,5 +129,7 @@ AVCodec ff_v410_decoder = { .id = AV_CODEC_ID_V410, .init = v410_decode_init, .decode = v410_decode_frame, - .capabilities = AV_CODEC_CAP_DR1, + .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_SLICE_THREADS, + .caps_internal= FF_CODEC_CAP_INIT_THREADSAFE | + FF_CODEC_CAP_INIT_CLEANUP, };