From patchwork Sun Oct 2 16:58:12 2016 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Timo Rothenpieler X-Patchwork-Id: 833 Delivered-To: ffmpegpatchwork@gmail.com Received: by 10.103.140.66 with SMTP id o63csp1213831vsd; Sun, 2 Oct 2016 09:58:30 -0700 (PDT) X-Received: by 10.28.50.199 with SMTP id y190mr6439373wmy.61.1475427510573; Sun, 02 Oct 2016 09:58:30 -0700 (PDT) Return-Path: Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org. [79.124.17.100]) by mx.google.com with ESMTP id 20si14868713wmk.33.2016.10.02.09.58.26; Sun, 02 Oct 2016 09:58:30 -0700 (PDT) Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100; Authentication-Results: mx.google.com; dkim=neutral (body hash did not verify) header.i=@rothenpieler.org; spf=pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) smtp.mailfrom=ffmpeg-devel-bounces@ffmpeg.org Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 607B8689CDD; Sun, 2 Oct 2016 19:58:11 +0300 (EEST) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from btbn.de (btbn.de [5.9.118.179]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 55FDA689BD3 for ; Sun, 2 Oct 2016 19:58:05 +0300 (EEST) Received: from localhost.localdomain (ip4d173c0b.dynamic.kabel-deutschland.de [77.23.60.11]) by btbn.de (Postfix) with ESMTPSA id EA7A82A3F76; Sun, 2 Oct 2016 18:58:18 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=rothenpieler.org; s=mail; t=1475427499; bh=WnOn0PcksN8n9ZUHW/m2KLRuF2dV1p7koC9thS6soD0=; h=From:To:Cc:Subject:Date; b=BgjNsWEFRm0zlbA8L/GjOVyiiqlG/Xp360+iuEndeWxudes1+xIVG2TEmuKhPHJTz +1JoNMNi8406wkR5hhaLL1Et2i5PcElECkeQlG8/TiDutmmITNgXJpXDwJi26Zma5T rKjz5t0prPOSJ30zjKifhcsQ7WNOK86CPVQnZKgY= From: Timo Rothenpieler To: ffmpeg-devel@ffmpeg.org Date: Sun, 2 Oct 2016 18:58:12 +0200 Message-Id: <20161002165813.17366-1-timo@rothenpieler.org> X-Mailer: git-send-email 2.10.0 Subject: [FFmpeg-devel] [PATCH 1/2] avutil/hwcontext_cuda: align allocated frames X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Cc: Timo Rothenpieler MIME-Version: 1.0 Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" --- libavutil/hwcontext_cuda.c | 43 ++++++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c index 40d2971..706d195 100644 --- a/libavutil/hwcontext_cuda.c +++ b/libavutil/hwcontext_cuda.c @@ -16,6 +16,8 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include "libavutil/intmath.h" + #include "buffer.h" #include "common.h" #include "hwcontext.h" @@ -35,6 +37,14 @@ static const enum AVPixelFormat supported_formats[] = { AV_PIX_FMT_YUV444P, }; +static unsigned int next_pow2(unsigned int a) +{ + if (a <= 1) + return 1; + a = 1 << (sizeof(a) * 8 - ff_clz(a - 1)); + return FFALIGN(a, 256); +} + static void cuda_buffer_free(void *opaque, uint8_t *data) { AVHWFramesContext *ctx = opaque; @@ -83,6 +93,7 @@ fail: static int cuda_frames_init(AVHWFramesContext *ctx) { CUDAFramesContext *priv = ctx->internal->priv; + int aligned_width = next_pow2(ctx->width); int i; for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) { @@ -103,10 +114,10 @@ static int cuda_frames_init(AVHWFramesContext *ctx) switch (ctx->sw_format) { case AV_PIX_FMT_NV12: case AV_PIX_FMT_YUV420P: - size = ctx->width * ctx->height * 3 / 2; + size = aligned_width * ctx->height * 3 / 2; break; case AV_PIX_FMT_YUV444P: - size = ctx->width * ctx->height * 3; + size = aligned_width * ctx->height * 3; break; } @@ -120,6 +131,8 @@ static int cuda_frames_init(AVHWFramesContext *ctx) static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame) { + int aligned_width = next_pow2(ctx->width); + frame->buf[0] = av_buffer_pool_get(ctx->pool); if (!frame->buf[0]) return AVERROR(ENOMEM); @@ -127,25 +140,25 @@ static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame) switch (ctx->sw_format) { case AV_PIX_FMT_NV12: frame->data[0] = frame->buf[0]->data; - frame->data[1] = frame->data[0] + ctx->width * ctx->height; - frame->linesize[0] = ctx->width; - frame->linesize[1] = ctx->width; + frame->data[1] = frame->data[0] + aligned_width * ctx->height; + frame->linesize[0] = aligned_width; + frame->linesize[1] = aligned_width; break; case AV_PIX_FMT_YUV420P: frame->data[0] = frame->buf[0]->data; - frame->data[2] = frame->data[0] + ctx->width * ctx->height; - frame->data[1] = frame->data[2] + ctx->width * ctx->height / 4; - frame->linesize[0] = ctx->width; - frame->linesize[1] = ctx->width / 2; - frame->linesize[2] = ctx->width / 2; + frame->data[2] = frame->data[0] + aligned_width * ctx->height; + frame->data[1] = frame->data[2] + aligned_width * ctx->height / 4; + frame->linesize[0] = aligned_width; + frame->linesize[1] = aligned_width / 2; + frame->linesize[2] = aligned_width / 2; break; case AV_PIX_FMT_YUV444P: frame->data[0] = frame->buf[0]->data; - frame->data[1] = frame->data[0] + ctx->width * ctx->height; - frame->data[2] = frame->data[1] + ctx->width * ctx->height; - frame->linesize[0] = ctx->width; - frame->linesize[1] = ctx->width; - frame->linesize[2] = ctx->width; + frame->data[1] = frame->data[0] + aligned_width * ctx->height; + frame->data[2] = frame->data[1] + aligned_width * ctx->height; + frame->linesize[0] = aligned_width; + frame->linesize[1] = aligned_width; + frame->linesize[2] = aligned_width; break; default: av_frame_unref(frame);