From patchwork Thu Feb 21 03:57:50 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Philip Langdale X-Patchwork-Id: 12125 Return-Path: X-Original-To: patchwork@ffaux-bg.ffmpeg.org Delivered-To: patchwork@ffaux-bg.ffmpeg.org Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by ffaux.localdomain (Postfix) with ESMTP id 4FBF04497CC for ; Thu, 21 Feb 2019 05:58:15 +0200 (EET) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 2B1EE68ABE7; Thu, 21 Feb 2019 05:58:15 +0200 (EET) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from mail-ot1-f97.google.com (mail-ot1-f97.google.com [209.85.210.97]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 998BF68A538 for ; Thu, 21 Feb 2019 05:58:06 +0200 (EET) Received: by mail-ot1-f97.google.com with SMTP id c18so22190626otl.13 for ; Wed, 20 Feb 2019 19:58:06 -0800 (PST) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:dkim-signature:from:to:cc:subject:date :message-id:in-reply-to:references:mime-version :content-transfer-encoding; bh=aSWvbQnsEQaZ7G/N9GBixvLAqyROoqKQ9AaiR904XN0=; b=qRK0U9tsHb1ux4bomxART0nrFZy/1vicqn7HuPaKrOvSwRJcJSBHkgsRYsN8qLmOCP 6HodJQA87o2nTj9MtBLvPP5iapl+El02SGGHmuJy3AEImhYmCzPQjWkxJFx+xUID1gJE A6QjNiabrqBfl+XWFRKfieqKhZ33/2m+ydzSYxG9KDpjuZ6k2C2NC1TJQbX0Dr6l1Om7 D9Xy1UjPVqvoNv/6LKAumPbLe9BR2cyub3+24egY5IfkmYgrAm5/JbYa/VKzmF4WQiUq V4nB/mvEBIv3Ejuwz3m+6MAM+twjP6TG3Osk/BwJ6dXhj1abBIyyCzVs4+PZsPFfmX4S y7vQ== X-Gm-Message-State: AHQUAubVGAEQd7VdOSJO11rtSkuDfmp5NjB79kle4RYKeJJ/9nrevN/w wpJPA1uGDslRPGUrOvJWMPJkFogol3cPO1CKzbJS3LDJcZ1fWg== X-Google-Smtp-Source: AHgI3Ia6dtTJhsPVP7ANhN9HfhLHaU1yxifG1i7yLwFZrin1RAqCGrLa28jvK/wQn6IGehsjPGuoqvkbz+eT X-Received: by 2002:a9d:72c3:: with SMTP id d3mr12953316otk.71.1550721485061; Wed, 20 Feb 2019 19:58:05 -0800 (PST) Received: from mail.overt.org (155.208.178.107.bc.googleusercontent.com. [107.178.208.155]) by smtp-relay.gmail.com with ESMTPS id u60sm2018948otb.2.2019.02.20.19.58.04 for (version=TLS1_2 cipher=ECDHE-RSA-CHACHA20-POLY1305 bits=256/256); Wed, 20 Feb 2019 19:58:05 -0800 (PST) X-Relaying-Domain: gapps.overt.org Received: from authenticated-user (mail.overt.org [107.178.208.155]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by mail.overt.org (Postfix) with ESMTPSA id F32054027B; Wed, 20 Feb 2019 21:58:03 -0600 (CST) DKIM-Signature: v=1; a=rsa-sha256; c=simple/simple; d=overt.org; s=mail; t=1550721484; bh=gVFiN0TPnD964MBDGaGr/CTjYrzTl5OLLD+49NKlOk4=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=JHxPmvP7LkCpv4addGMNUnwH2bzlJmIMpAbRzYf2ZszCHsfm7iai5eB9c/Jg8IJdg cyHu7rgzHG2ch8av8YQ0i9+ymJ4H57a03ZhDXEtwCHLQCz2qlchK1RYPR2n5Jqk3WS Vv8/NS47LHAnB7rWyXmXdAyAUQuNP8tmajUFzcoIgHjUaV7hIL6wGESOvDtyI0UR2R 3xdaEu2Z110MOz7+7fdi4pp23yhDflz8Ra7qqjDBRlJ/RVscV6Gp/nd0mcfHAMrIhP 9mV7+vlbY6XqTJ99wTw3dyrNcn7kaYpoWzGg72fqaHTb/4finVkF6r4JzAfuv+hWTD MjFAt+MHH9aDg== From: Philip Langdale To: ffmpeg-devel@ffmpeg.org Date: Wed, 20 Feb 2019 19:57:50 -0800 Message-Id: <20190221035753.27525-3-philipl@overt.org> In-Reply-To: <20190221035753.27525-1-philipl@overt.org> References: <20190221035753.27525-1-philipl@overt.org> MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH 2/5] avfilter/vf_yadif_cuda: Switch to using ffnvcodec X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Cc: Philip Langdale Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" This change switches the vf_thumbnail_cuda filter from using the full cuda sdk to using the ffnvcodec headers and loader. Signed-off-by: Philip Langdale --- configure | 2 +- libavfilter/vf_yadif_cuda.c | 58 ++++++++++++++++++++----------------- 2 files changed, 32 insertions(+), 28 deletions(-) diff --git a/configure b/configure index 2219eb1515..a2890dc171 100755 --- a/configure +++ b/configure @@ -3526,7 +3526,7 @@ zscale_filter_deps="libzimg const_nan" scale_vaapi_filter_deps="vaapi" vpp_qsv_filter_deps="libmfx" vpp_qsv_filter_select="qsvvpp" -yadif_cuda_filter_deps="cuda_sdk" +yadif_cuda_filter_deps="ffnvcodec cuda_nvcc" # examples avio_dir_cmd_deps="avformat avutil" diff --git a/libavfilter/vf_yadif_cuda.c b/libavfilter/vf_yadif_cuda.c index 85e1aac5eb..141dcb17f7 100644 --- a/libavfilter/vf_yadif_cuda.c +++ b/libavfilter/vf_yadif_cuda.c @@ -18,9 +18,8 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include #include "libavutil/avassert.h" -#include "libavutil/hwcontext_cuda.h" +#include "libavutil/hwcontext_cuda_internal.h" #include "libavutil/cuda_check.h" #include "internal.h" #include "yadif.h" @@ -49,7 +48,7 @@ typedef struct DeintCUDAContext { #define BLOCKX 32 #define BLOCKY 16 -#define CHECK_CU(x) FF_CUDA_CHECK(ctx, x) +#define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x) static CUresult call_kernel(AVFilterContext *ctx, CUfunction func, CUdeviceptr prev, CUdeviceptr cur, CUdeviceptr next, @@ -64,6 +63,7 @@ static CUresult call_kernel(AVFilterContext *ctx, CUfunction func, int parity, int tff) { DeintCUDAContext *s = ctx->priv; + CudaFunctions *cu = s->hwctx->internal->cuda_dl; CUtexObject tex_prev = 0, tex_cur = 0, tex_next = 0; int ret; int skip_spatial_check = s->yadif.mode&2; @@ -88,32 +88,32 @@ static CUresult call_kernel(AVFilterContext *ctx, CUfunction func, }; res_desc.res.pitch2D.devPtr = (CUdeviceptr)prev; - ret = CHECK_CU(cuTexObjectCreate(&tex_prev, &res_desc, &tex_desc, NULL)); + ret = CHECK_CU(cu->cuTexObjectCreate(&tex_prev, &res_desc, &tex_desc, NULL)); if (ret < 0) goto exit; res_desc.res.pitch2D.devPtr = (CUdeviceptr)cur; - ret = CHECK_CU(cuTexObjectCreate(&tex_cur, &res_desc, &tex_desc, NULL)); + ret = CHECK_CU(cu->cuTexObjectCreate(&tex_cur, &res_desc, &tex_desc, NULL)); if (ret < 0) goto exit; res_desc.res.pitch2D.devPtr = (CUdeviceptr)next; - ret = CHECK_CU(cuTexObjectCreate(&tex_next, &res_desc, &tex_desc, NULL)); + ret = CHECK_CU(cu->cuTexObjectCreate(&tex_next, &res_desc, &tex_desc, NULL)); if (ret < 0) goto exit; - ret = CHECK_CU(cuLaunchKernel(func, - DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1, - BLOCKX, BLOCKY, 1, - 0, s->stream, args, NULL)); + ret = CHECK_CU(cu->cuLaunchKernel(func, + DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1, + BLOCKX, BLOCKY, 1, + 0, s->stream, args, NULL)); exit: if (tex_prev) - CHECK_CU(cuTexObjectDestroy(tex_prev)); + CHECK_CU(cu->cuTexObjectDestroy(tex_prev)); if (tex_cur) - CHECK_CU(cuTexObjectDestroy(tex_cur)); + CHECK_CU(cu->cuTexObjectDestroy(tex_cur)); if (tex_next) - CHECK_CU(cuTexObjectDestroy(tex_next)); + CHECK_CU(cu->cuTexObjectDestroy(tex_next)); return ret; } @@ -123,10 +123,11 @@ static void filter(AVFilterContext *ctx, AVFrame *dst, { DeintCUDAContext *s = ctx->priv; YADIFContext *y = &s->yadif; + CudaFunctions *cu = s->hwctx->internal->cuda_dl; CUcontext dummy; int i, ret; - ret = CHECK_CU(cuCtxPushCurrent(s->cu_ctx)); + ret = CHECK_CU(cu->cuCtxPushCurrent(s->cu_ctx)); if (ret < 0) return; @@ -179,10 +180,10 @@ static void filter(AVFilterContext *ctx, AVFrame *dst, parity, tff); } - CHECK_CU(cuStreamSynchronize(s->stream)); + CHECK_CU(cu->cuStreamSynchronize(s->stream)); exit: - CHECK_CU(cuCtxPopCurrent(&dummy)); + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); return; } @@ -192,10 +193,11 @@ static av_cold void deint_cuda_uninit(AVFilterContext *ctx) DeintCUDAContext *s = ctx->priv; YADIFContext *y = &s->yadif; - if (s->cu_module) { - CHECK_CU(cuCtxPushCurrent(s->cu_ctx)); - CHECK_CU(cuModuleUnload(s->cu_module)); - CHECK_CU(cuCtxPopCurrent(&dummy)); + if (s->hwctx && s->cu_module) { + CudaFunctions *cu = s->hwctx->internal->cuda_dl; + CHECK_CU(cu->cuCtxPushCurrent(s->cu_ctx)); + CHECK_CU(cu->cuModuleUnload(s->cu_module)); + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); } av_frame_free(&y->prev); @@ -253,6 +255,7 @@ static int config_output(AVFilterLink *link) AVFilterContext *ctx = link->src; DeintCUDAContext *s = ctx->priv; YADIFContext *y = &s->yadif; + CudaFunctions *cu; int ret = 0; CUcontext dummy; @@ -266,6 +269,7 @@ static int config_output(AVFilterLink *link) s->hwctx = ((AVHWDeviceContext*)s->device_ref->data)->hwctx; s->cu_ctx = s->hwctx->cuda_ctx; s->stream = s->hwctx->stream; + cu = s->hwctx->internal->cuda_dl; link->hw_frames_ctx = av_hwframe_ctx_alloc(s->device_ref); if (!link->hw_frames_ctx) { @@ -313,32 +317,32 @@ static int config_output(AVFilterLink *link) y->csp = av_pix_fmt_desc_get(output_frames->sw_format); y->filter = filter; - ret = CHECK_CU(cuCtxPushCurrent(s->cu_ctx)); + ret = CHECK_CU(cu->cuCtxPushCurrent(s->cu_ctx)); if (ret < 0) goto exit; - ret = CHECK_CU(cuModuleLoadData(&s->cu_module, vf_yadif_cuda_ptx)); + ret = CHECK_CU(cu->cuModuleLoadData(&s->cu_module, vf_yadif_cuda_ptx)); if (ret < 0) goto exit; - ret = CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "yadif_uchar")); + ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "yadif_uchar")); if (ret < 0) goto exit; - ret = CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "yadif_uchar2")); + ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "yadif_uchar2")); if (ret < 0) goto exit; - ret = CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "yadif_ushort")); + ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "yadif_ushort")); if (ret < 0) goto exit; - ret = CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "yadif_ushort2")); + ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "yadif_ushort2")); if (ret < 0) goto exit; exit: - CHECK_CU(cuCtxPopCurrent(&dummy)); + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); return ret; }