From patchwork Sun Jul 12 17:21:00 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: hanishkvc X-Patchwork-Id: 20975 Return-Path: X-Original-To: patchwork@ffaux-bg.ffmpeg.org Delivered-To: patchwork@ffaux-bg.ffmpeg.org Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by ffaux.localdomain (Postfix) with ESMTP id 71F0144AA22 for ; Sun, 12 Jul 2020 20:22:17 +0300 (EEST) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 45ED468A531; Sun, 12 Jul 2020 20:22:17 +0300 (EEST) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from mail-pg1-f170.google.com (mail-pg1-f170.google.com [209.85.215.170]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 5AB55689F05 for ; Sun, 12 Jul 2020 20:22:10 +0300 (EEST) Received: by mail-pg1-f170.google.com with SMTP id o13so4977851pgf.0 for ; Sun, 12 Jul 2020 10:22:10 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025; h=from:to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-transfer-encoding; bh=WcZp2TMMaGX+ygkjNFqDcRhlGg4jjDGZ9j/S7xQqnRE=; b=tQmOSQpRKIQ8FI7nVYa6gDZHnB8URpk2pVK+vJ5dI+eyN9o1S2n+AUV5yVtxyMAj57 +Kp01WYDeXT+MlSMokQ9OkmVA3PK23I8I1cYHzr/f4AaWEaperRrrrLr0bO/eOarjfNJ BS5UNat8TDA2HD3k40xyQv/burqxbXE6wAoAKqrmQddAwTcTt3ujLvGbMJdw9DB/i1sH iWFlk9vGsT8/4fVKwIgC5RcHF0PZG7uEH+b8zrUOMybEP02fUduhAOPNfVEUJHBnPu+U cYE8iJt+vQDK+k5qHcNsYTtYJREncp9zRx1eR04vj0kVnLOpH8cNtnHjfsXV5w0kX1Ns Mt6w== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=WcZp2TMMaGX+ygkjNFqDcRhlGg4jjDGZ9j/S7xQqnRE=; b=ELy+0RgT1vrEHn+SK/hUR4mm1WPNuVdadhOUXLtqOz+281kQxAw8FEoeiCQV9Y3D1I Tqa8QN3aH1QyDYmEG1XC575q+5HFBAdxV/dzGLMj1CHniK3m7VSJulyY53tANsUk8tkX J6xl/WlhhbpxVqIztNCH1Xt8W2Jo3PGbvHARBtBlAz0IOilpKLLwGqB+glJFePrAgKJG 2iXfDa+MHa+anyFm3i+ZrYpZP4hSQKVjCxuJ4K1GkJDQxDVFnoOOXD6kJlVbqyIEYDz1 MMdQWYlxUxPhCPrAESHanY282Kj8ngEkJa90zg7gaKFW6rmBLijZuafU0GTJN9QrGDNW 3Lww== X-Gm-Message-State: AOAM532ZLRmnwlcESCz80777L4Mn0GBsyJczggCh1r8I0dfTeB0vnu3N K47w8RjkXT9RO0qn3/JfmRoVp9i9 X-Google-Smtp-Source: ABdhPJwIsggSZhM6hW33qLLNSAO88KIEZ1uYz0O6s2knG6I3zu0XULmUsCgTaz2dW1rwvCuV+OofUQ== X-Received: by 2002:a62:178e:: with SMTP id 136mr70555643pfx.180.1594574528041; Sun, 12 Jul 2020 10:22:08 -0700 (PDT) Received: from localhost.localdomain ([122.171.57.136]) by smtp.gmail.com with ESMTPSA id x3sm12244944pfn.154.2020.07.12.10.22.05 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Sun, 12 Jul 2020 10:22:07 -0700 (PDT) From: hanishkvc To: ffmpeg-devel@ffmpeg.org Date: Sun, 12 Jul 2020 22:51:00 +0530 Message-Id: <20200712172102.72406-2-hanishkvc@gmail.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: <20200712172102.72406-1-hanishkvc@gmail.com> References: <20200712172102.72406-1-hanishkvc@gmail.com> MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH v08.01 1/3] KMSGrab: getfb2 format_modifier if user doesnt specify X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Cc: hanishkvc Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" If user doesnt specify a format_modifier explicitly, then use GetFB2 to identify the format_modifier of the framebuffer being grabbed. This is supported on newer linux builds, where xf86drmMode.h has added support for GetFB2. --- Changelog | 1 + configure | 5 +++++ libavdevice/kmsgrab.c | 31 ++++++++++++++++++++++++++++++- 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/Changelog b/Changelog index 1bb9931c0d..20ba03ae8b 100644 --- a/Changelog +++ b/Changelog @@ -5,6 +5,7 @@ version : - AudioToolbox output device - MacCaption demuxer - PGX decoder +- kmsgrab GetFB2 format_modifier, if user doesnt specify version 4.3: diff --git a/configure b/configure index bdfd731602..3bbc51053c 100755 --- a/configure +++ b/configure @@ -2325,6 +2325,7 @@ HAVE_LIST=" $TYPES_LIST makeinfo makeinfo_html + drm_getfb2 opencl_d3d11 opencl_drm_arm opencl_drm_beignet @@ -6653,6 +6654,10 @@ if enabled vaapi; then check_type "va/va.h va/va_enc_vp9.h" "VAEncPictureParameterBufferVP9" fi +if enabled libdrm; then + check_pkg_config drm_getfb2 libdrm "xf86drm.h xf86drmMode.h" drmModeGetFB2 +fi + if enabled_all opencl libdrm ; then check_type "CL/cl_intel.h" "clCreateImageFromFdINTEL_fn" && enable opencl_drm_beignet diff --git a/libavdevice/kmsgrab.c b/libavdevice/kmsgrab.c index d0de774871..c7fa2343e3 100644 --- a/libavdevice/kmsgrab.c +++ b/libavdevice/kmsgrab.c @@ -239,6 +239,9 @@ static av_cold int kmsgrab_read_header(AVFormatContext *avctx) drmModePlaneRes *plane_res = NULL; drmModePlane *plane = NULL; drmModeFB *fb = NULL; +#if HAVE_DRM_GETFB2 + drmModeFB2 *fb2 = NULL; +#endif AVStream *stream; int err, i; @@ -364,6 +367,28 @@ static av_cold int kmsgrab_read_header(AVFormatContext *avctx) goto fail; } +#if HAVE_DRM_GETFB2 + fb2 = drmModeGetFB2(ctx->hwctx->fd, plane->fb_id); + if (!fb2) { + err = errno; + av_log(avctx, AV_LOG_ERROR, "Failed to get " + "framebuffer2 %"PRIu32": %s.\n", + plane->fb_id, strerror(err)); + err = AVERROR(err); + goto fail; + } + + av_log(avctx, AV_LOG_INFO, "Template framebuffer2 is %"PRIu32": " + "%"PRIu32"x%"PRIu32", pixel_format: 0x%"PRIx32", format_modifier: 0x%"PRIx64".\n", + fb2->fb_id, fb2->width, fb2->height, fb2->pixel_format, fb2->modifier); + + if (ctx->drm_format_modifier == DRM_FORMAT_MOD_INVALID) + ctx->drm_format_modifier = fb2->modifier; +#else + if (ctx->drm_format_modifier == DRM_FORMAT_MOD_INVALID) + ctx->drm_format_modifier = DRM_FORMAT_MOD_NONE; +#endif + stream = avformat_new_stream(avctx, NULL); if (!stream) { err = AVERROR(ENOMEM); @@ -408,6 +433,10 @@ fail: drmModeFreePlane(plane); if (fb) drmModeFreeFB(fb); +#if HAVE_DRM_GETFB2 + if (fb2) + drmModeFreeFB2(fb2); +#endif return err; } @@ -433,7 +462,7 @@ static const AVOption options[] = { { .i64 = AV_PIX_FMT_BGR0 }, 0, UINT32_MAX, FLAGS }, { "format_modifier", "DRM format modifier for framebuffer", OFFSET(drm_format_modifier), AV_OPT_TYPE_INT64, - { .i64 = DRM_FORMAT_MOD_NONE }, 0, INT64_MAX, FLAGS }, + { .i64 = DRM_FORMAT_MOD_INVALID}, 0, INT64_MAX, FLAGS }, { "crtc_id", "CRTC ID to define capture source", OFFSET(source_crtc), AV_OPT_TYPE_INT64, { .i64 = 0 }, 0, UINT32_MAX, FLAGS }, From patchwork Sun Jul 12 17:21:01 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: hanishkvc X-Patchwork-Id: 20976 Return-Path: X-Original-To: patchwork@ffaux-bg.ffmpeg.org Delivered-To: patchwork@ffaux-bg.ffmpeg.org Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by ffaux.localdomain (Postfix) with ESMTP id 532C144AA22 for ; Sun, 12 Jul 2020 20:22:19 +0300 (EEST) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 38B8A68A76A; Sun, 12 Jul 2020 20:22:19 +0300 (EEST) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from mail-pj1-f45.google.com (mail-pj1-f45.google.com [209.85.216.45]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id EF76668A687 for ; Sun, 12 Jul 2020 20:22:17 +0300 (EEST) Received: by mail-pj1-f45.google.com with SMTP id mn17so4981904pjb.4 for ; Sun, 12 Jul 2020 10:22:16 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025; h=from:to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-transfer-encoding; bh=4jBX3QfCEpEeOt8gjNRevw8a7CxSzA0rqtCQvMsTSJA=; b=KrTxAi4nlbMgr7gZwQiZkQNkLYXi4E/VmeJw8nOh7M5n/gYev908eZNVlRK86YWS+E pDu5BBcfkrKctFeo+PNlUC0xmBD6KfbeHoL0VPadpfbWSlv847R4f/ML2r+dpo2AX25q DbV1ih4xuAO+DP1Q2N16YJLUdscrhgrdfFmYbQPPHF7xAVIpUpwjYsVzV3n2FMf4hqTn bVqaHnm0Tk0eNIJ6XVtOByxRzoXitGMKPJdQjEdaUqP8EBW5/TrxzLsV2ZzFv0zEBXYG zzCYvIXSMP3vJYQdpxmOm9Y/RpUsTyELXMcoT9YXjL22rr1KMrsIlvzD4gERLjOdHHLe YzLw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=4jBX3QfCEpEeOt8gjNRevw8a7CxSzA0rqtCQvMsTSJA=; b=INaNS7+/alOs4EwoEJes/9TY6XSkpUMcR9pxWvTJSfak5xxJKJFhtkl7WpwL2pH/f6 phf1xfE/fChTN1aRGajLThBMxa+pFafPLPRpP73lLTodb0WeZQn1uyu88eb7/QrODGvQ uTe894X0Q0pZDupBZL40D3upiWxn20i/qI3dEBCQRTqmYvIJx2n3duQmLeGE7vBdrMNG hwwTT9LO8DmS3lkSRRIsCQfvZu16FhzNeSG7lmqkIcj9U9zk9vOj+WuzB2i43drs9/Gh yQ16wRJ6hX9b9rdnzHRZ61mV/re71mY6hHCxBtzew+n1fPreGPVAWwJFHL/uyFpNKsHu dqQw== X-Gm-Message-State: AOAM533g88EKHHWmFjQCqAkUIVDW6I1TsgpP/3sEl45nOdXNR5X71omP Na4NaaIZB1+Q1nZYOcyYFIbbJLDT X-Google-Smtp-Source: ABdhPJz2jz3e+jjngnSssoPPEnNmpBFf546gbZHO3Yb9oqFa0EV6uqpL0H/yjC/0DEPnbMUPrXJRsQ== X-Received: by 2002:a17:90a:bb84:: with SMTP id v4mr16708208pjr.162.1594574533969; Sun, 12 Jul 2020 10:22:13 -0700 (PDT) Received: from localhost.localdomain ([122.171.57.136]) by smtp.gmail.com with ESMTPSA id x3sm12244944pfn.154.2020.07.12.10.22.11 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Sun, 12 Jul 2020 10:22:13 -0700 (PDT) From: hanishkvc To: ffmpeg-devel@ffmpeg.org Date: Sun, 12 Jul 2020 22:51:01 +0530 Message-Id: <20200712172102.72406-3-hanishkvc@gmail.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: <20200712172102.72406-1-hanishkvc@gmail.com> References: <20200712172102.72406-1-hanishkvc@gmail.com> MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH v08.01 2/3] fbtile tile/detile, hwcontext_drm detile NonLinear X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Cc: hanishkvc Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" ** fbtile cpu based framebuffer tile/detile helpers Add helper routines which can be used to tile/detile framebuffer layouts between linear and specified tile layout, using the cpu. Currently it supports Legacy Intel Tile-X, Legacy Intel Tile-Y and Newer Intel Tile-Yf tiled layouts. Currently supported pixel format is 32bit RGB. It internally contains fbtile_generic logic, which can be easily configured to support different kinds of tiling layouts, at the expense of some additional processing cycles, compared to developing custom (de)tiling logic. One requires to provide the tile walking parameters for the new tile layout to be supported. Once it is done, both tiling and detiling of the new tile layout can be handled automatically. Its basic functionality is exposed as a simple ff_fbtile_frame_copy function along with few related funcs. This is inturn used by hwcontext_drm. ** hwcontext_drm detile non linear layout, if possible If the framebuffer is a tiled layout, use the fbtile helper routines to try and detile it into linear layout, if supported by fbtile. It uses the format_modifier associated with the framebuffer to decide whether to apply detiling or not and inturn which specific detiling to apply. If user is using kmsgrab, they will have to use -format_modifer option of kmsgrab to force a specific detile logic, in case they dont want to use the original format_modifier related detiling. Or they could even use -format_modifier 0 to make hwcontext_drm bypass this detiling. ** NOTE This depends on the related KMSGrab GetFB2 patch, for enhanced functionality, where detiling is automatically applied where possible. --- Changelog | 2 + libavutil/Makefile | 2 + libavutil/fbtile.c | 482 ++++++++++++++++++++++++++++++++++++++ libavutil/fbtile.h | 134 +++++++++++ libavutil/hwcontext_drm.c | 38 ++- 5 files changed, 657 insertions(+), 1 deletion(-) create mode 100644 libavutil/fbtile.c create mode 100644 libavutil/fbtile.h diff --git a/Changelog b/Changelog index 20ba03ae8b..0b48858da7 100644 --- a/Changelog +++ b/Changelog @@ -6,6 +6,8 @@ version : - MacCaption demuxer - PGX decoder - kmsgrab GetFB2 format_modifier, if user doesnt specify +- fbtile cpu based framebuffer tile/detile helpers (Intel TileX|Y|Yf) +- hwcontext_drm detiles non linear layouts, if possible version 4.3: diff --git a/libavutil/Makefile b/libavutil/Makefile index 9b08372eb2..9b58ac5980 100644 --- a/libavutil/Makefile +++ b/libavutil/Makefile @@ -84,6 +84,7 @@ HEADERS = adler32.h \ xtea.h \ tea.h \ tx.h \ + fbtile.h \ HEADERS-$(CONFIG_LZO) += lzo.h @@ -169,6 +170,7 @@ OBJS = adler32.o \ tx_float.o \ tx_double.o \ tx_int32.o \ + fbtile.o \ video_enc_params.o \ diff --git a/libavutil/fbtile.c b/libavutil/fbtile.c new file mode 100644 index 0000000000..f741e1aeca --- /dev/null +++ b/libavutil/fbtile.c @@ -0,0 +1,482 @@ +/* + * CPU based Framebuffer Generic Tile DeTile logic + * Copyright (c) 2020 C Hanish Menon + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" +#include "avutil.h" +#include "common.h" +#include "fbtile.h" +#if CONFIG_LIBDRM +#include +#endif + + +/** + * Ok return value + */ +#define FBT_OK 0 + + +enum FFFBTileLayout ff_fbtile_getlayoutid(enum FFFBTileFamily family, uint64_t familyTileType) +{ + enum FFFBTileLayout layout = FF_FBTILE_UNKNOWN; + + switch(family) { + case FF_FBTILE_FAMILY_DRM: +#if CONFIG_LIBDRM + switch(familyTileType) { + case DRM_FORMAT_MOD_LINEAR: + layout = FF_FBTILE_NONE; + break; + case I915_FORMAT_MOD_X_TILED: + layout = FF_FBTILE_INTEL_XGEN9; + break; + case I915_FORMAT_MOD_Y_TILED: + layout = FF_FBTILE_INTEL_YGEN9; + break; + case I915_FORMAT_MOD_Yf_TILED: + layout = FF_FBTILE_INTEL_YF; + break; + default: + layout = FF_FBTILE_UNKNOWN; + break; + } +#else + av_log(NULL, AV_LOG_WARNING, "fbtile:getlayoutid: family[%d] familyTileType[%ld]\n", family, familyTileType); +#endif + break; + default: + av_log(NULL, AV_LOG_WARNING, "fbtile:getlayoutid: unknown family[%d] familyTileType[%ld]\n", family, familyTileType); + } + av_log(NULL, AV_LOG_VERBOSE, "fbtile:getlayoutid: family[%d] familyTileType[%ld] maps to layoutid[%d]\n", family, familyTileType, layout); + return layout; +} + + +/** + * Supported pixel formats + * Currently only RGB based 32bit formats are specified + * TODO: Technically the logic is transparent to 16bit RGB formats also to a great extent + */ +const enum AVPixelFormat fbtilePixFormats[] = { + AV_PIX_FMT_RGB0, AV_PIX_FMT_0RGB, AV_PIX_FMT_BGR0, AV_PIX_FMT_0BGR, + AV_PIX_FMT_RGBA, AV_PIX_FMT_ARGB, AV_PIX_FMT_BGRA, AV_PIX_FMT_ABGR, + AV_PIX_FMT_NONE}; + +int ff_fbtile_checkpixformats(const enum AVPixelFormat srcPixFormat, const enum AVPixelFormat dstPixFormat) +{ + int errSrc = 1; + int errDst = 1; + for (int i = 0; fbtilePixFormats[i] != AV_PIX_FMT_NONE; i++) { + if (fbtilePixFormats[i] == srcPixFormat) + errSrc = 0; + if (fbtilePixFormats[i] == dstPixFormat) + errDst = 0; + } + return (errSrc | errDst); +} + + +/* + * Generic tile/detile logic + * The tile layout data is assumed to be tightly packed, with no gaps inbetween. + * However the logic does try to accomodate a src/dst linear layout memory, + * where there is possibly some additional bytes beyond the width in each line + * of pixel data. + */ + + +/** + * TileWalk Direction Change Entry + * Used to specify the tile walking of subtiles within a tile. + */ +struct FBTWDirChange { + int posOffset; + int xDelta; + int yDelta; +}; + + +/** + * TileWalk, Contains info required for a given tile walking. + * + * @field bytesPerPixel the bytes per pixel for the image + * @field subTileWidth the width of subtile within the tile, in pixels + * @field subTileHeight the height of subtile within the tile, in pixels + * @field tileWidth the width of the tile, in pixels + * @field tileHeight the height of the tile, in pixels + * @field numDirChanges the number of dir changes involved in tile walk + * @field dirChanges the array of dir changes for the tile walk required + */ +struct FBTileWalk { + int bytesPerPixel; + int subTileWidth, subTileHeight; + int tileWidth, tileHeight; + int numDirChanges; + struct FBTWDirChange dirChanges[]; +}; + + +/** + * Settings for Intel Tile-Yf framebuffer layout. + * May need to swap the 4 pixel wide subtile, have to check doc bit more + */ +static struct FBTileWalk tyfTileWalk = { + .bytesPerPixel = 4, + .subTileWidth = 4, .subTileHeight = 8, + .tileWidth = 32, .tileHeight = 32, + .numDirChanges = 6, + .dirChanges = { {8, 4, 0}, {16, -4, 8}, {32, 4, -8}, {64, -12, 8}, {128, 4, -24}, {256, 4, -24} } + }; + +/** + * Setting for Intel Tile-X framebuffer layout + */ +static struct FBTileWalk txTileWalk = { + .bytesPerPixel = 4, + .subTileWidth = 128, .subTileHeight = 8, + .tileWidth = 128, .tileHeight = 8, + .numDirChanges = 1, + .dirChanges = { {8, 128, 0} } + }; + +/** + * Setting for Intel Tile-Y framebuffer layout + * Even thou a simple generic detiling logic doesnt require the + * dummy 256 posOffset entry. The pseudo parallel detiling based + * opti logic requires to know about the Tile boundry. + */ +static struct FBTileWalk tyTileWalk = { + .bytesPerPixel = 4, + .subTileWidth = 4, .subTileHeight = 32, + .tileWidth = 32, .tileHeight = 32, + .numDirChanges = 2, + .dirChanges = { {32, 4, 0}, {256, 4, 0} } + }; + + +/** + * Generic Logic to Tile/Detile between tiled and linear layout. + * + * @param op whether to tile or detile + * @param w width of the image + * @param h height of the image + * @param dst the destination image buffer + * @param dstLineSize the size of each row in dst image, in bytes + * @param src the source image buffer + * @param srcLineSize the size of each row in src image, in bytes + * @param tw the structure which contains the tile walk parameters + * + * @return 0 if detiled, 1 if not + */ + + +/** + * _fbtile_generic_simple tile/detile layout + */ +static int _fbtile_generic_simple(enum FFFBTileOps op, + const int w, const int h, + uint8_t *dst, const int dstLineSize, + uint8_t *src, const int srcLineSize, + const int bytesPerPixel, + const int subTileWidth, const int subTileHeight, + const int tileWidth, const int tileHeight, + const int numDirChanges, const struct FBTWDirChange *dirChanges) +{ + int tO, lO; + int lX, lY; + int cSTL, nSTLines; + uint8_t *tld, *lin; + int tldLineSize, linLineSize; + const int subTileWidthBytes = subTileWidth*bytesPerPixel; + + if (op == FF_FBTILE_OPS_TILE) { + lin = src; + linLineSize = srcLineSize; + tld = dst; + tldLineSize = dstLineSize; + } else { + tld = src; + tldLineSize = srcLineSize; + lin = dst; + linLineSize = dstLineSize; + } + + // To keep things sane and simple tile layout is assumed to be tightly packed, + // so below check is a indirect logical assumption, even thou tldLineSize is not directly mappable at one level + if (w*bytesPerPixel != tldLineSize) { + av_log(NULL, AV_LOG_ERROR, "fbtile:genericsimp: w%dxh%d, tldLineSize%d, linLineSize%d\n", w, h, tldLineSize, linLineSize); + av_log(NULL, AV_LOG_ERROR, "fbtile:genericsimp: dont support tldLineSize | Pitch going beyond width\n"); + return AVERROR(EINVAL); + } + tO = 0; + lX = 0; + lY = 0; + nSTLines = (w*h)/subTileWidth; // numSubTileLines + cSTL = 0; // curSubTileLine + while (cSTL < nSTLines) { + lO = lY*linLineSize + lX*bytesPerPixel; +#ifdef DEBUG_FBTILE + av_log(NULL, AV_LOG_DEBUG, "fbtile:genericsimp: lX%d lY%d; lO%d, tO%d; %d/%d\n", lX, lY, lO, tO, cSTL, nSTLines); +#endif + + for (int k = 0; k < subTileHeight; k++) { + if (op == FF_FBTILE_OPS_TILE) { + memcpy(tld+tO+k*subTileWidthBytes, lin+lO+k*linLineSize, subTileWidthBytes); + } else { + memcpy(lin+lO+k*linLineSize, tld+tO+k*subTileWidthBytes, subTileWidthBytes); + } + } + tO = tO + subTileHeight*subTileWidthBytes; + + cSTL += subTileHeight; + for (int i=numDirChanges-1; i>=0; i--) { + if ((cSTL%dirChanges[i].posOffset) == 0) { + lX += dirChanges[i].xDelta; + lY += dirChanges[i].yDelta; + break; + } + } + if (lX >= w) { + lX = 0; + lY += tileHeight; + } + } + return FBT_OK; +} + + +static int fbtile_generic_simple(enum FFFBTileOps op, + const int w, const int h, + uint8_t *dst, const int dstLineSize, + uint8_t *src, const int srcLineSize, + const struct FBTileWalk *tw) +{ + return _fbtile_generic_simple(op, w, h, + dst, dstLineSize, src, srcLineSize, + tw->bytesPerPixel, + tw->subTileWidth, tw->subTileHeight, + tw->tileWidth, tw->tileHeight, + tw->numDirChanges, tw->dirChanges); +} + + +/** + * Generic tile/detile minimal optimised version. + */ +static int _fbtile_generic_opti(enum FFFBTileOps op, + const int w, const int h, + uint8_t *dst, const int dstLineSize, + uint8_t *src, const int srcLineSize, + const int bytesPerPixel, + const int subTileWidth, const int subTileHeight, + const int tileWidth, const int tileHeight, + const int numDirChanges, const struct FBTWDirChange *dirChanges) +{ + int tO, lO, tOPrev; + int lX, lY; + int cSTL, nSTLines; + int curTileInRow, nTilesInARow; + uint8_t *tld, *lin; + int tldLineSize, linLineSize; + const int subTileWidthBytes = subTileWidth*bytesPerPixel; + int parallel = 1; + + if (op == FF_FBTILE_OPS_TILE) { + lin = src; + linLineSize = srcLineSize; + tld = dst; + tldLineSize = dstLineSize; + } else { + tld = src; + tldLineSize = srcLineSize; + lin = dst; + linLineSize = dstLineSize; + } + + if (w*bytesPerPixel != tldLineSize) { + av_log(NULL, AV_LOG_ERROR, "fbtile:genericopti: w%dxh%d, linLineSize%d, tldLineSize%d\n", w, h, linLineSize, tldLineSize); + av_log(NULL, AV_LOG_ERROR, "fbtile:genericopti: dont support tldLineSize | Pitch going beyond width\n"); + return AVERROR(EINVAL); + } + if (w%tileWidth != 0) { + av_log(NULL, AV_LOG_ERROR, "fbtile:genericopti:NotSupported:Width being non-mult Of TileWidth: width%d, tileWidth%d\n", w, tileWidth); + return AVERROR(EINVAL); + } + tO = 0; + tOPrev = 0; + lX = 0; + lY = 0; + nTilesInARow = w/tileWidth; + for (parallel=8; parallel>0; parallel--) { + if (nTilesInARow%parallel == 0) + break; + } + nSTLines = (w*h)/subTileWidth; // numSubTileLines + cSTL = 0; // curSubTileLine + curTileInRow = 0; + while (cSTL < nSTLines) { + lO = lY*linLineSize + lX*bytesPerPixel; +#ifdef DEBUG_FBTILE + av_log(NULL, AV_LOG_DEBUG, "fbtile:genericopti: lX%d lY%d; tO%d, lO%d; %d/%d\n", lX, lY, tO, lO, cSTL, nSTLines); +#endif + + // As many tiling layouts have subtile and walk sizes which are multiples of 4, + // so this loop has been unrolled to be multiples of 4, and speed up a bit. + // If this condition is not satisfied, esp along vert dir, then use fbtile_generic_simple. + // (De)tile parallely and gain some speed by allowing reuse of some calcs and parallelism. + if (op == FF_FBTILE_OPS_DETILE) { + for (int k = 0; k < subTileHeight; k+=4) { + for (int p = 0; p < parallel; p++) { + int pTldOffset = p*tileWidth*tileHeight*bytesPerPixel; + int pLinOffset = p*tileWidth*bytesPerPixel; + memcpy(lin+lO+(k+0)*linLineSize+pLinOffset, tld+tO+(k+0)*subTileWidthBytes+pTldOffset, subTileWidthBytes); + memcpy(lin+lO+(k+1)*linLineSize+pLinOffset, tld+tO+(k+1)*subTileWidthBytes+pTldOffset, subTileWidthBytes); + memcpy(lin+lO+(k+2)*linLineSize+pLinOffset, tld+tO+(k+2)*subTileWidthBytes+pTldOffset, subTileWidthBytes); + memcpy(lin+lO+(k+3)*linLineSize+pLinOffset, tld+tO+(k+3)*subTileWidthBytes+pTldOffset, subTileWidthBytes); + } + } + } else { + for (int k = 0; k < subTileHeight; k+=4) { + for (int p = 0; p < parallel; p++) { + int pTldOffset = p*tileWidth*tileHeight*bytesPerPixel; + int pLinOffset = p*tileWidth*bytesPerPixel; + memcpy(tld+tO+(k+0)*subTileWidthBytes+pTldOffset, lin+lO+(k+0)*linLineSize+pLinOffset, subTileWidthBytes); + memcpy(tld+tO+(k+1)*subTileWidthBytes+pTldOffset, lin+lO+(k+1)*linLineSize+pLinOffset, subTileWidthBytes); + memcpy(tld+tO+(k+2)*subTileWidthBytes+pTldOffset, lin+lO+(k+2)*linLineSize+pLinOffset, subTileWidthBytes); + memcpy(tld+tO+(k+3)*subTileWidthBytes+pTldOffset, lin+lO+(k+3)*linLineSize+pLinOffset, subTileWidthBytes); + } + } + } + + tO = tO + subTileHeight*subTileWidthBytes; + cSTL += subTileHeight; + + for (int i=numDirChanges-1; i>=0; i--) { + if ((cSTL%dirChanges[i].posOffset) == 0) { + if (i == numDirChanges-1) { + curTileInRow += parallel; + lX = curTileInRow*tileWidth; + tO = tOPrev + tileWidth*tileHeight*bytesPerPixel*(parallel); + tOPrev = tO; + } else { + lX += dirChanges[i].xDelta; + } + lY += dirChanges[i].yDelta; + break; + } + } + if (lX >= w) { + lX = 0; + curTileInRow = 0; + lY += tileHeight; + if (lY >= h) { + break; + } + } + } + return FBT_OK; +} + + +static int fbtile_generic_opti(enum FFFBTileOps op, + const int w, const int h, + uint8_t *dst, const int dstLineSize, + uint8_t *src, const int srcLineSize, + const struct FBTileWalk *tw) +{ + return _fbtile_generic_opti(op, w, h, + dst, dstLineSize, src, srcLineSize, + tw->bytesPerPixel, + tw->subTileWidth, tw->subTileHeight, + tw->tileWidth, tw->tileHeight, + tw->numDirChanges, tw->dirChanges); +} + + +static int fbtile_conv(enum FFFBTileOps op, enum FFFBTileLayout layout, + int w, int h, + uint8_t *dst, int dstLineSize, + uint8_t *src, int srcLineSize, + int bytesPerPixel) +{ + static int logStateNone = 0; + static int logStateUnknown = 0; + + switch(layout) { + case FF_FBTILE_NONE: + av_log_once(NULL, AV_LOG_WARNING, AV_LOG_VERBOSE, &logStateNone, "fbtile:conv:FF_FBTILE_NONE: not (de)tiling\n"); + return AVERROR(EALREADY); + case FF_FBTILE_INTEL_XGEN9: + return fbtile_generic_opti(op, w, h, dst, dstLineSize, src, srcLineSize, &txTileWalk); + case FF_FBTILE_INTEL_YGEN9: + return fbtile_generic_opti(op, w, h, dst, dstLineSize, src, srcLineSize, &tyTileWalk); + case FF_FBTILE_INTEL_YF: + return fbtile_generic_opti(op, w, h, dst, dstLineSize, src, srcLineSize, &tyfTileWalk); + default: + av_log_once(NULL, AV_LOG_WARNING, AV_LOG_VERBOSE, &logStateUnknown, "fbtile:conv: unknown layout [%d] specified, not (de)tiling\n", layout); + return AVERROR(EINVAL); + } +} + + +/* + * Copy one AVFrame into another, in the process tiling or detiling as required, if possible. + * NOTE: Either the Source or the Destination AVFrame (i.e one of them) should be linear. + * NOTE: If the tiling layout is not understood, it falls back to av_frame_copy. + */ +int ff_fbtile_frame_copy(AVFrame *dst, enum FFFBTileLayout dstTileLayout, AVFrame *src, enum FFFBTileLayout srcTileLayout, + enum FFFBTileFrameCopyStatus *status) +{ + int err; + + if (dstTileLayout == FF_FBTILE_NONE) { // i.e DeTile + err = ff_fbtile_checkpixformats(src->format, dst->format); + if (!err) { + err = fbtile_conv(FF_FBTILE_OPS_DETILE, srcTileLayout, + dst->width, dst->height, + dst->data[0], dst->linesize[0], + src->data[0], src->linesize[0], 4); + if (!err) { + *status = FF_FBTILE_FRAMECOPY_TILECOPY; + return FBT_OK; + } + } + } else if (srcTileLayout == FF_FBTILE_NONE) { // i.e Tile + err = ff_fbtile_checkpixformats(src->format, dst->format); + if (!err) { + err = fbtile_conv(FF_FBTILE_OPS_TILE, dstTileLayout, + src->width, src->height, + dst->data[0], dst->linesize[0], + src->data[0], src->linesize[0], 4); + if (!err) { + *status = FF_FBTILE_FRAMECOPY_TILECOPY; + return FBT_OK; + } + } + } else { + av_log(NULL, AV_LOG_WARNING, "fbtile:framecopy: both src [%d] and dst [%d] layouts cant be tiled\n", srcTileLayout, dstTileLayout); + } + *status = FF_FBTILE_FRAMECOPY_COPYONLY; + return av_frame_copy(dst, src); +} + + +// vim: set expandtab sts=4: // diff --git a/libavutil/fbtile.h b/libavutil/fbtile.h new file mode 100644 index 0000000000..83360952b1 --- /dev/null +++ b/libavutil/fbtile.h @@ -0,0 +1,134 @@ +/* + * CPU based Framebuffer Generic Tile DeTile logic + * Copyright (c) 2020 C Hanish Menon + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_FBTILE_H +#define AVUTIL_FBTILE_H + +#include +#include "libavutil/pixfmt.h" +#include "libavutil/frame.h" + +/** + * @file + * @brief CPU based Framebuffer tiler detiler + * @author C Hanish Menon + * @{ + */ + + +/** + * Enable printing of the tile walk + */ +//#define DEBUG_FBTILE 1 + + +/** + * The FBTile related operations + */ +enum FFFBTileOps { + FF_FBTILE_OPS_NONE, + FF_FBTILE_OPS_TILE, + FF_FBTILE_OPS_DETILE, + FF_FBTILE_OPS_UNKNOWN, +}; + +/** + * The FBTile layout families + * Used to help map from an external subsystem like say drm + * to fbtile's internal tile layout id. + */ +enum FFFBTileFamily { + FF_FBTILE_FAMILY_DRM, + FF_FBTILE_FAMILY_UNKNOWN, +}; + +/** + * The FBTile related Layouts + * This identifies the supported tile layouts + */ +enum FFFBTileLayout { + FF_FBTILE_NONE, // This also corresponds to linear layout + FF_FBTILE_INTEL_XGEN9, + FF_FBTILE_INTEL_YGEN9, + FF_FBTILE_INTEL_YF, + FF_FBTILE_UNKNOWN, +}; + +/** + * FBTile FrameCopy additional status + */ +enum FFFBTileFrameCopyStatus { + FF_FBTILE_FRAMECOPY_TILECOPY, + FF_FBTILE_FRAMECOPY_COPYONLY +}; + + +/** + * Identify equivalent fbtile tile layout id given an external subsystem's tile layout id. + * + * @param family identifies the subsystem + * @param familyTileType the tile layout id as defined by the subsystem + * + * @return the fbtile's equivalent tile layout id + */ +enum FFFBTileLayout ff_fbtile_getlayoutid(enum FFFBTileFamily family, uint64_t familyTileType); + + +/** + * Supported pixel formats by the fbtile logics + */ +extern const enum AVPixelFormat fbtilePixFormats[]; + +/** + * Check if the given pixel formats are supported by fbtile logic. + * + * @param srcPixFormat pixel format of source image + * @param dstPixFormat pixel format of destination image + * + * @return 0 if supported, 1 if not + */ +int ff_fbtile_checkpixformats(const enum AVPixelFormat srcPixFormat, const enum AVPixelFormat dstPixFormat); + + +/** + * Copy one AVFrame into another, in the process tiling or detiling as required, if possible. + * NOTE: Either the Source or the Destination AVFrame (i.e one of them) should be linear. + * NOTE: If the tiling layout is not understood, it falls back to av_frame_copy. + * + * @param dst the destination avframe + * @param dstTileLayout the framebuffer tiling layout expected for the destination avframe + * @param src the source avframe + * @param srcTileLayout the framebuffer tiling layout of the source avframe + * @param status helps identify if only copy was done or (de)tile+copy was done + * + * @return 0 if copied. + */ +int ff_fbtile_frame_copy(AVFrame *dst, enum FFFBTileLayout dstTileLayout, + AVFrame *src, enum FFFBTileLayout srcTileLayout, + enum FFFBTileFrameCopyStatus *status); + + +/** + * @} + */ + +#endif /* AVUTIL_FBTILE_H */ +// vim: set expandtab sts=4: // diff --git a/libavutil/hwcontext_drm.c b/libavutil/hwcontext_drm.c index 32cbde82eb..c72ea405ca 100644 --- a/libavutil/hwcontext_drm.c +++ b/libavutil/hwcontext_drm.c @@ -21,6 +21,7 @@ #include #include +#include #include #include "avassert.h" @@ -28,6 +29,7 @@ #include "hwcontext_drm.h" #include "hwcontext_internal.h" #include "imgutils.h" +#include "fbtile.h" static void drm_device_free(AVHWDeviceContext *hwdev) @@ -185,6 +187,40 @@ static int drm_transfer_get_formats(AVHWFramesContext *ctx, return 0; } +/** + * As AVFrame doesnt support tile layout natively, so if detile is successful + * the same is notified to any other users by updating the corresponding + * hardware AVFrame's tile layout info. + * If this is not needed, #define HWCTXDRM_SYNCRELATED_FORMATMODIFIER 0 + */ +#ifndef HWCTXDRM_SYNCRELATED_FORMATMODIFIER +#define HWCTXDRM_SYNCRELATED_FORMATMODIFIER 1 +#endif +static int drm_transfer_with_detile(const AVFrame *hwAVFrame, AVFrame *dst, AVFrame *src) +{ + int err; + uint64_t formatModifier; + enum FFFBTileLayout srcFBTileLayout, dstFBTileLayout; + enum FFFBTileFrameCopyStatus status; + AVDRMFrameDescriptor *drmFrame = NULL; + + srcFBTileLayout = FF_FBTILE_NONE; + dstFBTileLayout = FF_FBTILE_NONE; + if (hwAVFrame->format == AV_PIX_FMT_DRM_PRIME) { + drmFrame = (AVDRMFrameDescriptor*)hwAVFrame->data[0]; + formatModifier = drmFrame->objects[0].format_modifier; + srcFBTileLayout = ff_fbtile_getlayoutid(FF_FBTILE_FAMILY_DRM, formatModifier); + } + err = ff_fbtile_frame_copy(dst, dstFBTileLayout, src, srcFBTileLayout, &status); +#if HWCTXDRM_SYNCRELATED_FORMATMODIFIER + if (!err && (status == FF_FBTILE_FRAMECOPY_TILECOPY)) { + if (drmFrame != NULL) + drmFrame->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR; + } +#endif + return err; +} + static int drm_transfer_data_from(AVHWFramesContext *hwfc, AVFrame *dst, const AVFrame *src) { @@ -206,7 +242,7 @@ static int drm_transfer_data_from(AVHWFramesContext *hwfc, map->width = dst->width; map->height = dst->height; - err = av_frame_copy(dst, map); + err = drm_transfer_with_detile(src, dst, map); if (err) goto fail; From patchwork Sun Jul 12 17:21:02 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: hanishkvc X-Patchwork-Id: 20977 Return-Path: X-Original-To: patchwork@ffaux-bg.ffmpeg.org Delivered-To: patchwork@ffaux-bg.ffmpeg.org Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by ffaux.localdomain (Postfix) with ESMTP id 1C7D444AA22 for ; Sun, 12 Jul 2020 20:22:29 +0300 (EEST) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 05FF468A8F8; Sun, 12 Jul 2020 20:22:29 +0300 (EEST) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from mail-pf1-f172.google.com (mail-pf1-f172.google.com [209.85.210.172]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 820B168A616 for ; Sun, 12 Jul 2020 20:22:22 +0300 (EEST) Received: by mail-pf1-f172.google.com with SMTP id x72so4931566pfc.6 for ; Sun, 12 Jul 2020 10:22:22 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025; h=from:to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-transfer-encoding; bh=+D2qfcKM77kRojgzf5s+aIvKvwy+1ajZelOD6gw0xqw=; b=GMpnCAN52Y/GXnQrlgxQHHmo6ghT5tGqnGFkeQYlRDuqqlCeQs0h7gunsZhEqvYCh9 XejWRmBisQN7UwWsrumBLxQyBhpuGhxk4Liseo9nGORRaTLekngxxHbDmkEX36Lt7jr3 63mmIeh5I1MKyeE+70/A/VyBDlFY5rgqFo7zfHqYoG9mqTojO+uL2RPv3vwQYByaRr35 325JY7TJpLXbXbjmbieKwpoq0wJSvgpfh4u//E20GCQZS2ggDHu35UIxZ0IhThpChF2i yQQEZJRJPKsXwc5MFg3Y5zI2+n0sND6VN5mQcXSnLgXHaKEtYBOmjdEnsNBVkyzGXS8W 4xFg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=+D2qfcKM77kRojgzf5s+aIvKvwy+1ajZelOD6gw0xqw=; b=FHLIzobsZWhAukpqOlcOYRIUOG6R8AU4zl8WJC4Zrunt0NX+1X2KVI/WEjDv7nSlKA wKUdlhkyLFz4s42oMkKzlGfZ4dwzhf3GTXvTYPLLoHwLuD1uExHJ6oWsHiBtGKk2Vffq +0klNG5Pu4MeK1iS2muHrbOAWndcuWynOp9bqhgz/8ZCl3fOlUdats04l7VCMs1IPISv uQLE6s3/zVIpLB17E3Wdemf8hZdIa+iVTZ6FNS4mxLsLqqYhGO7EUCRzixFM0e1W+20s BHvSTGd0fczccKMVM3z67ZN0nSS1kLy5Kz8v4cfSObFzRjoJndHj7bjwzkppbLCq8l7Q 9I4Q== X-Gm-Message-State: AOAM532THcI4hFCLA9oA+g8aYvbS0+x0rjSCv6iQcZ7CZgYRcMWaswXS QUye0IxEfWWFUrXmB/Ij+1TofigM X-Google-Smtp-Source: ABdhPJx9au5rx6OHNDIh9ONDqZMA4rrXPRXlF2KbAHW3QJglhORUtKtgTPhLfDs0kaQfY9a5T23dYg== X-Received: by 2002:a63:935c:: with SMTP id w28mr36865218pgm.174.1594574540004; Sun, 12 Jul 2020 10:22:20 -0700 (PDT) Received: from localhost.localdomain ([122.171.57.136]) by smtp.gmail.com with ESMTPSA id x3sm12244944pfn.154.2020.07.12.10.22.17 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Sun, 12 Jul 2020 10:22:19 -0700 (PDT) From: hanishkvc To: ffmpeg-devel@ffmpeg.org Date: Sun, 12 Jul 2020 22:51:02 +0530 Message-Id: <20200712172102.72406-4-hanishkvc@gmail.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: <20200712172102.72406-1-hanishkvc@gmail.com> References: <20200712172102.72406-1-hanishkvc@gmail.com> MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH v08.01 3/3] VF fbtiler (tile/detile) and hwdownload (detile) X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Cc: hanishkvc Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" fbtiler videofilter cpu based framebuffer tiling/detiling This adds a video filter called fbtiler, which allows the user to tile/detile framebuffer layout between linear and tiled layouts, if required. It uses the fbtile helper routines to achieve its functionality. This is useful, if a) the user doesnt want to apply detiling when capturing some content/framebuffer which is tiled. OR b) the user already has tiled content with them. OR c) a developer wants to experiment with tiled data. OR d) user/developer wants to tile a linear layout. hwdownload detile framebuffer, if requested by user Added logic to support detiling of framebuffer. By default this is disabled. Only if requested by the user, the logic will be triggered. It uses the fbtile helper routines to do the detiling. Currently 32bit RGB pixel format based framebuffers are supported. If the underlying hardware context provides linear layouts, then nothing is done. However if underlying hardware context generates tiled layout, then user can use this to detile, where possible. ./ffmpeg -f kmsgrab -i - -vf hwdownload=1,format=bgr0 out.mp4 NOTE: This is a optional patch, provided for reference, if one wants to experiment using the fbtiler video filter in this. This depends on the fbtile patch being applied. --- Changelog | 2 + doc/filters.texi | 103 +++++++++++++++ libavfilter/Makefile | 1 + libavfilter/allfilters.c | 1 + libavfilter/vf_fbtiler.c | 245 ++++++++++++++++++++++++++++++++++++ libavfilter/vf_hwdownload.c | 60 ++++++++- 6 files changed, 410 insertions(+), 2 deletions(-) create mode 100644 libavfilter/vf_fbtiler.c diff --git a/Changelog b/Changelog index 0b48858da7..affc852641 100644 --- a/Changelog +++ b/Changelog @@ -8,6 +8,8 @@ version : - kmsgrab GetFB2 format_modifier, if user doesnt specify - fbtile cpu based framebuffer tile/detile helpers (Intel TileX|Y|Yf) - hwcontext_drm detiles non linear layouts, if possible +- hwdownload framebuffer layout detiling, if requested +- fbtiler cpu based framebuffer layout tile/detile video filter version 4.3: diff --git a/doc/filters.texi b/doc/filters.texi index ad2448acb2..e242304e3d 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -12105,6 +12105,23 @@ Not all formats will be supported on the output - it may be necessary to insert an additional @option{format} filter immediately following in the graph to get the output in a supported format. +It supports the following optional parameters + +@table @option +@item fbdetile +Specify type of CPU based FrameBuffer layout detiling to apply. The supported values are +@table @var +@item 0 +Dont do sw detiling (the default). +@item 1 +intel tile-x to linear conversion. +@item 2 +intel tile-y to linear conversion. +@item 3 +intel tile-yf to linear conversion. +@end table +@end table + @section hwmap Map hardware frames to system memory or to another device. @@ -12218,6 +12235,92 @@ It accepts the following optional parameters: The number of the CUDA device to use @end table +@anchor{fbtiler} +@section fbtiler + +Tile/Detile the Framebuffer between tile layout and linear layout using CPU. + +Currently supports conversion to|from Intel legacy tile-x|tile-y as well as +the newer Intel tile-yf layouts and the linear layout. This is useful if +one is using kmsgrab and hwdownload to capture a screen which is using one +of these non-linear layouts. It can also be used to generate a tiled layout. + +It provides a generic tiling|detiling logic, which can be easily configured +to tile|detile many different tiling schemes if required, in future. One is +only required to specify the tile walk parameters for the new tiling layout. + +Currently it expects the data to be a 32bit RGB based pixel format. However +the logic doesnt do any pixel format conversion or so. Later will be enabling +16bit RGB data also, as the logic is transparent to it at one level. + +One could either insert this into the filter chain while capturing itself, +or else, if it is slowing things down or so, then one could instead insert +it into the filter chain during playback or transcoding or so. + +It supports the following parameters + +@table @option +@item op +Specify whether to apply tiling or detiling. The supported values are +@table @var +@item 0 +Dont do any operation, just pass through. +@item 1 +Apply tiling operation. +@item 2 +Apply detiling operation. +@end table +@item layout +Specify which frame buffer layout to work with for conversion. The supported values are +@table @var +@item 0 +Dont do any tile/detiling. +@item 1 +Between intel tile-x and linear conversion (the default). +@item 2 +Between intel tile-y and linear conversion. +@item 3 +Between intel tile-yf and linear conversion. +@end table +@end table + +If one wants to convert during capture itself, one could do +@example +ffmpeg -f kmsgrab -i - -vf "hwdownload,format=bgr0,fbtiler=op=2:layout=1" OUTPUT +@end example + +However if one wants to convert after the tiled data has been already captured +@example +ffmpeg -i INPUT -vf "fbtiler=op=2" OUTPUT +@end example +@example +ffplay -i INPUT -vf "fbdetile" +@end example + +NOTE: While transcoding a test 1080p h264 stream, with 276 frames, below was +the average times taken by the different detile logics. +@example +rm out.mp4; time ./ffmpeg -i input.mp4 out.mp4 +rm out.mp4; time ./ffmpeg -i input.mp4 -vf fbtiler=op=2:layout=1 out.mp4 +rm out.mp4; time ./ffmpeg -i input.mp4 -vf fbtiler=op=2:layout=2 out.mp4 +rm out.mp4; time ./ffmpeg -i input.mp4 -vf fbtiler=op=2:layout=3 out.mp4 +@end example +@table @option +@item with no filters +it took ~07.28 secs, i5-8th Gen +it took ~09.95 secs, i7-7th Gen +@item with fbtiler=op=0:layout=0 filter, Intel Tile-Yf +it took ~12.70 secs. i7-7th Gen +@item with fbtiler=op=2:layout=1 filter, Intel Tile-X +it took ~08.69 secs, i5-8th Gen +it took ~13.35 secs, i7-7th Gen +@item with fbtiler=op=2:layout=2 filter, Intel Tile-Y +it took ~09.20 secs. i5-8th Gen +it took ~13.65 secs. i7-7th Gen +@item with fbtiler=op=2:layout=3 filter, Intel Tile-Yf +it took ~13.75 secs. i7-7th Gen +@end table + @section hqx Apply a high-quality magnification filter designed for pixel art. This filter diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 5123540653..e9ac9b1c28 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -280,6 +280,7 @@ OBJS-$(CONFIG_HWDOWNLOAD_FILTER) += vf_hwdownload.o OBJS-$(CONFIG_HWMAP_FILTER) += vf_hwmap.o OBJS-$(CONFIG_HWUPLOAD_CUDA_FILTER) += vf_hwupload_cuda.o OBJS-$(CONFIG_HWUPLOAD_FILTER) += vf_hwupload.o +OBJS-$(CONFIG_FBTILER_FILTER) += vf_fbtiler.o OBJS-$(CONFIG_HYSTERESIS_FILTER) += vf_hysteresis.o framesync.o OBJS-$(CONFIG_IDET_FILTER) += vf_idet.o OBJS-$(CONFIG_IL_FILTER) += vf_il.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index 1183e40267..4ec80e77f1 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -265,6 +265,7 @@ extern AVFilter ff_vf_hwdownload; extern AVFilter ff_vf_hwmap; extern AVFilter ff_vf_hwupload; extern AVFilter ff_vf_hwupload_cuda; +extern AVFilter ff_vf_fbtiler; extern AVFilter ff_vf_hysteresis; extern AVFilter ff_vf_idet; extern AVFilter ff_vf_il; diff --git a/libavfilter/vf_fbtiler.c b/libavfilter/vf_fbtiler.c new file mode 100644 index 0000000000..1cf39ef81f --- /dev/null +++ b/libavfilter/vf_fbtiler.c @@ -0,0 +1,245 @@ +/* + * Copyright (c) 2020 HanishKVC + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Tile or Detile the Frame buffer using cpu + * Currently it supports the following layouts + * legacy Intel Tile-X + * legacy Intel Tile-Y + * newer Intel Tile-Yf + * It uses the fbtile helper library to do its job. + * More tiling layouts can be easily supported by adding configuration data + * for tile walking into fbtile library or its tile|detile_generic function. + * + */ + +/* + * ToThink|Check: Optimisations + * + * Does gcc setting used by ffmpeg allows memcpy | stringops inlining, + * loop unrolling, better native matching instructions, additional + * optimisations, ... + * + * Does gcc map to optimal memcpy logic, based on the situation it is + * used in i.e like + * based on size of transfer, alignment, architecture, etc + * a suitable combination of inlining and or rep movsb and or + * simd load/store and or unrolling and or ... + * + * If not, may be look at vector_size or intrinsics or appropriate arch + * and cpu specific inline asm or ... + * + */ + +/* + * Performance check results on i7-7500u + * + * Run Type : Layout : Seconds Min, Max : TSCCnt Min, Max + * Non filter run: : 10.04s, 09.97s : 00.00M, 00.00M + * fbdetile=0 run: PasThro: 12.70s, 13.20s : 00.00M, 00.00M + * fbdetile=1 run: TileX : 13.34s, 13.52s : 06.13M, 06.20M ; Opti generic + * fbdetile=2 run: TileY : 13.59s, 13.68s : 08.60M, 08.97M ; Opti generic + * fbdetile=3 run: TileYf : 13.73s, 13.83s : 09.82M, 09.92M ; Opti generic + * The Older logics + * fbdetile=2 run: TileX : 12.45s, 13.41s : 05.95M, 06.05M ; prev custom + * fbdetile=3 run: TileY : 13.47s, 13.89s : 06.31M, 06.38M ; prev custom + * fbdetile=4 run: TileYf : 13.73s, 13.83s : 11.41M, 11.83M ; Simple generic + */ + +#include "libavutil/avassert.h" +#include "libavutil/imgutils.h" +#include "libavutil/opt.h" +#include "libavutil/fbtile.h" +#include "avfilter.h" +#include "formats.h" +#include "internal.h" +#include "video.h" + + +// Print time taken by tile/detile using performance counter +#if ARCH_X86 +#define DEBUG_PERF 1 +#else +#undef DEBUG_PERF +#endif + +#ifdef DEBUG_PERF +#include +uint64_t perfTime = 0; +int perfCnt = 0; +#endif + +typedef struct FBTilerContext { + const AVClass *class; + int width, height; + int layout; + int op; +} FBTilerContext; + +#define OFFSET(x) offsetof(FBTilerContext, x) +#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM +static const AVOption fbtiler_options[] = { + { "layout", "set framebuffer tile|format_modifier layout", OFFSET(layout), AV_OPT_TYPE_INT, {.i64=FF_FBTILE_INTEL_XGEN9}, 0, FF_FBTILE_UNKNOWN-1, FLAGS, "layout" }, + { "None", "Linear layout", 0, AV_OPT_TYPE_CONST, {.i64=FF_FBTILE_NONE}, INT_MIN, INT_MAX, FLAGS, "layout" }, + { "intelx", "Intel Tile-X layout", 0, AV_OPT_TYPE_CONST, {.i64=FF_FBTILE_INTEL_XGEN9}, INT_MIN, INT_MAX, FLAGS, "layout" }, + { "intely", "Intel Tile-Y layout", 0, AV_OPT_TYPE_CONST, {.i64=FF_FBTILE_INTEL_YGEN9}, INT_MIN, INT_MAX, FLAGS, "layout" }, + { "intelyf", "Intel Tile-Yf layout", 0, AV_OPT_TYPE_CONST, {.i64=FF_FBTILE_INTEL_YF}, INT_MIN, INT_MAX, FLAGS, "layout" }, + { "op", "select framebuffer tiling operations i.e tile|detile", OFFSET(op), AV_OPT_TYPE_INT, {.i64=FF_FBTILE_OPS_NONE}, 0, FF_FBTILE_OPS_UNKNOWN-1, FLAGS, "op" }, + { "None", "Nop", 0, AV_OPT_TYPE_CONST, {.i64=FF_FBTILE_OPS_NONE}, INT_MIN, INT_MAX, FLAGS, "op" }, + { "tile", "Apply tiling operation", 0, AV_OPT_TYPE_CONST, {.i64=FF_FBTILE_OPS_TILE}, INT_MIN, INT_MAX, FLAGS, "op" }, + { "detile", "Apply detiling operation", 0, AV_OPT_TYPE_CONST, {.i64=FF_FBTILE_OPS_DETILE}, INT_MIN, INT_MAX, FLAGS, "op" }, + { NULL } +}; + +AVFILTER_DEFINE_CLASS(fbtiler); + +static av_cold int init(AVFilterContext *ctx) +{ + FBTilerContext *fbtiler = ctx->priv; + + if (fbtiler->op == FF_FBTILE_OPS_NONE) { + av_log(ctx, AV_LOG_INFO, "init:Op: None, Pass through\n"); + } else if (fbtiler->op == FF_FBTILE_OPS_TILE) { + av_log(ctx, AV_LOG_INFO, "init:Op: Apply tiling\n"); + } else if (fbtiler->op == FF_FBTILE_OPS_DETILE) { + av_log(ctx, AV_LOG_INFO, "init:Op: Apply detiling\n"); + } else { + av_log(ctx, AV_LOG_ERROR, "init:Op: Unknown, shouldnt reach here\n"); + } + + if (fbtiler->layout == FF_FBTILE_NONE) { + av_log(ctx, AV_LOG_INFO, "init:Layout: pass through\n"); + } else if (fbtiler->layout == FF_FBTILE_INTEL_XGEN9) { + av_log(ctx, AV_LOG_INFO, "init:Layout: Intel tile-x\n"); + } else if (fbtiler->layout == FF_FBTILE_INTEL_YGEN9) { + av_log(ctx, AV_LOG_INFO, "init:Layout: Intel tile-y\n"); + } else if (fbtiler->layout == FF_FBTILE_INTEL_YF) { + av_log(ctx, AV_LOG_INFO, "init:Layout: Intel tile-yf\n"); + } else { + av_log(ctx, AV_LOG_ERROR, "init: Unknown Tile format specified, shouldnt reach here\n"); + } + fbtiler->width = 1920; + fbtiler->height = 1088; + return 0; +} + +static int query_formats(AVFilterContext *ctx) +{ + AVFilterFormats *fmts_list; + + fmts_list = ff_make_format_list(fbtilePixFormats); + if (!fmts_list) + return AVERROR(ENOMEM); + return ff_set_common_formats(ctx, fmts_list); +} + +static int config_props(AVFilterLink *inlink) +{ + AVFilterContext *ctx = inlink->dst; + FBTilerContext *fbtiler = ctx->priv; + + fbtiler->width = inlink->w; + fbtiler->height = inlink->h; + av_log(ctx, AV_LOG_INFO, "config_props: %d x %d\n", fbtiler->width, fbtiler->height); + + return 0; +} + + +static int filter_frame(AVFilterLink *inlink, AVFrame *in) +{ + AVFilterContext *ctx = inlink->dst; + FBTilerContext *fbtiler = ctx->priv; + AVFilterLink *outlink = ctx->outputs[0]; + AVFrame *out; + enum FFFBTileFrameCopyStatus status; + + if ((fbtiler->op == FF_FBTILE_OPS_NONE) || (fbtiler->layout == FF_FBTILE_NONE)) + return ff_filter_frame(outlink, in); + + out = ff_get_video_buffer(outlink, outlink->w, outlink->h); + if (!out) { + av_frame_free(&in); + return AVERROR(ENOMEM); + } + av_frame_copy_props(out, in); + +#ifdef DEBUG_PERF + unsigned int tscArg; + uint64_t perfStart = __rdtscp(&tscArg); +#endif + + if (fbtiler->op == FF_FBTILE_OPS_DETILE) + ff_fbtile_frame_copy(out, FF_FBTILE_NONE, in, fbtiler->layout, &status); + else + ff_fbtile_frame_copy(out, fbtiler->layout, in, FF_FBTILE_NONE, &status); + +#ifdef DEBUG_PERF + uint64_t perfEnd = __rdtscp(&tscArg); + perfTime += (perfEnd - perfStart); + perfCnt += 1; +#endif + + av_frame_free(&in); + return ff_filter_frame(outlink, out); +} + +static av_cold void uninit(AVFilterContext *ctx) +{ +#ifdef DEBUG_PERF + if (perfCnt == 0) + perfCnt = 1; + av_log(ctx, AV_LOG_INFO, "uninit:perf: AvgTSCCnt %ld\n", perfTime/perfCnt); +#endif +} + +static const AVFilterPad fbtiler_inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = config_props, + .filter_frame = filter_frame, + }, + { NULL } +}; + +static const AVFilterPad fbtiler_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + }, + { NULL } +}; + +AVFilter ff_vf_fbtiler = { + .name = "fbtiler", + .description = NULL_IF_CONFIG_SMALL("Tile|Detile Framebuffer using CPU"), + .priv_size = sizeof(FBTilerContext), + .init = init, + .uninit = uninit, + .query_formats = query_formats, + .inputs = fbtiler_inputs, + .outputs = fbtiler_outputs, + .priv_class = &fbtiler_class, + .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC, +}; + +// vim: set expandtab sts=4: // diff --git a/libavfilter/vf_hwdownload.c b/libavfilter/vf_hwdownload.c index 33af30cf40..ee057bc101 100644 --- a/libavfilter/vf_hwdownload.c +++ b/libavfilter/vf_hwdownload.c @@ -22,6 +22,7 @@ #include "libavutil/mem.h" #include "libavutil/opt.h" #include "libavutil/pixdesc.h" +#include "libavutil/fbtile.h" #include "avfilter.h" #include "formats.h" @@ -33,8 +34,20 @@ typedef struct HWDownloadContext { AVBufferRef *hwframes_ref; AVHWFramesContext *hwframes; + int fbdetile; } HWDownloadContext; +#define OFFSET(x) offsetof(HWDownloadContext, x) +#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM +static const AVOption hwdownload_options[] = { + { "fbdetile", "set framebuffer detile layout info", OFFSET(fbdetile), AV_OPT_TYPE_INT, {.i64=FF_FBTILE_NONE}, 0, FF_FBTILE_UNKNOWN-1, FLAGS, "fbdetile" }, + { "none", "Pass through", 0, AV_OPT_TYPE_CONST, {.i64=FF_FBTILE_NONE}, INT_MIN, INT_MAX, FLAGS, "fbdetile" }, + { "intelx", "Intel Tile-X layout", 0, AV_OPT_TYPE_CONST, {.i64=FF_FBTILE_INTEL_XGEN9}, INT_MIN, INT_MAX, FLAGS, "fbdetile" }, + { "intely", "Intel Tile-Y layout", 0, AV_OPT_TYPE_CONST, {.i64=FF_FBTILE_INTEL_YGEN9}, INT_MIN, INT_MAX, FLAGS, "fbdetile" }, + { "intelyf", "Intel Tile-Yf layout", 0, AV_OPT_TYPE_CONST, {.i64=FF_FBTILE_INTEL_YF}, INT_MIN, INT_MAX, FLAGS, "fbdetile" }, + { NULL } +}; + static int hwdownload_query_formats(AVFilterContext *avctx) { AVFilterFormats *infmts = NULL; @@ -64,6 +77,7 @@ static int hwdownload_query_formats(AVFilterContext *avctx) static int hwdownload_config_input(AVFilterLink *inlink) { + int err; AVFilterContext *avctx = inlink->dst; HWDownloadContext *ctx = avctx->priv; @@ -81,6 +95,15 @@ static int hwdownload_config_input(AVFilterLink *inlink) ctx->hwframes = (AVHWFramesContext*)ctx->hwframes_ref->data; + if (ctx->fbdetile != 0) { + err = ff_fbtile_checkpixformats(ctx->hwframes->sw_format, fbtilePixFormats[0]); + if (err) { + av_log(ctx, AV_LOG_ERROR, "Invalid input format %s for fbdetile.\n", + av_get_pix_fmt_name(ctx->hwframes->sw_format)); + return AVERROR(EINVAL); + } + } + return 0; } @@ -116,6 +139,15 @@ static int hwdownload_config_output(AVFilterLink *outlink) return AVERROR(EINVAL); } + if (ctx->fbdetile != 0) { + err = ff_fbtile_checkpixformats(outlink->format, fbtilePixFormats[0]); + if (err) { + av_log(ctx, AV_LOG_ERROR, "Invalid output format %s for fbdetile.\n", + av_get_pix_fmt_name(outlink->format)); + return AVERROR(EINVAL); + } + } + outlink->w = inlink->w; outlink->h = inlink->h; @@ -128,7 +160,9 @@ static int hwdownload_filter_frame(AVFilterLink *link, AVFrame *input) AVFilterLink *outlink = avctx->outputs[0]; HWDownloadContext *ctx = avctx->priv; AVFrame *output = NULL; + AVFrame *output2 = NULL; int err; + enum FFFBTileFrameCopyStatus status; if (!ctx->hwframes_ref || !input->hw_frames_ctx) { av_log(ctx, AV_LOG_ERROR, "Input frames must have hardware context.\n"); @@ -162,13 +196,35 @@ static int hwdownload_filter_frame(AVFilterLink *link, AVFrame *input) if (err < 0) goto fail; + if (ctx->fbdetile == 0) { + av_frame_free(&input); + return ff_filter_frame(avctx->outputs[0], output); + } + + output2 = ff_get_video_buffer(outlink, ctx->hwframes->width, + ctx->hwframes->height); + if (!output2) { + err = AVERROR(ENOMEM); + goto fail; + } + + output2->width = outlink->w; + output2->height = outlink->h; + ff_fbtile_frame_copy(output2, FF_FBTILE_NONE, output, ctx->fbdetile, &status); + + err = av_frame_copy_props(output2, input); + if (err < 0) + goto fail; + av_frame_free(&input); + av_frame_free(&output); - return ff_filter_frame(avctx->outputs[0], output); + return ff_filter_frame(avctx->outputs[0], output2); fail: av_frame_free(&input); av_frame_free(&output); + av_frame_free(&output2); return err; } @@ -182,7 +238,7 @@ static av_cold void hwdownload_uninit(AVFilterContext *avctx) static const AVClass hwdownload_class = { .class_name = "hwdownload", .item_name = av_default_item_name, - .option = NULL, + .option = hwdownload_options, .version = LIBAVUTIL_VERSION_INT, };