@@ -8,6 +8,8 @@ version <next>:
- kmsgrab GetFB2 format_modifier, if user doesnt specify
- fbtile cpu based framebuffer tile/detile helpers (Intel TileX|Y|Yf)
- hwcontext_drm detiles non linear layouts, if possible
+- hwdownload framebuffer layout detiling, if requested
+- fbtiler cpu based framebuffer layout tile/detile video filter
version 4.3:
@@ -12105,6 +12105,23 @@ Not all formats will be supported on the output - it may be necessary to insert
an additional @option{format} filter immediately following in the graph to get
the output in a supported format.
+It supports the following optional parameters
+
+@table @option
+@item fbdetile
+Specify type of CPU based FrameBuffer layout detiling to apply. The supported values are
+@table @var
+@item 0
+Dont do sw detiling (the default).
+@item 1
+intel tile-x to linear conversion.
+@item 2
+intel tile-y to linear conversion.
+@item 3
+intel tile-yf to linear conversion.
+@end table
+@end table
+
@section hwmap
Map hardware frames to system memory or to another device.
@@ -12218,6 +12235,92 @@ It accepts the following optional parameters:
The number of the CUDA device to use
@end table
+@anchor{fbtiler}
+@section fbtiler
+
+Tile/Detile the Framebuffer between tile layout and linear layout using CPU.
+
+Currently supports conversion to|from Intel legacy tile-x|tile-y as well as
+the newer Intel tile-yf layouts and the linear layout. This is useful if
+one is using kmsgrab and hwdownload to capture a screen which is using one
+of these non-linear layouts. It can also be used to generate a tiled layout.
+
+It provides a generic tiling|detiling logic, which can be easily configured
+to tile|detile many different tiling schemes if required, in future. One is
+only required to specify the tile walk parameters for the new tiling layout.
+
+Currently it expects the data to be a 32bit RGB based pixel format. However
+the logic doesnt do any pixel format conversion or so. Later will be enabling
+16bit RGB data also, as the logic is transparent to it at one level.
+
+One could either insert this into the filter chain while capturing itself,
+or else, if it is slowing things down or so, then one could instead insert
+it into the filter chain during playback or transcoding or so.
+
+It supports the following parameters
+
+@table @option
+@item op
+Specify whether to apply tiling or detiling. The supported values are
+@table @var
+@item 0
+Dont do any operation, just pass through.
+@item 1
+Apply tiling operation.
+@item 2
+Apply detiling operation.
+@end table
+@item layout
+Specify which frame buffer layout to work with for conversion. The supported values are
+@table @var
+@item 0
+Dont do any tile/detiling.
+@item 1
+Between intel tile-x and linear conversion (the default).
+@item 2
+Between intel tile-y and linear conversion.
+@item 3
+Between intel tile-yf and linear conversion.
+@end table
+@end table
+
+If one wants to convert during capture itself, one could do
+@example
+ffmpeg -f kmsgrab -i - -vf "hwdownload,format=bgr0,fbtiler=op=2:layout=1" OUTPUT
+@end example
+
+However if one wants to convert after the tiled data has been already captured
+@example
+ffmpeg -i INPUT -vf "fbtiler=op=2" OUTPUT
+@end example
+@example
+ffplay -i INPUT -vf "fbdetile"
+@end example
+
+NOTE: While transcoding a test 1080p h264 stream, with 276 frames, below was
+the average times taken by the different detile logics.
+@example
+rm out.mp4; time ./ffmpeg -i input.mp4 out.mp4
+rm out.mp4; time ./ffmpeg -i input.mp4 -vf fbtiler=op=2:layout=1 out.mp4
+rm out.mp4; time ./ffmpeg -i input.mp4 -vf fbtiler=op=2:layout=2 out.mp4
+rm out.mp4; time ./ffmpeg -i input.mp4 -vf fbtiler=op=2:layout=3 out.mp4
+@end example
+@table @option
+@item with no filters
+it took ~07.28 secs, i5-8th Gen
+it took ~09.95 secs, i7-7th Gen
+@item with fbtiler=op=0:layout=0 filter, Intel Tile-Yf
+it took ~12.70 secs. i7-7th Gen
+@item with fbtiler=op=2:layout=1 filter, Intel Tile-X
+it took ~08.69 secs, i5-8th Gen
+it took ~13.35 secs, i7-7th Gen
+@item with fbtiler=op=2:layout=2 filter, Intel Tile-Y
+it took ~09.20 secs. i5-8th Gen
+it took ~13.65 secs. i7-7th Gen
+@item with fbtiler=op=2:layout=3 filter, Intel Tile-Yf
+it took ~13.75 secs. i7-7th Gen
+@end table
+
@section hqx
Apply a high-quality magnification filter designed for pixel art. This filter
@@ -280,6 +280,7 @@ OBJS-$(CONFIG_HWDOWNLOAD_FILTER) += vf_hwdownload.o
OBJS-$(CONFIG_HWMAP_FILTER) += vf_hwmap.o
OBJS-$(CONFIG_HWUPLOAD_CUDA_FILTER) += vf_hwupload_cuda.o
OBJS-$(CONFIG_HWUPLOAD_FILTER) += vf_hwupload.o
+OBJS-$(CONFIG_FBTILER_FILTER) += vf_fbtiler.o
OBJS-$(CONFIG_HYSTERESIS_FILTER) += vf_hysteresis.o framesync.o
OBJS-$(CONFIG_IDET_FILTER) += vf_idet.o
OBJS-$(CONFIG_IL_FILTER) += vf_il.o
@@ -265,6 +265,7 @@ extern AVFilter ff_vf_hwdownload;
extern AVFilter ff_vf_hwmap;
extern AVFilter ff_vf_hwupload;
extern AVFilter ff_vf_hwupload_cuda;
+extern AVFilter ff_vf_fbtiler;
extern AVFilter ff_vf_hysteresis;
extern AVFilter ff_vf_idet;
extern AVFilter ff_vf_il;
new file mode 100644
@@ -0,0 +1,245 @@
+/*
+ * Copyright (c) 2020 HanishKVC
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Tile or Detile the Frame buffer using cpu
+ * Currently it supports the following layouts
+ * legacy Intel Tile-X
+ * legacy Intel Tile-Y
+ * newer Intel Tile-Yf
+ * It uses the fbtile helper library to do its job.
+ * More tiling layouts can be easily supported by adding configuration data
+ * for tile walking into fbtile library or its tile|detile_generic function.
+ *
+ */
+
+/*
+ * ToThink|Check: Optimisations
+ *
+ * Does gcc setting used by ffmpeg allows memcpy | stringops inlining,
+ * loop unrolling, better native matching instructions, additional
+ * optimisations, ...
+ *
+ * Does gcc map to optimal memcpy logic, based on the situation it is
+ * used in i.e like
+ * based on size of transfer, alignment, architecture, etc
+ * a suitable combination of inlining and or rep movsb and or
+ * simd load/store and or unrolling and or ...
+ *
+ * If not, may be look at vector_size or intrinsics or appropriate arch
+ * and cpu specific inline asm or ...
+ *
+ */
+
+/*
+ * Performance check results on i7-7500u
+ *
+ * Run Type : Layout : Seconds Min, Max : TSCCnt Min, Max
+ * Non filter run: : 10.04s, 09.97s : 00.00M, 00.00M
+ * fbdetile=0 run: PasThro: 12.70s, 13.20s : 00.00M, 00.00M
+ * fbdetile=1 run: TileX : 13.34s, 13.52s : 06.13M, 06.20M ; Opti generic
+ * fbdetile=2 run: TileY : 13.59s, 13.68s : 08.60M, 08.97M ; Opti generic
+ * fbdetile=3 run: TileYf : 13.73s, 13.83s : 09.82M, 09.92M ; Opti generic
+ * The Older logics
+ * fbdetile=2 run: TileX : 12.45s, 13.41s : 05.95M, 06.05M ; prev custom
+ * fbdetile=3 run: TileY : 13.47s, 13.89s : 06.31M, 06.38M ; prev custom
+ * fbdetile=4 run: TileYf : 13.73s, 13.83s : 11.41M, 11.83M ; Simple generic
+ */
+
+#include "libavutil/avassert.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/opt.h"
+#include "libavutil/fbtile.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+
+// Print time taken by tile/detile using performance counter
+#if ARCH_X86
+#define DEBUG_PERF 1
+#else
+#undef DEBUG_PERF
+#endif
+
+#ifdef DEBUG_PERF
+#include <x86intrin.h>
+uint64_t perfTime = 0;
+int perfCnt = 0;
+#endif
+
+typedef struct FBTilerContext {
+ const AVClass *class;
+ int width, height;
+ int layout;
+ int op;
+} FBTilerContext;
+
+#define OFFSET(x) offsetof(FBTilerContext, x)
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
+static const AVOption fbtiler_options[] = {
+ { "layout", "set framebuffer tile|format_modifier layout", OFFSET(layout), AV_OPT_TYPE_INT, {.i64=FF_FBTILE_INTEL_XGEN9}, 0, FF_FBTILE_UNKNOWN-1, FLAGS, "layout" },
+ { "None", "Linear layout", 0, AV_OPT_TYPE_CONST, {.i64=FF_FBTILE_NONE}, INT_MIN, INT_MAX, FLAGS, "layout" },
+ { "intelx", "Intel Tile-X layout", 0, AV_OPT_TYPE_CONST, {.i64=FF_FBTILE_INTEL_XGEN9}, INT_MIN, INT_MAX, FLAGS, "layout" },
+ { "intely", "Intel Tile-Y layout", 0, AV_OPT_TYPE_CONST, {.i64=FF_FBTILE_INTEL_YGEN9}, INT_MIN, INT_MAX, FLAGS, "layout" },
+ { "intelyf", "Intel Tile-Yf layout", 0, AV_OPT_TYPE_CONST, {.i64=FF_FBTILE_INTEL_YF}, INT_MIN, INT_MAX, FLAGS, "layout" },
+ { "op", "select framebuffer tiling operations i.e tile|detile", OFFSET(op), AV_OPT_TYPE_INT, {.i64=FF_FBTILE_OPS_NONE}, 0, FF_FBTILE_OPS_UNKNOWN-1, FLAGS, "op" },
+ { "None", "Nop", 0, AV_OPT_TYPE_CONST, {.i64=FF_FBTILE_OPS_NONE}, INT_MIN, INT_MAX, FLAGS, "op" },
+ { "tile", "Apply tiling operation", 0, AV_OPT_TYPE_CONST, {.i64=FF_FBTILE_OPS_TILE}, INT_MIN, INT_MAX, FLAGS, "op" },
+ { "detile", "Apply detiling operation", 0, AV_OPT_TYPE_CONST, {.i64=FF_FBTILE_OPS_DETILE}, INT_MIN, INT_MAX, FLAGS, "op" },
+ { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(fbtiler);
+
+static av_cold int init(AVFilterContext *ctx)
+{
+ FBTilerContext *fbtiler = ctx->priv;
+
+ if (fbtiler->op == FF_FBTILE_OPS_NONE) {
+ av_log(ctx, AV_LOG_INFO, "init:Op: None, Pass through\n");
+ } else if (fbtiler->op == FF_FBTILE_OPS_TILE) {
+ av_log(ctx, AV_LOG_INFO, "init:Op: Apply tiling\n");
+ } else if (fbtiler->op == FF_FBTILE_OPS_DETILE) {
+ av_log(ctx, AV_LOG_INFO, "init:Op: Apply detiling\n");
+ } else {
+ av_log(ctx, AV_LOG_ERROR, "init:Op: Unknown, shouldnt reach here\n");
+ }
+
+ if (fbtiler->layout == FF_FBTILE_NONE) {
+ av_log(ctx, AV_LOG_INFO, "init:Layout: pass through\n");
+ } else if (fbtiler->layout == FF_FBTILE_INTEL_XGEN9) {
+ av_log(ctx, AV_LOG_INFO, "init:Layout: Intel tile-x\n");
+ } else if (fbtiler->layout == FF_FBTILE_INTEL_YGEN9) {
+ av_log(ctx, AV_LOG_INFO, "init:Layout: Intel tile-y\n");
+ } else if (fbtiler->layout == FF_FBTILE_INTEL_YF) {
+ av_log(ctx, AV_LOG_INFO, "init:Layout: Intel tile-yf\n");
+ } else {
+ av_log(ctx, AV_LOG_ERROR, "init: Unknown Tile format specified, shouldnt reach here\n");
+ }
+ fbtiler->width = 1920;
+ fbtiler->height = 1088;
+ return 0;
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+ AVFilterFormats *fmts_list;
+
+ fmts_list = ff_make_format_list(fbtilePixFormats);
+ if (!fmts_list)
+ return AVERROR(ENOMEM);
+ return ff_set_common_formats(ctx, fmts_list);
+}
+
+static int config_props(AVFilterLink *inlink)
+{
+ AVFilterContext *ctx = inlink->dst;
+ FBTilerContext *fbtiler = ctx->priv;
+
+ fbtiler->width = inlink->w;
+ fbtiler->height = inlink->h;
+ av_log(ctx, AV_LOG_INFO, "config_props: %d x %d\n", fbtiler->width, fbtiler->height);
+
+ return 0;
+}
+
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+ AVFilterContext *ctx = inlink->dst;
+ FBTilerContext *fbtiler = ctx->priv;
+ AVFilterLink *outlink = ctx->outputs[0];
+ AVFrame *out;
+ enum FFFBTileFrameCopyStatus status;
+
+ if ((fbtiler->op == FF_FBTILE_OPS_NONE) || (fbtiler->layout == FF_FBTILE_NONE))
+ return ff_filter_frame(outlink, in);
+
+ out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+ if (!out) {
+ av_frame_free(&in);
+ return AVERROR(ENOMEM);
+ }
+ av_frame_copy_props(out, in);
+
+#ifdef DEBUG_PERF
+ unsigned int tscArg;
+ uint64_t perfStart = __rdtscp(&tscArg);
+#endif
+
+ if (fbtiler->op == FF_FBTILE_OPS_DETILE)
+ ff_fbtile_frame_copy(out, FF_FBTILE_NONE, in, fbtiler->layout, &status);
+ else
+ ff_fbtile_frame_copy(out, fbtiler->layout, in, FF_FBTILE_NONE, &status);
+
+#ifdef DEBUG_PERF
+ uint64_t perfEnd = __rdtscp(&tscArg);
+ perfTime += (perfEnd - perfStart);
+ perfCnt += 1;
+#endif
+
+ av_frame_free(&in);
+ return ff_filter_frame(outlink, out);
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+#ifdef DEBUG_PERF
+ if (perfCnt == 0)
+ perfCnt = 1;
+ av_log(ctx, AV_LOG_INFO, "uninit:perf: AvgTSCCnt %ld\n", perfTime/perfCnt);
+#endif
+}
+
+static const AVFilterPad fbtiler_inputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .config_props = config_props,
+ .filter_frame = filter_frame,
+ },
+ { NULL }
+};
+
+static const AVFilterPad fbtiler_outputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_VIDEO,
+ },
+ { NULL }
+};
+
+AVFilter ff_vf_fbtiler = {
+ .name = "fbtiler",
+ .description = NULL_IF_CONFIG_SMALL("Tile|Detile Framebuffer using CPU"),
+ .priv_size = sizeof(FBTilerContext),
+ .init = init,
+ .uninit = uninit,
+ .query_formats = query_formats,
+ .inputs = fbtiler_inputs,
+ .outputs = fbtiler_outputs,
+ .priv_class = &fbtiler_class,
+ .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC,
+};
+
+// vim: set expandtab sts=4: //
@@ -22,6 +22,7 @@
#include "libavutil/mem.h"
#include "libavutil/opt.h"
#include "libavutil/pixdesc.h"
+#include "libavutil/fbtile.h"
#include "avfilter.h"
#include "formats.h"
@@ -33,8 +34,20 @@ typedef struct HWDownloadContext {
AVBufferRef *hwframes_ref;
AVHWFramesContext *hwframes;
+ int fbdetile;
} HWDownloadContext;
+#define OFFSET(x) offsetof(HWDownloadContext, x)
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
+static const AVOption hwdownload_options[] = {
+ { "fbdetile", "set framebuffer detile layout info", OFFSET(fbdetile), AV_OPT_TYPE_INT, {.i64=FF_FBTILE_NONE}, 0, FF_FBTILE_UNKNOWN-1, FLAGS, "fbdetile" },
+ { "none", "Pass through", 0, AV_OPT_TYPE_CONST, {.i64=FF_FBTILE_NONE}, INT_MIN, INT_MAX, FLAGS, "fbdetile" },
+ { "intelx", "Intel Tile-X layout", 0, AV_OPT_TYPE_CONST, {.i64=FF_FBTILE_INTEL_XGEN9}, INT_MIN, INT_MAX, FLAGS, "fbdetile" },
+ { "intely", "Intel Tile-Y layout", 0, AV_OPT_TYPE_CONST, {.i64=FF_FBTILE_INTEL_YGEN9}, INT_MIN, INT_MAX, FLAGS, "fbdetile" },
+ { "intelyf", "Intel Tile-Yf layout", 0, AV_OPT_TYPE_CONST, {.i64=FF_FBTILE_INTEL_YF}, INT_MIN, INT_MAX, FLAGS, "fbdetile" },
+ { NULL }
+};
+
static int hwdownload_query_formats(AVFilterContext *avctx)
{
AVFilterFormats *infmts = NULL;
@@ -64,6 +77,7 @@ static int hwdownload_query_formats(AVFilterContext *avctx)
static int hwdownload_config_input(AVFilterLink *inlink)
{
+ int err;
AVFilterContext *avctx = inlink->dst;
HWDownloadContext *ctx = avctx->priv;
@@ -81,6 +95,15 @@ static int hwdownload_config_input(AVFilterLink *inlink)
ctx->hwframes = (AVHWFramesContext*)ctx->hwframes_ref->data;
+ if (ctx->fbdetile != 0) {
+ err = ff_fbtile_checkpixformats(ctx->hwframes->sw_format, fbtilePixFormats[0]);
+ if (err) {
+ av_log(ctx, AV_LOG_ERROR, "Invalid input format %s for fbdetile.\n",
+ av_get_pix_fmt_name(ctx->hwframes->sw_format));
+ return AVERROR(EINVAL);
+ }
+ }
+
return 0;
}
@@ -116,6 +139,15 @@ static int hwdownload_config_output(AVFilterLink *outlink)
return AVERROR(EINVAL);
}
+ if (ctx->fbdetile != 0) {
+ err = ff_fbtile_checkpixformats(outlink->format, fbtilePixFormats[0]);
+ if (err) {
+ av_log(ctx, AV_LOG_ERROR, "Invalid output format %s for fbdetile.\n",
+ av_get_pix_fmt_name(outlink->format));
+ return AVERROR(EINVAL);
+ }
+ }
+
outlink->w = inlink->w;
outlink->h = inlink->h;
@@ -128,7 +160,9 @@ static int hwdownload_filter_frame(AVFilterLink *link, AVFrame *input)
AVFilterLink *outlink = avctx->outputs[0];
HWDownloadContext *ctx = avctx->priv;
AVFrame *output = NULL;
+ AVFrame *output2 = NULL;
int err;
+ enum FFFBTileFrameCopyStatus status;
if (!ctx->hwframes_ref || !input->hw_frames_ctx) {
av_log(ctx, AV_LOG_ERROR, "Input frames must have hardware context.\n");
@@ -162,13 +196,35 @@ static int hwdownload_filter_frame(AVFilterLink *link, AVFrame *input)
if (err < 0)
goto fail;
+ if (ctx->fbdetile == 0) {
+ av_frame_free(&input);
+ return ff_filter_frame(avctx->outputs[0], output);
+ }
+
+ output2 = ff_get_video_buffer(outlink, ctx->hwframes->width,
+ ctx->hwframes->height);
+ if (!output2) {
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ output2->width = outlink->w;
+ output2->height = outlink->h;
+ ff_fbtile_frame_copy(output2, FF_FBTILE_NONE, output, ctx->fbdetile, &status);
+
+ err = av_frame_copy_props(output2, input);
+ if (err < 0)
+ goto fail;
+
av_frame_free(&input);
+ av_frame_free(&output);
- return ff_filter_frame(avctx->outputs[0], output);
+ return ff_filter_frame(avctx->outputs[0], output2);
fail:
av_frame_free(&input);
av_frame_free(&output);
+ av_frame_free(&output2);
return err;
}
@@ -182,7 +238,7 @@ static av_cold void hwdownload_uninit(AVFilterContext *avctx)
static const AVClass hwdownload_class = {
.class_name = "hwdownload",
.item_name = av_default_item_name,
- .option = NULL,
+ .option = hwdownload_options,
.version = LIBAVUTIL_VERSION_INT,
};