@@ -2,6 +2,8 @@ Entries are sorted chronologically from oldest to youngest within each release,
releases are sorted from youngest to oldest.
version <next>:
+- hwdownload support framebuffer layout detiling (certain Intel tile layouts)
+- fbdetile cpu based framebuffer layout detiling video filter
- AudioToolbox output device
- MacCaption demuxer
@@ -12097,6 +12097,25 @@ Not all formats will be supported on the output - it may be necessary to insert
an additional @option{format} filter immediately following in the graph to get
the output in a supported format.
+It supports the following optional parameters
+
+@table @option
+@item fbdetile
+Specify type of CPU based FrameBuffer layout detiling to apply. The supported values are
+@table @var
+@item 0
+Dont do sw detiling (the default).
+@item 1
+Auto detect detile logic to apply (for hwcontext_drm).
+@item 2
+intel tile-x to linear conversion.
+@item 3
+intel tile-y to linear conversion.
+@item 4
+intel tile-yf to linear conversion.
+@end table
+@end table
+
@section hwmap
Map hardware frames to system memory or to another device.
@@ -12210,6 +12229,84 @@ It accepts the following optional parameters:
The number of the CUDA device to use
@end table
+@anchor{fbdetile}
+@section fbdetile
+
+Detiles the Framebuffer tile layout into a linear layout using CPU.
+
+It currently supports conversion from Intel legacy tile-x and tile-y as well
+as the newer Intel tile-yf layouts into a linear layout. This is useful if
+one is using kmsgrab and hwdownload to capture a screen which is using one
+of these non-linear layouts.
+
+NOTE: It also provides a generic detiling logic, which can be easily configured
+to detile many different tiling schemes if required, in future. The same is
+used for detiling the intel tile-yf layout. Also sample configuration to handle
+intel tile-x and tile-y using generic detile logic is also shown for reference,
+in the code.
+
+Currently it expects the data to be a 32bit RGB based pixel format. However
+the logic doesnt do any pixel format conversion or so. Later will be enabling
+16bit RGB data also, as the logic is transparent to it at one level.
+
+One could either insert this into the filter chain while capturing itself,
+or else, if it is slowing things down or so, then one could instead insert
+it into the filter chain during playback or transcoding or so.
+
+It supports the following optional parameters
+
+@table @option
+@item type
+Specify which detiling conversion to apply. The supported values are
+@table @var
+@item 0
+Dont do detiling.
+@item 1
+Auto detect detile logic to apply (supported in vf_hwdownload, not in vf_fbdetile).
+@item 2
+intel tile-x to linear conversion (the default).
+@item 3
+intel tile-y to linear conversion.
+@item 4
+intel tile-yf to linear conversion.
+@end table
+@end table
+
+If one wants to convert during capture itself, one could do
+@example
+ffmpeg -f kmsgrab -i - -vf "hwdownload,format=bgr0,fbdetile" OUTPUT
+@end example
+
+However if one wants to convert after the tiled data has been already captured
+@example
+ffmpeg -i INPUT -vf "fbdetile" OUTPUT
+@end example
+@example
+ffplay -i INPUT -vf "fbdetile"
+@end example
+
+NOTE: While transcoding a test 1080p h264 stream, with 276 frames, below was
+the average times taken by the different detile logics.
+@example
+rm out.mp4; time ./ffmpeg -i input.mp4 out.mp4
+rm out.mp4; time ./ffmpeg -i input.mp4 -vf fbdetile=0 out.mp4
+rm out.mp4; time ./ffmpeg -i input.mp4 -vf fbdetile=1 out.mp4
+rm out.mp4; time ./ffmpeg -i input.mp4 -vf fbdetile=2 out.mp4
+@end example
+@table @option
+@item with no fbdetile filter
+it took ~7.28 secs, i5-8th Gen
+it took ~10.1 secs, i7-7th Gen
+@item with fbdetile=0 filter, Intel Tile-X
+it took ~8.69 secs, i5-8th Gen
+it took ~13.3 secs, i7-7th Gen
+@item with fbdetile=1 filter, Intel Tile-Y
+it took ~9.20 secs. i5-8th Gen
+it took ~13.5 secs. i7-7th Gen
+@item with fbdetile=2 filter, Intel Tile-Yf
+it took ~13.8 secs. i7-7th Gen
+@end table
+
@section hqx
Apply a high-quality magnification filter designed for pixel art. This filter
@@ -239,6 +239,7 @@ static av_cold int kmsgrab_read_header(AVFormatContext *avctx)
drmModePlaneRes *plane_res = NULL;
drmModePlane *plane = NULL;
drmModeFB *fb = NULL;
+ drmModeFB2 *fb2 = NULL;
AVStream *stream;
int err, i;
@@ -364,6 +365,22 @@ static av_cold int kmsgrab_read_header(AVFormatContext *avctx)
goto fail;
}
+ fb2 = drmModeGetFB2(ctx->hwctx->fd, plane->fb_id);
+ if (!fb2) {
+ err = errno;
+ av_log(avctx, AV_LOG_ERROR, "Failed to get "
+ "framebuffer2 %"PRIu32": %s.\n",
+ plane->fb_id, strerror(err));
+ err = AVERROR(err);
+ goto fail;
+ }
+
+ av_log(avctx, AV_LOG_INFO, "Template framebuffer2 is %"PRIu32": "
+ "%"PRIu32"x%"PRIu32", pixel_format: 0x%"PRIx32", format_modifier: 0x%"PRIx64".\n",
+ fb2->fb_id, fb2->width, fb2->height, fb2->pixel_format, fb2->modifier);
+
+ ctx->drm_format_modifier = fb2->modifier;
+
stream = avformat_new_stream(avctx, NULL);
if (!stream) {
err = AVERROR(ENOMEM);
@@ -408,6 +425,8 @@ fail:
drmModeFreePlane(plane);
if (fb)
drmModeFreeFB(fb);
+ if (fb2)
+ drmModeFreeFB2(fb2);
return err;
}
@@ -280,6 +280,7 @@ OBJS-$(CONFIG_HWDOWNLOAD_FILTER) += vf_hwdownload.o
OBJS-$(CONFIG_HWMAP_FILTER) += vf_hwmap.o
OBJS-$(CONFIG_HWUPLOAD_CUDA_FILTER) += vf_hwupload_cuda.o
OBJS-$(CONFIG_HWUPLOAD_FILTER) += vf_hwupload.o
+OBJS-$(CONFIG_FBDETILE_FILTER) += vf_fbdetile.o
OBJS-$(CONFIG_HYSTERESIS_FILTER) += vf_hysteresis.o framesync.o
OBJS-$(CONFIG_IDET_FILTER) += vf_idet.o
OBJS-$(CONFIG_IL_FILTER) += vf_il.o
@@ -265,6 +265,7 @@ extern AVFilter ff_vf_hwdownload;
extern AVFilter ff_vf_hwmap;
extern AVFilter ff_vf_hwupload;
extern AVFilter ff_vf_hwupload_cuda;
+extern AVFilter ff_vf_fbdetile;
extern AVFilter ff_vf_hysteresis;
extern AVFilter ff_vf_idet;
extern AVFilter ff_vf_il;
new file mode 100644
@@ -0,0 +1,236 @@
+/*
+ * Copyright (c) 2020 HanishKVC
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Detile the Frame buffer's tile layout using the cpu
+ * Currently it supports detiling of following layouts
+ * legacy Intel Tile-X
+ * legacy Intel Tile-Y
+ * newer Intel Tile-Yf
+ * More tiling layouts can be easily supported by adding configuration data
+ * for the generic detile logic, wrt the required tiling schemes.
+ *
+ */
+
+/*
+ * ToThink|Check: Optimisations
+ *
+ * Does gcc setting used by ffmpeg allows memcpy | stringops inlining,
+ * loop unrolling, better native matching instructions, additional
+ * optimisations, ...
+ *
+ * Does gcc map to optimal memcpy logic, based on the situation it is
+ * used in i.e like
+ * based on size of transfer, alignment, architecture, etc
+ * a suitable combination of inlining and or rep movsb and or
+ * simd load/store and or unrolling and or ...
+ *
+ * If not, may be look at vector_size or intrinsics or appropriate arch
+ * and cpu specific inline asm or ...
+ *
+ */
+
+/*
+ * Performance check results on i7-7500u
+ * TileYf, TileGX, TileGY using detile_generic_opti
+ * This mainly impacts TileYf, due to its deeper subtiling
+ * Without opti, its TSCCnt rises to aroun 11.XYM
+ * Run Type : Type : Seconds Max, Min : TSCCnt Min, Max
+ * Non filter run: : 10.11s, 09.96s :
+ * fbdetile=2 run: TileX : 13.45s, 13.20s : 05.95M, 06.10M
+ * fbdetile=3 run: TileY : 13.50s, 13.39s : 06.22M, 06.39M
+ * fbdetile=4 run: TileYf : 13.75s, 13.63s : 09.82M, 09.90M
+ * fbdetile=5 run: TileGX : 13.70s, 13.32s : 06.15M, 06.24M
+ * fbdetile=6 run: TileGY : 14.12s, 13.57s : 08.75M, 09.10M
+ */
+
+#include "libavutil/avassert.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/opt.h"
+#include "libavutil/fbtile.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+// Use Optimised detile_generic or the Simpler but more fine grained one
+#define DETILE_GENERIC_OPTI 1
+// Enable printing of the tile walk
+#undef DEBUG_FBTILE
+// Print time taken by detile using performance counter
+#ifdef ARCH_X86
+#define DEBUG_PERF 1
+#else
+#undef DEBUG_PERF
+#endif
+
+#ifdef DEBUG_PERF
+#include <x86intrin.h>
+uint64_t perfTime = 0;
+int perfCnt = 0;
+#endif
+
+typedef struct FBDetileContext {
+ const AVClass *class;
+ int width, height;
+ int type;
+} FBDetileContext;
+
+#define OFFSET(x) offsetof(FBDetileContext, x)
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
+static const AVOption fbdetile_options[] = {
+ { "type", "set framebuffer tile|format_modifier conversion type", OFFSET(type), AV_OPT_TYPE_INT, {.i64=TILE_INTELX}, 0, TILE_NONE_END-1, FLAGS, "type" },
+ { "None", "Dont detile", 0, AV_OPT_TYPE_CONST, {.i64=TILE_NONE}, INT_MIN, INT_MAX, FLAGS, "type" },
+ { "Auto", "Auto detect tile conversion type, NotImplemented", 0, AV_OPT_TYPE_CONST, {.i64=TILE_AUTO}, INT_MIN, INT_MAX, FLAGS, "type" },
+ { "intelx", "Intel Tile-X layout", 0, AV_OPT_TYPE_CONST, {.i64=TILE_INTELX}, INT_MIN, INT_MAX, FLAGS, "type" },
+ { "intely", "Intel Tile-Y layout", 0, AV_OPT_TYPE_CONST, {.i64=TILE_INTELY}, INT_MIN, INT_MAX, FLAGS, "type" },
+ { "intelyf", "Intel Tile-Yf layout", 0, AV_OPT_TYPE_CONST, {.i64=TILE_INTELYF}, INT_MIN, INT_MAX, FLAGS, "type" },
+ { "intelgx", "Intel Tile-X layout, GenericDetile", 0, AV_OPT_TYPE_CONST, {.i64=TILE_INTELGX}, INT_MIN, INT_MAX, FLAGS, "type" },
+ { "intelgy", "Intel Tile-Y layout, GenericDetile", 0, AV_OPT_TYPE_CONST, {.i64=TILE_INTELGY}, INT_MIN, INT_MAX, FLAGS, "type" },
+ { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(fbdetile);
+
+static av_cold int init(AVFilterContext *ctx)
+{
+ FBDetileContext *fbdetile = ctx->priv;
+
+ if (fbdetile->type == TILE_INTELX) {
+ fprintf(stderr,"INFO:fbdetile:init: Intel tile-x to linear\n");
+ } else if (fbdetile->type == TILE_INTELY) {
+ fprintf(stderr,"INFO:fbdetile:init: Intel tile-y to linear\n");
+ } else if (fbdetile->type == TILE_INTELYF) {
+ fprintf(stderr,"INFO:fbdetile:init: Intel tile-yf to linear\n");
+ } else if (fbdetile->type == TILE_INTELGX) {
+ fprintf(stderr,"INFO:fbdetile:init: Intel tile-x to linear, using generic detile\n");
+ } else if (fbdetile->type == TILE_INTELGY) {
+ fprintf(stderr,"INFO:fbdetile:init: Intel tile-y to linear, using generic detile\n");
+ } else {
+ fprintf(stderr,"DBUG:fbdetile:init: Unknown Tile format specified, shouldnt reach here\n");
+ }
+ fbdetile->width = 1920;
+ fbdetile->height = 1080;
+ return 0;
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+ // Currently only RGB based 32bit formats are specified
+ // TODO: Technically the logic is transparent to 16bit RGB formats also to a great extent
+ static const enum AVPixelFormat pix_fmts[] = {AV_PIX_FMT_RGB0, AV_PIX_FMT_0RGB, AV_PIX_FMT_BGR0, AV_PIX_FMT_0BGR,
+ AV_PIX_FMT_RGBA, AV_PIX_FMT_ARGB, AV_PIX_FMT_BGRA, AV_PIX_FMT_ABGR,
+ AV_PIX_FMT_NONE};
+ AVFilterFormats *fmts_list;
+
+ fmts_list = ff_make_format_list(pix_fmts);
+ if (!fmts_list)
+ return AVERROR(ENOMEM);
+ return ff_set_common_formats(ctx, fmts_list);
+}
+
+static int config_props(AVFilterLink *inlink)
+{
+ AVFilterContext *ctx = inlink->dst;
+ FBDetileContext *fbdetile = ctx->priv;
+
+ fbdetile->width = inlink->w;
+ fbdetile->height = inlink->h;
+ fprintf(stderr,"DBUG:fbdetile:config_props: %d x %d\n", fbdetile->width, fbdetile->height);
+
+ return 0;
+}
+
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+ AVFilterContext *ctx = inlink->dst;
+ FBDetileContext *fbdetile = ctx->priv;
+ AVFilterLink *outlink = ctx->outputs[0];
+ AVFrame *out;
+
+ if (fbdetile->type == TILE_NONE)
+ return ff_filter_frame(outlink, in);
+
+ out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+ if (!out) {
+ av_frame_free(&in);
+ return AVERROR(ENOMEM);
+ }
+ av_frame_copy_props(out, in);
+
+#ifdef DEBUG_PERF
+ uint64_t perfStart = __rdtsc();
+#endif
+
+ detile_this(fbdetile->type, 0, fbdetile->width, fbdetile->height,
+ out->data[0], out->linesize[0],
+ in->data[0], in->linesize[0], 4);
+
+#ifdef DEBUG_PERF
+ uint64_t perfEnd = __rdtsc();
+ perfTime += (perfEnd - perfStart);
+ perfCnt += 1;
+#endif
+
+ av_frame_free(&in);
+ return ff_filter_frame(outlink, out);
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+#ifdef DEBUG_PERF
+ fprintf(stderr, "DBUG:fbdetile:uninit:perf: AvgTSCCnt %ld\n", perfTime/perfCnt);
+#endif
+}
+
+static const AVFilterPad fbdetile_inputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .config_props = config_props,
+ .filter_frame = filter_frame,
+ },
+ { NULL }
+};
+
+static const AVFilterPad fbdetile_outputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_VIDEO,
+ },
+ { NULL }
+};
+
+AVFilter ff_vf_fbdetile = {
+ .name = "fbdetile",
+ .description = NULL_IF_CONFIG_SMALL("Detile Framebuffer using CPU"),
+ .priv_size = sizeof(FBDetileContext),
+ .init = init,
+ .uninit = uninit,
+ .query_formats = query_formats,
+ .inputs = fbdetile_inputs,
+ .outputs = fbdetile_outputs,
+ .priv_class = &fbdetile_class,
+ .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC,
+};
+
+// vim: set expandtab sts=4: //
@@ -22,6 +22,10 @@
#include "libavutil/mem.h"
#include "libavutil/opt.h"
#include "libavutil/pixdesc.h"
+#include "libavutil/fbtile.h"
+#ifdef CONFIG_LIBDRM
+#include "libavutil/hwcontext_drm.h"
+#endif
#include "avfilter.h"
#include "formats.h"
@@ -33,8 +37,23 @@ typedef struct HWDownloadContext {
AVBufferRef *hwframes_ref;
AVHWFramesContext *hwframes;
+ int fbdetile;
} HWDownloadContext;
+#define OFFSET(x) offsetof(HWDownloadContext, x)
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
+static const AVOption hwdownload_options[] = {
+ { "fbdetile", "set framebuffer detile mode", OFFSET(fbdetile), AV_OPT_TYPE_INT, {.i64=TILE_NONE}, 0, TILE_NONE_END-1, FLAGS, "fbdetile" },
+ { "none", "No SW detiling", 0, AV_OPT_TYPE_CONST, {.i64=TILE_NONE}, INT_MIN, INT_MAX, FLAGS, "fbdetile" },
+ { "auto", "auto select based on format_modifier", 0, AV_OPT_TYPE_CONST, {.i64=TILE_AUTO}, INT_MIN, INT_MAX, FLAGS, "fbdetile" },
+ { "intelx", "Intel Tile-X layout", 0, AV_OPT_TYPE_CONST, {.i64=TILE_INTELX}, INT_MIN, INT_MAX, FLAGS, "fbdetile" },
+ { "intely", "Intel Tile-Y layout", 0, AV_OPT_TYPE_CONST, {.i64=TILE_INTELY}, INT_MIN, INT_MAX, FLAGS, "fbdetile" },
+ { "intelyf", "Intel Tile-Yf layout", 0, AV_OPT_TYPE_CONST, {.i64=TILE_INTELYF}, INT_MIN, INT_MAX, FLAGS, "fbdetile" },
+ { "intelgx", "Intel Tile-X layout, GenericDetile", 0, AV_OPT_TYPE_CONST, {.i64=TILE_INTELGX}, INT_MIN, INT_MAX, FLAGS, "fbdetile" },
+ { "intelgy", "Intel Tile-Y layout, GenericDetile", 0, AV_OPT_TYPE_CONST, {.i64=TILE_INTELGY}, INT_MIN, INT_MAX, FLAGS, "fbdetile" },
+ { NULL }
+};
+
static int hwdownload_query_formats(AVFilterContext *avctx)
{
AVFilterFormats *infmts = NULL;
@@ -128,6 +147,7 @@ static int hwdownload_filter_frame(AVFilterLink *link, AVFrame *input)
AVFilterLink *outlink = avctx->outputs[0];
HWDownloadContext *ctx = avctx->priv;
AVFrame *output = NULL;
+ AVFrame *output2 = NULL;
int err;
if (!ctx->hwframes_ref || !input->hw_frames_ctx) {
@@ -162,13 +182,39 @@ static int hwdownload_filter_frame(AVFilterLink *link, AVFrame *input)
if (err < 0)
goto fail;
+ output2 = ff_get_video_buffer(outlink, ctx->hwframes->width,
+ ctx->hwframes->height);
+ if (!output2) {
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ output2->width = outlink->w;
+ output2->height = outlink->h;
+ uint64_t formatModifier = 0;
+#ifdef CONFIG_LIBDRM
+ if (input->format == AV_PIX_FMT_DRM_PRIME) {
+ AVDRMFrameDescriptor *drmFrame = input->data[0];
+ formatModifier = drmFrame->objects[0].format_modifier;
+ }
+#endif
+ detile_this(ctx->fbdetile, formatModifier, output2->width, output2->height,
+ output2->data[0], output2->linesize[0],
+ output->data[0], output->linesize[0], 4);
+
+ err = av_frame_copy_props(output2, input);
+ if (err < 0)
+ goto fail;
+
av_frame_free(&input);
+ av_frame_free(&output);
- return ff_filter_frame(avctx->outputs[0], output);
+ return ff_filter_frame(avctx->outputs[0], output2);
fail:
av_frame_free(&input);
av_frame_free(&output);
+ av_frame_free(&output2);
return err;
}
@@ -182,7 +228,7 @@ static av_cold void hwdownload_uninit(AVFilterContext *avctx)
static const AVClass hwdownload_class = {
.class_name = "hwdownload",
.item_name = av_default_item_name,
- .option = NULL,
+ .option = hwdownload_options,
.version = LIBAVUTIL_VERSION_INT,
};
@@ -84,6 +84,7 @@ HEADERS = adler32.h \
xtea.h \
tea.h \
tx.h \
+ fbtile.h \
HEADERS-$(CONFIG_LZO) += lzo.h
@@ -169,6 +170,7 @@ OBJS = adler32.o \
tx_float.o \
tx_double.o \
tx_int32.o \
+ fbtile.o \
video_enc_params.o \
new file mode 100644
@@ -0,0 +1,411 @@
+/*
+ * CPU based Framebuffer Tile DeTile logic
+ * Copyright (c) 2020 C Hanish Menon <HanishKVC>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avutil.h"
+#include "common.h"
+#include "fbtile.h"
+#ifdef CONFIG_LIBDRM
+#include <drm_fourcc.h>
+#endif
+
+
+int fbtilemode_from_formatmodifier(uint64_t formatModifier)
+{
+ int mode = TILE_NONE_END;
+
+#ifdef CONFIG_LIBDRM
+ switch(formatModifier) {
+ case DRM_FORMAT_MOD_LINEAR:
+ mode = TILE_NONE;
+ break;
+ case I915_FORMAT_MOD_X_TILED:
+ mode = TILE_INTELX;
+ break;
+ case I915_FORMAT_MOD_Y_TILED:
+ mode = TILE_INTELY;
+ break;
+ case I915_FORMAT_MOD_Yf_TILED:
+ mode = TILE_INTELYF;
+ break;
+ default:
+ mode = TILE_NONE_END;
+ break;
+ }
+#endif
+#ifdef DEBUG_FBTILE_FORMATMODIFIER_MAPPING
+ fprintf(stderr,"DBUG:fbtile:formatmodifier[%lx] mapped to mode[%d]\n", formatModifier, mode);
+#endif
+ return mode;
+}
+
+
+void detile_intelx(int w, int h,
+ uint8_t *dst, int dstLineSize,
+ const uint8_t *src, int srcLineSize)
+{
+ // Offsets and LineSize are in bytes
+ const int pixBytes = 4; // bytes per pixel
+ const int tileW = 128; // tileWidth inPixels, 512/4, For a 32Bits/Pixel framebuffer
+ const int tileH = 8; // tileHeight inPixelLines
+ const int tileWBytes = tileW*pixBytes; // tileWidth inBytes
+
+ if (w*pixBytes != srcLineSize) {
+ fprintf(stderr,"DBUG:fbdetile:intelx: w%dxh%d, dL%d, sL%d\n", w, h, dstLineSize, srcLineSize);
+ fprintf(stderr,"ERRR:fbdetile:intelx: dont support LineSize | Pitch going beyond width\n");
+ }
+ int sO = 0; // srcOffset inBytes
+ int dX = 0; // destX inPixels
+ int dY = 0; // destY inPixels
+ int nTLines = (w*h)/tileW; // numTileLines; One TileLine = One TileWidth
+ int cTL = 0; // curTileLine
+ while (cTL < nTLines) {
+ int dO = dY*dstLineSize + dX*pixBytes;
+#ifdef DEBUG_FBTILE
+ fprintf(stderr,"DBUG:fbdetile:intelx: dX%d dY%d, sO%d, dO%d\n", dX, dY, sO, dO);
+#endif
+ memcpy(dst+dO+0*dstLineSize, src+sO+0*tileWBytes, tileWBytes);
+ memcpy(dst+dO+1*dstLineSize, src+sO+1*tileWBytes, tileWBytes);
+ memcpy(dst+dO+2*dstLineSize, src+sO+2*tileWBytes, tileWBytes);
+ memcpy(dst+dO+3*dstLineSize, src+sO+3*tileWBytes, tileWBytes);
+ memcpy(dst+dO+4*dstLineSize, src+sO+4*tileWBytes, tileWBytes);
+ memcpy(dst+dO+5*dstLineSize, src+sO+5*tileWBytes, tileWBytes);
+ memcpy(dst+dO+6*dstLineSize, src+sO+6*tileWBytes, tileWBytes);
+ memcpy(dst+dO+7*dstLineSize, src+sO+7*tileWBytes, tileWBytes);
+ dX += tileW;
+ if (dX >= w) {
+ dX = 0;
+ dY += tileH;
+ }
+ sO = sO + tileW*tileH*pixBytes;
+ cTL += tileH;
+ }
+}
+
+
+/*
+ * Intel Legacy Tile-Y layout conversion support
+ *
+ * currently done in a simple dumb way. Two low hanging optimisations
+ * that could be readily applied are
+ *
+ * a) unrolling the inner for loop
+ * --- Given small size memcpy, should help, DONE
+ *
+ * b) using simd based 128bit loading and storing along with prefetch
+ * hinting.
+ *
+ * TOTHINK|CHECK: Does memcpy already does this and more if situation
+ * is right?!
+ *
+ * As code (or even intrinsics) would be specific to each architecture,
+ * avoiding for now. Later have to check if vector_size attribute and
+ * corresponding implementation by gcc can handle different architectures
+ * properly, such that it wont become worse than memcpy provided for that
+ * architecture.
+ *
+ * Or maybe I could even merge the two intel detiling logics into one, as
+ * the semantic and flow is almost same for both logics.
+ *
+ */
+void detile_intely(int w, int h,
+ uint8_t *dst, int dstLineSize,
+ const uint8_t *src, int srcLineSize)
+{
+ // Offsets and LineSize are in bytes
+ const int pixBytes = 4; // bytesPerPixel
+ // tileW represents subTileWidth here, as it can be repeated to fill a tile
+ const int tileW = 4; // tileWidth inPixels, 16/4, For a 32Bits/Pixel framebuffer
+ const int tileH = 32; // tileHeight inPixelLines
+ const int tileWBytes = tileW*pixBytes; // tileWidth inBytes
+
+ if (w*pixBytes != srcLineSize) {
+ fprintf(stderr,"DBUG:fbdetile:intely: w%dxh%d, dL%d, sL%d\n", w, h, dstLineSize, srcLineSize);
+ fprintf(stderr,"ERRR:fbdetile:intely: dont support LineSize | Pitch going beyond width\n");
+ }
+ int sO = 0;
+ int dX = 0;
+ int dY = 0;
+ const int nTLines = (w*h)/tileW;
+ int cTL = 0;
+ while (cTL < nTLines) {
+ int dO = dY*dstLineSize + dX*pixBytes;
+#ifdef DEBUG_FBTILE
+ fprintf(stderr,"DBUG:fbdetile:intely: dX%d dY%d, sO%d, dO%d\n", dX, dY, sO, dO);
+#endif
+
+ memcpy(dst+dO+0*dstLineSize, src+sO+0*tileWBytes, tileWBytes);
+ memcpy(dst+dO+1*dstLineSize, src+sO+1*tileWBytes, tileWBytes);
+ memcpy(dst+dO+2*dstLineSize, src+sO+2*tileWBytes, tileWBytes);
+ memcpy(dst+dO+3*dstLineSize, src+sO+3*tileWBytes, tileWBytes);
+ memcpy(dst+dO+4*dstLineSize, src+sO+4*tileWBytes, tileWBytes);
+ memcpy(dst+dO+5*dstLineSize, src+sO+5*tileWBytes, tileWBytes);
+ memcpy(dst+dO+6*dstLineSize, src+sO+6*tileWBytes, tileWBytes);
+ memcpy(dst+dO+7*dstLineSize, src+sO+7*tileWBytes, tileWBytes);
+ memcpy(dst+dO+8*dstLineSize, src+sO+8*tileWBytes, tileWBytes);
+ memcpy(dst+dO+9*dstLineSize, src+sO+9*tileWBytes, tileWBytes);
+ memcpy(dst+dO+10*dstLineSize, src+sO+10*tileWBytes, tileWBytes);
+ memcpy(dst+dO+11*dstLineSize, src+sO+11*tileWBytes, tileWBytes);
+ memcpy(dst+dO+12*dstLineSize, src+sO+12*tileWBytes, tileWBytes);
+ memcpy(dst+dO+13*dstLineSize, src+sO+13*tileWBytes, tileWBytes);
+ memcpy(dst+dO+14*dstLineSize, src+sO+14*tileWBytes, tileWBytes);
+ memcpy(dst+dO+15*dstLineSize, src+sO+15*tileWBytes, tileWBytes);
+ memcpy(dst+dO+16*dstLineSize, src+sO+16*tileWBytes, tileWBytes);
+ memcpy(dst+dO+17*dstLineSize, src+sO+17*tileWBytes, tileWBytes);
+ memcpy(dst+dO+18*dstLineSize, src+sO+18*tileWBytes, tileWBytes);
+ memcpy(dst+dO+19*dstLineSize, src+sO+19*tileWBytes, tileWBytes);
+ memcpy(dst+dO+20*dstLineSize, src+sO+20*tileWBytes, tileWBytes);
+ memcpy(dst+dO+21*dstLineSize, src+sO+21*tileWBytes, tileWBytes);
+ memcpy(dst+dO+22*dstLineSize, src+sO+22*tileWBytes, tileWBytes);
+ memcpy(dst+dO+23*dstLineSize, src+sO+23*tileWBytes, tileWBytes);
+ memcpy(dst+dO+24*dstLineSize, src+sO+24*tileWBytes, tileWBytes);
+ memcpy(dst+dO+25*dstLineSize, src+sO+25*tileWBytes, tileWBytes);
+ memcpy(dst+dO+26*dstLineSize, src+sO+26*tileWBytes, tileWBytes);
+ memcpy(dst+dO+27*dstLineSize, src+sO+27*tileWBytes, tileWBytes);
+ memcpy(dst+dO+28*dstLineSize, src+sO+28*tileWBytes, tileWBytes);
+ memcpy(dst+dO+29*dstLineSize, src+sO+29*tileWBytes, tileWBytes);
+ memcpy(dst+dO+30*dstLineSize, src+sO+30*tileWBytes, tileWBytes);
+ memcpy(dst+dO+31*dstLineSize, src+sO+31*tileWBytes, tileWBytes);
+
+ dX += tileW;
+ if (dX >= w) {
+ dX = 0;
+ dY += tileH;
+ }
+ sO = sO + tileW*tileH*pixBytes;
+ cTL += tileH;
+ }
+}
+
+
+/*
+ * Generic detile logic
+ */
+
+/*
+ * Direction Change Entry
+ * Used to specify the tile walking of subtiles within a tile.
+ */
+/**
+ * Settings for Intel Tile-Yf framebuffer layout.
+ * May need to swap the 4 pixel wide subtile, have to check doc bit more
+ */
+const int tyfBytesPerPixel = 4;
+const int tyfSubTileWidth = 4;
+const int tyfSubTileHeight = 8;
+const int tyfSubTileWidthBytes = tyfSubTileWidth*tyfBytesPerPixel; //16
+const int tyfTileWidth = 32;
+const int tyfTileHeight = 32;
+const int tyfNumDirChanges = 6;
+struct dirChange tyfDirChanges[] = { {8, 4, 0}, {16, -4, 8}, {32, 4, -8}, {64, -12, 8 }, {128, 4, -24}, {256, 4, -24} };
+
+/**
+ * Setting for Intel Tile-X framebuffer layout
+ */
+const int txBytesPerPixel = 4;
+const int txSubTileWidth = 128;
+const int txSubTileHeight = 8;
+const int txSubTileWidthBytes = txSubTileWidth*txBytesPerPixel; //512
+const int txTileWidth = 128;
+const int txTileHeight = 8;
+const int txNumDirChanges = 1;
+struct dirChange txDirChanges[] = { {8, 128, 0} };
+
+/**
+ * Setting for Intel Tile-Y framebuffer layout
+ * Even thou a simple generic detiling logic doesnt require the
+ * dummy 256 posOffset entry. The pseudo parallel detiling based
+ * opti logic requires to know about the Tile boundry.
+ */
+const int tyBytesPerPixel = 4;
+const int tySubTileWidth = 4;
+const int tySubTileHeight = 32;
+const int tySubTileWidthBytes = tySubTileWidth*tyBytesPerPixel; //16
+const int tyTileWidth = 32;
+const int tyTileHeight = 32;
+const int tyNumDirChanges = 2;
+struct dirChange tyDirChanges[] = { {32, 4, 0}, {256, 4, 0} };
+
+
+void detile_generic_simple(int w, int h,
+ uint8_t *dst, int dstLineSize,
+ const uint8_t *src, int srcLineSize,
+ int bytesPerPixel,
+ int subTileWidth, int subTileHeight, int subTileWidthBytes,
+ int tileWidth, int tileHeight,
+ int numDirChanges, struct dirChange *dirChanges)
+{
+
+ if (w*bytesPerPixel != srcLineSize) {
+ fprintf(stderr,"DBUG:fbdetile:generic: w%dxh%d, dL%d, sL%d\n", w, h, dstLineSize, srcLineSize);
+ fprintf(stderr,"ERRR:fbdetile:generic: dont support LineSize | Pitch going beyond width\n");
+ }
+ int sO = 0;
+ int dX = 0;
+ int dY = 0;
+ int nSTLines = (w*h)/subTileWidth; // numSubTileLines
+ int cSTL = 0; // curSubTileLine
+ while (cSTL < nSTLines) {
+ int dO = dY*dstLineSize + dX*bytesPerPixel;
+#ifdef DEBUG_FBTILE
+ fprintf(stderr,"DBUG:fbdetile:generic: dX%d dY%d, sO%d, dO%d\n", dX, dY, sO, dO);
+#endif
+
+ for (int k = 0; k < subTileHeight; k++) {
+ memcpy(dst+dO+k*dstLineSize, src+sO+k*subTileWidthBytes, subTileWidthBytes);
+ }
+ sO = sO + subTileHeight*subTileWidthBytes;
+
+ cSTL += subTileHeight;
+ for (int i=numDirChanges-1; i>=0; i--) {
+ if ((cSTL%dirChanges[i].posOffset) == 0) {
+ dX += dirChanges[i].xDelta;
+ dY += dirChanges[i].yDelta;
+ break;
+ }
+ }
+ if (dX >= w) {
+ dX = 0;
+ dY += tileHeight;
+ }
+ }
+}
+
+
+void detile_generic_opti(int w, int h,
+ uint8_t *dst, int dstLineSize,
+ const uint8_t *src, int srcLineSize,
+ int bytesPerPixel,
+ int subTileWidth, int subTileHeight, int subTileWidthBytes,
+ int tileWidth, int tileHeight,
+ int numDirChanges, struct dirChange *dirChanges)
+{
+ int parallel = 1;
+
+ if (w*bytesPerPixel != srcLineSize) {
+ fprintf(stderr,"DBUG:fbdetile:generic: w%dxh%d, dL%d, sL%d\n", w, h, dstLineSize, srcLineSize);
+ fprintf(stderr,"ERRR:fbdetile:generic: dont support LineSize | Pitch going beyond width\n");
+ }
+ if (w%tileWidth != 0) {
+ fprintf(stderr,"DBUG:fbdetile:generic:NotSupported:NonMultWidth: width%d, tileWidth%d\n", w, tileWidth);
+ }
+ int sO = 0;
+ int sOPrev = 0;
+ int dX = 0;
+ int dY = 0;
+ int nSTLines = (w*h)/subTileWidth;
+ //int nSTLinesInATile = (tileWidth*tileHeight)/subTileWidth;
+ int nTilesInARow = w/tileWidth;
+ for (parallel=8; parallel>0; parallel--) {
+ if (nTilesInARow%parallel == 0)
+ break;
+ }
+ int cSTL = 0;
+ int curTileInRow = 0;
+ while (cSTL < nSTLines) {
+ int dO = dY*dstLineSize + dX*bytesPerPixel;
+#ifdef DEBUG_FBTILE
+ fprintf(stderr,"DBUG:fbdetile:generic: dX%d dY%d, sO%d, dO%d\n", dX, dY, sO, dO);
+#endif
+
+ // As most tiling layouts have a minimum subtile of 4x4, if I remember correctly,
+ // so this loop has been unrolled to be multiples of 4, and speed up a bit.
+ // However tiling involving 3x3 or 2x2 wont be handlable. Use detile_generic_simple
+ // for such tile layouts.
+ // Detile parallely to a limited extent. To avoid any cache set-associativity and or
+ // limited cache based thrashing, keep it spacially and inturn temporaly small at one level.
+ for (int k = 0; k < subTileHeight; k+=4) {
+ for (int p = 0; p < parallel; p++) {
+ int pSrcOffset = p*tileWidth*tileHeight*bytesPerPixel;
+ int pDstOffset = p*tileWidth*bytesPerPixel;
+ memcpy(dst+dO+k*dstLineSize+pDstOffset, src+sO+k*subTileWidthBytes+pSrcOffset, subTileWidthBytes);
+ memcpy(dst+dO+(k+1)*dstLineSize+pDstOffset, src+sO+(k+1)*subTileWidthBytes+pSrcOffset, subTileWidthBytes);
+ memcpy(dst+dO+(k+2)*dstLineSize+pDstOffset, src+sO+(k+2)*subTileWidthBytes+pSrcOffset, subTileWidthBytes);
+ memcpy(dst+dO+(k+3)*dstLineSize+pDstOffset, src+sO+(k+3)*subTileWidthBytes+pSrcOffset, subTileWidthBytes);
+ }
+ }
+ sO = sO + subTileHeight*subTileWidthBytes;
+
+ cSTL += subTileHeight;
+ for (int i=numDirChanges-1; i>=0; i--) {
+ if ((cSTL%dirChanges[i].posOffset) == 0) {
+ if (i == numDirChanges-1) {
+ curTileInRow += parallel;
+ dX = curTileInRow*tileWidth;
+ sO = sOPrev + tileWidth*tileHeight*bytesPerPixel*(parallel);
+ sOPrev = sO;
+ } else {
+ dX += dirChanges[i].xDelta;
+ }
+ dY += dirChanges[i].yDelta;
+ break;
+ }
+ }
+ if (dX >= w) {
+ dX = 0;
+ curTileInRow = 0;
+ dY += tileHeight;
+ if (dY >= h) {
+ break;
+ }
+ }
+ }
+}
+
+
+void detile_this(int mode, uint64_t arg1,
+ int w, int h,
+ uint8_t *dst, int dstLineSize,
+ uint8_t *src, int srcLineSize,
+ int bytesPerPixel)
+{
+ if (mode == TILE_NONE) {
+ return;
+ }
+ if (mode == TILE_AUTO) {
+ mode = fbtilemode_from_formatmodifier(arg1);
+ }
+
+ if (mode == TILE_INTELX) {
+ detile_intelx(w, h, dst, dstLineSize, src, srcLineSize);
+ } else if (mode == TILE_INTELY) {
+ detile_intely(w, h, dst, dstLineSize, src, srcLineSize);
+ } else if (mode == TILE_INTELYF) {
+ detile_generic(w, h, dst, dstLineSize, src, srcLineSize,
+ tyfBytesPerPixel, tyfSubTileWidth, tyfSubTileHeight, tyfSubTileWidthBytes,
+ tyfTileWidth, tyfTileHeight,
+ tyfNumDirChanges, tyfDirChanges);
+ } else if (mode == TILE_INTELGX) {
+ detile_generic(w, h, dst, dstLineSize, src, srcLineSize,
+ txBytesPerPixel, txSubTileWidth, txSubTileHeight, txSubTileWidthBytes,
+ txTileWidth, txTileHeight,
+ txNumDirChanges, txDirChanges);
+ } else if (mode == TILE_INTELGY) {
+ detile_generic(w, h, dst, dstLineSize, src, srcLineSize,
+ tyBytesPerPixel, tySubTileWidth, tySubTileHeight, tySubTileWidthBytes,
+ tyTileWidth, tyTileHeight,
+ tyNumDirChanges, tyDirChanges);
+ } else if (mode == TILE_NONE_END) {
+ fprintf(stderr, "WARN:fbtile:detile_this:TILE_AUTO: invalid or unsupported format_modifier:%llx\n",arg1);
+ }
+}
+
+
+// vim: set expandtab sts=4: //
new file mode 100644
@@ -0,0 +1,213 @@
+/*
+ * CPU based Framebuffer Tile DeTile logic
+ * Copyright (c) 2020 C Hanish Menon <HanishKVC>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_FBTILE_H
+#define AVUTIL_FBTILE_H
+
+#include <stdio.h>
+#include <stdint.h>
+
+/**
+ * @file
+ * @brief CPU based Framebuffer tiler detiler
+ * @author C Hanish Menon <HanishKVC>
+ * @{
+ */
+
+
+enum FBTileMode {
+ TILE_NONE,
+ TILE_AUTO,
+ TILE_INTELX,
+ TILE_INTELY,
+ TILE_INTELYF,
+ TILE_INTELGX,
+ TILE_INTELGY,
+ TILE_NONE_END,
+};
+
+
+/**
+ * Map from formatmodifier to fbtile's internal mode.
+ *
+ * @param formatModifier the format_modifier to map
+ * @return the fbtile's equivalent internal mode
+ */
+#undef DEBUG_FBTILE_FORMATMODIFIER_MAPPING
+int fbtilemode_from_formatmodifier(uint64_t formatModifier);
+
+
+/**
+ * Detile legacy intel tile-x layout into linear layout.
+ *
+ * @param w width of the image
+ * @param h height of the image
+ * @param dst the destination image buffer
+ * @param dstLineSize the size of each row in dst image, in bytes
+ * @param src the source image buffer
+ * @param srcLineSize the size of each row in src image, in bytes
+ */
+void detile_intelx(int w, int h,
+ uint8_t *dst, int dstLineSize,
+ const uint8_t *src, int srcLineSize);
+
+
+/**
+ * Detile legacy intel tile-y layout into linear layout.
+ *
+ * @param w width of the image
+ * @param h height of the image
+ * @param dst the destination image buffer
+ * @param dstLineSize the size of each row in dst image, in bytes
+ * @param src the source image buffer
+ * @param srcLineSize the size of each row in src image, in bytes
+ */
+void detile_intely(int w, int h,
+ uint8_t *dst, int dstLineSize,
+ const uint8_t *src, int srcLineSize);
+
+
+/**
+ * Generic Logic.
+ */
+
+/*
+ * Direction Change Entry
+ * Used to specify the tile walking of subtiles within a tile.
+ */
+struct dirChange {
+ int posOffset;
+ int xDelta;
+ int yDelta;
+};
+/**
+ * Settings for Intel Tile-Yf framebuffer layout.
+ * May need to swap the 4 pixel wide subtile, have to check doc bit more
+ */
+extern const int tyfBytesPerPixel;
+extern const int tyfSubTileWidth;
+extern const int tyfSubTileHeight;
+extern const int tyfSubTileWidthBytes;
+extern const int tyfTileWidth;
+extern const int tyfTileHeight;
+extern const int tyfNumDirChanges;
+extern struct dirChange tyfDirChanges[];
+/**
+ * Setting for Intel Tile-X framebuffer layout
+ */
+extern const int txBytesPerPixel;
+extern const int txSubTileWidth;
+extern const int txSubTileHeight;
+extern const int txSubTileWidthBytes;
+extern const int txTileWidth;
+extern const int txTileHeight;
+extern const int txNumDirChanges;
+extern struct dirChange txDirChanges[];
+/**
+ * Setting for Intel Tile-Y framebuffer layout
+ * Even thou a simple generic detiling logic doesnt require the
+ * dummy 256 posOffset entry. The pseudo parallel detiling based
+ * opti logic requires to know about the Tile boundry.
+ */
+extern const int tyBytesPerPixel;
+extern const int tySubTileWidth;
+extern const int tySubTileHeight;
+extern const int tySubTileWidthBytes;
+extern const int tyTileWidth;
+extern const int tyTileHeight;
+extern const int tyNumDirChanges;
+extern struct dirChange tyDirChanges[];
+
+/**
+ * Generic Logic to Detile into linear layout.
+ *
+ * @param w width of the image
+ * @param h height of the image
+ * @param dst the destination image buffer
+ * @param dstLineSize the size of each row in dst image, in bytes
+ * @param src the source image buffer
+ * @param srcLineSize the size of each row in src image, in bytes
+ * @param bytesPerPixel the bytes per pixel for the image
+ * @param subTileWidth the width of subtile within the tile, in pixels
+ * @param subTileHeight the height of subtile within the tile, in pixels
+ * @param subTileWidthBytes the width of subtile within the tile, in bytes
+ * @param tileWidth the width of the tile, in pixels
+ * @param tileHeight the height of the tile, in pixels
+ */
+
+
+/**
+ * Generic detile simple version, which is fine-grained.
+ */
+void detile_generic_simple(int w, int h,
+ uint8_t *dst, int dstLineSize,
+ const uint8_t *src, int srcLineSize,
+ int bytesPerPixel,
+ int subTileWidth, int subTileHeight, int subTileWidthBytes,
+ int tileWidth, int tileHeight,
+ int numDirChanges, struct dirChange *dirChanges);
+
+
+/**
+ * Generic detile optimised version, minimum subtile supported 4x4.
+ */
+void detile_generic_opti(int w, int h,
+ uint8_t *dst, int dstLineSize,
+ const uint8_t *src, int srcLineSize,
+ int bytesPerPixel,
+ int subTileWidth, int subTileHeight, int subTileWidthBytes,
+ int tileWidth, int tileHeight,
+ int numDirChanges, struct dirChange *dirChanges);
+
+
+#ifdef DETILE_GENERIC_OPTI
+#define detile_generic detile_generic_opti
+#else
+#define detile_generic detile_generic_simple
+#endif
+
+
+/**
+ * detile demuxer.
+ *
+ * @param mode the fbtile mode based detiling to call
+ * @param arg1 the format_modifier, in case mode is TILE_AUTO
+ * @param w width of the image
+ * @param h height of the image
+ * @param dst the destination image buffer
+ * @param dstLineSize the size of each row in dst image, in bytes
+ * @param src the source image buffer
+ * @param srcLineSize the size of each row in src image, in bytes
+ * @param bytesPerPixel the bytes per pixel for the image
+ */
+void detile_this(int mode, uint64_t arg1,
+ int w, int h,
+ uint8_t *dst, int dstLineSize,
+ uint8_t *src, int srcLineSize,
+ int bytesPerPixel);
+
+
+/**
+ * @}
+ */
+
+#endif /* AVUTIL_FBTILE_H */
+// vim: set expandtab sts=4: //