From 663987b6391301b963714eb3a660642d46656ed9 Mon Sep 17 00:00:00 2001
From: Ulf Zibis <Ulf.Zibis@CoSoCo.de>
Date: 14.03.2019, 23:27:43
avfilter/fillborders: enhance performance by
- less calculations in inner for loops
- more use of memcpy()
side effect: again enhanced readability;
@@ -18,6 +18,7 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
+#include <sys/param.h>
#include "libavutil/colorspace.h"
#include "libavutil/common.h"
#include "libavutil/opt.h"
@@ -84,226 +85,176 @@
static void smear_borders8(FillBordersContext *s, AVFrame *frame)
{
- int p, y;
-
- for (p = 0; p < s->nb_planes; p++) {
+ for (int p = 0; p < s->nb_planes; p++) {
uint8_t *data = frame->data[p];
- int linesize = frame->linesize[p];
+ int lz = frame->linesize[p];
int width = s->planewidth[p];
- int height = s->planeheight[p];
+ int height = s->planeheight[p] * lz;
int left = s->borders[p].left;
- int right = s->borders[p].right;
- int top = s->borders[p].top;
- int bottom = s->borders[p].bottom;
+ int right = width - s->borders[p].right;
+ int top = s->borders[p].top * lz;
+ int bottom = height - s->borders[p].bottom * lz;
/* fill left and right borders from top to bottom border */
if (left != 0 || right != width) // in case skip for performance
- for (y = top; y < height - bottom; y++) {
- memset(data + y * linesize,
- *(data + y * linesize + left), left);
- memset(data + y * linesize + width - right,
- *(data + y * linesize + width - right - 1), right);
+ for (int y = top; y < bottom; y += lz) {
+ memset(data + y, *(data + y + left), left);
+ memset(data + y + right, *(data + y + right - 1), width - right);
}
/* fill top and bottom borders */
- for (y = 0; y < top; y++) {
- memcpy(data + y * linesize,
- data + top * linesize, width);
- }
- for (y = height - bottom; y < height; y++) {
- memcpy(data + y * linesize,
- data + (height - bottom - 1) * linesize, width);
- }
+ for (uint8_t *y = data + top; y > data; )
+ memcpy(y -= lz, data + top, width);
+ for (uint8_t *y = data + bottom; y < data + height; y += lz)
+ memcpy(y, data + bottom - lz, width);
}
}
static void smear_borders16(FillBordersContext *s, AVFrame *frame)
{
- int p, y, x;
-
- for (p = 0; p < s->nb_planes; p++) {
+ for (int p = 0; p < s->nb_planes; p++) {
uint16_t *data = (uint16_t *)frame->data[p];
- int linesize = frame->linesize[p] / sizeof(uint16_t);
+ int lz = frame->linesize[p] / sizeof(uint16_t);
int width = s->planewidth[p];
- int height = s->planeheight[p];
+ int height = s->planeheight[p] * lz;
int left = s->borders[p].left;
- int right = s->borders[p].right;
- int top = s->borders[p].top;
- int bottom = s->borders[p].bottom;
+ int right = width - s->borders[p].right;
+ int top = s->borders[p].top * lz;
+ int bottom = height - s->borders[p].bottom * lz;
/* fill left and right borders from top to bottom border */
if (left != 0 || right != width) // in case skip for performance
- for (y = top; y < height - bottom; y++) {
- for (x = 0; x < left; x++) {
- data[y * linesize + x] = *(data + y * linesize + left);
- }
- for (x = 0; x < right; x++) {
- data[y * linesize + width - right + x] =
- *(data + y * linesize + width - right - 1);
- }
+ for (int y = top; y < bottom; y += lz) {
+ for (int x = left; x >= 0; x--)
+ data[y + x] = data[y + left];
+ for (int x = right; x < width; x++)
+ data[y + x] = data[y + right - 1];
}
/* fill top and bottom borders */
- for (y = 0; y < top; y++) {
- memcpy(data + y * linesize,
- data + top * linesize, width * sizeof(uint16_t));
- }
- for (y = height - bottom; y < height; y++) {
- memcpy(data + y * linesize,
- data + (height - bottom - 1) * linesize,
- width * sizeof(uint16_t));
- }
+ for (uint16_t *y = data + top; y > data; )
+ memcpy(y -= lz, data + top, width * sizeof(uint16_t));
+ for (uint16_t *y = data + bottom; y < data + height; y += lz)
+ memcpy(y, data + bottom - lz, width * sizeof(uint16_t));
}
}
static void mirror_borders8(FillBordersContext *s, AVFrame *frame)
{
- int p, y, x;
-
- for (p = 0; p < s->nb_planes; p++) {
+ for (int p = 0; p < s->nb_planes; p++) {
uint8_t *data = frame->data[p];
- int linesize = frame->linesize[p];
+ int lz = frame->linesize[p];
int width = s->planewidth[p];
- int height = s->planeheight[p];
+ int height = s->planeheight[p] * lz;
int left = s->borders[p].left;
- int right = s->borders[p].right;
- int top = s->borders[p].top;
- int bottom = s->borders[p].bottom;
+ int right = width - s->borders[p].right;
+ int top = s->borders[p].top * lz;
+ int bottom = height - s->borders[p].bottom * lz;
/* fill left and right borders from top to bottom border */
if (left != 0 || right != width) // in case skip for performance
- for (y = top; y < height - bottom; y++) {
- for (x = 0; x < left; x++) {
- data[y * linesize + x] = data[y * linesize + left * 2 - 1 - x];
- }
- for (x = 0; x < right; x++) {
- data[y * linesize + width - right + x] =
- data[y * linesize + width - right - 1 - x];
- }
+ for (int y = top; y < bottom; y += lz)
+ for (int x = left, x2 = x; x >= 0; x--) {
+ data[y + x] = data[y + x2++];
+ for (int x = right, x2 = x; x < width; x++)
+ data[y + x] = data[y + --x2];
}
/* fill top and bottom borders */
- for (y = 0; y < top; y++) {
- memcpy(data + y * linesize,
- data + (top * 2 - 1 - y) * linesize, width);
- }
- for (y = 0; y < bottom; y++) {
- memcpy(data + (height - bottom + y) * linesize,
- data + (height - bottom - 1 - y) * linesize, width);
- }
+ for (uint8_t *y = data + top, *y2 = y; y > data; y2 += lz)
+ memcpy(y -= lz, y2, width);
+ for (uint8_t *y = data + bottom, *y2 = y; y < data + height; y += lz)
+ memcpy(y, y2 -= lz, width);
}
}
static void mirror_borders16(FillBordersContext *s, AVFrame *frame)
{
- int p, y, x;
-
- for (p = 0; p < s->nb_planes; p++) {
+ for (int p = 0; p < s->nb_planes; p++) {
uint16_t *data = (uint16_t *)frame->data[p];
- int linesize = frame->linesize[p] / sizeof(uint16_t);
+ int lz = frame->linesize[p] / sizeof(uint16_t);
int width = s->planewidth[p];
- int height = s->planeheight[p];
+ int height = s->planeheight[p] * lz;
int left = s->borders[p].left;
- int right = s->borders[p].right;
- int top = s->borders[p].top;
- int bottom = s->borders[p].bottom;
+ int right = width - s->borders[p].right;
+ int top = s->borders[p].top * lz;
+ int bottom = height - s->borders[p].bottom * lz;
/* fill left and right borders from top to bottom border */
if (left != 0 || right != width) // in case skip for performance
- for (y = top; y < height - bottom; y++) {
- for (x = 0; x < left; x++) {
- data[y * linesize + x] = data[y * linesize + left * 2 - 1 - x];
- }
-
- for (x = 0; x < right; x++) {
- data[y * linesize + width - right + x] =
- data[y * linesize + width - right - 1 - x];
- }
+ for (int y = top; y < bottom; y += lz) {
+ for (int x = left, x2 = x; x >= 0; x--)
+ data[y + x] = data[y + x2++];
+ for (int x = right, x2 = x; x < width; x++)
+ data[y + x] = data[y + --x2];
}
/* fill top and bottom borders */
- for (y = 0; y < top; y++) {
- memcpy(data + y * linesize,
- data + (top * 2 - 1 - y) * linesize,
- width * sizeof(uint16_t));
- }
- for (y = 0; y < bottom; y++) {
- memcpy(data + (height - bottom + y) * linesize,
- data + (height - bottom - 1 - y) * linesize,
- width * sizeof(uint16_t));
- }
+ for (uint16_t *y = data + top, *y2 = y; y > data; y2 += lz)
+ memcpy(y -= lz, y2, width * sizeof(uint16_t));
+ for (uint16_t *y = data + bottom, *y2 = y; y < data + height; y += lz)
+ memcpy(y, y2 -= lz, width * sizeof(uint16_t));
}
}
static void fixed_borders8(FillBordersContext *s, AVFrame *frame)
{
- int p, y;
-
- for (p = 0; p < s->nb_planes; p++) {
+ for (int p = 0; p < s->nb_planes; p++) {
uint8_t *data = frame->data[p];
- int linesize = frame->linesize[p];
+ int lz = frame->linesize[p];
int width = s->planewidth[p];
- int height = s->planeheight[p];
+ int height = s->planeheight[p] * lz;
int left = s->borders[p].left;
- int right = s->borders[p].right;
- int top = s->borders[p].top;
- int bottom = s->borders[p].bottom;
+ int right = width - s->borders[p].right;
+ int top = s->borders[p].top * lz;
+ int bottom = height - s->borders[p].bottom * lz;
uint8_t fill = s->fill[p];
/* fill left and right borders from top to bottom border */
if (left != 0 || right != width) // in case skip for performance
- for (y = top; y < height - bottom; y++) {
- memset(data + y * linesize, fill, left);
- memset(data + y * linesize + width - right, fill, right);
+ for (int y = top; y < bottom; y += lz) {
+ memset(data + y, fill, left);
+ memset(data + y + right, fill, width - right);
}
/* fill top and bottom borders */
- for (y = 0; y < top; y++) {
- memset(data + y * linesize, fill, width);
- }
- for (y = height - bottom; y < height; y++) {
- memset(data + y * linesize, fill, width);
- }
+ for (uint8_t *y = data + top; y > data; )
+ memset(y -= lz, fill, width);
+ for (uint8_t *y = data + bottom; y < data + height; y += lz)
+ memset(y, fill, width);
}
}
static void fixed_borders16(FillBordersContext *s, AVFrame *frame)
{
- int p, y, x;
-
- for (p = 0; p < s->nb_planes; p++) {
+ for (int p = 0; p < s->nb_planes; p++) {
uint16_t *data = (uint16_t *)frame->data[p];
- int linesize = frame->linesize[p] / sizeof(uint16_t);
+ int lz = frame->linesize[p] / sizeof(uint16_t);
int width = s->planewidth[p];
- int height = s->planeheight[p];
+ int height = s->planeheight[p] * lz;
int left = s->borders[p].left;
- int right = s->borders[p].right;
- int top = s->borders[p].top;
- int bottom = s->borders[p].bottom;
- uint16_t fill = s->fill[p] << (s->depth - 8);
+ int right = width - s->borders[p].right;
+ int top = s->borders[p].top * lz;
+ int bottom = height - s->borders[p].bottom * lz;
+ int fill_sz = MAX(MAX(left, right), top!=0 || height-bottom!=0 ? width : 0);
+ uint16_t *fill = av_malloc(fill_sz * sizeof(uint16_t));
+ for (int i = 0; i < fill_sz; i++)
+ fill[i] = s->fill[p] << (s->depth - 8);
/* fill left and right borders from top to bottom border */
if (left != 0 || right != width) // in case skip for performance
- for (y = top; y < height - bottom; y++) {
- for (x = 0; x < left; x++) {
- data[y * linesize + x] = fill;
- }
- for (x = 0; x < right; x++) {
- data[y * linesize + width - right + x] = fill;
- }
+ for (int y = top; y < bottom; y += lz) {
+ memcpy(data + y, fill, left * sizeof(uint16_t));
+ memcpy(data + y + right, fill, (width - right) * sizeof(uint16_t));
}
/* fill top and bottom borders */
- for (y = 0; y < top; y++) {
- for (x = 0; x < width; x++) {
- data[y * linesize + x] = fill;
- }
- }
- for (y = height - bottom; y < height; y++) {
- for (x = 0; x < width; x++) {
- data[y * linesize + x] = fill;
- }
- }
+ for (uint16_t *y = data + top; y > data; )
+ memcpy(y -= lz, fill, width * sizeof(uint16_t));
+ for (uint16_t *y = data + bottom; y < data + height; y += lz)
+ memcpy(y, fill, width * sizeof(uint16_t));
+
+ av_free(fill);
}
}