diff mbox

[FFmpeg-devel] beautified + accelerated vf_fillborders – Please review

Message ID 60c92734-b68a-1cac-6140-4ba5b09017f7@CoSoCo.de
State Superseded
Headers show

Commit Message

Ulf Zibis March 15, 2019, 12:08 a.m. UTC
Am 11.03.19 um 23:29 schrieb Lou Logan:
> Commit message title prefix for filter patches are usually in the form
> of:
>
> avfilter/fillborders:
> or
> lavfi/fillborders: 
>
> Trailing whitespace. This should always be avoided.
>
> Use av_malloc.

I now have separted the changes into 4 patches, and mergerd your hints.

So you can clearly see, which calculations I have skipped or moved out
of inner for loops.
Can you give a rating if a performance win could be expected compaired
to the original code from your experienced knowledge without a benchmark?

-Ulf

Comments

Carl Eugen Hoyos March 15, 2019, 12:21 a.m. UTC | #1
2019-03-15 1:08 GMT+01:00, Ulf Zibis <Ulf.Zibis@cosoco.de>:

> Can you give a rating if a performance win could be expected compaired
> to the original code from your experienced knowledge without a benchmark?

Just post the numbers that your tests produced.

Carl Eugen
Michael Niedermayer March 15, 2019, 9:09 p.m. UTC | #2
On Fri, Mar 15, 2019 at 01:08:33AM +0100, Ulf Zibis wrote:
[...]
>  static void fixed_borders16(FillBordersContext *s, AVFrame *frame)
>  {
> -    int p, y, x;
> -
> -    for (p = 0; p < s->nb_planes; p++) {
> +    for (int p = 0; p < s->nb_planes; p++) {
>          uint16_t *data = (uint16_t *)frame->data[p];
> -        int linesize = frame->linesize[p] / sizeof(uint16_t);
> +        int lz = frame->linesize[p] / sizeof(uint16_t);
>          int width = s->planewidth[p];
> -        int height = s->planeheight[p];
> +        int height = s->planeheight[p] * lz;
>          int left = s->borders[p].left;
> -        int right = s->borders[p].right;
> -        int top = s->borders[p].top;
> -        int bottom = s->borders[p].bottom;
> -        uint16_t fill = s->fill[p] << (s->depth - 8);
> +        int right = width - s->borders[p].right;
> +        int top = s->borders[p].top * lz;
> +        int bottom = height - s->borders[p].bottom * lz;
> +        int fill_sz = MAX(MAX(left, right), top!=0 || height-bottom!=0 ? width : 0);

this would be FFMAX

src/libavfilter/vf_fillborders.c: In function ‘fixed_borders16’:
src/libavfilter/vf_fillborders.c:239:9: error: implicit declaration of function ‘MAX’ [-Werror=implicit-function-declaration]
         int fill_sz = MAX(MAX(left, right), top!=0 || height-bottom!=0 ? width : 0);

         
[...]
diff mbox

Patch

From 663987b6391301b963714eb3a660642d46656ed9 Mon Sep 17 00:00:00 2001
From: Ulf Zibis <Ulf.Zibis@CoSoCo.de>
Date: 14.03.2019, 23:27:43

avfilter/fillborders: enhance performance by
- less calculations in inner for loops
- more use of memcpy()
side effect: again enhanced readability;

diff --git a/libavfilter/vf_fillborders.c b/libavfilter/vf_fillborders.c
index 393ad7d..3d58f9e 100644
--- a/libavfilter/vf_fillborders.c
+++ b/libavfilter/vf_fillborders.c
@@ -18,6 +18,7 @@ 
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include <sys/param.h>
 #include "libavutil/colorspace.h"
 #include "libavutil/common.h"
 #include "libavutil/opt.h"
@@ -84,226 +85,176 @@ 
 
 static void smear_borders8(FillBordersContext *s, AVFrame *frame)
 {
-    int p, y;
-
-    for (p = 0; p < s->nb_planes; p++) {
+    for (int p = 0; p < s->nb_planes; p++) {
         uint8_t *data = frame->data[p];
-        int linesize = frame->linesize[p];
+        int lz = frame->linesize[p];
         int width = s->planewidth[p];
-        int height = s->planeheight[p];
+        int height = s->planeheight[p] * lz;
         int left = s->borders[p].left;
-        int right = s->borders[p].right;
-        int top = s->borders[p].top;
-        int bottom = s->borders[p].bottom;
+        int right = width - s->borders[p].right;
+        int top = s->borders[p].top * lz;
+        int bottom = height - s->borders[p].bottom * lz;
 
         /* fill left and right borders from top to bottom border */
         if (left != 0 || right != width) // in case skip for performance
-            for (y = top; y < height - bottom; y++) {
-                memset(data + y * linesize,
-                       *(data + y * linesize + left), left);
-                memset(data + y * linesize + width - right,
-                       *(data + y * linesize + width - right - 1), right);
+            for (int y = top; y < bottom; y += lz) {
+                memset(data + y, *(data + y + left), left);
+                memset(data + y + right, *(data + y + right - 1), width - right);
             }
 
         /* fill top and bottom borders */
-        for (y = 0; y < top; y++) {
-            memcpy(data + y * linesize,
-                   data + top * linesize, width);
-        }
-        for (y = height - bottom; y < height; y++) {
-            memcpy(data + y * linesize,
-                   data + (height - bottom - 1) * linesize, width);
-        }
+        for (uint8_t *y = data + top; y > data; )
+            memcpy(y -= lz, data + top, width);
+        for (uint8_t *y = data + bottom; y < data + height; y += lz)
+            memcpy(y, data + bottom - lz, width);
     }
 }
 
 static void smear_borders16(FillBordersContext *s, AVFrame *frame)
 {
-    int p, y, x;
-
-    for (p = 0; p < s->nb_planes; p++) {
+    for (int p = 0; p < s->nb_planes; p++) {
         uint16_t *data = (uint16_t *)frame->data[p];
-        int linesize = frame->linesize[p] / sizeof(uint16_t);
+        int lz = frame->linesize[p] / sizeof(uint16_t);
         int width = s->planewidth[p];
-        int height = s->planeheight[p];
+        int height = s->planeheight[p] * lz;
         int left = s->borders[p].left;
-        int right = s->borders[p].right;
-        int top = s->borders[p].top;
-        int bottom = s->borders[p].bottom;
+        int right = width - s->borders[p].right;
+        int top = s->borders[p].top * lz;
+        int bottom = height - s->borders[p].bottom * lz;
 
         /* fill left and right borders from top to bottom border */
         if (left != 0 || right != width) // in case skip for performance
-            for (y = top; y < height - bottom; y++) {
-                for (x = 0; x < left; x++) {
-                    data[y * linesize + x] =  *(data + y * linesize + left);
-                }
-                for (x = 0; x < right; x++) {
-                    data[y * linesize + width - right + x] =
-                            *(data + y * linesize + width - right - 1);
-                }
+            for (int y = top; y < bottom; y += lz) {
+                for (int x = left; x >= 0; x--)
+                    data[y + x] =  data[y + left];
+                for (int x = right; x < width; x++)
+                    data[y + x] = data[y + right - 1];
             }
 
         /* fill top and bottom borders */
-        for (y = 0; y < top; y++) {
-            memcpy(data + y * linesize,
-                   data + top * linesize, width * sizeof(uint16_t));
-        }
-        for (y = height - bottom; y < height; y++) {
-            memcpy(data + y * linesize,
-                   data + (height - bottom - 1) * linesize,
-                   width * sizeof(uint16_t));
-        }
+        for (uint16_t *y = data + top; y > data; )
+            memcpy(y -= lz, data + top, width * sizeof(uint16_t));
+        for (uint16_t *y = data + bottom; y < data + height; y += lz)
+            memcpy(y, data + bottom - lz, width * sizeof(uint16_t));
     }
 }
 
 static void mirror_borders8(FillBordersContext *s, AVFrame *frame)
 {
-    int p, y, x;
-
-    for (p = 0; p < s->nb_planes; p++) {
+    for (int p = 0; p < s->nb_planes; p++) {
         uint8_t *data = frame->data[p];
-        int linesize = frame->linesize[p];
+        int lz = frame->linesize[p];
         int width = s->planewidth[p];
-        int height = s->planeheight[p];
+        int height = s->planeheight[p] * lz;
         int left = s->borders[p].left;
-        int right = s->borders[p].right;
-        int top = s->borders[p].top;
-        int bottom = s->borders[p].bottom;
+        int right = width - s->borders[p].right;
+        int top = s->borders[p].top * lz;
+        int bottom = height - s->borders[p].bottom * lz;
 
         /* fill left and right borders from top to bottom border */
         if (left != 0 || right != width) // in case skip for performance
-            for (y = top; y < height - bottom; y++) {
-                for (x = 0; x < left; x++) {
-                    data[y * linesize + x] = data[y * linesize + left * 2 - 1 - x];
-                }
-                for (x = 0; x < right; x++) {
-                    data[y * linesize + width - right + x] =
-                            data[y * linesize + width - right - 1 - x];
-                }
+            for (int y = top; y < bottom; y += lz)
+                for (int x = left, x2 = x; x >= 0; x--) {
+                    data[y + x] = data[y + x2++];
+                for (int x = right, x2 = x; x < width; x++)
+                    data[y + x] = data[y + --x2];
             }
 
         /* fill top and bottom borders */
-        for (y = 0; y < top; y++) {
-            memcpy(data + y * linesize,
-                   data + (top * 2 - 1 - y) * linesize, width);
-        }
-        for (y = 0; y < bottom; y++) {
-            memcpy(data + (height - bottom + y) * linesize,
-                   data + (height - bottom - 1 - y) * linesize, width);
-        }
+        for (uint8_t *y = data + top, *y2 = y; y > data; y2 += lz)
+            memcpy(y -= lz, y2, width);
+        for (uint8_t *y = data + bottom, *y2 = y; y < data + height; y += lz)
+            memcpy(y, y2 -= lz, width);
     }
 }
 
 static void mirror_borders16(FillBordersContext *s, AVFrame *frame)
 {
-    int p, y, x;
-
-    for (p = 0; p < s->nb_planes; p++) {
+    for (int p = 0; p < s->nb_planes; p++) {
         uint16_t *data = (uint16_t *)frame->data[p];
-        int linesize = frame->linesize[p] / sizeof(uint16_t);
+        int lz = frame->linesize[p] / sizeof(uint16_t);
         int width = s->planewidth[p];
-        int height = s->planeheight[p];
+        int height = s->planeheight[p] * lz;
         int left = s->borders[p].left;
-        int right = s->borders[p].right;
-        int top = s->borders[p].top;
-        int bottom = s->borders[p].bottom;
+        int right = width - s->borders[p].right;
+        int top = s->borders[p].top * lz;
+        int bottom = height - s->borders[p].bottom * lz;
 
         /* fill left and right borders from top to bottom border */
         if (left != 0 || right != width) // in case skip for performance
-            for (y = top; y < height - bottom; y++) {
-                for (x = 0; x < left; x++) {
-                    data[y * linesize + x] = data[y * linesize + left * 2 - 1 - x];
-                }
-
-                for (x = 0; x < right; x++) {
-                    data[y * linesize + width - right + x] =
-                            data[y * linesize + width - right - 1 - x];
-                }
+            for (int y = top; y < bottom; y += lz) {
+                for (int x = left, x2 = x; x >= 0; x--)
+                    data[y + x] = data[y + x2++];
+                for (int x = right, x2 = x; x < width; x++)
+                    data[y + x] = data[y + --x2];
             }
 
         /* fill top and bottom borders */
-        for (y = 0; y < top; y++) {
-            memcpy(data + y * linesize,
-                   data + (top * 2 - 1 - y) * linesize,
-                   width * sizeof(uint16_t));
-        }
-        for (y = 0; y < bottom; y++) {
-            memcpy(data + (height - bottom + y) * linesize,
-                   data + (height - bottom - 1 - y) * linesize,
-                   width * sizeof(uint16_t));
-        }
+        for (uint16_t *y = data + top, *y2 = y; y > data; y2 += lz)
+            memcpy(y -= lz, y2, width * sizeof(uint16_t));
+        for (uint16_t *y = data + bottom, *y2 = y; y < data + height; y += lz)
+            memcpy(y, y2 -= lz, width * sizeof(uint16_t));
     }
 }
 
 static void fixed_borders8(FillBordersContext *s, AVFrame *frame)
 {
-    int p, y;
-
-    for (p = 0; p < s->nb_planes; p++) {
+    for (int p = 0; p < s->nb_planes; p++) {
         uint8_t *data = frame->data[p];
-        int linesize = frame->linesize[p];
+        int lz = frame->linesize[p];
         int width = s->planewidth[p];
-        int height = s->planeheight[p];
+        int height = s->planeheight[p] * lz;
         int left = s->borders[p].left;
-        int right = s->borders[p].right;
-        int top = s->borders[p].top;
-        int bottom = s->borders[p].bottom;
+        int right = width - s->borders[p].right;
+        int top = s->borders[p].top * lz;
+        int bottom = height - s->borders[p].bottom * lz;
         uint8_t fill = s->fill[p];
 
         /* fill left and right borders from top to bottom border */
         if (left != 0 || right != width) // in case skip for performance
-            for (y = top; y < height - bottom; y++) {
-                memset(data + y * linesize, fill, left);
-                memset(data + y * linesize + width - right, fill, right);
+            for (int y = top; y < bottom; y += lz) {
+                memset(data + y, fill, left);
+                memset(data + y + right, fill, width - right);
             }
 
         /* fill top and bottom borders */
-        for (y = 0; y < top; y++) {
-            memset(data + y * linesize, fill, width);
-        }
-        for (y = height - bottom; y < height; y++) {
-            memset(data + y * linesize, fill, width);
-        }
+        for (uint8_t *y = data + top; y > data; )
+            memset(y -= lz, fill, width);
+        for (uint8_t *y = data + bottom; y < data + height; y += lz)
+            memset(y, fill, width);
     }
 }
 
 static void fixed_borders16(FillBordersContext *s, AVFrame *frame)
 {
-    int p, y, x;
-
-    for (p = 0; p < s->nb_planes; p++) {
+    for (int p = 0; p < s->nb_planes; p++) {
         uint16_t *data = (uint16_t *)frame->data[p];
-        int linesize = frame->linesize[p] / sizeof(uint16_t);
+        int lz = frame->linesize[p] / sizeof(uint16_t);
         int width = s->planewidth[p];
-        int height = s->planeheight[p];
+        int height = s->planeheight[p] * lz;
         int left = s->borders[p].left;
-        int right = s->borders[p].right;
-        int top = s->borders[p].top;
-        int bottom = s->borders[p].bottom;
-        uint16_t fill = s->fill[p] << (s->depth - 8);
+        int right = width - s->borders[p].right;
+        int top = s->borders[p].top * lz;
+        int bottom = height - s->borders[p].bottom * lz;
+        int fill_sz = MAX(MAX(left, right), top!=0 || height-bottom!=0 ? width : 0);
+        uint16_t *fill = av_malloc(fill_sz * sizeof(uint16_t));
+        for (int i = 0; i < fill_sz; i++)
+            fill[i] = s->fill[p] << (s->depth - 8);
 
         /* fill left and right borders from top to bottom border */
         if (left != 0 || right != width) // in case skip for performance
-            for (y = top; y < height - bottom; y++) {
-                for (x = 0; x < left; x++) {
-                    data[y * linesize + x] = fill;
-                }
-                for (x = 0; x < right; x++) {
-                    data[y * linesize + width - right + x] = fill;
-                }
+            for (int y = top; y < bottom; y += lz) {
+                memcpy(data + y, fill, left * sizeof(uint16_t));
+                memcpy(data + y + right, fill, (width - right) * sizeof(uint16_t));
             }
 
         /* fill top and bottom borders */
-        for (y = 0; y < top; y++) {
-            for (x = 0; x < width; x++) {
-                data[y * linesize + x] = fill;
-            }
-        }
-        for (y = height - bottom; y < height; y++) {
-            for (x = 0; x < width; x++) {
-                data[y * linesize + x] = fill;
-            }
-        }
+        for (uint16_t *y = data + top; y > data; )
+            memcpy(y -= lz, fill, width * sizeof(uint16_t));
+        for (uint16_t *y = data + bottom; y < data + height; y += lz)
+            memcpy(y, fill, width * sizeof(uint16_t));
+
+        av_free(fill);
     }
 }