diff mbox series

[FFmpeg-devel,v2,1/2] lavfi/edge_common: Add 16bit versions of gaussian_blur and sobel

Message ID 9bb7f1eb-b927-f2bc-7b4a-292f46ef1a9e@mail.de
State New
Headers show
Series [FFmpeg-devel,v2,1/2] lavfi/edge_common: Add 16bit versions of gaussian_blur and sobel | expand

Checks

Context Check Description
yinshiyou/configure_loongarch64 warning Failed to apply patch
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Thilo Borgmann July 11, 2022, 8:53 a.m. UTC
Hi,

1/2 adds 16 bit versions of ff_gaussian_blur and ff_sobel.

2/2 adds new mode to cropdetect.

Thanks,
Thilo
From fc8c179e2de4dee3d32d2e02684f3e3215af63c6 Mon Sep 17 00:00:00 2001
From: Thilo Borgmann <thilo.borgmann@mail.de>
Date: Sun, 10 Jul 2022 12:40:27 +0200
Subject: [PATCH v2 1/2] lavfi/edge_common: Add 16bit versions of gaussian_blur
 and sobel

---
 libavfilter/edge_common.c   | 134 ++++++++++++++++++++++++++++--------
 libavfilter/edge_common.h   |  14 +++-
 libavfilter/vf_blurdetect.c |   4 +-
 libavfilter/vf_edgedetect.c |   4 +-
 4 files changed, 121 insertions(+), 35 deletions(-)

Comments

Thilo Borgmann July 16, 2022, 9:07 p.m. UTC | #1
Hi,

> 1/2 adds 16 bit versions of ff_gaussian_blur and ff_sobel.
> 2/2 adds new mode to cropdetect.

v3 does it the template way for 1/2 as requested on IRC.

-Thilo
From 5453c0e27cd2c54931b012d663178a7c0b5a9f5f Mon Sep 17 00:00:00 2001
From: Thilo Borgmann <thilo.borgmann@mail.de>
Date: Sat, 16 Jul 2022 22:59:57 +0200
Subject: [PATCH v3 1/2] lavfi/edge_common: Templatify ff_gaussian_blur and
 ff_sobel

---
 libavfilter/edge_common.c   |  74 ++--------------------
 libavfilter/edge_common.h   |  22 ++++---
 libavfilter/edge_template.c | 120 ++++++++++++++++++++++++++++++++++++
 libavfilter/vf_blurdetect.c |   8 +--
 libavfilter/vf_edgedetect.c |  14 ++---
 5 files changed, 152 insertions(+), 86 deletions(-)
 create mode 100644 libavfilter/edge_template.c

diff --git a/libavfilter/edge_common.c b/libavfilter/edge_common.c
index d72e8521cd..ebd47d7c53 100644
--- a/libavfilter/edge_common.c
+++ b/libavfilter/edge_common.c
@@ -46,33 +46,13 @@ static int get_rounded_direction(int gx, int gy)
     return DIRECTION_VERTICAL;
 }
 
-// Simple sobel operator to get rounded gradients
-void ff_sobel(int w, int h,
-                    uint16_t *dst, int dst_linesize,
-                    int8_t *dir, int dir_linesize,
-                    const uint8_t *src, int src_linesize)
-{
-    int i, j;
-
-    for (j = 1; j < h - 1; j++) {
-        dst += dst_linesize;
-        dir += dir_linesize;
-        src += src_linesize;
-        for (i = 1; i < w - 1; i++) {
-            const int gx =
-                -1*src[-src_linesize + i-1] + 1*src[-src_linesize + i+1]
-                -2*src[                i-1] + 2*src[                i+1]
-                -1*src[ src_linesize + i-1] + 1*src[ src_linesize + i+1];
-            const int gy =
-                -1*src[-src_linesize + i-1] + 1*src[ src_linesize + i-1]
-                -2*src[-src_linesize + i  ] + 2*src[ src_linesize + i  ]
-                -1*src[-src_linesize + i+1] + 1*src[ src_linesize + i+1];
+#undef DEPTH
+#define DEPTH 8
+#include "edge_template.c"
 
-            dst[i] = FFABS(gx) + FFABS(gy);
-            dir[i] = get_rounded_direction(gx, gy);
-        }
-    }
-}
+#undef DEPTH
+#define DEPTH 16
+#include "edge_template.c"
 
 // Filters rounded gradients to drop all non-maxima
 // Expects gradients generated by ff_sobel()
@@ -137,45 +117,3 @@ void ff_double_threshold(int low, int high, int w, int h,
         src += src_linesize;
     }
 }
-
-// Applies gaussian blur, using 5x5 kernels, sigma = 1.4
-void ff_gaussian_blur(int w, int h,
-                      uint8_t *dst, int dst_linesize,
-                      const uint8_t *src, int src_linesize)
-{
-    int i, j;
-
-    memcpy(dst, src, w); dst += dst_linesize; src += src_linesize;
-    memcpy(dst, src, w); dst += dst_linesize; src += src_linesize;
-    for (j = 2; j < h - 2; j++) {
-        dst[0] = src[0];
-        dst[1] = src[1];
-        for (i = 2; i < w - 2; i++) {
-            /* Gaussian mask of size 5x5 with sigma = 1.4 */
-            dst[i] = ((src[-2*src_linesize + i-2] + src[2*src_linesize + i-2]) * 2
-                    + (src[-2*src_linesize + i-1] + src[2*src_linesize + i-1]) * 4
-                    + (src[-2*src_linesize + i  ] + src[2*src_linesize + i  ]) * 5
-                    + (src[-2*src_linesize + i+1] + src[2*src_linesize + i+1]) * 4
-                    + (src[-2*src_linesize + i+2] + src[2*src_linesize + i+2]) * 2
-
-                    + (src[  -src_linesize + i-2] + src[  src_linesize + i-2]) *  4
-                    + (src[  -src_linesize + i-1] + src[  src_linesize + i-1]) *  9
-                    + (src[  -src_linesize + i  ] + src[  src_linesize + i  ]) * 12
-                    + (src[  -src_linesize + i+1] + src[  src_linesize + i+1]) *  9
-                    + (src[  -src_linesize + i+2] + src[  src_linesize + i+2]) *  4
-
-                    + src[i-2] *  5
-                    + src[i-1] * 12
-                    + src[i  ] * 15
-                    + src[i+1] * 12
-                    + src[i+2] *  5) / 159;
-        }
-        dst[i    ] = src[i    ];
-        dst[i + 1] = src[i + 1];
-
-        dst += dst_linesize;
-        src += src_linesize;
-    }
-    memcpy(dst, src, w); dst += dst_linesize; src += src_linesize;
-    memcpy(dst, src, w);
-}
diff --git a/libavfilter/edge_common.h b/libavfilter/edge_common.h
index 87c143f2b8..cff4febd70 100644
--- a/libavfilter/edge_common.h
+++ b/libavfilter/edge_common.h
@@ -48,10 +48,14 @@ enum AVRoundedDirection {
  * @param src           data pointers to source image
  * @param src_linesize  linesizes for the source image
  */
-void ff_sobel(int w, int h,
-              uint16_t *dst, int dst_linesize,
-              int8_t *dir, int dir_linesize,
-              const uint8_t *src, int src_linesize);
+#define PROTO_SOBEL(depth) \
+void ff_sobel_##depth(int w, int h,                                          \
+                      uint16_t *dst, int dst_linesize,                       \
+                      int8_t *dir, int dir_linesize,                         \
+                      const uint8_t *src, int src_linesize, int src_stride);
+
+PROTO_SOBEL(8)
+PROTO_SOBEL(16)
 
 /**
  * Filters rounded gradients to drop all non-maxima pixels in the magnitude image
@@ -100,8 +104,12 @@ void ff_double_threshold(int low, int high, int w, int h,
  * @param src           data pointers to source image
  * @param src_linesize  linesizes for the source image
  */
-void ff_gaussian_blur(int w, int h,
-                      uint8_t *dst, int dst_linesize,
-                      const uint8_t *src, int src_linesize);
+#define PROTO_GAUSSIAN_BLUR(depth)                                                   \
+void ff_gaussian_blur_##depth(int w, int h,                                          \
+                              uint8_t *dst, int dst_linesize,                        \
+                              const uint8_t *src, int src_linesize, int src_stride);
+
+PROTO_GAUSSIAN_BLUR(8)
+PROTO_GAUSSIAN_BLUR(16)
 
 #endif
diff --git a/libavfilter/edge_template.c b/libavfilter/edge_template.c
new file mode 100644
index 0000000000..d3cf8221a4
--- /dev/null
+++ b/libavfilter/edge_template.c
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2022 Thilo Borgmann <thilo.borgmann _at_ mail.de>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ */
+
+#include "libavutil/avassert.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+#undef pixel
+#if DEPTH == 8
+#define pixel uint8_t
+#else
+#define pixel uint16_t
+#endif
+
+#undef fn
+#undef fn2
+#undef fn3
+#define fn3(a,b)   ff_##a##_##b
+#define fn2(a,b)   fn3(a,b)
+#define fn(a)      fn2(a, DEPTH)
+
+void fn(sobel)(int w, int h,
+               uint16_t *dst, int dst_linesize,
+               int8_t *dir, int dir_linesize,
+               const uint8_t *src, int src_linesize, int src_stride)
+{
+    int i, j;
+    pixel *srcp = (pixel *)src;
+
+    src_stride   /= sizeof(pixel);
+    src_linesize /= sizeof(pixel);
+    dst_linesize /= sizeof(pixel);
+
+    for (j = 1; j < h - 1; j++) {
+        dst  += dst_linesize;
+        dir  += dir_linesize;
+        srcp += src_linesize;
+        for (i = 1; i < w - 1; i++) {
+            const int gx =
+                -1*srcp[-src_linesize + (i-1)*src_stride] + 1*srcp[-src_linesize + (i+1)*src_stride]
+                -2*srcp[                (i-1)*src_stride] + 2*srcp[                (i+1)*src_stride]
+                -1*srcp[ src_linesize + (i-1)*src_stride] + 1*srcp[ src_linesize + (i+1)*src_stride];
+            const int gy =
+                -1*srcp[-src_linesize + (i-1)*src_stride] + 1*srcp[ src_linesize + (i-1)*src_stride]
+                -2*srcp[-src_linesize + (i  )*src_stride] + 2*srcp[ src_linesize + (i  )*src_stride]
+                -1*srcp[-src_linesize + (i+1)*src_stride] + 1*srcp[ src_linesize + (i+1)*src_stride];
+
+            dst[i] = FFABS(gx) + FFABS(gy);
+            dir[i] = get_rounded_direction(gx, gy);
+        }
+    }
+}
+
+void fn(gaussian_blur)(int w, int h,
+                       uint8_t *dst, int dst_linesize,
+                       const uint8_t *src, int src_linesize, int src_stride)
+{
+    int i, j;
+    pixel *srcp = (pixel *)src;
+    pixel *dstp = (pixel *)dst;
+
+    src_stride   /= sizeof(pixel);
+    src_linesize /= sizeof(pixel);
+    dst_linesize /= sizeof(pixel);
+
+    memcpy(dstp, srcp, w*2); dstp += dst_linesize; srcp += src_linesize;
+    memcpy(dstp, srcp, w*2); dstp += dst_linesize; srcp += src_linesize;
+    for (j = 2; j < h - 2; j++) {
+        dstp[0] = srcp[(0)*src_stride];
+        dstp[1] = srcp[(1)*src_stride];
+        for (i = 2; i < w - 2; i++) {
+            /* Gaussian mask of size 5x5 with sigma = 1.4 */
+            dstp[i] = ((srcp[-2*src_linesize + (i-2)*src_stride] + srcp[2*src_linesize + (i-2)*src_stride]) * 2
+                     + (srcp[-2*src_linesize + (i-1)*src_stride] + srcp[2*src_linesize + (i-1)*src_stride]) * 4
+                     + (srcp[-2*src_linesize + (i  )*src_stride] + srcp[2*src_linesize + (i  )*src_stride]) * 5
+                     + (srcp[-2*src_linesize + (i+1)*src_stride] + srcp[2*src_linesize + (i+1)*src_stride]) * 4
+                     + (srcp[-2*src_linesize + (i+2)*src_stride] + srcp[2*src_linesize + (i+2)*src_stride]) * 2
+
+                     + (srcp[  -src_linesize + (i-2)*src_stride] + srcp[  src_linesize + (i-2)*src_stride]) *  4
+                     + (srcp[  -src_linesize + (i-1)*src_stride] + srcp[  src_linesize + (i-1)*src_stride]) *  9
+                     + (srcp[  -src_linesize + (i  )*src_stride] + srcp[  src_linesize + (i  )*src_stride]) * 12
+                     + (srcp[  -src_linesize + (i+1)*src_stride] + srcp[  src_linesize + (i+1)*src_stride]) *  9
+                     + (srcp[  -src_linesize + (i+2)*src_stride] + srcp[  src_linesize + (i+2)*src_stride]) *  4
+
+                     + srcp[(i-2)*src_stride] *  5
+                     + srcp[(i-1)*src_stride] * 12
+                     + srcp[(i  )*src_stride] * 15
+                     + srcp[(i+1)*src_stride] * 12
+                     + srcp[(i+2)*src_stride] *  5) / 159;
+        }
+        dstp[i    ] = srcp[(i    )*src_stride];
+        dstp[i + 1] = srcp[(i + 1)*src_stride];
+
+        dstp += dst_linesize;
+        srcp += src_linesize;
+    }
+    memcpy(dstp, srcp, w*2); dstp += dst_linesize; srcp += src_linesize;
+    memcpy(dstp, srcp, w*2);
+}
diff --git a/libavfilter/vf_blurdetect.c b/libavfilter/vf_blurdetect.c
index 0e08ba96de..db06efcce7 100644
--- a/libavfilter/vf_blurdetect.c
+++ b/libavfilter/vf_blurdetect.c
@@ -283,12 +283,12 @@ static int blurdetect_filter_frame(AVFilterLink *inlink, AVFrame *in)
         nplanes++;
 
         // gaussian filter to reduce noise
-        ff_gaussian_blur(w, h,
-                         filterbuf,  w,
-                         in->data[plane], in->linesize[plane]);
+        ff_gaussian_blur_8(w, h,
+                           filterbuf,  w,
+                           in->data[plane], in->linesize[plane], 1);
 
         // compute the 16-bits gradients and directions for the next step
-        ff_sobel(w, h, gradients, w, directions, w, filterbuf, w);
+        ff_sobel_8(w, h, gradients, w, directions, w, filterbuf, w, 1);
 
         // non_maximum_suppression() will actually keep & clip what's necessary and
         // ignore the rest, so we need a clean output buffer
diff --git a/libavfilter/vf_edgedetect.c b/libavfilter/vf_edgedetect.c
index 90390ceb3e..603f06f141 100644
--- a/libavfilter/vf_edgedetect.c
+++ b/libavfilter/vf_edgedetect.c
@@ -191,15 +191,15 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
         }
 
         /* gaussian filter to reduce noise  */
-        ff_gaussian_blur(width, height,
-                         tmpbuf,      width,
-                         in->data[p], in->linesize[p]);
+        ff_gaussian_blur_8(width, height,
+                           tmpbuf,      width,
+                           in->data[p], in->linesize[p], 1);
 
         /* compute the 16-bits gradients and directions for the next step */
-        ff_sobel(width, height,
-              gradients, width,
-              directions,width,
-              tmpbuf,    width);
+        ff_sobel_8(width, height,
+                   gradients, width,
+                   directions,width,
+                   tmpbuf,    width, 1);
 
         /* non_maximum_suppression() will actually keep & clip what's necessary and
          * ignore the rest, so we need a clean output buffer */
Thilo Borgmann July 17, 2022, 7:54 a.m. UTC | #2
Am 16.07.22 um 23:07 schrieb Thilo Borgmann:
> Hi,
> 
>> 1/2 adds 16 bit versions of ff_gaussian_blur and ff_sobel.
>> 2/2 adds new mode to cropdetect.
> 
> v3 does it the template way for 1/2 as requested on IRC.

v4 fixed bug in gaussian_blur. Otherwise identical.

-Thilo
From 2cced42f8053c647384fe020cdb2e12f8b7b3d0a Mon Sep 17 00:00:00 2001
From: Thilo Borgmann <thilo.borgmann@mail.de>
Date: Sun, 17 Jul 2022 09:51:33 +0200
Subject: [PATCH v4 1/2] lavfi/edge_common: Templatify ff_gaussian_blur and
 ff_sobel

---
 libavfilter/edge_common.c   |  74 ++--------------------
 libavfilter/edge_common.h   |  22 ++++---
 libavfilter/edge_template.c | 120 ++++++++++++++++++++++++++++++++++++
 libavfilter/vf_blurdetect.c |   8 +--
 libavfilter/vf_edgedetect.c |  14 ++---
 5 files changed, 152 insertions(+), 86 deletions(-)
 create mode 100644 libavfilter/edge_template.c

diff --git a/libavfilter/edge_common.c b/libavfilter/edge_common.c
index d72e8521cd..ebd47d7c53 100644
--- a/libavfilter/edge_common.c
+++ b/libavfilter/edge_common.c
@@ -46,33 +46,13 @@ static int get_rounded_direction(int gx, int gy)
     return DIRECTION_VERTICAL;
 }
 
-// Simple sobel operator to get rounded gradients
-void ff_sobel(int w, int h,
-                    uint16_t *dst, int dst_linesize,
-                    int8_t *dir, int dir_linesize,
-                    const uint8_t *src, int src_linesize)
-{
-    int i, j;
-
-    for (j = 1; j < h - 1; j++) {
-        dst += dst_linesize;
-        dir += dir_linesize;
-        src += src_linesize;
-        for (i = 1; i < w - 1; i++) {
-            const int gx =
-                -1*src[-src_linesize + i-1] + 1*src[-src_linesize + i+1]
-                -2*src[                i-1] + 2*src[                i+1]
-                -1*src[ src_linesize + i-1] + 1*src[ src_linesize + i+1];
-            const int gy =
-                -1*src[-src_linesize + i-1] + 1*src[ src_linesize + i-1]
-                -2*src[-src_linesize + i  ] + 2*src[ src_linesize + i  ]
-                -1*src[-src_linesize + i+1] + 1*src[ src_linesize + i+1];
+#undef DEPTH
+#define DEPTH 8
+#include "edge_template.c"
 
-            dst[i] = FFABS(gx) + FFABS(gy);
-            dir[i] = get_rounded_direction(gx, gy);
-        }
-    }
-}
+#undef DEPTH
+#define DEPTH 16
+#include "edge_template.c"
 
 // Filters rounded gradients to drop all non-maxima
 // Expects gradients generated by ff_sobel()
@@ -137,45 +117,3 @@ void ff_double_threshold(int low, int high, int w, int h,
         src += src_linesize;
     }
 }
-
-// Applies gaussian blur, using 5x5 kernels, sigma = 1.4
-void ff_gaussian_blur(int w, int h,
-                      uint8_t *dst, int dst_linesize,
-                      const uint8_t *src, int src_linesize)
-{
-    int i, j;
-
-    memcpy(dst, src, w); dst += dst_linesize; src += src_linesize;
-    memcpy(dst, src, w); dst += dst_linesize; src += src_linesize;
-    for (j = 2; j < h - 2; j++) {
-        dst[0] = src[0];
-        dst[1] = src[1];
-        for (i = 2; i < w - 2; i++) {
-            /* Gaussian mask of size 5x5 with sigma = 1.4 */
-            dst[i] = ((src[-2*src_linesize + i-2] + src[2*src_linesize + i-2]) * 2
-                    + (src[-2*src_linesize + i-1] + src[2*src_linesize + i-1]) * 4
-                    + (src[-2*src_linesize + i  ] + src[2*src_linesize + i  ]) * 5
-                    + (src[-2*src_linesize + i+1] + src[2*src_linesize + i+1]) * 4
-                    + (src[-2*src_linesize + i+2] + src[2*src_linesize + i+2]) * 2
-
-                    + (src[  -src_linesize + i-2] + src[  src_linesize + i-2]) *  4
-                    + (src[  -src_linesize + i-1] + src[  src_linesize + i-1]) *  9
-                    + (src[  -src_linesize + i  ] + src[  src_linesize + i  ]) * 12
-                    + (src[  -src_linesize + i+1] + src[  src_linesize + i+1]) *  9
-                    + (src[  -src_linesize + i+2] + src[  src_linesize + i+2]) *  4
-
-                    + src[i-2] *  5
-                    + src[i-1] * 12
-                    + src[i  ] * 15
-                    + src[i+1] * 12
-                    + src[i+2] *  5) / 159;
-        }
-        dst[i    ] = src[i    ];
-        dst[i + 1] = src[i + 1];
-
-        dst += dst_linesize;
-        src += src_linesize;
-    }
-    memcpy(dst, src, w); dst += dst_linesize; src += src_linesize;
-    memcpy(dst, src, w);
-}
diff --git a/libavfilter/edge_common.h b/libavfilter/edge_common.h
index 87c143f2b8..cff4febd70 100644
--- a/libavfilter/edge_common.h
+++ b/libavfilter/edge_common.h
@@ -48,10 +48,14 @@ enum AVRoundedDirection {
  * @param src           data pointers to source image
  * @param src_linesize  linesizes for the source image
  */
-void ff_sobel(int w, int h,
-              uint16_t *dst, int dst_linesize,
-              int8_t *dir, int dir_linesize,
-              const uint8_t *src, int src_linesize);
+#define PROTO_SOBEL(depth) \
+void ff_sobel_##depth(int w, int h,                                          \
+                      uint16_t *dst, int dst_linesize,                       \
+                      int8_t *dir, int dir_linesize,                         \
+                      const uint8_t *src, int src_linesize, int src_stride);
+
+PROTO_SOBEL(8)
+PROTO_SOBEL(16)
 
 /**
  * Filters rounded gradients to drop all non-maxima pixels in the magnitude image
@@ -100,8 +104,12 @@ void ff_double_threshold(int low, int high, int w, int h,
  * @param src           data pointers to source image
  * @param src_linesize  linesizes for the source image
  */
-void ff_gaussian_blur(int w, int h,
-                      uint8_t *dst, int dst_linesize,
-                      const uint8_t *src, int src_linesize);
+#define PROTO_GAUSSIAN_BLUR(depth)                                                   \
+void ff_gaussian_blur_##depth(int w, int h,                                          \
+                              uint8_t *dst, int dst_linesize,                        \
+                              const uint8_t *src, int src_linesize, int src_stride);
+
+PROTO_GAUSSIAN_BLUR(8)
+PROTO_GAUSSIAN_BLUR(16)
 
 #endif
diff --git a/libavfilter/edge_template.c b/libavfilter/edge_template.c
new file mode 100644
index 0000000000..de8b318d91
--- /dev/null
+++ b/libavfilter/edge_template.c
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2022 Thilo Borgmann <thilo.borgmann _at_ mail.de>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ */
+
+#include "libavutil/avassert.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+#undef pixel
+#if DEPTH == 8
+#define pixel uint8_t
+#else
+#define pixel uint16_t
+#endif
+
+#undef fn
+#undef fn2
+#undef fn3
+#define fn3(a,b)   ff_##a##_##b
+#define fn2(a,b)   fn3(a,b)
+#define fn(a)      fn2(a, DEPTH)
+
+void fn(sobel)(int w, int h,
+               uint16_t *dst, int dst_linesize,
+               int8_t *dir, int dir_linesize,
+               const uint8_t *src, int src_linesize, int src_stride)
+{
+    int i, j;
+    pixel *srcp = (pixel *)src;
+
+    src_stride   /= sizeof(pixel);
+    src_linesize /= sizeof(pixel);
+    dst_linesize /= sizeof(pixel);
+
+    for (j = 1; j < h - 1; j++) {
+        dst  += dst_linesize;
+        dir  += dir_linesize;
+        srcp += src_linesize;
+        for (i = 1; i < w - 1; i++) {
+            const int gx =
+                -1*srcp[-src_linesize + (i-1)*src_stride] + 1*srcp[-src_linesize + (i+1)*src_stride]
+                -2*srcp[                (i-1)*src_stride] + 2*srcp[                (i+1)*src_stride]
+                -1*srcp[ src_linesize + (i-1)*src_stride] + 1*srcp[ src_linesize + (i+1)*src_stride];
+            const int gy =
+                -1*srcp[-src_linesize + (i-1)*src_stride] + 1*srcp[ src_linesize + (i-1)*src_stride]
+                -2*srcp[-src_linesize + (i  )*src_stride] + 2*srcp[ src_linesize + (i  )*src_stride]
+                -1*srcp[-src_linesize + (i+1)*src_stride] + 1*srcp[ src_linesize + (i+1)*src_stride];
+
+            dst[i] = FFABS(gx) + FFABS(gy);
+            dir[i] = get_rounded_direction(gx, gy);
+        }
+    }
+}
+
+void fn(gaussian_blur)(int w, int h,
+                       uint8_t *dst, int dst_linesize,
+                       const uint8_t *src, int src_linesize, int src_stride)
+{
+    int i, j;
+    pixel *srcp = (pixel *)src;
+    pixel *dstp = (pixel *)dst;
+
+    src_stride   /= sizeof(pixel);
+    src_linesize /= sizeof(pixel);
+    dst_linesize /= sizeof(pixel);
+
+    memcpy(dstp, srcp, w*sizeof(pixel)); dstp += dst_linesize; srcp += src_linesize;
+    memcpy(dstp, srcp, w*sizeof(pixel)); dstp += dst_linesize; srcp += src_linesize;
+    for (j = 2; j < h - 2; j++) {
+        dstp[0] = srcp[(0)*src_stride];
+        dstp[1] = srcp[(1)*src_stride];
+        for (i = 2; i < w - 2; i++) {
+            /* Gaussian mask of size 5x5 with sigma = 1.4 */
+            dstp[i] = ((srcp[-2*src_linesize + (i-2)*src_stride] + srcp[2*src_linesize + (i-2)*src_stride]) * 2
+                     + (srcp[-2*src_linesize + (i-1)*src_stride] + srcp[2*src_linesize + (i-1)*src_stride]) * 4
+                     + (srcp[-2*src_linesize + (i  )*src_stride] + srcp[2*src_linesize + (i  )*src_stride]) * 5
+                     + (srcp[-2*src_linesize + (i+1)*src_stride] + srcp[2*src_linesize + (i+1)*src_stride]) * 4
+                     + (srcp[-2*src_linesize + (i+2)*src_stride] + srcp[2*src_linesize + (i+2)*src_stride]) * 2
+
+                     + (srcp[  -src_linesize + (i-2)*src_stride] + srcp[  src_linesize + (i-2)*src_stride]) *  4
+                     + (srcp[  -src_linesize + (i-1)*src_stride] + srcp[  src_linesize + (i-1)*src_stride]) *  9
+                     + (srcp[  -src_linesize + (i  )*src_stride] + srcp[  src_linesize + (i  )*src_stride]) * 12
+                     + (srcp[  -src_linesize + (i+1)*src_stride] + srcp[  src_linesize + (i+1)*src_stride]) *  9
+                     + (srcp[  -src_linesize + (i+2)*src_stride] + srcp[  src_linesize + (i+2)*src_stride]) *  4
+
+                     + srcp[(i-2)*src_stride] *  5
+                     + srcp[(i-1)*src_stride] * 12
+                     + srcp[(i  )*src_stride] * 15
+                     + srcp[(i+1)*src_stride] * 12
+                     + srcp[(i+2)*src_stride] *  5) / 159;
+        }
+        dstp[i    ] = srcp[(i    )*src_stride];
+        dstp[i + 1] = srcp[(i + 1)*src_stride];
+
+        dstp += dst_linesize;
+        srcp += src_linesize;
+    }
+    memcpy(dstp, srcp, w*sizeof(pixel)); dstp += dst_linesize; srcp += src_linesize;
+    memcpy(dstp, srcp, w*sizeof(pixel));
+}
diff --git a/libavfilter/vf_blurdetect.c b/libavfilter/vf_blurdetect.c
index 0e08ba96de..db06efcce7 100644
--- a/libavfilter/vf_blurdetect.c
+++ b/libavfilter/vf_blurdetect.c
@@ -283,12 +283,12 @@ static int blurdetect_filter_frame(AVFilterLink *inlink, AVFrame *in)
         nplanes++;
 
         // gaussian filter to reduce noise
-        ff_gaussian_blur(w, h,
-                         filterbuf,  w,
-                         in->data[plane], in->linesize[plane]);
+        ff_gaussian_blur_8(w, h,
+                           filterbuf,  w,
+                           in->data[plane], in->linesize[plane], 1);
 
         // compute the 16-bits gradients and directions for the next step
-        ff_sobel(w, h, gradients, w, directions, w, filterbuf, w);
+        ff_sobel_8(w, h, gradients, w, directions, w, filterbuf, w, 1);
 
         // non_maximum_suppression() will actually keep & clip what's necessary and
         // ignore the rest, so we need a clean output buffer
diff --git a/libavfilter/vf_edgedetect.c b/libavfilter/vf_edgedetect.c
index 90390ceb3e..603f06f141 100644
--- a/libavfilter/vf_edgedetect.c
+++ b/libavfilter/vf_edgedetect.c
@@ -191,15 +191,15 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
         }
 
         /* gaussian filter to reduce noise  */
-        ff_gaussian_blur(width, height,
-                         tmpbuf,      width,
-                         in->data[p], in->linesize[p]);
+        ff_gaussian_blur_8(width, height,
+                           tmpbuf,      width,
+                           in->data[p], in->linesize[p], 1);
 
         /* compute the 16-bits gradients and directions for the next step */
-        ff_sobel(width, height,
-              gradients, width,
-              directions,width,
-              tmpbuf,    width);
+        ff_sobel_8(width, height,
+                   gradients, width,
+                   directions,width,
+                   tmpbuf,    width, 1);
 
         /* non_maximum_suppression() will actually keep & clip what's necessary and
          * ignore the rest, so we need a clean output buffer */
Thilo Borgmann July 18, 2022, 2:15 p.m. UTC | #3
Am 17.07.22 um 09:54 schrieb Thilo Borgmann:
> Am 16.07.22 um 23:07 schrieb Thilo Borgmann:
>> Hi,
>>
>>> 1/2 adds 16 bit versions of ff_gaussian_blur and ff_sobel.
>>> 2/2 adds new mode to cropdetect.
>>
>> v3 does it the template way for 1/2 as requested on IRC.
> 
> v4 fixed bug in gaussian_blur. Otherwise identical.

v5 fixes minor things mentioned on IRC and another bug found on the way.

-Thilo
From 74ed982d46acb980d97ec8ba969036504fdbe777 Mon Sep 17 00:00:00 2001
From: Thilo Borgmann <thilo.borgmann@mail.de>
Date: Mon, 18 Jul 2022 16:09:46 +0200
Subject: [PATCH v5 1/2] lavfi/edge_common: Templatify ff_gaussian_blur and
 ff_sobel

---
 libavfilter/edge_common.c   |  74 ++--------------------
 libavfilter/edge_common.h   |  22 ++++---
 libavfilter/edge_template.c | 118 ++++++++++++++++++++++++++++++++++++
 libavfilter/vf_blurdetect.c |   8 +--
 libavfilter/vf_edgedetect.c |  14 ++---
 5 files changed, 150 insertions(+), 86 deletions(-)
 create mode 100644 libavfilter/edge_template.c

diff --git a/libavfilter/edge_common.c b/libavfilter/edge_common.c
index d72e8521cd..ebd47d7c53 100644
--- a/libavfilter/edge_common.c
+++ b/libavfilter/edge_common.c
@@ -46,33 +46,13 @@ static int get_rounded_direction(int gx, int gy)
     return DIRECTION_VERTICAL;
 }
 
-// Simple sobel operator to get rounded gradients
-void ff_sobel(int w, int h,
-                    uint16_t *dst, int dst_linesize,
-                    int8_t *dir, int dir_linesize,
-                    const uint8_t *src, int src_linesize)
-{
-    int i, j;
-
-    for (j = 1; j < h - 1; j++) {
-        dst += dst_linesize;
-        dir += dir_linesize;
-        src += src_linesize;
-        for (i = 1; i < w - 1; i++) {
-            const int gx =
-                -1*src[-src_linesize + i-1] + 1*src[-src_linesize + i+1]
-                -2*src[                i-1] + 2*src[                i+1]
-                -1*src[ src_linesize + i-1] + 1*src[ src_linesize + i+1];
-            const int gy =
-                -1*src[-src_linesize + i-1] + 1*src[ src_linesize + i-1]
-                -2*src[-src_linesize + i  ] + 2*src[ src_linesize + i  ]
-                -1*src[-src_linesize + i+1] + 1*src[ src_linesize + i+1];
+#undef DEPTH
+#define DEPTH 8
+#include "edge_template.c"
 
-            dst[i] = FFABS(gx) + FFABS(gy);
-            dir[i] = get_rounded_direction(gx, gy);
-        }
-    }
-}
+#undef DEPTH
+#define DEPTH 16
+#include "edge_template.c"
 
 // Filters rounded gradients to drop all non-maxima
 // Expects gradients generated by ff_sobel()
@@ -137,45 +117,3 @@ void ff_double_threshold(int low, int high, int w, int h,
         src += src_linesize;
     }
 }
-
-// Applies gaussian blur, using 5x5 kernels, sigma = 1.4
-void ff_gaussian_blur(int w, int h,
-                      uint8_t *dst, int dst_linesize,
-                      const uint8_t *src, int src_linesize)
-{
-    int i, j;
-
-    memcpy(dst, src, w); dst += dst_linesize; src += src_linesize;
-    memcpy(dst, src, w); dst += dst_linesize; src += src_linesize;
-    for (j = 2; j < h - 2; j++) {
-        dst[0] = src[0];
-        dst[1] = src[1];
-        for (i = 2; i < w - 2; i++) {
-            /* Gaussian mask of size 5x5 with sigma = 1.4 */
-            dst[i] = ((src[-2*src_linesize + i-2] + src[2*src_linesize + i-2]) * 2
-                    + (src[-2*src_linesize + i-1] + src[2*src_linesize + i-1]) * 4
-                    + (src[-2*src_linesize + i  ] + src[2*src_linesize + i  ]) * 5
-                    + (src[-2*src_linesize + i+1] + src[2*src_linesize + i+1]) * 4
-                    + (src[-2*src_linesize + i+2] + src[2*src_linesize + i+2]) * 2
-
-                    + (src[  -src_linesize + i-2] + src[  src_linesize + i-2]) *  4
-                    + (src[  -src_linesize + i-1] + src[  src_linesize + i-1]) *  9
-                    + (src[  -src_linesize + i  ] + src[  src_linesize + i  ]) * 12
-                    + (src[  -src_linesize + i+1] + src[  src_linesize + i+1]) *  9
-                    + (src[  -src_linesize + i+2] + src[  src_linesize + i+2]) *  4
-
-                    + src[i-2] *  5
-                    + src[i-1] * 12
-                    + src[i  ] * 15
-                    + src[i+1] * 12
-                    + src[i+2] *  5) / 159;
-        }
-        dst[i    ] = src[i    ];
-        dst[i + 1] = src[i + 1];
-
-        dst += dst_linesize;
-        src += src_linesize;
-    }
-    memcpy(dst, src, w); dst += dst_linesize; src += src_linesize;
-    memcpy(dst, src, w);
-}
diff --git a/libavfilter/edge_common.h b/libavfilter/edge_common.h
index 87c143f2b8..cff4febd70 100644
--- a/libavfilter/edge_common.h
+++ b/libavfilter/edge_common.h
@@ -48,10 +48,14 @@ enum AVRoundedDirection {
  * @param src           data pointers to source image
  * @param src_linesize  linesizes for the source image
  */
-void ff_sobel(int w, int h,
-              uint16_t *dst, int dst_linesize,
-              int8_t *dir, int dir_linesize,
-              const uint8_t *src, int src_linesize);
+#define PROTO_SOBEL(depth) \
+void ff_sobel_##depth(int w, int h,                                          \
+                      uint16_t *dst, int dst_linesize,                       \
+                      int8_t *dir, int dir_linesize,                         \
+                      const uint8_t *src, int src_linesize, int src_stride);
+
+PROTO_SOBEL(8)
+PROTO_SOBEL(16)
 
 /**
  * Filters rounded gradients to drop all non-maxima pixels in the magnitude image
@@ -100,8 +104,12 @@ void ff_double_threshold(int low, int high, int w, int h,
  * @param src           data pointers to source image
  * @param src_linesize  linesizes for the source image
  */
-void ff_gaussian_blur(int w, int h,
-                      uint8_t *dst, int dst_linesize,
-                      const uint8_t *src, int src_linesize);
+#define PROTO_GAUSSIAN_BLUR(depth)                                                   \
+void ff_gaussian_blur_##depth(int w, int h,                                          \
+                              uint8_t *dst, int dst_linesize,                        \
+                              const uint8_t *src, int src_linesize, int src_stride);
+
+PROTO_GAUSSIAN_BLUR(8)
+PROTO_GAUSSIAN_BLUR(16)
 
 #endif
diff --git a/libavfilter/edge_template.c b/libavfilter/edge_template.c
new file mode 100644
index 0000000000..af33c178af
--- /dev/null
+++ b/libavfilter/edge_template.c
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2022 Thilo Borgmann <thilo.borgmann _at_ mail.de>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ */
+
+#include "libavutil/avassert.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+#undef pixel
+#if DEPTH == 8
+#define pixel uint8_t
+#else
+#define pixel uint16_t
+#endif
+
+#undef fn
+#undef fn2
+#undef fn3
+#define fn3(a,b)   ff_##a##_##b
+#define fn2(a,b)   fn3(a,b)
+#define fn(a)      fn2(a, DEPTH)
+
+void fn(sobel)(int w, int h,
+               uint16_t *dst, int dst_linesize,
+               int8_t *dir, int dir_linesize,
+               const uint8_t *src, int src_linesize, int src_stride)
+{
+    pixel *srcp = (pixel *)src;
+
+    src_stride   /= sizeof(pixel);
+    src_linesize /= sizeof(pixel);
+    dst_linesize /= sizeof(pixel);
+
+    for (int j = 1; j < h - 1; j++) {
+        dst  += dst_linesize;
+        dir  += dir_linesize;
+        srcp += src_linesize;
+        for (int i = 1; i < w - 1; i++) {
+            const int gx =
+                -1*srcp[-src_linesize + (i-1)*src_stride] + 1*srcp[-src_linesize + (i+1)*src_stride]
+                -2*srcp[                (i-1)*src_stride] + 2*srcp[                (i+1)*src_stride]
+                -1*srcp[ src_linesize + (i-1)*src_stride] + 1*srcp[ src_linesize + (i+1)*src_stride];
+            const int gy =
+                -1*srcp[-src_linesize + (i-1)*src_stride] + 1*srcp[ src_linesize + (i-1)*src_stride]
+                -2*srcp[-src_linesize + (i  )*src_stride] + 2*srcp[ src_linesize + (i  )*src_stride]
+                -1*srcp[-src_linesize + (i+1)*src_stride] + 1*srcp[ src_linesize + (i+1)*src_stride];
+
+            dst[i] = FFABS(gx) + FFABS(gy);
+            dir[i] = get_rounded_direction(gx, gy);
+        }
+    }
+}
+
+void fn(gaussian_blur)(int w, int h,
+                       uint8_t *dst, int dst_linesize,
+                       const uint8_t *src, int src_linesize, int src_stride)
+{
+    pixel *srcp = (pixel *)src;
+    pixel *dstp = (pixel *)dst;
+
+    src_stride   /= sizeof(pixel);
+    src_linesize /= sizeof(pixel);
+    dst_linesize /= sizeof(pixel);
+
+    memcpy(dstp, srcp, w*sizeof(pixel)); dstp += dst_linesize; srcp += src_linesize;
+    memcpy(dstp, srcp, w*sizeof(pixel)); dstp += dst_linesize; srcp += src_linesize;
+    for (int j = 2; j < h - 2; j++) {
+        dstp[0] = srcp[(0)*src_stride];
+        dstp[1] = srcp[(1)*src_stride];
+        for (int i = 2; i < w - 2; i++) {
+            /* Gaussian mask of size 5x5 with sigma = 1.4 */
+            dstp[i] = ((srcp[-2*src_linesize + (i-2)*src_stride] + srcp[2*src_linesize + (i-2)*src_stride]) * 2
+                     + (srcp[-2*src_linesize + (i-1)*src_stride] + srcp[2*src_linesize + (i-1)*src_stride]) * 4
+                     + (srcp[-2*src_linesize + (i  )*src_stride] + srcp[2*src_linesize + (i  )*src_stride]) * 5
+                     + (srcp[-2*src_linesize + (i+1)*src_stride] + srcp[2*src_linesize + (i+1)*src_stride]) * 4
+                     + (srcp[-2*src_linesize + (i+2)*src_stride] + srcp[2*src_linesize + (i+2)*src_stride]) * 2
+
+                     + (srcp[  -src_linesize + (i-2)*src_stride] + srcp[  src_linesize + (i-2)*src_stride]) *  4
+                     + (srcp[  -src_linesize + (i-1)*src_stride] + srcp[  src_linesize + (i-1)*src_stride]) *  9
+                     + (srcp[  -src_linesize + (i  )*src_stride] + srcp[  src_linesize + (i  )*src_stride]) * 12
+                     + (srcp[  -src_linesize + (i+1)*src_stride] + srcp[  src_linesize + (i+1)*src_stride]) *  9
+                     + (srcp[  -src_linesize + (i+2)*src_stride] + srcp[  src_linesize + (i+2)*src_stride]) *  4
+
+                     + srcp[(i-2)*src_stride] *  5
+                     + srcp[(i-1)*src_stride] * 12
+                     + srcp[(i  )*src_stride] * 15
+                     + srcp[(i+1)*src_stride] * 12
+                     + srcp[(i+2)*src_stride] *  5) / 159;
+        }
+        dstp[w - 2] = srcp[(w - 2)*src_stride];
+        dstp[w - 1] = srcp[(w - 1)*src_stride];
+
+        dstp += dst_linesize;
+        srcp += src_linesize;
+    }
+    memcpy(dstp, srcp, w*sizeof(pixel)); dstp += dst_linesize; srcp += src_linesize;
+    memcpy(dstp, srcp, w*sizeof(pixel));
+}
diff --git a/libavfilter/vf_blurdetect.c b/libavfilter/vf_blurdetect.c
index 0e08ba96de..db06efcce7 100644
--- a/libavfilter/vf_blurdetect.c
+++ b/libavfilter/vf_blurdetect.c
@@ -283,12 +283,12 @@ static int blurdetect_filter_frame(AVFilterLink *inlink, AVFrame *in)
         nplanes++;
 
         // gaussian filter to reduce noise
-        ff_gaussian_blur(w, h,
-                         filterbuf,  w,
-                         in->data[plane], in->linesize[plane]);
+        ff_gaussian_blur_8(w, h,
+                           filterbuf,  w,
+                           in->data[plane], in->linesize[plane], 1);
 
         // compute the 16-bits gradients and directions for the next step
-        ff_sobel(w, h, gradients, w, directions, w, filterbuf, w);
+        ff_sobel_8(w, h, gradients, w, directions, w, filterbuf, w, 1);
 
         // non_maximum_suppression() will actually keep & clip what's necessary and
         // ignore the rest, so we need a clean output buffer
diff --git a/libavfilter/vf_edgedetect.c b/libavfilter/vf_edgedetect.c
index 90390ceb3e..603f06f141 100644
--- a/libavfilter/vf_edgedetect.c
+++ b/libavfilter/vf_edgedetect.c
@@ -191,15 +191,15 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
         }
 
         /* gaussian filter to reduce noise  */
-        ff_gaussian_blur(width, height,
-                         tmpbuf,      width,
-                         in->data[p], in->linesize[p]);
+        ff_gaussian_blur_8(width, height,
+                           tmpbuf,      width,
+                           in->data[p], in->linesize[p], 1);
 
         /* compute the 16-bits gradients and directions for the next step */
-        ff_sobel(width, height,
-              gradients, width,
-              directions,width,
-              tmpbuf,    width);
+        ff_sobel_8(width, height,
+                   gradients, width,
+                   directions,width,
+                   tmpbuf,    width, 1);
 
         /* non_maximum_suppression() will actually keep & clip what's necessary and
          * ignore the rest, so we need a clean output buffer */
Thilo Borgmann July 29, 2022, 1:11 p.m. UTC | #4
Hi,

>>>> 1/2 adds 16 bit versions of ff_gaussian_blur and ff_sobel.
>>>> 2/2 adds new mode to cropdetect.
>>>
>>> v3 does it the template way for 1/2 as requested on IRC.
>>
>> v4 fixed bug in gaussian_blur. Otherwise identical.
>
> v5 fixes minor things mentioned on IRC and another bug found on the way.

will push v5 patchiest soon if there are no more comments.

-Thilo
Thilo Borgmann July 30, 2022, 11:22 a.m. UTC | #5
Am 29.07.22 um 15:11 schrieb Thilo Borgmann:
> Hi,
> 
>>>>> 1/2 adds 16 bit versions of ff_gaussian_blur and ff_sobel.
>>>>> 2/2 adds new mode to cropdetect.
>>>>
>>>> v3 does it the template way for 1/2 as requested on IRC.
>>>
>>> v4 fixed bug in gaussian_blur. Otherwise identical.
>>
>> v5 fixes minor things mentioned on IRC and another bug found on the way.
> 
> will push v5 patchiest soon if there are no more comments.

Patchset OK'd on IRC and pushed.

Thanks!
-Thilo
diff mbox series

Patch

diff --git a/libavfilter/edge_common.c b/libavfilter/edge_common.c
index d72e8521cd..f0bf273b84 100644
--- a/libavfilter/edge_common.c
+++ b/libavfilter/edge_common.c
@@ -50,7 +50,7 @@  static int get_rounded_direction(int gx, int gy)
 void ff_sobel(int w, int h,
                     uint16_t *dst, int dst_linesize,
                     int8_t *dir, int dir_linesize,
-                    const uint8_t *src, int src_linesize)
+                    const uint8_t *src, int src_linesize, int src_stride)
 {
     int i, j;
 
@@ -60,13 +60,43 @@  void ff_sobel(int w, int h,
         src += src_linesize;
         for (i = 1; i < w - 1; i++) {
             const int gx =
-                -1*src[-src_linesize + i-1] + 1*src[-src_linesize + i+1]
-                -2*src[                i-1] + 2*src[                i+1]
-                -1*src[ src_linesize + i-1] + 1*src[ src_linesize + i+1];
+                -1*src[-src_linesize + (i-1)*src_stride] + 1*src[-src_linesize + (i+1)*src_stride]
+                -2*src[                (i-1)*src_stride] + 2*src[                (i+1)*src_stride]
+                -1*src[ src_linesize + (i-1)*src_stride] + 1*src[ src_linesize + (i+1)*src_stride];
             const int gy =
-                -1*src[-src_linesize + i-1] + 1*src[ src_linesize + i-1]
-                -2*src[-src_linesize + i  ] + 2*src[ src_linesize + i  ]
-                -1*src[-src_linesize + i+1] + 1*src[ src_linesize + i+1];
+                -1*src[-src_linesize + (i-1)*src_stride] + 1*src[ src_linesize + (i-1)*src_stride]
+                -2*src[-src_linesize + (i  )*src_stride] + 2*src[ src_linesize + (i  )*src_stride]
+                -1*src[-src_linesize + (i+1)*src_stride] + 1*src[ src_linesize + (i+1)*src_stride];
+
+            dst[i] = FFABS(gx) + FFABS(gy);
+            dir[i] = get_rounded_direction(gx, gy);
+        }
+    }
+}
+
+void ff_sobel16(int w, int h,
+                    uint16_t *dst, int dst_linesize,
+                    int8_t *dir, int dir_linesize,
+                    const uint8_t *src, int src_linesize, int src_stride)
+{
+    int i, j;
+    uint16_t *src16 = (uint16_t *)src;
+    int src16_stride   = src_stride   / 2;
+    int src16_linesize = src_linesize / 2;
+
+    for (j = 1; j < h - 1; j++) {
+        dst += dst_linesize;
+        dir += dir_linesize;
+        src16 += src16_linesize;
+        for (i = 1; i < w - 1; i++) {
+            const int gx =
+                -1*src16[-src16_linesize + (i-1)*src16_stride] + 1*src16[-src16_linesize + (i+1)*src16_stride]
+                -2*src16[                  (i-1)*src16_stride] + 2*src16[                  (i+1)*src16_stride]
+                -1*src16[ src16_linesize + (i-1)*src16_stride] + 1*src16[ src16_linesize + (i+1)*src16_stride];
+            const int gy =
+                -1*src16[-src16_linesize + (i-1)*src16_stride] + 1*src16[ src16_linesize + (i-1)*src16_stride]
+                -2*src16[-src16_linesize + (i  )*src16_stride] + 2*src16[ src16_linesize + (i  )*src16_stride]
+                -1*src16[-src16_linesize + (i+1)*src16_stride] + 1*src16[ src16_linesize + (i+1)*src16_stride];
 
             dst[i] = FFABS(gx) + FFABS(gy);
             dir[i] = get_rounded_direction(gx, gy);
@@ -141,37 +171,37 @@  void ff_double_threshold(int low, int high, int w, int h,
 // Applies gaussian blur, using 5x5 kernels, sigma = 1.4
 void ff_gaussian_blur(int w, int h,
                       uint8_t *dst, int dst_linesize,
-                      const uint8_t *src, int src_linesize)
+                      const uint8_t *src, int src_linesize, int src_stride)
 {
     int i, j;
 
     memcpy(dst, src, w); dst += dst_linesize; src += src_linesize;
     memcpy(dst, src, w); dst += dst_linesize; src += src_linesize;
     for (j = 2; j < h - 2; j++) {
-        dst[0] = src[0];
-        dst[1] = src[1];
+        dst[0] = src[(0)*src_stride];
+        dst[1] = src[(1)*src_stride];
         for (i = 2; i < w - 2; i++) {
             /* Gaussian mask of size 5x5 with sigma = 1.4 */
-            dst[i] = ((src[-2*src_linesize + i-2] + src[2*src_linesize + i-2]) * 2
-                    + (src[-2*src_linesize + i-1] + src[2*src_linesize + i-1]) * 4
-                    + (src[-2*src_linesize + i  ] + src[2*src_linesize + i  ]) * 5
-                    + (src[-2*src_linesize + i+1] + src[2*src_linesize + i+1]) * 4
-                    + (src[-2*src_linesize + i+2] + src[2*src_linesize + i+2]) * 2
-
-                    + (src[  -src_linesize + i-2] + src[  src_linesize + i-2]) *  4
-                    + (src[  -src_linesize + i-1] + src[  src_linesize + i-1]) *  9
-                    + (src[  -src_linesize + i  ] + src[  src_linesize + i  ]) * 12
-                    + (src[  -src_linesize + i+1] + src[  src_linesize + i+1]) *  9
-                    + (src[  -src_linesize + i+2] + src[  src_linesize + i+2]) *  4
-
-                    + src[i-2] *  5
-                    + src[i-1] * 12
-                    + src[i  ] * 15
-                    + src[i+1] * 12
-                    + src[i+2] *  5) / 159;
+            dst[i] = ((src[-2*src_linesize + (i-2)*src_stride] + src[2*src_linesize + (i-2)*src_stride]) * 2
+                    + (src[-2*src_linesize + (i-1)*src_stride] + src[2*src_linesize + (i-1)*src_stride]) * 4
+                    + (src[-2*src_linesize + (i  )*src_stride] + src[2*src_linesize + (i  )*src_stride]) * 5
+                    + (src[-2*src_linesize + (i+1)*src_stride] + src[2*src_linesize + (i+1)*src_stride]) * 4
+                    + (src[-2*src_linesize + (i+2)*src_stride] + src[2*src_linesize + (i+2)*src_stride]) * 2
+
+                    + (src[  -src_linesize + (i-2)*src_stride] + src[  src_linesize + (i-2)*src_stride]) *  4
+                    + (src[  -src_linesize + (i-1)*src_stride] + src[  src_linesize + (i-1)*src_stride]) *  9
+                    + (src[  -src_linesize + (i  )*src_stride] + src[  src_linesize + (i  )*src_stride]) * 12
+                    + (src[  -src_linesize + (i+1)*src_stride] + src[  src_linesize + (i+1)*src_stride]) *  9
+                    + (src[  -src_linesize + (i+2)*src_stride] + src[  src_linesize + (i+2)*src_stride]) *  4
+
+                    + src[(i-2)*src_stride] *  5
+                    + src[(i-1)*src_stride] * 12
+                    + src[(i  )*src_stride] * 15
+                    + src[(i+1)*src_stride] * 12
+                    + src[(i+2)*src_stride] *  5) / 159;
         }
-        dst[i    ] = src[i    ];
-        dst[i + 1] = src[i + 1];
+        dst[i    ] = src[(i    )*src_stride];
+        dst[i + 1] = src[(i + 1)*src_stride];
 
         dst += dst_linesize;
         src += src_linesize;
@@ -179,3 +209,49 @@  void ff_gaussian_blur(int w, int h,
     memcpy(dst, src, w); dst += dst_linesize; src += src_linesize;
     memcpy(dst, src, w);
 }
+
+void ff_gaussian_blur16(int w, int h,
+                      uint8_t *dst, int dst_linesize,
+                      const uint8_t *src, int src_linesize, int src_stride)
+{
+    int i, j;
+    uint16_t *src16    = (uint16_t *)src;
+    uint16_t *dst16    = (uint16_t *)dst;
+    int src16_stride   = src_stride   / 2;
+    int src16_linesize = src_linesize / 2;
+    int dst16_linesize = dst_linesize / 2;
+
+    memcpy(dst16, src16, w*2); dst16 += dst16_linesize; src16 += src16_linesize;
+    memcpy(dst16, src16, w*2); dst16 += dst16_linesize; src16 += src16_linesize;
+    for (j = 2; j < h - 2; j++) {
+        dst16[0] = src16[(0)*src16_stride];
+        dst16[1] = src16[(1)*src16_stride];
+        for (i = 2; i < w - 2; i++) {
+            /* Gaussian mask of size 5x5 with sigma = 1.4 */
+            dst16[i] = ((src16[-2*src16_linesize + (i-2)*src16_stride] + src16[2*src16_linesize + (i-2)*src16_stride]) * 2
+                      + (src16[-2*src16_linesize + (i-1)*src16_stride] + src16[2*src16_linesize + (i-1)*src16_stride]) * 4
+                      + (src16[-2*src16_linesize + (i  )*src16_stride] + src16[2*src16_linesize + (i  )*src16_stride]) * 5
+                      + (src16[-2*src16_linesize + (i+1)*src16_stride] + src16[2*src16_linesize + (i+1)*src16_stride]) * 4
+                      + (src16[-2*src16_linesize + (i+2)*src16_stride] + src16[2*src16_linesize + (i+2)*src16_stride]) * 2
+
+                      + (src16[  -src16_linesize + (i-2)*src16_stride] + src16[  src16_linesize + (i-2)*src16_stride]) *  4
+                      + (src16[  -src16_linesize + (i-1)*src16_stride] + src16[  src16_linesize + (i-1)*src16_stride]) *  9
+                      + (src16[  -src16_linesize + (i  )*src16_stride] + src16[  src16_linesize + (i  )*src16_stride]) * 12
+                      + (src16[  -src16_linesize + (i+1)*src16_stride] + src16[  src16_linesize + (i+1)*src16_stride]) *  9
+                      + (src16[  -src16_linesize + (i+2)*src16_stride] + src16[  src16_linesize + (i+2)*src16_stride]) *  4
+
+                      + src16[(i-2)*src16_stride] *  5
+                      + src16[(i-1)*src16_stride] * 12
+                      + src16[(i  )*src16_stride] * 15
+                      + src16[(i+1)*src16_stride] * 12
+                      + src16[(i+2)*src16_stride] *  5) / 159;
+        }
+        dst16[i    ] = src16[(i    )*src16_stride];
+        dst16[i + 1] = src16[(i + 1)*src16_stride];
+
+        dst16 += dst16_linesize;
+        src16 += src16_linesize;
+    }
+    memcpy(dst16, src16, w*2); dst16 += dst16_linesize; src16 += src16_linesize;
+    memcpy(dst16, src16, w*2);
+}
diff --git a/libavfilter/edge_common.h b/libavfilter/edge_common.h
index 87c143f2b8..310d92a388 100644
--- a/libavfilter/edge_common.h
+++ b/libavfilter/edge_common.h
@@ -51,7 +51,13 @@  enum AVRoundedDirection {
 void ff_sobel(int w, int h,
               uint16_t *dst, int dst_linesize,
               int8_t *dir, int dir_linesize,
-              const uint8_t *src, int src_linesize);
+              const uint8_t *src, int src_linesize, int src_stride);
+
+void ff_sobel16(int w, int h,
+                uint16_t *dst, int dst_linesize,
+                int8_t *dir, int dir_linesize,
+                const uint8_t *src, int src_linesize, int src_stride);
+
 
 /**
  * Filters rounded gradients to drop all non-maxima pixels in the magnitude image
@@ -102,6 +108,10 @@  void ff_double_threshold(int low, int high, int w, int h,
  */
 void ff_gaussian_blur(int w, int h,
                       uint8_t *dst, int dst_linesize,
-                      const uint8_t *src, int src_linesize);
+                      const uint8_t *src, int src_linesize, int src_stride);
+
+void ff_gaussian_blur16(int w, int h,
+                        uint8_t *dst, int dst_linesize,
+                        const uint8_t *src, int src_linesize, int src_stride);
 
 #endif
diff --git a/libavfilter/vf_blurdetect.c b/libavfilter/vf_blurdetect.c
index 0e08ba96de..ed4fb29b31 100644
--- a/libavfilter/vf_blurdetect.c
+++ b/libavfilter/vf_blurdetect.c
@@ -285,10 +285,10 @@  static int blurdetect_filter_frame(AVFilterLink *inlink, AVFrame *in)
         // gaussian filter to reduce noise
         ff_gaussian_blur(w, h,
                          filterbuf,  w,
-                         in->data[plane], in->linesize[plane]);
+                         in->data[plane], in->linesize[plane], 1);
 
         // compute the 16-bits gradients and directions for the next step
-        ff_sobel(w, h, gradients, w, directions, w, filterbuf, w);
+        ff_sobel(w, h, gradients, w, directions, w, filterbuf, w, 1);
 
         // non_maximum_suppression() will actually keep & clip what's necessary and
         // ignore the rest, so we need a clean output buffer
diff --git a/libavfilter/vf_edgedetect.c b/libavfilter/vf_edgedetect.c
index 90390ceb3e..10397fb8dc 100644
--- a/libavfilter/vf_edgedetect.c
+++ b/libavfilter/vf_edgedetect.c
@@ -193,13 +193,13 @@  static int filter_frame(AVFilterLink *inlink, AVFrame *in)
         /* gaussian filter to reduce noise  */
         ff_gaussian_blur(width, height,
                          tmpbuf,      width,
-                         in->data[p], in->linesize[p]);
+                         in->data[p], in->linesize[p], 1);
 
         /* compute the 16-bits gradients and directions for the next step */
         ff_sobel(width, height,
               gradients, width,
               directions,width,
-              tmpbuf,    width);
+              tmpbuf,    width, 1);
 
         /* non_maximum_suppression() will actually keep & clip what's necessary and
          * ignore the rest, so we need a clean output buffer */