[FFmpeg-devel,v3,4/6] libavfilter/vf_overlay.c: using the nbits and depth for 8bits and 10bit support

Submitted by lance.lmwang@gmail.com on June 6, 2019, 7:09 a.m.

Details

Message ID 20190606070959.9608-4-lance.lmwang@gmail.com
State New
Headers show

Commit Message

lance.lmwang@gmail.com June 6, 2019, 7:09 a.m.
From: Limin Wang <lance.lmwang@gmail.com>

Signed-off-by: Limin Wang <lance.lmwang@gmail.com>
---
 libavfilter/vf_overlay.c | 79 ++++++++++++++++++++++++----------------
 1 file changed, 47 insertions(+), 32 deletions(-)

Patch hide | download patch | download mbox

diff --git a/libavfilter/vf_overlay.c b/libavfilter/vf_overlay.c
index ee51a54659..70961befa5 100644
--- a/libavfilter/vf_overlay.c
+++ b/libavfilter/vf_overlay.c
@@ -464,22 +464,26 @@  static av_always_inline void blend_plane_##depth##_##nbits##bits(AVFilterContext
     int dst_hp = AV_CEIL_RSHIFT(dst_h, vsub);                                                                          \
     int yp = y>>vsub;                                                                                                  \
     int xp = x>>hsub;                                                                                                  \
-    uint8_t *s, *sp, *d, *dp, *dap, *a, *da, *ap;                                                                      \
+    uint##depth##_t *s, *sp, *d, *dp, *dap, *a, *da, *ap;                                                              \
     int jmax, j, k, kmax;                                                                                              \
     int slice_start, slice_end;                                                                                        \
+    const uint##depth##_t max = (1 << nbits) - 1;                                                                      \
+    const uint##depth##_t mid = (1 << (nbits -1)) ;                                                                    \
+    int bytes = depth / 8;                                                                                             \
                                                                                                                        \
+    dst_step /= bytes;                                                                                                 \
     j = FFMAX(-yp, 0);                                                                                                 \
     jmax = FFMIN3(-yp + dst_hp, FFMIN(src_hp, dst_hp), yp + src_hp);                                                   \
                                                                                                                        \
     slice_start = j + (jmax * jobnr) / nb_jobs;                                                                        \
     slice_end = j + (jmax * (jobnr+1)) / nb_jobs;                                                                      \
                                                                                                                        \
-    sp = src->data[i] + (slice_start) * src->linesize[i];                                                              \
-    dp = dst->data[dst_plane]                                                                                          \
+    sp = (uint##depth##_t *)(src->data[i] + (slice_start) * src->linesize[i]);                                         \
+    dp = (uint##depth##_t *)(dst->data[dst_plane]                                                                      \
                       + (yp + slice_start) * dst->linesize[dst_plane]                                                  \
-                      + dst_offset;                                                                                    \
-    ap = src->data[3] + (slice_start << vsub) * src->linesize[3];                                                      \
-    dap = dst->data[3] + ((yp + slice_start) << vsub) * dst->linesize[3];                                              \
+                      + dst_offset);                                                                                   \
+    ap = (uint##depth##_t *)(src->data[3] + (slice_start << vsub) * src->linesize[3]);                                 \
+    dap = (uint##depth##_t *)(dst->data[3] + ((yp + slice_start) << vsub) * dst->linesize[3]);                         \
                                                                                                                        \
     for (j = slice_start; j < slice_end; j++) {                                                                        \
         k = FFMAX(-xp, 0);                                                                                             \
@@ -489,8 +493,8 @@  static av_always_inline void blend_plane_##depth##_##nbits##bits(AVFilterContext
         da = dap + ((xp+k) << hsub);                                                                                   \
         kmax = FFMIN(-xp + dst_wp, src_wp);                                                                            \
                                                                                                                        \
-        if (((vsub && j+1 < src_hp) || !vsub) && octx->blend_row[i]) {                                                 \
-            int c = octx->blend_row[i](d, da, s, a, kmax - k, src->linesize[3]);                                       \
+        if (nbits == 8 && ((vsub && j+1 < src_hp) || !vsub) && octx->blend_row[i]) {                                   \
+            int c = octx->blend_row[i]((uint8_t*)d, (uint8_t*)da, (uint8_t*)s, (uint8_t*)a, kmax - k, src->linesize[3]); \
                                                                                                                        \
             s += c;                                                                                                    \
             d += dst_step * c;                                                                                         \
@@ -515,7 +519,7 @@  static av_always_inline void blend_plane_##depth##_##nbits##bits(AVFilterContext
                 alpha = a[0];                                                                                          \
             /* if the main channel has an alpha channel, alpha has to be calculated */                                 \
             /* to create an un-premultiplied (straight) alpha value */                                                 \
-            if (main_has_alpha && alpha != 0 && alpha != 255) {                                                        \
+            if (main_has_alpha && alpha != 0 && alpha != max) {                                                        \
                 /* average alpha for color components, improve quality */                                              \
                 uint8_t alpha_d;                                                                                       \
                 if (hsub && vsub && j+1 < src_hp && k+1 < src_wp) {                                                    \
@@ -532,22 +536,32 @@  static av_always_inline void blend_plane_##depth##_##nbits##bits(AVFilterContext
                 alpha = UNPREMULTIPLY_ALPHA(alpha, alpha_d);                                                           \
             }                                                                                                          \
             if (straight) {                                                                                            \
-                *d = FAST_DIV255(*d * (255 - alpha) + *s * alpha);                                                     \
-            } else {                                                                                                   \
-                if (i && yuv)                                                                                          \
-                    *d = av_clip(FAST_DIV255((*d - 128) * (255 - alpha)) + *s - 128, -128, 128) + 128;                 \
+                if (nbits > 8)                                                                                         \
+                   *d = (*d * (max - alpha) + *s * alpha) / max;                                                       \
                 else                                                                                                   \
-                    *d = FFMIN(FAST_DIV255(*d * (255 - alpha)) + *s, 255);                                             \
+                    *d = FAST_DIV255(*d * (255 - alpha) + *s * alpha);                                                 \
+            } else {                                                                                                   \
+                if (nbits > 8) {                                                                                       \
+                    if (i && yuv)                                                                                      \
+                        *d = av_clip((*d * (max - alpha) + *s * alpha) / max + *s - mid, -mid, mid) + mid;             \
+                    else                                                                                               \
+                        *d = FFMIN((*d * (max - alpha) + *s * alpha) / max + *s, max);                                 \
+                } else {                                                                                               \
+                    if (i && yuv)                                                                                      \
+                        *d = av_clip(FAST_DIV255((*d - mid) * (max - alpha)) + *s - mid, -mid, mid) + mid;             \
+                    else                                                                                               \
+                        *d = FFMIN(FAST_DIV255(*d * (max - alpha)) + *s, max);                                         \
+                }                                                                                                      \
             }                                                                                                          \
             s++;                                                                                                       \
             d += dst_step;                                                                                             \
             da += 1 << hsub;                                                                                           \
             a += 1 << hsub;                                                                                            \
         }                                                                                                              \
-        dp += dst->linesize[dst_plane];                                                                                \
-        sp += src->linesize[i];                                                                                        \
-        ap += (1 << vsub) * src->linesize[3];                                                                          \
-        dap += (1 << vsub) * dst->linesize[3];                                                                         \
+        dp += dst->linesize[dst_plane] / bytes;                                                                        \
+        sp += src->linesize[i] / bytes;                                                                                \
+        ap += (1 << vsub) * src->linesize[3] / bytes;                                                                  \
+        dap += (1 << vsub) * dst->linesize[3] / bytes;                                                                 \
     }                                                                                                                  \
 }
 DEFINE_BLEND_PLANE(8, 8);
@@ -559,18 +573,20 @@  static inline void alpha_composite_##depth##_##nbits##bits(const AVFrame *src, c
                                    int x, int y,                                                                       \
                                    int jobnr, int nb_jobs)                                                             \
 {                                                                                                                      \
-    uint8_t alpha;          /* the amount of overlay to blend on to main */                                            \
-    uint8_t *s, *sa, *d, *da;                                                                                          \
+    uint##depth##_t alpha;          /* the amount of overlay to blend on to main */                                    \
+    uint##depth##_t *s, *sa, *d, *da;                                                                                  \
     int i, imax, j, jmax;                                                                                              \
     int slice_start, slice_end;                                                                                        \
+    const uint##depth##_t max = (1 << nbits) - 1;                                                                      \
+    int bytes = depth / 8;                                                                                             \
                                                                                                                        \
     imax = FFMIN(-y + dst_h, src_h);                                                                                   \
     slice_start = (imax * jobnr) / nb_jobs;                                                                            \
     slice_end = ((imax * (jobnr+1)) / nb_jobs);                                                                        \
                                                                                                                        \
     i = FFMAX(-y, 0);                                                                                                  \
-    sa = src->data[3] + (i + slice_start) * src->linesize[3];                                                          \
-    da = dst->data[3] + (y + i + slice_start) * dst->linesize[3];                                                      \
+    sa = (uint##depth##_t *)(src->data[3] + (i + slice_start) * src->linesize[3]);                                     \
+    da = (uint##depth##_t *)(dst->data[3] + (y + i + slice_start) * dst->linesize[3]);                                 \
                                                                                                                        \
     for (i = i + slice_start; i < slice_end; i++) {                                                                    \
         j = FFMAX(-x, 0);                                                                                              \
@@ -579,25 +595,24 @@  static inline void alpha_composite_##depth##_##nbits##bits(const AVFrame *src, c
                                                                                                                        \
         for (jmax = FFMIN(-x + dst_w, src_w); j < jmax; j++) {                                                         \
             alpha = *s;                                                                                                \
-            if (alpha != 0 && alpha != 255) {                                                                          \
+            if (alpha != 0 && alpha != max) {                                                                          \
                 uint8_t alpha_d = *d;                                                                                  \
                 alpha = UNPREMULTIPLY_ALPHA(alpha, alpha_d);                                                           \
             }                                                                                                          \
-            switch (alpha) {                                                                                           \
-            case 0:                                                                                                    \
-                break;                                                                                                 \
-            case 255:                                                                                                  \
+            if (alpha == max)                                                                                          \
                 *d = *s;                                                                                               \
-                break;                                                                                                 \
-            default:                                                                                                   \
+            else if (alpha > 0) {                                                                                      \
                 /* apply alpha compositing: main_alpha += (1-main_alpha) * overlay_alpha */                            \
-                *d += FAST_DIV255((255 - *d) * *s);                                                                    \
+                if (nbits > 8)                                                                                         \
+                    *d += (max - *d) * *s / max;                                                                       \
+                else                                                                                                   \
+                    *d += FAST_DIV255((max - *d) * *s);                                                                \
             }                                                                                                          \
             d += 1;                                                                                                    \
             s += 1;                                                                                                    \
         }                                                                                                              \
-        da += dst->linesize[3];                                                                                        \
-        sa += src->linesize[3];                                                                                        \
+        da += dst->linesize[3] / bytes;                                                                                \
+        sa += src->linesize[3] / bytes;                                                                                \
     }                                                                                                                  \
 }
 DEFINE_ALPHA_COMPOSITE(8, 8);