From patchwork Mon Mar 11 21:59:07 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ulf Zibis X-Patchwork-Id: 12292 Return-Path: X-Original-To: patchwork@ffaux-bg.ffmpeg.org Delivered-To: patchwork@ffaux-bg.ffmpeg.org Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by ffaux.localdomain (Postfix) with ESMTP id EA1DB448883 for ; Mon, 11 Mar 2019 23:59:14 +0200 (EET) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id CECAA688338; Mon, 11 Mar 2019 23:59:14 +0200 (EET) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from wp215.webpack.hosteurope.de (wp215.webpack.hosteurope.de [80.237.132.222]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 5D57768829F for ; Mon, 11 Mar 2019 23:59:08 +0200 (EET) Received: from dslb-088-078-201-202.088.078.pools.vodafone-ip.de ([88.78.201.202] helo=[192.168.178.140]); authenticated by wp215.webpack.hosteurope.de running ExIM with esmtpsa (TLS1.2:ECDHE_RSA_AES_128_GCM_SHA256:128) id 1h3Swt-0006bd-LY; Mon, 11 Mar 2019 22:59:07 +0100 From: Ulf Zibis To: FFmpeg development discussions and patches Message-ID: Date: Mon, 11 Mar 2019 22:59:07 +0100 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Thunderbird/60.5.1 MIME-Version: 1.0 Content-Language: en-GB X-bounce-key: webpack.hosteurope.de; ulf.zibis@cosoco.de; 1552341553; 49e48b0e; X-HE-SMSGID: 1h3Swt-0006bd-LY Subject: [FFmpeg-devel] =?utf-8?q?=5BPatch=5D_beautified_+_accelerated_vf?= =?utf-8?q?=5Ffillborders_=E2=80=93_Please_review?= X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" Hi, I have made some refactoring to vf_fillborders.c. My motivation came from using this filter as a template for a new filter. Refactoring the code was a good exercise to understand FFmpeg's data models. I think, the code is now much better readable and I believe, it's faster because of: - more use of memset() and memcpy() - less calculations in the inner for loops - skip the looping for right/left filling when there is nothing to do I would be appreciated, if one would review my proposed changes. Hopefully I've used the right format for the patch. Please honour, that I'm new in FFmpeg development. -Ulf diff --git a/libavfilter/vf_fillborders.c b/libavfilter/vf_fillborders.c index 1344587..a7a074b 100644 --- a/libavfilter/vf_fillborders.c +++ b/libavfilter/vf_fillborders.c @@ -1,5 +1,7 @@ /* * Copyright (c) 2017 Paul B Mahol + * + * Contributors: Ulf Zibis * * This file is part of FFmpeg. * @@ -18,6 +20,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include #include "libavutil/colorspace.h" #include "libavutil/common.h" #include "libavutil/opt.h" @@ -84,187 +87,176 @@ static void smear_borders8(FillBordersContext *s, AVFrame *frame) { - int p, y; + for (int p = 0; p < s->nb_planes; p++) { + uint8_t *data = frame->data[p]; + int lz = frame->linesize[p]; + int width = s->planewidth[p]; + int height = s->planeheight[p] * lz; + int left = s->borders[p].left; + int right = width - s->borders[p].right; + int top = s->borders[p].top * lz; + int bottom = height - s->borders[p].bottom * lz; - for (p = 0; p < s->nb_planes; p++) { - uint8_t *ptr = frame->data[p]; - int linesize = frame->linesize[p]; + /* fill left and right borders from top to bottom border */ + if (left != 0 || right != width) // in case skip for performance + for (int y = top; y < bottom; y += lz) { + memset(data + y, *(data + y + left), left); + memset(data + y + right, *(data + y + right - 1), width - right); + } - for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) { - memset(ptr + y * linesize, - *(ptr + y * linesize + s->borders[p].left), - s->borders[p].left); - memset(ptr + y * linesize + s->planewidth[p] - s->borders[p].right, - *(ptr + y * linesize + s->planewidth[p] - s->borders[p].right - 1), - s->borders[p].right); - } - - for (y = 0; y < s->borders[p].top; y++) { - memcpy(ptr + y * linesize, - ptr + s->borders[p].top * linesize, s->planewidth[p]); - } - - for (y = s->planeheight[p] - s->borders[p].bottom; y < s->planeheight[p]; y++) { - memcpy(ptr + y * linesize, - ptr + (s->planeheight[p] - s->borders[p].bottom - 1) * linesize, - s->planewidth[p]); - } + /* fill top and bottom borders */ + for (uint8_t *y = data + top; y > data; ) + memcpy(y -= lz, data + top, width); + for (uint8_t *y = data + bottom; y < data + height; y += lz) + memcpy(y, data + bottom - lz, width); } } static void smear_borders16(FillBordersContext *s, AVFrame *frame) { - int p, y, x; + for (int p = 0; p < s->nb_planes; p++) { + uint16_t *data = (uint16_t *)frame->data[p]; + int lz = frame->linesize[p] / sizeof(uint16_t); + int width = s->planewidth[p]; + int height = s->planeheight[p] * lz; + int left = s->borders[p].left; + int right = width - s->borders[p].right; + int top = s->borders[p].top * lz; + int bottom = height - s->borders[p].bottom * lz; - for (p = 0; p < s->nb_planes; p++) { - uint16_t *ptr = (uint16_t *)frame->data[p]; - int linesize = frame->linesize[p] / 2; - - for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) { - for (x = 0; x < s->borders[p].left; x++) { - ptr[y * linesize + x] = *(ptr + y * linesize + s->borders[p].left); + /* fill left and right borders from top to bottom border */ + if (left != 0 || right != width) // in case skip for performance + for (int y = top; y < bottom; y += lz) { + for (int x = left; x >= 0; x--) + data[y + x] = data[y + left]; + for (int x = right; x < width; x++) + data[y + x] = data[y + right - 1]; } - for (x = 0; x < s->borders[p].right; x++) { - ptr[y * linesize + s->planewidth[p] - s->borders[p].right + x] = - *(ptr + y * linesize + s->planewidth[p] - s->borders[p].right - 1); - } - } - - for (y = 0; y < s->borders[p].top; y++) { - memcpy(ptr + y * linesize, - ptr + s->borders[p].top * linesize, s->planewidth[p] * 2); - } - - for (y = s->planeheight[p] - s->borders[p].bottom; y < s->planeheight[p]; y++) { - memcpy(ptr + y * linesize, - ptr + (s->planeheight[p] - s->borders[p].bottom - 1) * linesize, - s->planewidth[p] * 2); - } + /* fill top and bottom borders */ + for (uint16_t *y = data + top; y > data; ) + memcpy(y -= lz, data + top, width * sizeof(uint16_t)); + for (uint16_t *y = data + bottom; y < data + height; y += lz) + memcpy(y, data + bottom - lz, width * sizeof(uint16_t)); } } static void mirror_borders8(FillBordersContext *s, AVFrame *frame) { - int p, y, x; + for (int p = 0; p < s->nb_planes; p++) { + uint8_t *data = frame->data[p]; + int lz = frame->linesize[p]; + int width = s->planewidth[p]; + int height = s->planeheight[p] * lz; + int left = s->borders[p].left; + int right = width - s->borders[p].right; + int top = s->borders[p].top * lz; + int bottom = height - s->borders[p].bottom * lz; - for (p = 0; p < s->nb_planes; p++) { - uint8_t *ptr = frame->data[p]; - int linesize = frame->linesize[p]; - - for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) { - for (x = 0; x < s->borders[p].left; x++) { - ptr[y * linesize + x] = ptr[y * linesize + s->borders[p].left * 2 - 1 - x]; + /* fill left and right borders from top to bottom border */ + if (left != 0 || right != width) // in case skip for performance + for (int y = top; y < bottom; y += lz) + for (int x = left, x2 = x; x >= 0; x--) { + data[y + x] = data[y + x2++]; + for (int x = right, x2 = x; x < width; x++) + data[y + x] = data[y + --x2]; } - for (x = 0; x < s->borders[p].right; x++) { - ptr[y * linesize + s->planewidth[p] - s->borders[p].right + x] = - ptr[y * linesize + s->planewidth[p] - s->borders[p].right - 1 - x]; - } - } - - for (y = 0; y < s->borders[p].top; y++) { - memcpy(ptr + y * linesize, - ptr + (s->borders[p].top * 2 - 1 - y) * linesize, - s->planewidth[p]); - } - - for (y = 0; y < s->borders[p].bottom; y++) { - memcpy(ptr + (s->planeheight[p] - s->borders[p].bottom + y) * linesize, - ptr + (s->planeheight[p] - s->borders[p].bottom - 1 - y) * linesize, - s->planewidth[p]); - } + /* fill top and bottom borders */ + for (uint8_t *y = data + top, *y2 = y; y > data; y2 += lz) + memcpy(y -= lz, y2, width); + for (uint8_t *y = data + bottom, *y2 = y; y < data + height; y += lz) + memcpy(y, y2 -= lz, width); } } static void mirror_borders16(FillBordersContext *s, AVFrame *frame) { - int p, y, x; + for (int p = 0; p < s->nb_planes; p++) { + uint16_t *data = (uint16_t *)frame->data[p]; + int lz = frame->linesize[p] / sizeof(uint16_t); + int width = s->planewidth[p]; + int height = s->planeheight[p] * lz; + int left = s->borders[p].left; + int right = width - s->borders[p].right; + int top = s->borders[p].top * lz; + int bottom = height - s->borders[p].bottom * lz; - for (p = 0; p < s->nb_planes; p++) { - uint16_t *ptr = (uint16_t *)frame->data[p]; - int linesize = frame->linesize[p] / 2; - - for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) { - for (x = 0; x < s->borders[p].left; x++) { - ptr[y * linesize + x] = ptr[y * linesize + s->borders[p].left * 2 - 1 - x]; + /* fill left and right borders from top to bottom border */ + if (left != 0 || right != width) // in case skip for performance + for (int y = top; y < bottom; y += lz) { + for (int x = left, x2 = x; x >= 0; x--) + data[y + x] = data[y + x2++]; + for (int x = right, x2 = x; x < width; x++) + data[y + x] = data[y + --x2]; } - for (x = 0; x < s->borders[p].right; x++) { - ptr[y * linesize + s->planewidth[p] - s->borders[p].right + x] = - ptr[y * linesize + s->planewidth[p] - s->borders[p].right - 1 - x]; - } - } - - for (y = 0; y < s->borders[p].top; y++) { - memcpy(ptr + y * linesize, - ptr + (s->borders[p].top * 2 - 1 - y) * linesize, - s->planewidth[p] * 2); - } - - for (y = 0; y < s->borders[p].bottom; y++) { - memcpy(ptr + (s->planeheight[p] - s->borders[p].bottom + y) * linesize, - ptr + (s->planeheight[p] - s->borders[p].bottom - 1 - y) * linesize, - s->planewidth[p] * 2); - } + /* fill top and bottom borders */ + for (uint16_t *y = data + top, *y2 = y; y > data; y2 += lz) + memcpy(y -= lz, y2, width * sizeof(uint16_t)); + for (uint16_t *y = data + bottom, *y2 = y; y < data + height; y += lz) + memcpy(y, y2 -= lz, width * sizeof(uint16_t)); } } static void fixed_borders8(FillBordersContext *s, AVFrame *frame) { - int p, y; - - for (p = 0; p < s->nb_planes; p++) { - uint8_t *ptr = frame->data[p]; + for (int p = 0; p < s->nb_planes; p++) { + uint8_t *data = frame->data[p]; + int lz = frame->linesize[p]; + int width = s->planewidth[p]; + int height = s->planeheight[p] * lz; + int left = s->borders[p].left; + int right = width - s->borders[p].right; + int top = s->borders[p].top * lz; + int bottom = height - s->borders[p].bottom * lz; uint8_t fill = s->fill[p]; - int linesize = frame->linesize[p]; - for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) { - memset(ptr + y * linesize, fill, s->borders[p].left); - memset(ptr + y * linesize + s->planewidth[p] - s->borders[p].right, fill, - s->borders[p].right); - } + /* fill left and right borders from top to bottom border */ + if (left != 0 || right != width) // in case skip for performance + for (int y = top; y < bottom; y += lz) { + memset(data + y, fill, left); + memset(data + y + right, fill, width - right); + } - for (y = 0; y < s->borders[p].top; y++) { - memset(ptr + y * linesize, fill, s->planewidth[p]); - } - - for (y = s->planeheight[p] - s->borders[p].bottom; y < s->planeheight[p]; y++) { - memset(ptr + y * linesize, fill, s->planewidth[p]); - } + /* fill top and bottom borders */ + for (uint8_t *y = data + top; y > data; ) + memset(y -= lz, fill, width); + for (uint8_t *y = data + bottom; y < data + height; y += lz) + memset(y, fill, width); } } static void fixed_borders16(FillBordersContext *s, AVFrame *frame) { - int p, y, x; + for (int p = 0; p < s->nb_planes; p++) { + uint16_t *data = (uint16_t *)frame->data[p]; + int lz = frame->linesize[p] / sizeof(uint16_t); + int width = s->planewidth[p]; + int height = s->planeheight[p] * lz; + int left = s->borders[p].left; + int right = width - s->borders[p].right; + int top = s->borders[p].top * lz; + int bottom = height - s->borders[p].bottom * lz; + int fill_sz = MAX(MAX(left, right), top!=0 || height-bottom!=0 ? width : 0); + uint16_t *fill = malloc(fill_sz * sizeof(uint16_t)); + for (int i = 0; i < fill_sz; i++) + fill[i] = s->fill[p] << (s->depth - 8); - for (p = 0; p < s->nb_planes; p++) { - uint16_t *ptr = (uint16_t *)frame->data[p]; - uint16_t fill = s->fill[p] << (s->depth - 8); - int linesize = frame->linesize[p] / 2; - - for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) { - for (x = 0; x < s->borders[p].left; x++) { - ptr[y * linesize + x] = fill; + /* fill left and right borders from top to bottom border */ + if (left != 0 || right != width) // in case skip for performance + for (int y = top; y < bottom; y += lz) { + memcpy(data + y, fill, left * sizeof(uint16_t)); + memcpy(data + y + right, fill, (width - right) * sizeof(uint16_t)); } - for (x = 0; x < s->borders[p].right; x++) { - ptr[y * linesize + s->planewidth[p] - s->borders[p].right + x] = fill; - } - } + /* fill top and bottom borders */ + for (uint16_t *y = data + top; y > data; ) + memcpy(y -= lz, fill, width * sizeof(uint16_t)); + for (uint16_t *y = data + bottom; y < data + height; y += lz) + memcpy(y, fill, width * sizeof(uint16_t)); - for (y = 0; y < s->borders[p].top; y++) { - for (x = 0; x < s->planewidth[p]; x++) { - ptr[y * linesize + x] = fill; - } - } - - for (y = s->planeheight[p] - s->borders[p].bottom; y < s->planeheight[p]; y++) { - for (x = 0; x < s->planewidth[p]; x++) { - ptr[y * linesize + x] = fill; - } - } + free(fill); } } @@ -282,6 +274,20 @@ AVFilterContext *ctx = inlink->dst; FillBordersContext *s = ctx->priv; const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format); + + if (inlink->w < s->left + s->right || + inlink->w <= s->left || + inlink->w <= s->right || + inlink->h < s->top + s->bottom || + inlink->h <= s->top || + inlink->h <= s->bottom || + inlink->w < s->left * 2 || + inlink->w < s->right * 2 || + inlink->h < s->top * 2 || + inlink->h < s->bottom * 2) { + av_log(ctx, AV_LOG_ERROR, "Borders are bigger than input frame size.\n"); + return AVERROR(EINVAL); + } s->nb_planes = desc->nb_components; s->depth = desc->comp[0].depth; @@ -306,40 +312,24 @@ s->borders[2].top = s->top >> desc->log2_chroma_h; s->borders[2].bottom = s->bottom >> desc->log2_chroma_h; - if (inlink->w < s->left + s->right || - inlink->w <= s->left || - inlink->w <= s->right || - inlink->h < s->top + s->bottom || - inlink->h <= s->top || - inlink->h <= s->bottom || - inlink->w < s->left * 2 || - inlink->w < s->right * 2 || - inlink->h < s->top * 2 || - inlink->h < s->bottom * 2) { - av_log(ctx, AV_LOG_ERROR, "Borders are bigger than input frame size.\n"); - return AVERROR(EINVAL); - } - switch (s->mode) { - case FM_SMEAR: s->fillborders = s->depth <= 8 ? smear_borders8 : smear_borders16; break; - case FM_MIRROR: s->fillborders = s->depth <= 8 ? mirror_borders8 : mirror_borders16; break; - case FM_FIXED: s->fillborders = s->depth <= 8 ? fixed_borders8 : fixed_borders16; break; - } + case FM_SMEAR: s->fillborders = s->depth <= 8 ? smear_borders8 : smear_borders16; break; + case FM_MIRROR: s->fillborders = s->depth <= 8 ? mirror_borders8 : mirror_borders16; break; + case FM_FIXED: s->fillborders = s->depth <= 8 ? fixed_borders8 : fixed_borders16; + if (desc->flags & AV_PIX_FMT_FLAG_RGB) { + uint8_t rgba_map[4]; + int i; - s->yuv_color[Y] = RGB_TO_Y_CCIR(s->rgba_color[R], s->rgba_color[G], s->rgba_color[B]); - s->yuv_color[U] = RGB_TO_U_CCIR(s->rgba_color[R], s->rgba_color[G], s->rgba_color[B], 0); - s->yuv_color[V] = RGB_TO_V_CCIR(s->rgba_color[R], s->rgba_color[G], s->rgba_color[B], 0); - s->yuv_color[A] = s->rgba_color[A]; - - if (desc->flags & AV_PIX_FMT_FLAG_RGB) { - uint8_t rgba_map[4]; - int i; - - ff_fill_rgba_map(rgba_map, inlink->format); - for (i = 0; i < 4; i++) - s->fill[rgba_map[i]] = s->rgba_color[i]; - } else { - memcpy(s->fill, s->yuv_color, sizeof(s->yuv_color)); + ff_fill_rgba_map(rgba_map, inlink->format); + for (i = 0; i < sizeof(rgba_map); i++) + s->fill[rgba_map[i]] = s->rgba_color[i]; + } else { + s->yuv_color[Y] = RGB_TO_Y_CCIR(s->rgba_color[R], s->rgba_color[G], s->rgba_color[B]); + s->yuv_color[U] = RGB_TO_U_CCIR(s->rgba_color[R], s->rgba_color[G], s->rgba_color[B], 0); + s->yuv_color[V] = RGB_TO_V_CCIR(s->rgba_color[R], s->rgba_color[G], s->rgba_color[B], 0); + s->yuv_color[A] = s->rgba_color[A]; + memcpy(s->fill, s->yuv_color, sizeof(s->yuv_color)); + } break; } return 0;