From patchwork Wed Nov 27 14:55:46 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Xu Jun X-Patchwork-Id: 16459 Return-Path: X-Original-To: patchwork@ffaux-bg.ffmpeg.org Delivered-To: patchwork@ffaux-bg.ffmpeg.org Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by ffaux.localdomain (Postfix) with ESMTP id E72E6447200 for ; Wed, 27 Nov 2019 16:56:03 +0200 (EET) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id C05C068B0E9; Wed, 27 Nov 2019 16:56:03 +0200 (EET) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from smtp180.sjtu.edu.cn (smtp180.sjtu.edu.cn [202.120.2.180]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 816BB68B0B5 for ; Wed, 27 Nov 2019 16:55:54 +0200 (EET) Received: from proxy01.sjtu.edu.cn (unknown [202.112.26.54]) by smtp180.sjtu.edu.cn (Postfix) with ESMTPS id 012E41008CBC1 for ; Wed, 27 Nov 2019 22:55:49 +0800 (CST) Received: from localhost (localhost [127.0.0.1]) by proxy01.sjtu.edu.cn (Postfix) with ESMTP id E4D2F2018A54E; Wed, 27 Nov 2019 22:55:49 +0800 (CST) X-Virus-Scanned: amavisd-new at proxy01.sjtu.edu.cn Received: from proxy01.sjtu.edu.cn ([127.0.0.1]) by localhost (proxy01.sjtu.edu.cn [127.0.0.1]) (amavisd-new, port 10026) with ESMTP id EiQ1QD6oKkz4; Wed, 27 Nov 2019 22:55:49 +0800 (CST) Received: from localhost.localdomain (unknown [59.78.63.241]) (Authenticated sender: xujunzz@sjtu.edu.cn) by proxy01.sjtu.edu.cn (Postfix) with ESMTPSA id B5A472019573E; Wed, 27 Nov 2019 22:55:47 +0800 (CST) From: xujunzz@sjtu.edu.cn To: ffmpeg-devel@ffmpeg.org Date: Wed, 27 Nov 2019 22:55:46 +0800 Message-Id: <20191127145546.6873-1-xujunzz@sjtu.edu.cn> X-Mailer: git-send-email 2.17.1 Subject: [FFmpeg-devel] [PATCH] avfilter/vf_convolution: add 16-column operation for filter_column() to prepare for x86 SIMD. X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Cc: xujunzz@sjtu.edu.cn MIME-Version: 1.0 Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" From: Xu Jun In order to add x86 SIMD for filter_column(), I write a C function which processes 16 columns at a time. Signed-off-by: Xu Jun --- libavfilter/vf_convolution.c | 56 +++++++++++++++++++++++++++ libavfilter/x86/vf_convolution_init.c | 23 +++++++++++ 2 files changed, 79 insertions(+) diff --git a/libavfilter/vf_convolution.c b/libavfilter/vf_convolution.c index d022f1a04a..5291415d48 100644 --- a/libavfilter/vf_convolution.c +++ b/libavfilter/vf_convolution.c @@ -520,6 +520,61 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) continue; } + if (mode == MATRIX_COLUMN && s->filter[plane] != filter_column){ + for (y = slice_start; y < slice_end - 16; y+=16) { + const int xoff = (y - slice_start) * bpc; + const int yoff = radius * stride; + for (x = 0; x < radius; x++) { + const int xoff = (y - slice_start) * bpc; + const int yoff = x * stride; + + s->setup[plane](radius, c, src, stride, x, width, y, height, bpc); + s->filter[plane](dst + yoff + xoff, 1, rdiv, + bias, matrix, c, 16, radius, + dstride, stride); + } + s->setup[plane](radius, c, src, stride, radius, width, y, height, bpc); + s->filter[plane](dst + yoff + xoff, sizew - 2 * radius, + rdiv, bias, matrix, c, 16, radius, + dstride, stride); + for (x = sizew - radius; x < sizew; x++) { + const int xoff = (y - slice_start) * bpc; + const int yoff = x * stride; + + s->setup[plane](radius, c, src, stride, x, width, y, height, bpc); + s->filter[plane](dst + yoff + xoff, 1, rdiv, + bias, matrix, c, 16, radius, + dstride, stride); + } + } + if (y < slice_end){ + const int xoff = (y - slice_start) * bpc; + const int yoff = radius * stride; + for (x = 0; x < radius; x++) { + const int xoff = (y - slice_start) * bpc; + const int yoff = x * stride; + + s->setup[plane](radius, c, src, stride, x, width, y, height, bpc); + s->filter[plane](dst + yoff + xoff, 1, rdiv, + bias, matrix, c, slice_end - y, radius, + dstride, stride); + } + s->setup[plane](radius, c, src, stride, radius, width, y, height, bpc); + s->filter[plane](dst + yoff + xoff, sizew - 2 * radius, + rdiv, bias, matrix, c, slice_end - y, radius, + dstride, stride); + for (x = sizew - radius; x < sizew; x++) { + const int xoff = (y - slice_start) * bpc; + const int yoff = x * stride; + + s->setup[plane](radius, c, src, stride, x, width, y, height, bpc); + s->filter[plane](dst + yoff + xoff, 1, rdiv, + bias, matrix, c, slice_end - y, radius, + dstride, stride); + } + } + } + else { for (y = slice_start; y < slice_end; y++) { const int xoff = mode == MATRIX_COLUMN ? (y - slice_start) * bpc : radius * bpc; const int yoff = mode == MATRIX_COLUMN ? radius * stride : 0; @@ -550,6 +605,7 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) dst += dstride; } } + } return 0; } diff --git a/libavfilter/x86/vf_convolution_init.c b/libavfilter/x86/vf_convolution_init.c index d1e8c90ceb..6b1c2f0e9f 100644 --- a/libavfilter/x86/vf_convolution_init.c +++ b/libavfilter/x86/vf_convolution_init.c @@ -34,6 +34,27 @@ void ff_filter_row_sse4(uint8_t *dst, int width, const uint8_t *c[], int peak, int radius, int dstride, int stride); +static void filter_column16(uint8_t *dst, int height, + float rdiv, float bias, const int *const matrix, + const uint8_t *c[], int length, int radius, + int dstride, int stride) +{ + int y, off16; + + for (y = 0; y < height; y++) { + for (off16 = 0; off16 < length; off16++){ + int i, sum = 0; + + for (i = 0; i < 2 * radius + 1; i++) + sum += c[i][0 + y * stride + off16] * matrix[i]; + + sum = (int)(sum * rdiv + bias + 0.5f); + dst[off16] = av_clip_uint8(sum); + } + dst += dstride; + } + +} av_cold void ff_convolution_init_x86(ConvolutionContext *s) { @@ -51,6 +72,8 @@ av_cold void ff_convolution_init_x86(ConvolutionContext *s) if (EXTERNAL_SSE4(cpu_flags)) s->filter[i] = ff_filter_row_sse4; } + if (s->mode[i] == MATRIX_COLUMN) + s->filter[i] = filter_column16; } #endif }