From patchwork Sun Jan 13 20:01:53 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Peter Ross X-Patchwork-Id: 11727 Return-Path: X-Original-To: patchwork@ffaux-bg.ffmpeg.org Delivered-To: patchwork@ffaux-bg.ffmpeg.org Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by ffaux.localdomain (Postfix) with ESMTP id 9C13B44D987 for ; Sun, 13 Jan 2019 22:02:14 +0200 (EET) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id B6A5C68A601; Sun, 13 Jan 2019 22:02:02 +0200 (EET) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from mx.sdf.org (ol.sdf.org [205.166.94.20]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 54F0E68A590 for ; Sun, 13 Jan 2019 22:01:56 +0200 (EET) Received: from 25e1e8d5a8db22a306756ba74a06449b (pa49-199-107-96.pa.vic.optusnet.com.au [49.199.107.96]) (authenticated (128 bits)) by mx.sdf.org (8.15.2/8.14.5) with ESMTPSA id x0DK1vh3021692 (using TLSv1.2 with cipher AES256-GCM-SHA384 (256 bits) verified NO) for ; Sun, 13 Jan 2019 20:02:06 GMT Authentication-Results: mx.sdf.org; dkim=none Date: Mon, 14 Jan 2019 07:01:53 +1100 From: Peter Ross To: ffmpeg-devel@ffmpeg.org Message-ID: References: MIME-Version: 1.0 In-Reply-To: User-Agent: Mutt/1.9.4 (2018-02-28) Subject: [FFmpeg-devel] [PATCH 2/6] avcodec/vp3dsp: add 10 coefficient version of the vp3 idct X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" --- libavcodec/vp3dsp.c | 152 ++++++++++++++++++++++++++++++++++++++++++++ libavcodec/vp3dsp.h | 3 + 2 files changed, 155 insertions(+) diff --git a/libavcodec/vp3dsp.c b/libavcodec/vp3dsp.c index f049953356..8204188aa8 100644 --- a/libavcodec/vp3dsp.c +++ b/libavcodec/vp3dsp.c @@ -195,6 +195,158 @@ static av_always_inline void idct(uint8_t *dst, ptrdiff_t stride, } } +static av_always_inline void idct10(uint8_t *dst, ptrdiff_t stride, + int16_t *input, int type) +{ + int16_t *ip = input; + + int A, B, C, D, Ad, Bd, Cd, Dd, E, F, G, H; + int Ed, Gd, Add, Bdd, Fd, Hd; + + int i; + + /* Inverse DCT on the rows now */ + for (i = 0; i < 4; i++) { + /* Check for non-zero values */ + if (ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8]) { + A = M(xC1S7, ip[1 * 8]); + B = M(xC7S1, ip[1 * 8]); + C = M(xC3S5, ip[3 * 8]); + D = -M(xC5S3, ip[3 * 8]); + + Ad = M(xC4S4, (A - C)); + Bd = M(xC4S4, (B - D)); + + Cd = A + C; + Dd = B + D; + + E = M(xC4S4, ip[0 * 8]); + F = E; + + G = M(xC2S6, ip[2 * 8]); + H = M(xC6S2, ip[2 * 8]); + + Ed = E - G; + Gd = E + G; + + Add = F + Ad; + Bdd = Bd - H; + + Fd = F - Ad; + Hd = Bd + H; + + /* Final sequence of operations over-write original inputs */ + ip[0 * 8] = Gd + Cd; + ip[7 * 8] = Gd - Cd; + + ip[1 * 8] = Add + Hd; + ip[2 * 8] = Add - Hd; + + ip[3 * 8] = Ed + Dd; + ip[4 * 8] = Ed - Dd; + + ip[5 * 8] = Fd + Bdd; + ip[6 * 8] = Fd - Bdd; + + } + + ip += 1; + } + + ip = input; + + for (i = 0; i < 8; i++) { + /* Check for non-zero values (bitwise or faster than ||) */ + if (ip[0] | ip[1] | ip[2] | ip[3]) { + A = M(xC1S7, ip[1]); + B = M(xC7S1, ip[1]); + C = M(xC3S5, ip[3]); + D = -M(xC5S3, ip[3]); + + Ad = M(xC4S4, (A - C)); + Bd = M(xC4S4, (B - D)); + + Cd = A + C; + Dd = B + D; + + E = M(xC4S4, ip[0]); + if (type == 1) + E += 16 * 128; + F = E; + + G = M(xC2S6, ip[2]); + H = M(xC6S2, ip[2]); + + Ed = E - G; + Gd = E + G; + + Add = F + Ad; + Bdd = Bd - H; + + Fd = F - Ad; + Hd = Bd + H; + + Gd += 8; + Add += 8; + Ed += 8; + Fd += 8; + + /* Final sequence of operations over-write original inputs. */ + if (type == 1) { + dst[0 * stride] = av_clip_uint8((Gd + Cd) >> 4); + dst[7 * stride] = av_clip_uint8((Gd - Cd) >> 4); + + dst[1 * stride] = av_clip_uint8((Add + Hd) >> 4); + dst[2 * stride] = av_clip_uint8((Add - Hd) >> 4); + + dst[3 * stride] = av_clip_uint8((Ed + Dd) >> 4); + dst[4 * stride] = av_clip_uint8((Ed - Dd) >> 4); + + dst[5 * stride] = av_clip_uint8((Fd + Bdd) >> 4); + dst[6 * stride] = av_clip_uint8((Fd - Bdd) >> 4); + } else { + dst[0 * stride] = av_clip_uint8(dst[0 * stride] + ((Gd + Cd) >> 4)); + dst[7 * stride] = av_clip_uint8(dst[7 * stride] + ((Gd - Cd) >> 4)); + + dst[1 * stride] = av_clip_uint8(dst[1 * stride] + ((Add + Hd) >> 4)); + dst[2 * stride] = av_clip_uint8(dst[2 * stride] + ((Add - Hd) >> 4)); + + dst[3 * stride] = av_clip_uint8(dst[3 * stride] + ((Ed + Dd) >> 4)); + dst[4 * stride] = av_clip_uint8(dst[4 * stride] + ((Ed - Dd) >> 4)); + + dst[5 * stride] = av_clip_uint8(dst[5 * stride] + ((Fd + Bdd) >> 4)); + dst[6 * stride] = av_clip_uint8(dst[6 * stride] + ((Fd - Bdd) >> 4)); + } + } else { + if (type == 1) { + dst[0*stride] = + dst[1*stride] = + dst[2*stride] = + dst[3*stride] = + dst[4*stride] = + dst[5*stride] = + dst[6*stride] = + dst[7*stride] = 128; + } + } + + ip += 8; + dst++; + } +} + +void ff_vp3dsp_idct10_put_c(uint8_t *dest, ptrdiff_t stride, int16_t *block) +{ + idct10(dest, stride, block, 1); + memset(block, 0, sizeof(*block) * 64); +} + +void ff_vp3dsp_idct10_add_c(uint8_t *dest, ptrdiff_t stride, int16_t *block) +{ + idct10(dest, stride, block, 2); + memset(block, 0, sizeof(*block) * 64); +} + static void vp3_idct_put_c(uint8_t *dest /* align 8 */, ptrdiff_t stride, int16_t *block /* align 16 */) { diff --git a/libavcodec/vp3dsp.h b/libavcodec/vp3dsp.h index fe77d69ebf..ee5662ad6d 100644 --- a/libavcodec/vp3dsp.h +++ b/libavcodec/vp3dsp.h @@ -47,6 +47,9 @@ typedef struct VP3DSPContext { void (*h_loop_filter_12)(uint8_t *src, ptrdiff_t stride, int *bounding_values); } VP3DSPContext; +void ff_vp3dsp_idct10_put_c(uint8_t *dest, ptrdiff_t stride, int16_t *block); +void ff_vp3dsp_idct10_add_c(uint8_t *dest, ptrdiff_t stride, int16_t *block); + void ff_vp4_v_loop_filter_12_c(uint8_t *first_pixel, ptrdiff_t stride, int *bounding_values); void ff_vp4_h_loop_filter_12_c(uint8_t *first_pixel, ptrdiff_t stride, int *bounding_values);