From patchwork Mon Aug 1 01:27:46 2016 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: James Almer X-Patchwork-Id: 44 Delivered-To: ffmpegpatchwork@gmail.com Received: by 10.103.140.67 with SMTP id o64csp2463067vsd; Sun, 31 Jul 2016 18:28:54 -0700 (PDT) X-Received: by 10.194.18.35 with SMTP id t3mr48037851wjd.174.1470014934660; Sun, 31 Jul 2016 18:28:54 -0700 (PDT) Return-Path: Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org. [79.124.17.100]) by mx.google.com with ESMTP id l11si13732640wmg.37.2016.07.31.18.28.54; Sun, 31 Jul 2016 18:28:54 -0700 (PDT) Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100; Authentication-Results: mx.google.com; dkim=neutral (body hash did not verify) header.i=@gmail.com; spf=pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) smtp.mailfrom=ffmpeg-devel-bounces@ffmpeg.org; dmarc=fail (p=NONE dis=NONE) header.from=gmail.com Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id C81D3689C45; Mon, 1 Aug 2016 04:28:42 +0300 (EEST) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from mail-oi0-f67.google.com (mail-oi0-f67.google.com [209.85.218.67]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 310B3689C48 for ; Mon, 1 Aug 2016 04:28:33 +0300 (EEST) Received: by mail-oi0-f67.google.com with SMTP id w143so12697438oiw.2 for ; Sun, 31 Jul 2016 18:28:38 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20120113; h=from:to:subject:date:message-id; bh=gHLrrz3KcW9XGQlgDTBf+RiRO4AdVdegQhB/2k2ujns=; b=bH+i/J1va9uHj2KVObJKk4pCnxu94Ru2/iEJHI+dSbxy5EqSvbTKpnd2motUaKPfbo ctGpBNY7S4ZUDgRD1Y+LlB9bCdKO2S91PboCjCC9k/OfSY+s4+RUPJa5mHQ8utXI16fz RUG516OH2S/aqPhmuJH5HdzEbBFOY9rFEtVM6Ig0PSNkGztjsHHS8cU234l+MbDSEQWZ 7gRIcMfCBXJjov6HE4yjFJJhj1rDEaJYHSHHj0jahvLPiiG8zDwbOeRLYWiHb2DFy3I9 NH/e6tQPDL2PVnMj1Xgd3tOic3IAFw7/o3VOCU+9wDnf5LZqrndMzJIdZsYV8mwE76VH KQcA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20130820; h=x-gm-message-state:from:to:subject:date:message-id; bh=gHLrrz3KcW9XGQlgDTBf+RiRO4AdVdegQhB/2k2ujns=; b=JPLpj4Xn6n5UDOcQoBNhNLROlRHC5V/Kps5Wv/IRePyHlnGyqWALqgAG55HC82fuJS Vbhzq9jMsA1Ii47fUcrn/JZtrI63OZIyfbZm6h6RpQQRMtRZy4oa7ptMPRG58rvJ0AOU Hzb3goZ5PIbkbYsYd8FtFCBbYmqjgnJRKbFJxqPjySsdyrhghoOVJBF5Qm3bcGTqSW9E oiJVyVLgaBERcGSpvZywKTdPGpTP+KuQhPWCu/UWyO14PKeI90mQ4vtTnXgZEm4fRsNO xPskoXtfbYPHKdwcgiJHn5D9FHFLMFQ3fr6uD8pkaCkSEUBgLv2l13UZQ3+dO6lHP1xU N05Q== X-Gm-Message-State: AEkoouuL1S8eOKcntGm6AvOE+BtngBD0cl2hIDMcLCsfYDe5bnS9LhY7xtfIaTceJvZ0iQ== X-Received: by 10.202.60.3 with SMTP id j3mr32418136oia.92.1470014917025; Sun, 31 Jul 2016 18:28:37 -0700 (PDT) Received: from localhost.localdomain ([181.22.17.94]) by smtp.gmail.com with ESMTPSA id 33sm11891710otb.14.2016.07.31.18.28.35 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-SHA bits=128/128); Sun, 31 Jul 2016 18:28:36 -0700 (PDT) From: James Almer To: ffmpeg-devel@ffmpeg.org Date: Sun, 31 Jul 2016 22:27:46 -0300 Message-Id: <20160801012746.4688-1-jamrial@gmail.com> X-Mailer: git-send-email 2.9.1 Subject: [FFmpeg-devel] [PATCH] x86/ttadsp: add ff_ttafilter_process_enc_{ssse3, sse4} X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches MIME-Version: 1.0 Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" Signed-off-by: James Almer --- libavcodec/Makefile | 2 +- libavcodec/ttadsp.c | 41 ++++++++++++++++++++++++++++++++++++----- libavcodec/ttadsp.h | 3 +++ libavcodec/ttaenc.c | 38 ++++++-------------------------------- libavcodec/x86/Makefile | 2 ++ libavcodec/x86/ttadsp.asm | 24 ++++++++++++++++-------- libavcodec/x86/ttadsp_init.c | 25 +++++++++++++++++++------ 7 files changed, 83 insertions(+), 52 deletions(-) diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 33ac2b3..4355c13 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -552,7 +552,7 @@ OBJS-$(CONFIG_TRUESPEECH_DECODER) += truespeech.o OBJS-$(CONFIG_TSCC_DECODER) += tscc.o msrledec.o OBJS-$(CONFIG_TSCC2_DECODER) += tscc2.o OBJS-$(CONFIG_TTA_DECODER) += tta.o ttadata.o ttadsp.o -OBJS-$(CONFIG_TTA_ENCODER) += ttaenc.o ttadata.o +OBJS-$(CONFIG_TTA_ENCODER) += ttaenc.o ttadata.o ttadsp.o OBJS-$(CONFIG_TWINVQ_DECODER) += twinvqdec.o twinvq.o OBJS-$(CONFIG_TXD_DECODER) += txd.o OBJS-$(CONFIG_ULTI_DECODER) += ulti.o diff --git a/libavcodec/ttadsp.c b/libavcodec/ttadsp.c index 30b7ab9..32a87b2 100644 --- a/libavcodec/ttadsp.c +++ b/libavcodec/ttadsp.c @@ -18,9 +18,10 @@ #include "ttadsp.h" -static void ttafilter_process_dec_c(int32_t *qm, int32_t *dx, int32_t *dl, - int32_t *error, int32_t *in, int32_t shift, - int32_t round) { +static inline void ttafilter_process(int32_t *qm, int32_t *dx, int32_t *dl, + int32_t *error, int32_t *in, int32_t shift, + int32_t round, int enc) +{ if (*error < 0) { qm[0] -= dx[0]; qm[1] -= dx[1]; qm[2] -= dx[2]; qm[3] -= dx[3]; qm[4] -= dx[4]; qm[5] -= dx[5]; qm[6] -= dx[6]; qm[7] -= dx[7]; @@ -40,17 +41,47 @@ static void ttafilter_process_dec_c(int32_t *qm, int32_t *dx, int32_t *dl, dx[6] = ((dl[6] >> 30) | 2) & ~1; dx[7] = ((dl[7] >> 30) | 4) & ~3; - *error = *in; - *in += (round >> shift); + if (!enc) { + *error = *in; + *in += (round >> shift); + } dl[4] = -dl[5]; dl[5] = -dl[6]; dl[6] = *in - dl[7]; dl[7] = *in; dl[5] += dl[6]; dl[4] += dl[5]; + + if (enc) { + *in -= (round >> shift); + *error = *in; + } +} + +#if CONFIG_TTA_DECODER +static void ttafilter_process_dec_c(int32_t *qm, int32_t *dx, int32_t *dl, + int32_t *error, int32_t *in, int32_t shift, + int32_t round) +{ + ttafilter_process(qm, dx, dl, error, in, shift, round, 0); +} +#endif + +#if CONFIG_TTA_ENCODER +static void ttafilter_process_enc_c(int32_t *qm, int32_t *dx, int32_t *dl, + int32_t *error, int32_t *in, int32_t shift, + int32_t round) +{ + ttafilter_process(qm, dx, dl, error, in, shift, round, 1); } +#endif av_cold void ff_ttadsp_init(TTADSPContext *c) { +#if CONFIG_TTA_DECODER c->ttafilter_process_dec = ttafilter_process_dec_c; +#endif +#if CONFIG_TTA_ENCODER + c->ttafilter_process_enc = ttafilter_process_enc_c; +#endif if (ARCH_X86) ff_ttadsp_init_x86(c); diff --git a/libavcodec/ttadsp.h b/libavcodec/ttadsp.h index 56930f1..df73998 100644 --- a/libavcodec/ttadsp.h +++ b/libavcodec/ttadsp.h @@ -26,6 +26,9 @@ typedef struct TTADSPContext { void (*ttafilter_process_dec)(int32_t *qm, int32_t *dx, int32_t *dl, int32_t *error, int32_t *in, int32_t shift, int32_t round); + void (*ttafilter_process_enc)(int32_t *qm, int32_t *dx, int32_t *dl, + int32_t *error, int32_t *in, int32_t shift, + int32_t round); } TTADSPContext; void ff_ttadsp_init(TTADSPContext *c); diff --git a/libavcodec/ttaenc.c b/libavcodec/ttaenc.c index 2f1c8db..5ccf98b 100644 --- a/libavcodec/ttaenc.c +++ b/libavcodec/ttaenc.c @@ -20,6 +20,7 @@ #define BITSTREAM_WRITER_LE #include "ttadata.h" +#include "ttadsp.h" #include "avcodec.h" #include "put_bits.h" #include "internal.h" @@ -29,6 +30,7 @@ typedef struct TTAEncContext { const AVCRC *crc_table; int bps; TTAChannel *ch_ctx; + TTADSPContext dsp; } TTAEncContext; static av_cold int tta_encode_init(AVCodecContext *avctx) @@ -57,38 +59,9 @@ static av_cold int tta_encode_init(AVCodecContext *avctx) if (!s->ch_ctx) return AVERROR(ENOMEM); - return 0; -} - -static inline void ttafilter_process(TTAFilter *c, int32_t *in) -{ - register int32_t *dl = c->dl, *qm = c->qm, *dx = c->dx, sum = c->round; - - if (c->error < 0) { - qm[0] -= dx[0]; qm[1] -= dx[1]; qm[2] -= dx[2]; qm[3] -= dx[3]; - qm[4] -= dx[4]; qm[5] -= dx[5]; qm[6] -= dx[6]; qm[7] -= dx[7]; - } else if (c->error > 0) { - qm[0] += dx[0]; qm[1] += dx[1]; qm[2] += dx[2]; qm[3] += dx[3]; - qm[4] += dx[4]; qm[5] += dx[5]; qm[6] += dx[6]; qm[7] += dx[7]; - } + ff_ttadsp_init(&s->dsp); - sum += dl[0] * qm[0] + dl[1] * qm[1] + dl[2] * qm[2] + dl[3] * qm[3] + - dl[4] * qm[4] + dl[5] * qm[5] + dl[6] * qm[6] + dl[7] * qm[7]; - - dx[0] = dx[1]; dx[1] = dx[2]; dx[2] = dx[3]; dx[3] = dx[4]; - dl[0] = dl[1]; dl[1] = dl[2]; dl[2] = dl[3]; dl[3] = dl[4]; - - dx[4] = ((dl[4] >> 30) | 1); - dx[5] = ((dl[5] >> 30) | 2) & ~1; - dx[6] = ((dl[6] >> 30) | 2) & ~1; - dx[7] = ((dl[7] >> 30) | 4) & ~3; - - dl[4] = -dl[5]; dl[5] = -dl[6]; - dl[6] = *in - dl[7]; dl[7] = *in; - dl[5] += dl[6]; dl[4] += dl[5]; - - *in -= (sum >> c->shift); - c->error = *in; + return 0; } static int32_t get_sample(const AVFrame *frame, int sample, @@ -155,7 +128,8 @@ pkt_alloc: } c->predictor = temp; - ttafilter_process(filter, &value); + s->dsp.ttafilter_process_enc(filter->qm, filter->dx, filter->dl, &filter->error, &value, + filter->shift, filter->round); outval = (value > 0) ? (value << 1) - 1: -value << 1; k = rice->k0; diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index 839b5bc..cc2b3c4 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -61,6 +61,7 @@ OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc_init.o OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp_init.o OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp_init.o OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp_init.o +OBJS-$(CONFIG_TTA_ENCODER) += x86/ttadsp_init.o OBJS-$(CONFIG_V210_DECODER) += x86/v210-init.o OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc_init.o OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp_init.o @@ -160,6 +161,7 @@ YASM-OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc.o YASM-OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp.o YASM-OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o YASM-OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp.o +YASM-OBJS-$(CONFIG_TTA_ENCODER) += x86/ttadsp.o YASM-OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc.o YASM-OBJS-$(CONFIG_V210_DECODER) += x86/v210.o YASM-OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp.o diff --git a/libavcodec/x86/ttadsp.asm b/libavcodec/x86/ttadsp.asm index 8f48949..1c664f2 100644 --- a/libavcodec/x86/ttadsp.asm +++ b/libavcodec/x86/ttadsp.asm @@ -29,9 +29,9 @@ pd_1224: dd 1, 2, 2, 4 SECTION .text -%macro TTA_FILTER 2 +%macro TTA_FILTER 3 INIT_XMM %1 -cglobal ttafilter_process_dec, 5,5,%2, qm, dx, dl, error, in, shift, round +cglobal ttafilter_process_%2, 5,5,%3, qm, dx, dl, error, in, shift, round mova m2, [qmq ] mova m3, [qmq + 0x10] mova m4, [dxq ] @@ -94,13 +94,19 @@ cglobal ttafilter_process_dec, 5,5,%2, qm, dx, dl, error, in, shift, round mova [dlq ], m2 mova [dxq ], m5 mova [dxq + 0x10], m4 - movd m0, [inq] ; filter->error = *in; - movd [errorq], m0 ; - movd m2, shiftm ; *in += (sum >> filter->shift); + movd m2, shiftm ; + movd m0, [inq] psrad m6, m2 ; - paddd m0, m6 ; +%ifidn %2, dec + movd [errorq], m0 ; filter->error = *in; + paddd m0, m6 ; *in += (sum >> filter->shift); movd [inq], m0 ; +%else + psubd m3, m0, m6 ; + movd [inq], m3 ; *in -= (sum >> filter->shift); + movd [errorq], m3 ; filter->error = *in; +%endif psrldq m1, 4 ; pslldq m0, 12 ; filter->dl[4] = -filter->dl[5]; @@ -115,5 +121,7 @@ cglobal ttafilter_process_dec, 5,5,%2, qm, dx, dl, error, in, shift, round RET %endmacro -TTA_FILTER ssse3, 8 -TTA_FILTER sse4, 7 +TTA_FILTER ssse3, dec, 8 +TTA_FILTER sse4, dec, 7 +TTA_FILTER ssse3, enc, 8 +TTA_FILTER sse4, enc, 7 diff --git a/libavcodec/x86/ttadsp_init.c b/libavcodec/x86/ttadsp_init.c index 47dc87f..75c444c 100644 --- a/libavcodec/x86/ttadsp_init.c +++ b/libavcodec/x86/ttadsp_init.c @@ -22,21 +22,34 @@ #include "libavutil/x86/cpu.h" #include "config.h" -void ff_ttafilter_process_dec_ssse3(int32_t *qm, int32_t *dx, int32_t *dl, - int32_t *error, int32_t *in, int32_t shift, - int32_t round); -void ff_ttafilter_process_dec_sse4(int32_t *qm, int32_t *dx, int32_t *dl, - int32_t *error, int32_t *in, int32_t shift, - int32_t round); +#define TTAFILTER_PROCESS(opt) \ +void ff_ttafilter_process_dec_##opt(int32_t *qm, int32_t *dx, int32_t *dl, \ + int32_t *error, int32_t *in, int32_t shift, \ + int32_t round); \ +void ff_ttafilter_process_enc_##opt(int32_t *qm, int32_t *dx, int32_t *dl, \ + int32_t *error, int32_t *in, int32_t shift, \ + int32_t round) + +TTAFILTER_PROCESS(ssse3); +TTAFILTER_PROCESS(sse4); av_cold void ff_ttadsp_init_x86(TTADSPContext *c) { #if HAVE_YASM int cpu_flags = av_get_cpu_flags(); +#if CONFIG_TTA_DECODER if (EXTERNAL_SSSE3(cpu_flags)) c->ttafilter_process_dec = ff_ttafilter_process_dec_ssse3; if (EXTERNAL_SSE4(cpu_flags)) c->ttafilter_process_dec = ff_ttafilter_process_dec_sse4; #endif + +#if CONFIG_TTA_ENCODER + if (EXTERNAL_SSSE3(cpu_flags)) + c->ttafilter_process_enc = ff_ttafilter_process_enc_ssse3; + if (EXTERNAL_SSE4(cpu_flags)) + c->ttafilter_process_enc = ff_ttafilter_process_enc_sse4; +#endif +#endif }