From patchwork Tue Jan 31 20:09:50 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Michael Niedermayer X-Patchwork-Id: 2384 Delivered-To: ffmpegpatchwork@gmail.com Received: by 10.103.89.21 with SMTP id n21csp2111056vsb; Tue, 31 Jan 2017 12:10:03 -0800 (PST) X-Received: by 10.223.148.230 with SMTP id 93mr25642251wrr.13.1485893403248; Tue, 31 Jan 2017 12:10:03 -0800 (PST) Return-Path: Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org. [79.124.17.100]) by mx.google.com with ESMTP id c21si22023783wrc.301.2017.01.31.12.10.02; Tue, 31 Jan 2017 12:10:03 -0800 (PST) Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100; Authentication-Results: mx.google.com; spf=pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) smtp.mailfrom=ffmpeg-devel-bounces@ffmpeg.org Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 52DD568A545; Tue, 31 Jan 2017 22:09:57 +0200 (EET) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from vie01a-dmta-pe04-2.mx.upcmail.net (vie01a-dmta-pe04-2.mx.upcmail.net [62.179.121.164]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 9D37D689824 for ; Tue, 31 Jan 2017 22:09:50 +0200 (EET) Received: from [172.31.216.43] (helo=vie01a-pemc-psmtp-pe01) by vie01a-dmta-pe04.mx.upcmail.net with esmtp (Exim 4.87) (envelope-from ) id 1cYekS-0000Ih-Tt for ffmpeg-devel@ffmpeg.org; Tue, 31 Jan 2017 21:09:52 +0100 Received: from localhost ([213.47.41.20]) by vie01a-pemc-psmtp-pe01 with SMTP @ mailcloud.upcmail.net id f89r1u01c0S5wYM0189sG2; Tue, 31 Jan 2017 21:09:52 +0100 X-SourceIP: 213.47.41.20 From: Michael Niedermayer To: FFmpeg development discussions and patches Date: Tue, 31 Jan 2017 21:09:50 +0100 Message-Id: <20170131200950.19023-1-michael@niedermayer.cc> X-Mailer: git-send-email 2.11.0 Subject: [FFmpeg-devel] [PATCH] Revert "Merge commit '1dfc3cf89d0eb026af28be46294b85d79499ffb5'" X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches MIME-Version: 1.0 Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" The code is not just used by VP3, the optimizations have primarly an effect on VP3 but are used by other codecs too when available. This reverts commit ca8a3978e57c7c8f6abab8547f47483e407469b7, reversing changes made to 481884080977f15854fe06e1c742a7741e49555c. --- libavcodec/x86/Makefile | 2 - libavcodec/x86/hpeldsp.asm | 89 ++++++++++++++++++++++++++++++ libavcodec/x86/hpeldsp.h | 4 -- libavcodec/x86/hpeldsp_init.c | 25 +++++++-- libavcodec/x86/hpeldsp_vp3.asm | 111 -------------------------------------- libavcodec/x86/hpeldsp_vp3_init.c | 56 ------------------- 6 files changed, 111 insertions(+), 176 deletions(-) delete mode 100644 libavcodec/x86/hpeldsp_vp3.asm delete mode 100644 libavcodec/x86/hpeldsp_vp3_init.c diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index 28649522ff..2f0354a2c8 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -67,7 +67,6 @@ OBJS-$(CONFIG_TTA_ENCODER) += x86/ttaencdsp_init.o OBJS-$(CONFIG_V210_DECODER) += x86/v210-init.o OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc_init.o OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp_init.o -OBJS-$(CONFIG_VP3_DECODER) += x86/hpeldsp_vp3_init.o OBJS-$(CONFIG_VP6_DECODER) += x86/vp6dsp_init.o OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o \ x86/vp9dsp_init_10bpp.o \ @@ -170,7 +169,6 @@ YASM-OBJS-$(CONFIG_TTA_ENCODER) += x86/ttaencdsp.o YASM-OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc.o YASM-OBJS-$(CONFIG_V210_DECODER) += x86/v210.o YASM-OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp.o -YASM-OBJS-$(CONFIG_VP3_DECODER) += x86/hpeldsp_vp3.o YASM-OBJS-$(CONFIG_VP6_DECODER) += x86/vp6dsp.o YASM-OBJS-$(CONFIG_VP9_DECODER) += x86/vp9intrapred.o \ x86/vp9intrapred_16bpp.o \ diff --git a/libavcodec/x86/hpeldsp.asm b/libavcodec/x86/hpeldsp.asm index ce5d7a4e28..82fb8934af 100644 --- a/libavcodec/x86/hpeldsp.asm +++ b/libavcodec/x86/hpeldsp.asm @@ -175,6 +175,53 @@ INIT_MMX 3dnow PUT_NO_RND_PIXELS8_X2 +; void ff_put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) +%macro PUT_NO_RND_PIXELS8_X2_EXACT 0 +cglobal put_no_rnd_pixels8_x2_exact, 4,5 + lea r4, [r2*3] + pcmpeqb m6, m6 +.loop: + mova m0, [r1] + mova m2, [r1+r2] + mova m1, [r1+1] + mova m3, [r1+r2+1] + pxor m0, m6 + pxor m2, m6 + pxor m1, m6 + pxor m3, m6 + PAVGB m0, m1 + PAVGB m2, m3 + pxor m0, m6 + pxor m2, m6 + mova [r0], m0 + mova [r0+r2], m2 + mova m0, [r1+r2*2] + mova m1, [r1+r2*2+1] + mova m2, [r1+r4] + mova m3, [r1+r4+1] + pxor m0, m6 + pxor m1, m6 + pxor m2, m6 + pxor m3, m6 + PAVGB m0, m1 + PAVGB m2, m3 + pxor m0, m6 + pxor m2, m6 + mova [r0+r2*2], m0 + mova [r0+r4], m2 + lea r1, [r1+r2*4] + lea r0, [r0+r2*4] + sub r3d, 4 + jg .loop + REP_RET +%endmacro + +INIT_MMX mmxext +PUT_NO_RND_PIXELS8_X2_EXACT +INIT_MMX 3dnow +PUT_NO_RND_PIXELS8_X2_EXACT + + ; void ff_put_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) %macro PUT_PIXELS8_Y2 0 %if cpuflag(sse2) @@ -253,6 +300,48 @@ INIT_MMX 3dnow PUT_NO_RND_PIXELS8_Y2 +; void ff_put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) +%macro PUT_NO_RND_PIXELS8_Y2_EXACT 0 +cglobal put_no_rnd_pixels8_y2_exact, 4,5 + lea r4, [r2*3] + mova m0, [r1] + pcmpeqb m6, m6 + add r1, r2 + pxor m0, m6 +.loop: + mova m1, [r1] + mova m2, [r1+r2] + pxor m1, m6 + pxor m2, m6 + PAVGB m0, m1 + PAVGB m1, m2 + pxor m0, m6 + pxor m1, m6 + mova [r0], m0 + mova [r0+r2], m1 + mova m1, [r1+r2*2] + mova m0, [r1+r4] + pxor m1, m6 + pxor m0, m6 + PAVGB m2, m1 + PAVGB m1, m0 + pxor m2, m6 + pxor m1, m6 + mova [r0+r2*2], m2 + mova [r0+r4], m1 + lea r1, [r1+r2*4] + lea r0, [r0+r2*4] + sub r3d, 4 + jg .loop + REP_RET +%endmacro + +INIT_MMX mmxext +PUT_NO_RND_PIXELS8_Y2_EXACT +INIT_MMX 3dnow +PUT_NO_RND_PIXELS8_Y2_EXACT + + ; void ff_avg_pixels8(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) %macro AVG_PIXELS8 0 cglobal avg_pixels8, 4,5 diff --git a/libavcodec/x86/hpeldsp.h b/libavcodec/x86/hpeldsp.h index bf97029b57..5fae990a4f 100644 --- a/libavcodec/x86/hpeldsp.h +++ b/libavcodec/x86/hpeldsp.h @@ -22,8 +22,6 @@ #include #include -#include "libavcodec/hpeldsp.h" - void ff_avg_pixels8_x2_mmx(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); @@ -52,6 +50,4 @@ void ff_put_pixels16_xy2_sse2(uint8_t *block, const uint8_t *pixels, void ff_put_pixels16_xy2_ssse3(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); -void ff_hpeldsp_vp3_init_x86(HpelDSPContext *c, int cpu_flags, int flags); - #endif /* AVCODEC_X86_HPELDSP_H */ diff --git a/libavcodec/x86/hpeldsp_init.c b/libavcodec/x86/hpeldsp_init.c index 58e27e3542..f463d73498 100644 --- a/libavcodec/x86/hpeldsp_init.c +++ b/libavcodec/x86/hpeldsp_init.c @@ -51,6 +51,12 @@ void ff_put_no_rnd_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); void ff_put_no_rnd_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); +void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block, + const uint8_t *pixels, + ptrdiff_t line_size, int h); +void ff_put_no_rnd_pixels8_x2_exact_3dnow(uint8_t *block, + const uint8_t *pixels, + ptrdiff_t line_size, int h); void ff_put_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); void ff_put_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels, @@ -59,6 +65,12 @@ void ff_put_no_rnd_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); void ff_put_no_rnd_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); +void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block, + const uint8_t *pixels, + ptrdiff_t line_size, int h); +void ff_put_no_rnd_pixels8_y2_exact_3dnow(uint8_t *block, + const uint8_t *pixels, + ptrdiff_t line_size, int h); void ff_avg_pixels8_3dnow(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); void ff_avg_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels, @@ -230,6 +242,11 @@ static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags) c->avg_pixels_tab[0][3] = avg_approx_pixels16_xy2_mmxext; c->avg_pixels_tab[1][3] = ff_avg_approx_pixels8_xy2_mmxext; } + + if (CONFIG_VP3_DECODER && flags & AV_CODEC_FLAG_BITEXACT) { + c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext; + c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext; + } #endif /* HAVE_MMXEXT_EXTERNAL */ } @@ -261,6 +278,11 @@ static void hpeldsp_init_3dnow(HpelDSPContext *c, int flags) c->avg_pixels_tab[0][3] = avg_approx_pixels16_xy2_3dnow; c->avg_pixels_tab[1][3] = ff_avg_approx_pixels8_xy2_3dnow; } + + if (CONFIG_VP3_DECODER && flags & AV_CODEC_FLAG_BITEXACT) { + c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_3dnow; + c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_3dnow; + } #endif /* HAVE_AMD3DNOW_EXTERNAL */ } @@ -307,7 +329,4 @@ av_cold void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags) if (EXTERNAL_SSSE3(cpu_flags)) hpeldsp_init_ssse3(c, flags); - - if (CONFIG_VP3_DECODER) - ff_hpeldsp_vp3_init_x86(c, cpu_flags, flags); } diff --git a/libavcodec/x86/hpeldsp_vp3.asm b/libavcodec/x86/hpeldsp_vp3.asm deleted file mode 100644 index cba96d06cb..0000000000 --- a/libavcodec/x86/hpeldsp_vp3.asm +++ /dev/null @@ -1,111 +0,0 @@ -;****************************************************************************** -;* SIMD-optimized halfpel functions for VP3 -;* -;* This file is part of FFmpeg. -;* -;* FFmpeg is free software; you can redistribute it and/or -;* modify it under the terms of the GNU Lesser General Public -;* License as published by the Free Software Foundation; either -;* version 2.1 of the License, or (at your option) any later version. -;* -;* FFmpeg is distributed in the hope that it will be useful, -;* but WITHOUT ANY WARRANTY; without even the implied warranty of -;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -;* Lesser General Public License for more details. -;* -;* You should have received a copy of the GNU Lesser General Public -;* License along with FFmpeg; if not, write to the Free Software -;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -;****************************************************************************** - -%include "libavutil/x86/x86util.asm" - -SECTION .text - -; void ff_put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) -%macro PUT_NO_RND_PIXELS8_X2_EXACT 0 -cglobal put_no_rnd_pixels8_x2_exact, 4,5 - lea r4, [r2*3] - pcmpeqb m6, m6 -.loop: - mova m0, [r1] - mova m2, [r1+r2] - mova m1, [r1+1] - mova m3, [r1+r2+1] - pxor m0, m6 - pxor m2, m6 - pxor m1, m6 - pxor m3, m6 - PAVGB m0, m1 - PAVGB m2, m3 - pxor m0, m6 - pxor m2, m6 - mova [r0], m0 - mova [r0+r2], m2 - mova m0, [r1+r2*2] - mova m1, [r1+r2*2+1] - mova m2, [r1+r4] - mova m3, [r1+r4+1] - pxor m0, m6 - pxor m1, m6 - pxor m2, m6 - pxor m3, m6 - PAVGB m0, m1 - PAVGB m2, m3 - pxor m0, m6 - pxor m2, m6 - mova [r0+r2*2], m0 - mova [r0+r4], m2 - lea r1, [r1+r2*4] - lea r0, [r0+r2*4] - sub r3d, 4 - jg .loop - REP_RET -%endmacro - -INIT_MMX mmxext -PUT_NO_RND_PIXELS8_X2_EXACT -INIT_MMX 3dnow -PUT_NO_RND_PIXELS8_X2_EXACT - - -; void ff_put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) -%macro PUT_NO_RND_PIXELS8_Y2_EXACT 0 -cglobal put_no_rnd_pixels8_y2_exact, 4,5 - lea r4, [r2*3] - mova m0, [r1] - pcmpeqb m6, m6 - add r1, r2 - pxor m0, m6 -.loop: - mova m1, [r1] - mova m2, [r1+r2] - pxor m1, m6 - pxor m2, m6 - PAVGB m0, m1 - PAVGB m1, m2 - pxor m0, m6 - pxor m1, m6 - mova [r0], m0 - mova [r0+r2], m1 - mova m1, [r1+r2*2] - mova m0, [r1+r4] - pxor m1, m6 - pxor m0, m6 - PAVGB m2, m1 - PAVGB m1, m0 - pxor m2, m6 - pxor m1, m6 - mova [r0+r2*2], m2 - mova [r0+r4], m1 - lea r1, [r1+r2*4] - lea r0, [r0+r2*4] - sub r3d, 4 - jg .loop - REP_RET -%endmacro - -INIT_MMX mmxext -PUT_NO_RND_PIXELS8_Y2_EXACT -INIT_MMX 3dnow -PUT_NO_RND_PIXELS8_Y2_EXACT diff --git a/libavcodec/x86/hpeldsp_vp3_init.c b/libavcodec/x86/hpeldsp_vp3_init.c deleted file mode 100644 index 5979f4123c..0000000000 --- a/libavcodec/x86/hpeldsp_vp3_init.c +++ /dev/null @@ -1,56 +0,0 @@ -/* - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/attributes.h" -#include "libavutil/cpu.h" -#include "libavutil/x86/cpu.h" - -#include "libavcodec/avcodec.h" -#include "libavcodec/hpeldsp.h" - -#include "hpeldsp.h" - -void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block, - const uint8_t *pixels, - ptrdiff_t line_size, int h); -void ff_put_no_rnd_pixels8_x2_exact_3dnow(uint8_t *block, - const uint8_t *pixels, - ptrdiff_t line_size, int h); -void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block, - const uint8_t *pixels, - ptrdiff_t line_size, int h); -void ff_put_no_rnd_pixels8_y2_exact_3dnow(uint8_t *block, - const uint8_t *pixels, - ptrdiff_t line_size, int h); - -av_cold void ff_hpeldsp_vp3_init_x86(HpelDSPContext *c, int cpu_flags, int flags) -{ - if (EXTERNAL_AMD3DNOW(cpu_flags)) { - if (flags & AV_CODEC_FLAG_BITEXACT) { - c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_3dnow; - c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_3dnow; - } - } - - if (EXTERNAL_MMXEXT(cpu_flags)) { - if (flags & AV_CODEC_FLAG_BITEXACT) { - c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext; - c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext; - } - } -}