[FFmpeg-devel,v5,1/2,GSoC,2024] libavcodec/x86/vvc: Add AVX2 DMVR SAD functions for VVC

Message ID	20240522000039.34913-2-chen.stonechen@gmail.com
State	New
Headers	show Delivered-To: ffmpegpatchwork2@gmail.com Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100; From: Stone Chen <chen.stonechen@gmail.com> To: ffmpeg-devel@ffmpeg.org Date: Tue, 21 May 2024 20:00:32 -0400 Message-ID: <20240522000039.34913-2-chen.stonechen@gmail.com> MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH v5 1/2][GSoC 2024] libavcodec/x86/vvc: Add AVX2 DMVR SAD functions for VVC Precedence: list Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> Cc: Stone Chen <chen.stonechen@gmail.com> Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>
Series	[FFmpeg-devel,v5,1/2,GSoC,2024] libavcodec/x86/vvc: Add AVX2 DMVR SAD functions for VVC \| expand [FFmpeg-devel,v5,1/2,GSoC,2024] libavcodec/x86/vvc: Add AVX2 DMVR SAD functions for VVC [FFmpeg-devel,v5,2/2,GSoC,2024] tests/checkasm: Add check_vvc_sad to vvc_mc.c

Context	Check	Description
yinshiyou/make_loongarch64	success	Make finished
yinshiyou/make_fate_loongarch64	success	Make fate finished

diff --git a/libavcodec/vvc/dsp.c b/libavcodec/vvc/dsp.c index 41e830a98a..aded1a2f9f 100644 --- a/libavcodec/vvc/dsp.c +++ b/libavcodec/vvc/dsp.c @@ -46,7 +46,7 @@ static void av_always_inline pad_int16(int16_t *_dst, const ptrdiff_t dst_stride memcpy(_dst, _dst - dst_stride, padded_width * sizeof(int16_t)); } -static int vvc_sad(const int16_t *src0, const int16_t *src1, int dx, int dy, +static int vvc_sad(const int16_t *src0, const int16_t *src1, intptr_t dx, intptr_t dy, const int block_w, const int block_h) { int sad = 0; diff --git a/libavcodec/vvc/dsp.h b/libavcodec/vvc/dsp.h index 9810ac314c..213337358b 100644 --- a/libavcodec/vvc/dsp.h +++ b/libavcodec/vvc/dsp.h @@ -86,7 +86,7 @@ typedef struct VVCInterDSPContext { void (*apply_bdof)(uint8_t *dst, ptrdiff_t dst_stride, int16_t *src0, int16_t *src1, int block_w, int block_h); - int (*sad)(const int16_t *src0, const int16_t *src1, int dx, int dy, int block_w, int block_h); + int (*sad)(const int16_t *src0, const int16_t *src1, intptr_t dx, intptr_t dy, int block_w, int block_h); void (*dmvr[2][2])(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, int height, intptr_t mx, intptr_t my, int width); } VVCInterDSPContext; diff --git a/libavcodec/x86/vvc/Makefile b/libavcodec/x86/vvc/Makefile index d6a66f860a..7b2438ce17 100644 --- a/libavcodec/x86/vvc/Makefile +++ b/libavcodec/x86/vvc/Makefile @@ -5,4 +5,5 @@ OBJS-$(CONFIG_VVC_DECODER) += x86/vvc/vvcdsp_init.o \ x86/h26x/h2656dsp.o X86ASM-OBJS-$(CONFIG_VVC_DECODER) += x86/vvc/vvc_alf.o \ x86/vvc/vvc_mc.o \ - x86/h26x/h2656_inter.o + x86/vvc/vvc_sad.o \ + x86/h26x/h2656_inter.o diff --git a/libavcodec/x86/vvc/vvc_sad.asm b/libavcodec/x86/vvc/vvc_sad.asm new file mode 100644 index 0000000000..9766446b11 --- /dev/null +++ b/libavcodec/x86/vvc/vvc_sad.asm @@ -0,0 +1,130 @@ +; /* +; * Provide SIMD DMVR SAD functions for VVC decoding +; * +; * Copyright (c) 2024 Stone Chen +; * +; * This file is part of FFmpeg. +; * +; * FFmpeg is free software; you can redistribute it and/or +; * modify it under the terms of the GNU Lesser General Public +; * License as published by the Free Software Foundation; either +; * version 2.1 of the License, or (at your option) any later version. +; * +; * FFmpeg is distributed in the hope that it will be useful, +; * but WITHOUT ANY WARRANTY; without even the implied warranty of +; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; * Lesser General Public License for more details. +; * +; * You should have received a copy of the GNU Lesser General Public +; * License along with FFmpeg; if not, write to the Free Software +; * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +; */ + +%include "libavutil/x86/x86util.asm" +%define MAX_PB_SIZE 128 +%define ROWS 2 + +SECTION_RODATA + +pw_1: times 2 dw 1 + +; DMVR SAD is only calculated on even rows to reduce complexity +SECTION .text + +%macro MIN_MAX_SAD 3 ; + pminuw %3, %2, %1 + pmaxuw %1, %2, %1 + psubusw %1, %1, %3 +%endmacro + +%macro HORIZ_ADD 3 ; xm0, xm1, m1 + vextracti128 %1, %3, q0001 ; 3 2 1 0 + paddd %1, %2 ; xm0 (7 + 3) (6 + 2) (5 + 1) (4 + 0) + pshufd %2, %1, q0032 ; xm1 - - (7 + 3) (6 + 2) + paddd %1, %1, %2 ; xm0 _ _ (5 1 7 3) (4 0 6 2) + pshufd %2, %1, q0001 ; xm1 _ _ (5 1 7 3) (5 1 7 3) + paddd %1, %1, %2 ; (01234567) +%endmacro + +%if ARCH_X86_64 +%if HAVE_AVX2_EXTERNAL + +INIT_YMM avx2 + +cglobal vvc_sad, 6, 9, 5, src1, src2, dx, dy, block_w, block_h, off1, off2, row_idx + sub dxq, 2 + sub dyq, 2 + + mov off1q, 2 + mov off2q, 2 + + add off1q, dyq + sub off2q, dyq + + shl off1q, 7 + shl off2q, 7 + + add off1q, dxq + sub off2q, dxq + + lea src1q, [src1q + off1q * 2 + 2 * 2] + lea src2q, [src2q + off2q * 2 + 2 * 2] + + pxor m3, m3 + vpbroadcastd m4, [pw_1] + + cmp block_wd, 16 + jge vvc_sad_16_128 + + vvc_sad_8: + .loop_height: + movu xm0, [src1q] + vinserti128 m0, [src1q + MAX_PB_SIZE * ROWS * 2], 1 + movu xm1, [src2q] + vinserti128 m1, [src2q + MAX_PB_SIZE * ROWS * 2], 1 + + MIN_MAX_SAD m1, m0, m2 + pmaddwd m1, m4 + paddd m3, m1 + + add src1q, 2 * MAX_PB_SIZE * ROWS * 2 + add src2q, 2 * MAX_PB_SIZE * ROWS * 2 + + sub block_hd, 4 + jg .loop_height + + HORIZ_ADD xm0, xm3, m3 + movd eax, xm0 + RET + + vvc_sad_16_128: + sar block_wd, 4 + .loop_height: + mov off1q, src1q + mov off2q, src2q + mov row_idxd, block_wd + + .loop_width: + movu m0, [src1q] + movu m1, [src2q] + MIN_MAX_SAD m1, m0, m2 + pmaddwd m1, m4 + paddd m3, m1 + + add src1q, 32 + add src2q, 32 + dec row_idxd + jg .loop_width + + lea src1q, [off1q + ROWS * MAX_PB_SIZE * 2] + lea src2q, [off2q + ROWS * MAX_PB_SIZE * 2] + + sub block_hd, 2 + jg .loop_height + + HORIZ_ADD xm0, xm3, m3 + movd eax, xm0 + RET + +%endif +%endif diff --git a/libavcodec/x86/vvc/vvcdsp_init.c b/libavcodec/x86/vvc/vvcdsp_init.c index 0e68971b2c..aa6c916760 100644 --- a/libavcodec/x86/vvc/vvcdsp_init.c +++ b/libavcodec/x86/vvc/vvcdsp_init.c @@ -311,6 +311,9 @@ ALF_FUNCS(16, 12, avx2) c->alf.filter[CHROMA] = ff_vvc_alf_filter_chroma_##bd##_avx2; \ c->alf.classify = ff_vvc_alf_classify_##bd##_avx2; \ } while (0) + +int ff_vvc_sad_avx2(const int16_t *src0, const int16_t *src1, intptr_t dx, intptr_t dy, int block_w, int block_h); +#define SAD_INIT() c->inter.sad = ff_vvc_sad_avx2 #endif void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int bd) @@ -327,6 +330,7 @@ void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int bd) ALF_INIT(8); AVG_INIT(8, avx2); MC_LINKS_AVX2(8); + SAD_INIT(); } break; case 10: @@ -338,6 +342,7 @@ void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int bd) AVG_INIT(10, avx2); MC_LINKS_AVX2(10); MC_LINKS_16BPC_AVX2(10); + SAD_INIT(); } break; case 12: @@ -349,6 +354,7 @@ void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int bd) AVG_INIT(12, avx2); MC_LINKS_AVX2(12); MC_LINKS_16BPC_AVX2(12); + SAD_INIT(); } break; default:

[FFmpeg-devel,v5,1/2,GSoC,2024] libavcodec/x86/vvc: Add AVX2 DMVR SAD functions for VVC

Checks

Commit Message

Comments

Patch