[FFmpeg-devel,3/3] swscale: [LA] Optimize swscale funcs in input.c

Message ID	20240316030333.31269-4-yinshiyou-hf@loongson.cn
State	Accepted
Commit	2a7d622ddd0394f20de06b5f1da2f3c3cbc90f6f
Headers	show Delivered-To: ffmpegpatchwork2@gmail.com Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100; From: Shiyou Yin <yinshiyou-hf@loongson.cn> To: ffmpeg-devel@ffmpeg.org Date: Sat, 16 Mar 2024 11:03:33 +0800 Message-Id: <20240316030333.31269-4-yinshiyou-hf@loongson.cn> In-Reply-To: <20240316030333.31269-1-yinshiyou-hf@loongson.cn> References: <20240316030333.31269-1-yinshiyou-hf@loongson.cn> MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH 3/3] swscale: [LA] Optimize swscale funcs in input.c Precedence: list Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>
Series	[FFmpeg-devel,1/3] swscale: [LA] Optimize range convert for yuvj420p. \| expand [FFmpeg-devel,1/3] swscale: [LA] Optimize range convert for yuvj420p. [FFmpeg-devel,2/3] swscale: [LA] Optimize yuv2plane1_8_c. [FFmpeg-devel,3/3] swscale: [LA] Optimize swscale funcs in input.c

Context	Check	Description
yinshiyou/make_loongarch64	success	Make finished
yinshiyou/make_fate_loongarch64	success	Make fate finished
andriy/make_x86	success	Make finished
andriy/make_fate_x86	success	Make fate finished

diff --git a/libswscale/loongarch/Makefile b/libswscale/loongarch/Makefile index c35ba309a4..7ba11d492e 100644 --- a/libswscale/loongarch/Makefile +++ b/libswscale/loongarch/Makefile @@ -9,4 +9,5 @@ LSX-OBJS-$(CONFIG_SWSCALE) += loongarch/swscale.o \ loongarch/input.o \ loongarch/output.o \ loongarch/output_lsx.o \ + loongarch/input_lsx.o \ loongarch/yuv2rgb_lsx.o diff --git a/libswscale/loongarch/input.S b/libswscale/loongarch/input.S index d01f7384b1..717592b004 100644 --- a/libswscale/loongarch/input.S +++ b/libswscale/loongarch/input.S @@ -283,3 +283,498 @@ function planar_rgb_to_uv_lsx ld.d s3, sp, 16 addi.d sp, sp, 24 endfunc + +/* + * void yuy2ToUV_lsx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1, + * const uint8_t *src2, int width, uint32_t *unused, void *opq) + */ +function yuy2ToUV_lsx + andi t0, a5, 7 + srli.d a5, a5, 3 + beqz a5, 2f +1: + vld vr0, a3, 1 + vld vr1, a3, 17 + addi.d a5, a5, -1 + addi.d a3, a3, 32 + vpickev.b vr2, vr1, vr0 + vpickev.b vr0, vr2, vr2 + vpickod.b vr1, vr2, vr2 + fst.d f0, a0, 0 + fst.d f1, a1, 0 + addi.d a0, a0, 8 + addi.d a1, a1, 8 + bnez a5, 1b +2: + beqz t0, 4f +3: + ld.b t1, a3, 1 + ld.b t2, a3, 3 + addi.d a3, a3, 4 + addi.d t0, t0, -1 + st.b t1, a0, 0 + st.b t2, a1, 0 + addi.d a0, a0, 1 + addi.d a1, a1, 1 + bnez t0, 3b +4: +endfunc + +function yuy2ToUV_lasx + andi t0, a5, 15 + srli.d a5, a5, 4 + beqz a5, 2f +1: + xvld xr0, a3, 1 + xvld xr1, a3, 33 + addi.d a5, a5, -1 + addi.d a3, a3, 64 + xvpickev.b xr2, xr1, xr0 + xvpermi.d xr2, xr2, 0xd8 + xvpickev.b xr0, xr2, xr2 + xvpermi.d xr0, xr0, 0xd8 + xvpickod.b xr1, xr2, xr2 + xvpermi.d xr1, xr1, 0xd8 + vst vr0, a0, 0 + vst vr1, a1, 0 + addi.d a0, a0, 16 + addi.d a1, a1, 16 + bnez a5, 1b +2: + beqz t0, 4f +3: + ld.b t1, a3, 1 + ld.b t2, a3, 3 + addi.d a3, a3, 4 + addi.d t0, t0, -1 + st.b t1, a0, 0 + st.b t2, a1, 0 + addi.d a0, a0, 1 + addi.d a1, a1, 1 + bnez t0, 3b +4: +endfunc + +/* + * void yvy2ToUV_lsx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1, + * const uint8_t *src2, int width, uint32_t *unused, void *opq) + */ +function yvy2ToUV_lsx + andi t0, a5, 7 + srli.d a5, a5, 3 + beqz a5, 2f +1: + vld vr0, a3, 1 + vld vr1, a3, 17 + addi.d a5, a5, -1 + addi.d a3, a3, 32 + vpickev.b vr2, vr1, vr0 + vpickev.b vr0, vr2, vr2 + vpickod.b vr1, vr2, vr2 + fst.d f0, a1, 0 + fst.d f1, a0, 0 + addi.d a0, a0, 8 + addi.d a1, a1, 8 + bnez a5, 1b +2: + beqz t0, 4f +3: + ld.b t1, a3, 1 + ld.b t2, a3, 3 + addi.d a3, a3, 4 + addi.d t0, t0, -1 + st.b t1, a1, 0 + st.b t2, a0, 0 + addi.d a0, a0, 1 + addi.d a1, a1, 1 + bnez t0, 3b +4: +endfunc + +function yvy2ToUV_lasx + andi t0, a5, 15 + srli.d a5, a5, 4 + beqz a5, 2f +1: + xvld xr0, a3, 1 + xvld xr1, a3, 33 + addi.d a5, a5, -1 + addi.d a3, a3, 64 + xvpickev.b xr2, xr1, xr0 + xvpermi.d xr2, xr2, 0xd8 + xvpickev.b xr0, xr2, xr2 + xvpermi.d xr0, xr0, 0xd8 + xvpickod.b xr1, xr2, xr2 + xvpermi.d xr1, xr1, 0xd8 + vst vr0, a1, 0 + vst vr1, a0, 0 + addi.d a0, a0, 16 + addi.d a1, a1, 16 + bnez a5, 1b +2: + beqz t0, 4f +3: + ld.b t1, a3, 1 + ld.b t2, a3, 3 + addi.d a3, a3, 4 + addi.d t0, t0, -1 + st.b t1, a1, 0 + st.b t2, a0, 0 + addi.d a0, a0, 1 + addi.d a1, a1, 1 + bnez t0, 3b +4: +endfunc + +/* + * void uyvyToUV_lsx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1, + * const uint8_t *src2, int width, uint32_t *unused, void *opq) + */ +function uyvyToUV_lsx + andi t0, a5, 7 + srli.d a5, a5, 3 + beqz a5, 2f +1: + vld vr0, a3, 0 + vld vr1, a3, 16 + addi.d a5, a5, -1 + addi.d a3, a3, 32 + vpickev.b vr2, vr1, vr0 + vpickev.b vr0, vr2, vr2 + vpickod.b vr1, vr2, vr2 + fst.d f0, a0, 0 + fst.d f1, a1, 0 + addi.d a0, a0, 8 + addi.d a1, a1, 8 + bnez a5, 1b +2: + beqz t0, 4f +3: + ld.b t1, a3, 1 + ld.b t2, a3, 3 + addi.d a3, a3, 4 + addi.d t0, t0, -1 + st.b t1, a0, 0 + st.b t2, a1, 0 + addi.d a0, a0, 1 + addi.d a1, a1, 1 + bnez t0, 3b +4: +endfunc + +function uyvyToUV_lasx + andi t0, a5, 15 + srli.d a5, a5, 4 + beqz a5, 2f +1: + xvld xr0, a3, 0 + xvld xr1, a3, 32 + addi.d a5, a5, -1 + addi.d a3, a3, 64 + xvpickev.b xr2, xr1, xr0 + xvpermi.d xr2, xr2, 0xd8 + xvpickev.b xr0, xr2, xr2 + xvpermi.d xr0, xr0, 0xd8 + xvpickod.b xr1, xr2, xr2 + xvpermi.d xr1, xr1, 0xd8 + vst vr0, a0, 0 + vst vr1, a1, 0 + addi.d a0, a0, 16 + addi.d a1, a1, 16 + bnez a5, 1b +2: + beqz t0, 4f +3: + ld.b t1, a3, 1 + ld.b t2, a3, 3 + addi.d a3, a3, 4 + addi.d t0, t0, -1 + st.b t1, a0, 0 + st.b t2, a1, 0 + addi.d a0, a0, 1 + addi.d a1, a1, 1 + bnez t0, 3b +4: +endfunc + +/* + * void nv12ToUV_lsx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1, + * const uint8_t *src2, int width, uint32_t *unused, void *opq) + */ +function nv12ToUV_lsx + andi t0, a5, 15 + srli.d a5, a5, 4 + beqz a5, 2f +1: + vld vr0, a3, 0 + vld vr1, a3, 16 + addi.d a5, a5, -1 + addi.d a3, a3, 32 + vpickev.b vr2, vr1, vr0 + vpickod.b vr3, vr1, vr0 + vst vr2, a0, 0 + vst vr3, a1, 0 + addi.d a0, a0, 16 + addi.d a1, a1, 16 + bnez a5, 1b +2: + beqz t0, 4f +3: + ld.b t1, a3, 0 + ld.b t2, a3, 1 + addi.d a3, a3, 2 + addi.d t0, t0, -1 + st.b t1, a0, 0 + st.b t2, a1, 0 + addi.d a0, a0, 1 + addi.d a1, a1, 1 + bnez t0, 3b +4: +endfunc + +function nv12ToUV_lasx + andi t0, a5, 31 + srli.d a5, a5, 5 + beqz a5, 2f +1: + xvld xr0, a3, 0 + xvld xr1, a3, 32 + addi.d a5, a5, -1 + addi.d a3, a3, 64 + xvpickev.b xr2, xr1, xr0 + xvpickod.b xr3, xr1, xr0 + xvpermi.d xr2, xr2, 0xd8 + xvpermi.d xr3, xr3, 0xd8 + xvst xr2, a0, 0 + xvst xr3, a1, 0 + addi.d a0, a0, 32 + addi.d a1, a1, 32 + bnez a5, 1b +2: + beqz t0, 4f +3: + ld.b t1, a3, 0 + ld.b t2, a3, 1 + addi.d a3, a3, 2 + addi.d t0, t0, -1 + st.b t1, a0, 0 + st.b t2, a1, 0 + addi.d a0, a0, 1 + addi.d a1, a1, 1 + bnez t0, 3b +4: +endfunc + +/* + * void nv21ToUV_lsx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1, + * const uint8_t *src2, int width, uint32_t *unused, void *opq) + */ +function nv21ToUV_lsx + andi t0, a5, 15 + srli.d a5, a5, 4 + beqz a5, 2f +1: + vld vr0, a3, 0 + vld vr1, a3, 16 + addi.d a5, a5, -1 + addi.d a3, a3, 32 + vpickev.b vr2, vr1, vr0 + vpickod.b vr3, vr1, vr0 + vst vr2, a1, 0 + vst vr3, a0, 0 + addi.d a0, a0, 16 + addi.d a1, a1, 16 + bnez a5, 1b +2: + beqz t0, 4f +3: + ld.b t1, a3, 0 + ld.b t2, a3, 1 + addi.d a3, a3, 2 + addi.d t0, t0, -1 + st.b t1, a1, 0 + st.b t2, a0, 0 + addi.d a0, a0, 1 + addi.d a1, a1, 1 + bnez t0, 3b +4: +endfunc + +function nv21ToUV_lasx + andi t0, a5, 31 + srli.d a5, a5, 5 + beqz a5, 2f +1: + xvld xr0, a3, 0 + xvld xr1, a3, 32 + addi.d a5, a5, -1 + addi.d a3, a3, 64 + xvpickev.b xr2, xr1, xr0 + xvpickod.b xr3, xr1, xr0 + xvpermi.d xr2, xr2, 0xd8 + xvpermi.d xr3, xr3, 0xd8 + xvst xr2, a1, 0 + xvst xr3, a0, 0 + addi.d a0, a0, 32 + addi.d a1, a1, 32 + bnez a5, 1b +2: + beqz t0, 4f +3: + ld.b t1, a3, 0 + ld.b t2, a3, 1 + addi.d a3, a3, 2 + addi.d t0, t0, -1 + st.b t1, a1, 0 + st.b t2, a0, 0 + addi.d a0, a0, 1 + addi.d a1, a1, 1 + bnez t0, 3b +4: +endfunc + +/* + *void abgrToA_lsx(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, + * const uint8_t *unused2, int width, uint32_t *unused, void *opq) + */ +function abgrToA_lsx + andi t0, a4, 7 + srli.d a4, a4, 3 + vxor.v vr0, vr0, vr0 + beqz a4, 2f +1: + vld vr1, a1, 0 + vld vr2, a1, 16 + addi.d a4, a4, -1 + addi.d a1, a1, 32 + vpickev.b vr3, vr2, vr1 + vpackev.b vr3, vr0, vr3 + vslli.h vr1, vr3, 6 + vsrli.h vr2, vr3, 2 + vor.v vr3, vr2, vr1 + vst vr3, a0, 0 + addi.d a0, a0, 16 + bnez a4, 1b +2: + beqz t0, 4f +3: + ld.b t1, a1, 3 + addi.d t0, t0, -1 + addi.d a1, a1, 4 + andi t1, t1, 0xff + slli.w t2, t1, 6 + srli.w t3, t1, 2 + or t1, t2, t3 + st.h t1, a0, 0 + addi.d a0, a0, 2 + bnez t0, 3b +4: +endfunc + +function abgrToA_lasx + andi t0, a4, 15 + srli.d a4, a4, 4 + xvxor.v xr0, xr0, xr0 + beqz a4, 2f +1: + xvld xr1, a1, 0 + xvld xr2, a1, 32 + addi.d a4, a4, -1 + addi.d a1, a1, 64 + xvpickev.b xr3, xr2, xr1 + xvpermi.d xr3, xr3, 0xd8 + xvpackev.b xr3, xr0, xr3 + xvslli.h xr1, xr3, 6 + xvsrli.h xr2, xr3, 2 + xvor.v xr3, xr2, xr1 + xvst xr3, a0, 0 + addi.d a0, a0, 32 + bnez a4, 1b +2: + beqz t0, 4f +3: + ld.b t1, a1, 3 + addi.d t0, t0, -1 + addi.d a1, a1, 4 + andi t1, t1, 0xff + slli.w t2, t1, 6 + srli.w t3, t1, 2 + or t1, t2, t3 + st.h t1, a0, 0 + addi.d a0, a0, 2 + bnez t0, 3b +4: +endfunc + +/* + *void rgbaToA_lsx(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, + * const uint8_t *unused2, int width, uint32_t *unused, void *opq) + */ +function rgbaToA_lsx + andi t0, a4, 7 + srli.d a4, a4, 3 + vxor.v vr0, vr0, vr0 + beqz a4, 2f +1: + vld vr1, a1, 3 + vld vr2, a1, 19 + addi.d a4, a4, -1 + addi.d a1, a1, 32 + vpickev.b vr3, vr2, vr1 + vpackev.b vr3, vr0, vr3 + vslli.h vr1, vr3, 6 + vsrli.h vr2, vr3, 2 + vor.v vr3, vr2, vr1 + vst vr3, a0, 0 + addi.d a0, a0, 16 + bnez a4, 1b +2: + beqz t0, 4f +3: + ld.b t1, a1, 3 + addi.d t0, t0, -1 + addi.d a1, a1, 4 + andi t1, t1, 0xff + slli.w t2, t1, 6 + srli.w t3, t1, 2 + or t1, t2, t3 + st.h t1, a0, 0 + addi.d a0, a0, 2 + bnez t0, 3b +4: +endfunc + +function rgbaToA_lasx + andi t0, a4, 15 + srli.d a4, a4, 4 + xvxor.v xr0, xr0, xr0 + beqz a4, 2f +1: + xvld xr1, a1, 3 + xvld xr2, a1, 35 + addi.d a4, a4, -1 + addi.d a1, a1, 64 + xvpickev.b xr3, xr2, xr1 + xvpermi.d xr3, xr3, 0xd8 + xvpackev.b xr3, xr0, xr3 + xvslli.h xr1, xr3, 6 + xvsrli.h xr2, xr3, 2 + xvor.v xr3, xr2, xr1 + xvst xr3, a0, 0 + addi.d a0, a0, 32 + bnez a4, 1b +2: + beqz t0, 4f +3: + ld.b t1, a1, 3 + addi.d t0, t0, -1 + addi.d a1, a1, 4 + andi t1, t1, 0xff + slli.w t2, t1, 6 + srli.w t3, t1, 2 + or t1, t2, t3 + st.h t1, a0, 0 + addi.d a0, a0, 2 + bnez t0, 3b +4: +endfunc diff --git a/libswscale/loongarch/input_lasx.c b/libswscale/loongarch/input_lasx.c index 4830072eaf..0f1d954880 100644 --- a/libswscale/loongarch/input_lasx.c +++ b/libswscale/loongarch/input_lasx.c @@ -200,3 +200,46 @@ void planar_rgb_to_y_lasx(uint8_t *_dst, const uint8_t *src[4], int width, dst[i] = (tem_ry * r + tem_gy * g + tem_by * b + set) >> shift; } } + +av_cold void ff_sws_init_input_lasx(SwsContext *c) +{ + enum AVPixelFormat srcFormat = c->srcFormat; + + switch (srcFormat) { + case AV_PIX_FMT_YUYV422: + c->chrToYV12 = yuy2ToUV_lasx; + break; + case AV_PIX_FMT_YVYU422: + c->chrToYV12 = yvy2ToUV_lasx; + break; + case AV_PIX_FMT_UYVY422: + c->chrToYV12 = uyvyToUV_lasx; + break; + case AV_PIX_FMT_NV12: + case AV_PIX_FMT_NV16: + case AV_PIX_FMT_NV24: + c->chrToYV12 = nv12ToUV_lasx; + break; + case AV_PIX_FMT_NV21: + case AV_PIX_FMT_NV42: + c->chrToYV12 = nv21ToUV_lasx; + break; + case AV_PIX_FMT_GBRAP: + case AV_PIX_FMT_GBRP: + c->readChrPlanar = planar_rgb_to_uv_lasx; + break; + } + + if (c->needAlpha) { + switch (srcFormat) { + case AV_PIX_FMT_BGRA: + case AV_PIX_FMT_RGBA: + c->alpToYV12 = rgbaToA_lasx; + break; + case AV_PIX_FMT_ABGR: + case AV_PIX_FMT_ARGB: + c->alpToYV12 = abgrToA_lasx; + break; + } + } +} diff --git a/libswscale/loongarch/input_lsx.c b/libswscale/loongarch/input_lsx.c new file mode 100644 index 0000000000..1bb04457bb --- /dev/null +++ b/libswscale/loongarch/input_lsx.c @@ -0,0 +1,65 @@ +/* + * Copyright (C) 2024 Loongson Technology Corporation Limited + * Contributed by Shiyou Yin<yinshiyou-hf@loongson.cn> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "swscale_loongarch.h" + +av_cold void ff_sws_init_input_lsx(SwsContext *c) +{ + enum AVPixelFormat srcFormat = c->srcFormat; + + switch (srcFormat) { + case AV_PIX_FMT_YUYV422: + c->chrToYV12 = yuy2ToUV_lsx; + break; + case AV_PIX_FMT_YVYU422: + c->chrToYV12 = yvy2ToUV_lsx; + break; + case AV_PIX_FMT_UYVY422: + c->chrToYV12 = uyvyToUV_lsx; + break; + case AV_PIX_FMT_NV12: + case AV_PIX_FMT_NV16: + case AV_PIX_FMT_NV24: + c->chrToYV12 = nv12ToUV_lsx; + break; + case AV_PIX_FMT_NV21: + case AV_PIX_FMT_NV42: + c->chrToYV12 = nv21ToUV_lsx; + break; + case AV_PIX_FMT_GBRAP: + case AV_PIX_FMT_GBRP: + c->readChrPlanar = planar_rgb_to_uv_lsx; + break; + } + + if (c->needAlpha) { + switch (srcFormat) { + case AV_PIX_FMT_BGRA: + case AV_PIX_FMT_RGBA: + c->alpToYV12 = rgbaToA_lsx; + break; + case AV_PIX_FMT_ABGR: + case AV_PIX_FMT_ARGB: + c->alpToYV12 = abgrToA_lsx; + break; + } + } +} diff --git a/libswscale/loongarch/swscale_init_loongarch.c b/libswscale/loongarch/swscale_init_loongarch.c index 04d2553fa4..3a5a7ee856 100644 --- a/libswscale/loongarch/swscale_init_loongarch.c +++ b/libswscale/loongarch/swscale_init_loongarch.c @@ -63,6 +63,7 @@ av_cold void ff_sws_init_swscale_loongarch(SwsContext *c) ff_sws_init_output_lsx(c, &c->yuv2plane1, &c->yuv2planeX, &c->yuv2nv12cX, &c->yuv2packed1, &c->yuv2packed2, &c->yuv2packedX, &c->yuv2anyX); + ff_sws_init_input_lsx(c); if (c->srcBpc == 8) { if (c->dstBpc <= 14) { c->hyScale = c->hcScale = ff_hscale_8_to_15_lsx; @@ -73,21 +74,13 @@ av_cold void ff_sws_init_swscale_loongarch(SwsContext *c) c->hyScale = c->hcScale = c->dstBpc > 14 ? ff_hscale_16_to_19_lsx : ff_hscale_16_to_15_lsx; } - switch (c->srcFormat) { - case AV_PIX_FMT_GBRAP: - case AV_PIX_FMT_GBRP: - { - c->readChrPlanar = planar_rgb_to_uv_lsx; - c->readLumPlanar = planar_rgb_to_y_lsx; - } - break; - } } #if HAVE_LASX if (have_lasx(cpu_flags)) { ff_sws_init_output_lasx(c, &c->yuv2plane1, &c->yuv2planeX, &c->yuv2nv12cX, &c->yuv2packed1, &c->yuv2packed2, &c->yuv2packedX, &c->yuv2anyX); + ff_sws_init_input_lasx(c); if (c->srcBpc == 8) { if (c->dstBpc <= 14) { c->hyScale = c->hcScale = ff_hscale_8_to_15_lasx; @@ -98,15 +91,6 @@ av_cold void ff_sws_init_swscale_loongarch(SwsContext *c) c->hyScale = c->hcScale = c->dstBpc > 14 ? ff_hscale_16_to_19_lasx : ff_hscale_16_to_15_lasx; } - switch (c->srcFormat) { - case AV_PIX_FMT_GBRAP: - case AV_PIX_FMT_GBRP: - { - c->readChrPlanar = planar_rgb_to_uv_lasx; - c->readLumPlanar = planar_rgb_to_y_lasx; - } - break; - } } #endif // #if HAVE_LASX ff_sws_init_range_convert_loongarch(c); diff --git a/libswscale/loongarch/swscale_loongarch.h b/libswscale/loongarch/swscale_loongarch.h index ea93881f8e..07c91bc25c 100644 --- a/libswscale/loongarch/swscale_loongarch.h +++ b/libswscale/loongarch/swscale_loongarch.h @@ -68,6 +68,29 @@ void yuv2planeX_8_lsx(const int16_t *filter, int filterSize, void yuv2plane1_8_lsx(const int16_t *src, uint8_t *dest, int dstW, const uint8_t *dither, int offset); +void yuy2ToUV_lsx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1, + const uint8_t *src2, int width, uint32_t *unused, void *opq); + +void yvy2ToUV_lsx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1, + const uint8_t *src2, int width, uint32_t *unused, void *opq); + +void uyvyToUV_lsx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1, + const uint8_t *src2, int width, uint32_t *unused, void *opq); + +void nv12ToUV_lsx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1, + const uint8_t *src2, int width, uint32_t *unused, void *opq); + +void nv21ToUV_lsx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1, + const uint8_t *src2, int width, uint32_t *unused, void *opq); + +void abgrToA_lsx(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, + const uint8_t *unused2, int width, uint32_t *unused, void *opq); + +void rgbaToA_lsx(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, + const uint8_t *unused2, int width, uint32_t *unused, void *opq); + +av_cold void ff_sws_init_input_lsx(SwsContext *c); + av_cold void ff_sws_init_output_lsx(SwsContext *c, yuv2planar1_fn *yuv2plane1, yuv2planarX_fn *yuv2planeX, @@ -152,6 +175,29 @@ void yuv2planeX_8_lasx(const int16_t *filter, int filterSize, void yuv2plane1_8_lasx(const int16_t *src, uint8_t *dest, int dstW, const uint8_t *dither, int offset); +void yuy2ToUV_lasx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1, + const uint8_t *src2, int width, uint32_t *unused, void *opq); + +void yvy2ToUV_lasx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1, + const uint8_t *src2, int width, uint32_t *unused, void *opq); + +void uyvyToUV_lasx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1, + const uint8_t *src2, int width, uint32_t *unused, void *opq); + +void nv12ToUV_lasx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1, + const uint8_t *src2, int width, uint32_t *unused, void *opq); + +void nv21ToUV_lasx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1, + const uint8_t *src2, int width, uint32_t *unused, void *opq); + +void abgrToA_lasx(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, + const uint8_t *unused2, int width, uint32_t *unused, void *opq); + +void rgbaToA_lasx(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, + const uint8_t *unused2, int width, uint32_t *unused, void *opq); + +av_cold void ff_sws_init_input_lasx(SwsContext *c); + av_cold void ff_sws_init_output_lasx(SwsContext *c, yuv2planar1_fn *yuv2plane1, yuv2planarX_fn *yuv2planeX,

[FFmpeg-devel,3/3] swscale: [LA] Optimize swscale funcs in input.c

Checks

Commit Message

Patch