[FFmpeg-devel,v1,2/2] avcodec/loongarch:add LSX optimization for aac audio encode

Message ID	20240409123528.15022-2-pengxu@loongson.cn
State	New
Headers	show Delivered-To: ffmpegpatchwork2@gmail.com Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100; From: pengxu <pengxu@loongson.cn> To: ffmpeg-devel@ffmpeg.org Date: Tue, 9 Apr 2024 20:35:28 +0800 Message-Id: <20240409123528.15022-2-pengxu@loongson.cn> MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH v1 2/2] avcodec/loongarch:add LSX optimization for aac audio encode Precedence: list Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>
Series	None \| expand [FFmpeg-devel,v1,2/2] avcodec/loongarch:add LSX optimization for aac audio encode

diff --git a/libavcodec/aacencdsp.h b/libavcodec/aacencdsp.h index 67836d8cf7..5db27a95a9 100644 --- a/libavcodec/aacencdsp.h +++ b/libavcodec/aacencdsp.h @@ -34,6 +34,7 @@ typedef struct AACEncDSPContext { void ff_aacenc_dsp_init_riscv(AACEncDSPContext *s); void ff_aacenc_dsp_init_x86(AACEncDSPContext *s); +void ff_aacenc_dsp_init_loongarch(AACEncDSPContext *s); static inline void abs_pow34_v(float *out, const float *in, const int size) { @@ -66,6 +67,8 @@ static inline void ff_aacenc_dsp_init(AACEncDSPContext *s) ff_aacenc_dsp_init_riscv(s); #elif ARCH_X86 ff_aacenc_dsp_init_x86(s); +#elif ARCH_LOONGARCH64 + ff_aacenc_dsp_init_loongarch(s); #endif } diff --git a/libavcodec/loongarch/Makefile b/libavcodec/loongarch/Makefile index 07da2964e4..483917d336 100644 --- a/libavcodec/loongarch/Makefile +++ b/libavcodec/loongarch/Makefile @@ -9,6 +9,7 @@ OBJS-$(CONFIG_HPELDSP) += loongarch/hpeldsp_init_loongarch.o OBJS-$(CONFIG_IDCTDSP) += loongarch/idctdsp_init_loongarch.o OBJS-$(CONFIG_VIDEODSP) += loongarch/videodsp_init.o OBJS-$(CONFIG_HEVC_DECODER) += loongarch/hevcdsp_init_loongarch.o +OBJS-$(CONFIG_AAC_ENCODER) += loongarch/aacencdsp_init_loongarch.o LASX-OBJS-$(CONFIG_H264QPEL) += loongarch/h264qpel_lasx.o LASX-OBJS-$(CONFIG_H264DSP) += loongarch/h264dsp_lasx.o \ loongarch/h264_deblock_lasx.o @@ -38,3 +39,4 @@ LSX-OBJS-$(CONFIG_H264QPEL) += loongarch/h264qpel.o \ loongarch/h264qpel_lsx.o LSX-OBJS-$(CONFIG_H264CHROMA) += loongarch/h264chroma.o LSX-OBJS-$(CONFIG_H264PRED) += loongarch/h264intrapred.o +LSX-OBJS-$(CONFIG_AAC_ENCODER) += loongarch/aacencdsp.o diff --git a/libavcodec/loongarch/aacencdsp.S b/libavcodec/loongarch/aacencdsp.S new file mode 100644 index 0000000000..a7cfd3bb1c --- /dev/null +++ b/libavcodec/loongarch/aacencdsp.S @@ -0,0 +1,255 @@ +/* + * Loongarch LASX/LSX optimizeds AAC encoder DSP functions + * + * Copyright (c) 2024 Loongson Technology Corporation Limited + * Contributed by PengXu <pengxu@loongson.cn> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "loongson_asm.S" + + +/* void ff_abs_pow34_lsx(float *out, const float *in, const int size); */ +// Param, out:a0, in:a1, size:a2 +function ff_abs_pow34_lsx + move t0, zero //loop param + move t1, zero //data index + + srai.d t2, a2, 2 + beq zero, t2, .FAPL02 + +.FAPL01: + add.d t3, a1, t1 + fld.s f0, t3, 0x00 + fld.s f1, t3, 0x04 + fld.s f2, t3, 0x08 + fld.s f3, t3, 0x0c + + fabs.s f0, f0 + fabs.s f1, f1 + fabs.s f2, f2 + fabs.s f3, f3 + + vextrins.w vr0, vr1, 0x10 + vextrins.w vr0, vr2, 0x20 + vextrins.w vr0, vr3, 0x30 + + vfsqrt.s vr4, vr0 + vfmul.s vr5, vr0, vr4 + vfsqrt.s vr6, vr5 + + vstx vr6, a0, t1 + + addi.d t1, t1, 16 + addi.d t0, t0, 1 + blt t0, t2, .FAPL01 + +.FAPL02: /* &2 */ + andi t0, a2, 2 + beq zero, t0, .FAPL03 + + add.d t3, a1, t1 + add.d t4, a0, t1 + + fld.s f0, t3, 0x00 + fld.s f1, t3, 0x04 + + fabs.s f0, f0 + fabs.s f1, f1 + + fsqrt.s f2, f0 + fsqrt.s f3, f1 + + fmul.s f4, f0, f2 + fmul.s f5, f1, f3 + + fsqrt.s f6, f4 + fsqrt.s f7, f5 + + fld.s f6, t4, 0x00 + fld.s f7, t4, 0x04 + + addi.d t1, t1, 8 + +.FAPL03: /* &1 */ + andi t0, a2, 1 + beq zero, t0, .FAPL04 + + fldx.s f0, a1, t1 + + fabs.s f0, f0 + fsqrt.s f2, f0 + fmul.s f4, f0, f2 + fsqrt.s f6, f4 + + fldx.s f6, a0, t1 + + addi.d t1, t1, 4 + +.FAPL04: +endfunc + + + +/* void ff_aac_quantize_bands_lsx(int *out, const float *in, const float *scaled, + int size, int is_signed, int maxval, const float Q34, + const float rounding) */ +// param: +// out: a0 +// in: a1 +// scaled: a2 +// size: a3 +// is_signed: a4 +// maxval: a5 +// Q34: f0 +// rounding: f1 +function ff_aac_quantize_bands_lsx + move t0, zero //loop param + move t1, zero //data index + + vpermi.w vr0, vr0, 0x00 //Q34 + vpermi.w vr1, vr1, 0x00 //rounding + + srai.d t2, a3, 2 ////loop max + beq zero, t2, .FAQBL02 + +.FAQBL01: /* /4 */ + vldx vr2, a2, t1 + vfmul.s vr3, vr2, vr0 //qc + vfadd.s vr4, vr3, vr1 + + movgr2fr.w f5, a5 + ffint.s.w f5, f5 + vpermi.w vr5, vr5, 0x00 //maxval + vfmin.s vr6, vr4, vr5 + vfrintrz.s vr7, vr6 //(float .0)tmp + + beq a4, zero, .S4ISEND + + fsub.s f8, f0, f0 + vshuf4i.w vr8, vr8, 0x00 //0.0f + vldx vr9, a1, t1 //in + vextrins.w vr10, vr9, 0x01 + vextrins.w vr11, vr9, 0x02 + vextrins.w vr12, vr9, 0x03 +.S4IS00: + fcmp.clt.s $fcc0, f9, f8 + bceqz $fcc0, .S4IS01 + fneg.s f7, f7 +.S4IS01: + fcmp.clt.s $fcc1, f10, f8 + bceqz $fcc1, .S4IS02 + vextrins.w vr13, vr7, 0x01 + fneg.s f13, f13 + vextrins.w vr7, vr13, 0x10 +.S4IS02: + fcmp.clt.s $fcc2, f11, f8 + bceqz $fcc2, .S4IS03 + vextrins.w vr13, vr7, 0x02 + fneg.s f13, f13 + vextrins.w vr7, vr13, 0x20 +.S4IS03: + fcmp.clt.s $fcc3, f12, f8 + bceqz $fcc3, .S4ISEND + vextrins.w vr13, vr7, 0x03 + fneg.s f13, f13 + vextrins.w vr7, vr13, 0x30 +.S4ISEND: + vftintrz.w.s vr14, vr7 + vstx vr14, a0, t1 + addi.d t1, t1, 16 + addi.d t0, t0, 1 + blt t0, t2, .FAQBL01 + +.FAQBL02: /* &2 */ + andi t2, a3, 2 + beq $r0, t2, .FAQBL03 + + add.d t2, a2, t1 + fld.s f2, t2, 0x00 + fld.s f3, t2, 0x04 + + fmul.s f2, f2, f0 + fmul.s f3, f3, f0 //qc + + fadd.s f2, f2, f1 + fadd.s f3, f3, f1 + + movgr2fr.w f5, a5 + ffint.s.w f5, f5 //maxval + + fmin.s f2, f2, f5 + fmin.s f3, f3, f5 //tmp + + vextrins.w vr2, vr3, 0x10 + vfrintrz.s vr2, vr2 //(float .0)tmp + vextrins.w vr3, vr2, 0x01 + + beq a4, zero, .S2ISEND + + fsub.s f4, f0, f0 + add.d t3, a1, t1 + fld.s f6, t3, 0x00 + fld.s f7, t3, 0x04 //in + +.S2IS00: + fcmp.clt.s $fcc0, f6, f4 + bceqz $fcc0, .S2IS01 + fneg.s f8, f2 +.S2IS01: + fcmp.clt.s $fcc1, f7, f4 + bceqz $fcc1, .S2ISEND + fneg.s f9, f3 +.S2ISEND: + ftintrz.w.s f8, f8 + ftintrz.w.s f9, f9 + add.d t2, a0, t1 + fst.s f8, t2, 0x00 + fst.s f9, t2, 0x04 + addi.d t1, t1, 8 + +.FAQBL03: /* &1 */ + andi t2, a3, 1 + beq $r0, t2, .FAQBL04 + + fldx.s f2, a2, t1 + fmul.s f2, f2, f0 //qc + fadd.s f2, f2, f1 + + movgr2fr.w f5, a5 + ffint.s.w f5, f5 //maxval + + fmin.s f2, f2, f5 + vfrintrz.s vr2, vr2 //(float .0)tmp + + beq a4, zero, .S1ISEND + + fsub.s f4, f0, f0 + fldx.s f6, a1, t1 //in + +.S1IS00: + fcmp.clt.s $fcc0, f6, f4 + bceqz $fcc0, .S1ISEND + fneg.s f8, f2 +.S1ISEND: + ftintrz.w.s f8, f8 + fstx.s f8, a0, t1 + addi.d t1, t1, 4 + +.FAQBL04: +endfunc \ No newline at end of file diff --git a/libavcodec/loongarch/aacencdsp.h b/libavcodec/loongarch/aacencdsp.h new file mode 100644 index 0000000000..076cd4d247 --- /dev/null +++ b/libavcodec/loongarch/aacencdsp.h @@ -0,0 +1,35 @@ +/* + * AAC encoder assembly optimizations + * Copyright (c) 2024 Loongson Technology Corporation Limited + * Contributed by PengXu <pengxu@loongson.cn> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_LOONGARCH_AACENC_H +#define AVCODEC_LOONGARCH_AACENC_H + +#include "libavutil/float_dsp.h" +#include "libavutil/loongarch/cpu.h" +#include "libavcodec/aacenc.h" + +void ff_abs_pow34_lsx(float *out, const float *in, const int size); +void ff_aac_quantize_bands_lsx(int *out, const float *in, const float *scaled, + int size, int is_signed, int maxval, const float Q34, + const float rounding); + +#endif /* AVCODEC_LOONGARCH_AACENC_H */ \ No newline at end of file diff --git a/libavcodec/loongarch/aacencdsp_init_loongarch.c b/libavcodec/loongarch/aacencdsp_init_loongarch.c new file mode 100644 index 0000000000..3b33d50567 --- /dev/null +++ b/libavcodec/loongarch/aacencdsp_init_loongarch.c @@ -0,0 +1,33 @@ +/* + * AAC encoder assembly optimizations + * Copyright (c) 2024 Loongson Technology Corporation Limited + * Contributed by PengXu <pengxu@loongson.cn> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "aacencdsp.h" + +av_cold void ff_aacenc_dsp_init_loongarch(AACEncDSPContext *s) +{ + int cpu_flags = av_get_cpu_flags(); + + if (have_lsx(cpu_flags)) { + s->abs_pow34 = ff_abs_pow34_lsx; + s->quant_bands = ff_aac_quantize_bands_lsx; + } +} \ No newline at end of file

[FFmpeg-devel,v1,2/2] avcodec/loongarch:add LSX optimization for aac audio encode

Commit Message

Patch