[FFmpeg-devel,4/6] ac3enc_fixed: drop unnecessary fixed-point DSP code

Message ID	MQca41g--3-2@lynne.ee
State	Superseded
Headers	show Return-Path: <ffmpeg-devel-bounces@ffmpeg.org> Date: Sat, 9 Jan 2021 20:21:05 +0100 (CET) From: Lynne <dev@lynne.ee> To: Ffmpeg Devel <ffmpeg-devel@ffmpeg.org> Message-ID: <MQca41g--3-2@lynne.ee> MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="----=_Part_643796_1992043701.1610220065474" Subject: [FFmpeg-devel] [PATCH 4/6] ac3enc_fixed: drop unnecessary fixed-point DSP code Precedence: list Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>
Series	[FFmpeg-devel,1/6] ac3enc_fixed: convert to 32-bit sample format \| expand [FFmpeg-devel,1/6] ac3enc_fixed: convert to 32-bit sample format [FFmpeg-devel,2/6] ac3enc: do not clip coefficients after transforms [FFmpeg-devel,3/6] ac3enc: halve the MDCT window size by using vector_fmul_reverse [FFmpeg-devel,4/6] ac3enc_fixed: drop unnecessary fixed-point DSP code [FFmpeg-devel,5/6] mdct_fixed: remove 16-bit MDCT code [FFmpeg-devel,6/6] fft_fixed: remove 16-bit FFT code

Context	Check	Description
andriy/x86_make	success	Make finished
andriy/x86_make_fate	fail	Make fate failed
andriy/PPC64_make	success	Make finished
andriy/PPC64_make_fate	warning	Make fate failed

diff --git a/libavcodec/ac3dsp.c b/libavcodec/ac3dsp.c index 382f87c05f..85c721dd3b 100644 --- a/libavcodec/ac3dsp.c +++ b/libavcodec/ac3dsp.c @@ -46,49 +46,6 @@ static void ac3_exponent_min_c(uint8_t *exp, int num_reuse_blocks, int nb_coefs) } } -static int ac3_max_msb_abs_int16_c(const int16_t *src, int len) -{ - int i, v = 0; - for (i = 0; i < len; i++) - v |= abs(src[i]); - return v; -} - -static void ac3_lshift_int16_c(int16_t *src, unsigned int len, - unsigned int shift) -{ - uint32_t *src32 = (uint32_t *)src; - const uint32_t mask = ~(((1 << shift) - 1) << 16); - int i; - len >>= 1; - for (i = 0; i < len; i += 8) { - src32[i ] = (src32[i ] << shift) & mask; - src32[i+1] = (src32[i+1] << shift) & mask; - src32[i+2] = (src32[i+2] << shift) & mask; - src32[i+3] = (src32[i+3] << shift) & mask; - src32[i+4] = (src32[i+4] << shift) & mask; - src32[i+5] = (src32[i+5] << shift) & mask; - src32[i+6] = (src32[i+6] << shift) & mask; - src32[i+7] = (src32[i+7] << shift) & mask; - } -} - -static void ac3_rshift_int32_c(int32_t *src, unsigned int len, - unsigned int shift) -{ - do { - *src++ >>= shift; - *src++ >>= shift; - *src++ >>= shift; - *src++ >>= shift; - *src++ >>= shift; - *src++ >>= shift; - *src++ >>= shift; - *src++ >>= shift; - len -= 8; - } while (len > 0); -} - static void float_to_fixed24_c(int32_t *dst, const float *src, unsigned int len) { const float scale = 1 << 24; @@ -376,19 +333,6 @@ void ff_ac3dsp_downmix_fixed(AC3DSPContext *c, int32_t **samples, int16_t **matr ac3_downmix_c_fixed(samples, matrix, out_ch, in_ch, len); } -static void apply_window_int16_c(int16_t *output, const int16_t *input, - const int16_t *window, unsigned int len) -{ - int i; - int len2 = len >> 1; - - for (i = 0; i < len2; i++) { - int16_t w = window[i]; - output[i] = (MUL16(input[i], w) + (1 << 14)) >> 15; - output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15; - } -} - void ff_ac3dsp_downmix(AC3DSPContext *c, float **samples, float **matrix, int out_ch, int in_ch, int len) { @@ -424,9 +368,6 @@ void ff_ac3dsp_downmix(AC3DSPContext *c, float **samples, float **matrix, av_cold void ff_ac3dsp_init(AC3DSPContext *c, int bit_exact) { c->ac3_exponent_min = ac3_exponent_min_c; - c->ac3_max_msb_abs_int16 = ac3_max_msb_abs_int16_c; - c->ac3_lshift_int16 = ac3_lshift_int16_c; - c->ac3_rshift_int32 = ac3_rshift_int32_c; c->float_to_fixed24 = float_to_fixed24_c; c->bit_alloc_calc_bap = ac3_bit_alloc_calc_bap_c; c->update_bap_counts = ac3_update_bap_counts_c; @@ -438,7 +379,6 @@ av_cold void ff_ac3dsp_init(AC3DSPContext *c, int bit_exact) c->out_channels = 0; c->downmix = NULL; c->downmix_fixed = NULL; - c->apply_window_int16 = apply_window_int16_c; if (ARCH_ARM) ff_ac3dsp_init_arm(c, bit_exact); diff --git a/libavcodec/ac3dsp.h b/libavcodec/ac3dsp.h index 161de4cb86..a23b11526e 100644 --- a/libavcodec/ac3dsp.h +++ b/libavcodec/ac3dsp.h @@ -42,39 +42,6 @@ typedef struct AC3DSPContext { */ void (*ac3_exponent_min)(uint8_t *exp, int num_reuse_blocks, int nb_coefs); - /** - * Calculate the maximum MSB of the absolute value of each element in an - * array of int16_t. - * @param src input array - * constraints: align 16. values must be in range [-32767,32767] - * @param len number of values in the array - * constraints: multiple of 16 greater than 0 - * @return a value with the same MSB as max(abs(src[])) - */ - int (*ac3_max_msb_abs_int16)(const int16_t *src, int len); - - /** - * Left-shift each value in an array of int16_t by a specified amount. - * @param src input array - * constraints: align 16 - * @param len number of values in the array - * constraints: multiple of 32 greater than 0 - * @param shift left shift amount - * constraints: range [0,15] - */ - void (*ac3_lshift_int16)(int16_t *src, unsigned int len, unsigned int shift); - - /** - * Right-shift each value in an array of int32_t by a specified amount. - * @param src input array - * constraints: align 16 - * @param len number of values in the array - * constraints: multiple of 16 greater than 0 - * @param shift right shift amount - * constraints: range [0,31] - */ - void (*ac3_rshift_int32)(int32_t *src, unsigned int len, unsigned int shift); - /** * Convert an array of float in range [-1.0,1.0] to int32_t with range * [-(1<<24),(1<<24)] @@ -136,20 +103,6 @@ typedef struct AC3DSPContext { int in_channels; void (*downmix)(float **samples, float **matrix, int len); void (*downmix_fixed)(int32_t **samples, int16_t **matrix, int len); - - /** - * Apply symmetric window in 16-bit fixed-point. - * @param output destination array - * constraints: 16-byte aligned - * @param input source array - * constraints: 16-byte aligned - * @param window window array - * constraints: 16-byte aligned, at least len/2 elements - * @param len full window length - * constraints: multiple of ? greater than zero - */ - void (*apply_window_int16)(int16_t *output, const int16_t *input, - const int16_t *window, unsigned int len); } AC3DSPContext; void ff_ac3dsp_init (AC3DSPContext *c, int bit_exact); diff --git a/libavcodec/ac3tab.c b/libavcodec/ac3tab.c index d018110331..99307218cc 100644 --- a/libavcodec/ac3tab.c +++ b/libavcodec/ac3tab.c @@ -147,44 +147,6 @@ const uint8_t ff_eac3_default_cpl_band_struct[18] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1 }; -/* AC-3 MDCT window */ - -/* MDCT window */ -DECLARE_ALIGNED(16, const int16_t, ff_ac3_window)[AC3_WINDOW_SIZE/2] = { - 4, 7, 12, 16, 21, 28, 34, 42, - 51, 61, 72, 84, 97, 111, 127, 145, - 164, 184, 207, 231, 257, 285, 315, 347, - 382, 419, 458, 500, 544, 591, 641, 694, - 750, 810, 872, 937, 1007, 1079, 1155, 1235, - 1318, 1406, 1497, 1593, 1692, 1796, 1903, 2016, - 2132, 2253, 2379, 2509, 2644, 2783, 2927, 3076, - 3230, 3389, 3552, 3721, 3894, 4072, 4255, 4444, - 4637, 4835, 5038, 5246, 5459, 5677, 5899, 6127, - 6359, 6596, 6837, 7083, 7334, 7589, 7848, 8112, - 8380, 8652, 8927, 9207, 9491, 9778,10069,10363, -10660,10960,11264,11570,11879,12190,12504,12820, -13138,13458,13780,14103,14427,14753,15079,15407, -15735,16063,16392,16720,17049,17377,17705,18032, -18358,18683,19007,19330,19651,19970,20287,20602, -20914,21225,21532,21837,22139,22438,22733,23025, -23314,23599,23880,24157,24430,24699,24964,25225, -25481,25732,25979,26221,26459,26691,26919,27142, -27359,27572,27780,27983,28180,28373,28560,28742, -28919,29091,29258,29420,29577,29729,29876,30018, -30155,30288,30415,30538,30657,30771,30880,30985, -31086,31182,31274,31363,31447,31528,31605,31678, -31747,31814,31877,31936,31993,32046,32097,32145, -32190,32232,32272,32310,32345,32378,32409,32438, -32465,32490,32513,32535,32556,32574,32592,32608, -32623,32636,32649,32661,32671,32681,32690,32698, -32705,32712,32718,32724,32729,32733,32737,32741, -32744,32747,32750,32752,32754,32756,32757,32759, -32760,32761,32762,32763,32764,32764,32765,32765, -32766,32766,32766,32766,32767,32767,32767,32767, -32767,32767,32767,32767,32767,32767,32767,32767, -32767,32767,32767,32767,32767,32767,32767,32767, -}; - const uint8_t ff_ac3_log_add_tab[260]= { 0x40,0x3f,0x3e,0x3d,0x3c,0x3b,0x3a,0x39,0x38,0x37, 0x36,0x35,0x34,0x34,0x33,0x32,0x31,0x30,0x2f,0x2f, diff --git a/libavcodec/ac3tab.h b/libavcodec/ac3tab.h index 1d1264e3fc..a0036a301b 100644 --- a/libavcodec/ac3tab.h +++ b/libavcodec/ac3tab.h @@ -37,7 +37,6 @@ extern const int ff_ac3_sample_rate_tab[]; extern const uint16_t ff_ac3_bitrate_tab[19]; extern const uint8_t ff_ac3_rematrix_band_tab[5]; extern const uint8_t ff_eac3_default_cpl_band_struct[18]; -extern const int16_t ff_ac3_window[AC3_WINDOW_SIZE/2]; extern const uint8_t ff_ac3_log_add_tab[260]; extern const uint16_t ff_ac3_hearing_threshold_tab[AC3_CRITICAL_BANDS][3]; extern const uint8_t ff_ac3_bap_tab[64]; diff --git a/libavcodec/arm/ac3dsp_init_arm.c b/libavcodec/arm/ac3dsp_init_arm.c index a3c32ff407..9217a7d0c2 100644 --- a/libavcodec/arm/ac3dsp_init_arm.c +++ b/libavcodec/arm/ac3dsp_init_arm.c @@ -26,13 +26,8 @@ #include "config.h" void ff_ac3_exponent_min_neon(uint8_t *exp, int num_reuse_blocks, int nb_coefs); -int ff_ac3_max_msb_abs_int16_neon(const int16_t *src, int len); -void ff_ac3_lshift_int16_neon(int16_t *src, unsigned len, unsigned shift); -void ff_ac3_rshift_int32_neon(int32_t *src, unsigned len, unsigned shift); void ff_float_to_fixed24_neon(int32_t *dst, const float *src, unsigned int len); void ff_ac3_extract_exponents_neon(uint8_t *exp, int32_t *coef, int nb_coefs); -void ff_apply_window_int16_neon(int16_t *dst, const int16_t *src, - const int16_t *window, unsigned n); void ff_ac3_sum_square_butterfly_int32_neon(int64_t sum[4], const int32_t *coef0, const int32_t *coef1, @@ -61,12 +56,8 @@ av_cold void ff_ac3dsp_init_arm(AC3DSPContext *c, int bit_exact) if (have_neon(cpu_flags)) { c->ac3_exponent_min = ff_ac3_exponent_min_neon; - c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_neon; - c->ac3_lshift_int16 = ff_ac3_lshift_int16_neon; - c->ac3_rshift_int32 = ff_ac3_rshift_int32_neon; c->float_to_fixed24 = ff_float_to_fixed24_neon; c->extract_exponents = ff_ac3_extract_exponents_neon; - c->apply_window_int16 = ff_apply_window_int16_neon; c->sum_square_butterfly_int32 = ff_ac3_sum_square_butterfly_int32_neon; c->sum_square_butterfly_float = ff_ac3_sum_square_butterfly_float_neon; } diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm index 675ade3101..4ddaa94320 100644 --- a/libavcodec/x86/ac3dsp.asm +++ b/libavcodec/x86/ac3dsp.asm @@ -35,10 +35,6 @@ pw_bap_mul2: dw 5, 7, 0, 7, 5, 7, 0, 7 cextern pd_1 pd_151: times 4 dd 151 -; used in ff_apply_window_int16() -pb_revwords: SHUFFLE_MASK_W 7, 6, 5, 4, 3, 2, 1, 0 -pd_16384: times 4 dd 16384 - SECTION .text ;----------------------------------------------------------------------------- @@ -81,133 +77,6 @@ AC3_EXPONENT_MIN %endif %undef LOOP_ALIGN -;----------------------------------------------------------------------------- -; int ff_ac3_max_msb_abs_int16(const int16_t *src, int len) -; -; This function uses 2 different methods to calculate a valid result. -; 1) logical 'or' of abs of each element -; This is used for ssse3 because of the pabsw instruction. -; It is also used for mmx because of the lack of min/max instructions. -; 2) calculate min/max for the array, then or(abs(min),abs(max)) -; This is used for mmxext and sse2 because they have pminsw/pmaxsw. -;----------------------------------------------------------------------------- - -; logical 'or' of 4 or 8 words in an mmx or xmm register into the low word -%macro OR_WORDS_HORIZ 2 ; src, tmp -%if cpuflag(sse2) - movhlps %2, %1 - por %1, %2 - pshuflw %2, %1, q0032 - por %1, %2 - pshuflw %2, %1, q0001 - por %1, %2 -%elif cpuflag(mmxext) - pshufw %2, %1, q0032 - por %1, %2 - pshufw %2, %1, q0001 - por %1, %2 -%else ; mmx - movq %2, %1 - psrlq %2, 32 - por %1, %2 - movq %2, %1 - psrlq %2, 16 - por %1, %2 -%endif -%endmacro - -%macro AC3_MAX_MSB_ABS_INT16 1 -cglobal ac3_max_msb_abs_int16, 2,2,5, src, len - pxor m2, m2 - pxor m3, m3 -.loop: -%ifidn %1, min_max - mova m0, [srcq] - mova m1, [srcq+mmsize] - pminsw m2, m0 - pminsw m2, m1 - pmaxsw m3, m0 - pmaxsw m3, m1 -%else ; or_abs -%if notcpuflag(ssse3) - mova m0, [srcq] - mova m1, [srcq+mmsize] - ABS2 m0, m1, m3, m4 -%else ; ssse3 - ; using memory args is faster for ssse3 - pabsw m0, [srcq] - pabsw m1, [srcq+mmsize] -%endif - por m2, m0 - por m2, m1 -%endif - add srcq, mmsize*2 - sub lend, mmsize - ja .loop -%ifidn %1, min_max - ABS2 m2, m3, m0, m1 - por m2, m3 -%endif - OR_WORDS_HORIZ m2, m0 - movd eax, m2 - and eax, 0xFFFF - RET -%endmacro - -INIT_MMX mmx -AC3_MAX_MSB_ABS_INT16 or_abs -INIT_MMX mmxext -AC3_MAX_MSB_ABS_INT16 min_max -INIT_XMM sse2 -AC3_MAX_MSB_ABS_INT16 min_max -INIT_XMM ssse3 -AC3_MAX_MSB_ABS_INT16 or_abs - -;----------------------------------------------------------------------------- -; macro used for ff_ac3_lshift_int16() and ff_ac3_rshift_int32() -;----------------------------------------------------------------------------- - -%macro AC3_SHIFT 3 ; l/r, 16/32, shift instruction, instruction set -cglobal ac3_%1shift_int%2, 3, 3, 5, src, len, shift - movd m0, shiftd -.loop: - mova m1, [srcq ] - mova m2, [srcq+mmsize ] - mova m3, [srcq+mmsize*2] - mova m4, [srcq+mmsize*3] - %3 m1, m0 - %3 m2, m0 - %3 m3, m0 - %3 m4, m0 - mova [srcq ], m1 - mova [srcq+mmsize ], m2 - mova [srcq+mmsize*2], m3 - mova [srcq+mmsize*3], m4 - add srcq, mmsize*4 - sub lend, mmsize*32/%2 - ja .loop -.end: - REP_RET -%endmacro - -;----------------------------------------------------------------------------- -; void ff_ac3_lshift_int16(int16_t *src, unsigned int len, unsigned int shift) -;----------------------------------------------------------------------------- - -INIT_MMX mmx -AC3_SHIFT l, 16, psllw -INIT_XMM sse2 -AC3_SHIFT l, 16, psllw - -;----------------------------------------------------------------------------- -; void ff_ac3_rshift_int32(int32_t *src, unsigned int len, unsigned int shift) -;----------------------------------------------------------------------------- - -INIT_MMX mmx -AC3_SHIFT r, 32, psrad -INIT_XMM sse2 -AC3_SHIFT r, 32, psrad - ;----------------------------------------------------------------------------- ; void ff_float_to_fixed24(int32_t *dst, const float *src, unsigned int len) ;----------------------------------------------------------------------------- @@ -423,130 +292,3 @@ AC3_EXTRACT_EXPONENTS INIT_XMM ssse3 AC3_EXTRACT_EXPONENTS %endif - -;----------------------------------------------------------------------------- -; void ff_apply_window_int16(int16_t *output, const int16_t *input, -; const int16_t *window, unsigned int len) -;----------------------------------------------------------------------------- - -%macro REVERSE_WORDS 1-2 -%if cpuflag(ssse3) && notcpuflag(atom) - pshufb %1, %2 -%elif cpuflag(sse2) - pshuflw %1, %1, 0x1B - pshufhw %1, %1, 0x1B - pshufd %1, %1, 0x4E -%elif cpuflag(mmxext) - pshufw %1, %1, 0x1B -%endif -%endmacro - -%macro MUL16FIXED 3 -%if cpuflag(ssse3) ; dst, src, unused -; dst = ((dst * src) + (1<<14)) >> 15 - pmulhrsw %1, %2 -%elif cpuflag(mmxext) ; dst, src, temp -; dst = (dst * src) >> 15 -; pmulhw cuts off the bottom bit, so we have to lshift by 1 and add it back -; in from the pmullw result. - mova %3, %1 - pmulhw %1, %2 - pmullw %3, %2 - psrlw %3, 15 - psllw %1, 1 - por %1, %3 -%endif -%endmacro - -%macro APPLY_WINDOW_INT16 1 ; %1 bitexact version -%if %1 -cglobal apply_window_int16, 4,5,6, output, input, window, offset, offset2 -%else -cglobal apply_window_int16_round, 4,5,6, output, input, window, offset, offset2 -%endif - lea offset2q, [offsetq-mmsize] -%if cpuflag(ssse3) && notcpuflag(atom) - mova m5, [pb_revwords] - ALIGN 16 -%elif %1 - mova m5, [pd_16384] -%endif -.loop: -%if cpuflag(ssse3) - ; This version does the 16x16->16 multiplication in-place without expanding - ; to 32-bit. The ssse3 version is bit-identical. - mova m0, [windowq+offset2q] - mova m1, [ inputq+offset2q] - pmulhrsw m1, m0 - REVERSE_WORDS m0, m5 - pmulhrsw m0, [ inputq+offsetq ] - mova [outputq+offset2q], m1 - mova [outputq+offsetq ], m0 -%elif %1 - ; This version expands 16-bit to 32-bit, multiplies by the window, - ; adds 16384 for rounding, right shifts 15, then repacks back to words to - ; save to the output. The window is reversed for the second half. - mova m3, [windowq+offset2q] - mova m4, [ inputq+offset2q] - pxor m0, m0 - punpcklwd m0, m3 - punpcklwd m1, m4 - pmaddwd m0, m1 - paddd m0, m5 - psrad m0, 15 - pxor m2, m2 - punpckhwd m2, m3 - punpckhwd m1, m4 - pmaddwd m2, m1 - paddd m2, m5 - psrad m2, 15 - packssdw m0, m2 - mova [outputq+offset2q], m0 - REVERSE_WORDS m3 - mova m4, [ inputq+offsetq] - pxor m0, m0 - punpcklwd m0, m3 - punpcklwd m1, m4 - pmaddwd m0, m1 - paddd m0, m5 - psrad m0, 15 - pxor m2, m2 - punpckhwd m2, m3 - punpckhwd m1, m4 - pmaddwd m2, m1 - paddd m2, m5 - psrad m2, 15 - packssdw m0, m2 - mova [outputq+offsetq], m0 -%else - ; This version does the 16x16->16 multiplication in-place without expanding - ; to 32-bit. The mmxext and sse2 versions do not use rounding, and - ; therefore are not bit-identical to the C version. - mova m0, [windowq+offset2q] - mova m1, [ inputq+offset2q] - mova m2, [ inputq+offsetq ] - MUL16FIXED m1, m0, m3 - REVERSE_WORDS m0 - MUL16FIXED m2, m0, m3 - mova [outputq+offset2q], m1 - mova [outputq+offsetq ], m2 -%endif - add offsetd, mmsize - sub offset2d, mmsize - jae .loop - REP_RET -%endmacro - -INIT_MMX mmxext -APPLY_WINDOW_INT16 0 -INIT_XMM sse2 -APPLY_WINDOW_INT16 0 - -INIT_MMX mmxext -APPLY_WINDOW_INT16 1 -INIT_XMM sse2 -APPLY_WINDOW_INT16 1 -INIT_XMM ssse3 -APPLY_WINDOW_INT16 1 -INIT_XMM ssse3, atom -APPLY_WINDOW_INT16 1 diff --git a/libavcodec/x86/ac3dsp_init.c b/libavcodec/x86/ac3dsp_init.c index 2e7e2fb6da..2ae762af46 100644 --- a/libavcodec/x86/ac3dsp_init.c +++ b/libavcodec/x86/ac3dsp_init.c @@ -30,17 +30,6 @@ void ff_ac3_exponent_min_mmx (uint8_t *exp, int num_reuse_blocks, int nb_coefs void ff_ac3_exponent_min_mmxext(uint8_t *exp, int num_reuse_blocks, int nb_coefs); void ff_ac3_exponent_min_sse2 (uint8_t *exp, int num_reuse_blocks, int nb_coefs); -int ff_ac3_max_msb_abs_int16_mmx (const int16_t *src, int len); -int ff_ac3_max_msb_abs_int16_mmxext(const int16_t *src, int len); -int ff_ac3_max_msb_abs_int16_sse2 (const int16_t *src, int len); -int ff_ac3_max_msb_abs_int16_ssse3(const int16_t *src, int len); - -void ff_ac3_lshift_int16_mmx (int16_t *src, unsigned int len, unsigned int shift); -void ff_ac3_lshift_int16_sse2(int16_t *src, unsigned int len, unsigned int shift); - -void ff_ac3_rshift_int32_mmx (int32_t *src, unsigned int len, unsigned int shift); -void ff_ac3_rshift_int32_sse2(int32_t *src, unsigned int len, unsigned int shift); - void ff_float_to_fixed24_3dnow(int32_t *dst, const float *src, unsigned int len); void ff_float_to_fixed24_sse (int32_t *dst, const float *src, unsigned int len); void ff_float_to_fixed24_sse2 (int32_t *dst, const float *src, unsigned int len); @@ -50,28 +39,12 @@ int ff_ac3_compute_mantissa_size_sse2(uint16_t mant_cnt[6][16]); void ff_ac3_extract_exponents_sse2 (uint8_t *exp, int32_t *coef, int nb_coefs); void ff_ac3_extract_exponents_ssse3(uint8_t *exp, int32_t *coef, int nb_coefs); -void ff_apply_window_int16_round_mmxext(int16_t *output, const int16_t *input, - const int16_t *window, unsigned int len); -void ff_apply_window_int16_round_sse2(int16_t *output, const int16_t *input, - const int16_t *window, unsigned int len); -void ff_apply_window_int16_mmxext(int16_t *output, const int16_t *input, - const int16_t *window, unsigned int len); -void ff_apply_window_int16_sse2(int16_t *output, const int16_t *input, - const int16_t *window, unsigned int len); -void ff_apply_window_int16_ssse3(int16_t *output, const int16_t *input, - const int16_t *window, unsigned int len); -void ff_apply_window_int16_ssse3_atom(int16_t *output, const int16_t *input, - const int16_t *window, unsigned int len); - av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact) { int cpu_flags = av_get_cpu_flags(); if (EXTERNAL_MMX(cpu_flags)) { c->ac3_exponent_min = ff_ac3_exponent_min_mmx; - c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx; - c->ac3_lshift_int16 = ff_ac3_lshift_int16_mmx; - c->ac3_rshift_int32 = ff_ac3_rshift_int32_mmx; } if (EXTERNAL_AMD3DNOW(cpu_flags)) { if (!bit_exact) { @@ -80,43 +53,20 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact) } if (EXTERNAL_MMXEXT(cpu_flags)) { c->ac3_exponent_min = ff_ac3_exponent_min_mmxext; - c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmxext; - if (bit_exact) { - c->apply_window_int16 = ff_apply_window_int16_mmxext; - } else { - c->apply_window_int16 = ff_apply_window_int16_round_mmxext; - } } if (EXTERNAL_SSE(cpu_flags)) { c->float_to_fixed24 = ff_float_to_fixed24_sse; } if (EXTERNAL_SSE2(cpu_flags)) { c->ac3_exponent_min = ff_ac3_exponent_min_sse2; - c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2; c->float_to_fixed24 = ff_float_to_fixed24_sse2; c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2; c->extract_exponents = ff_ac3_extract_exponents_sse2; - if (bit_exact) { - c->apply_window_int16 = ff_apply_window_int16_sse2; - } - } - - if (EXTERNAL_SSE2_FAST(cpu_flags)) { - c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2; - c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2; - if (!bit_exact) { - c->apply_window_int16 = ff_apply_window_int16_round_sse2; - } } if (EXTERNAL_SSSE3(cpu_flags)) { - c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3; - if (cpu_flags & AV_CPU_FLAG_ATOM) { - c->apply_window_int16 = ff_apply_window_int16_ssse3_atom; - } else { + if (!(cpu_flags & AV_CPU_FLAG_ATOM)) c->extract_exponents = ff_ac3_extract_exponents_ssse3; - c->apply_window_int16 = ff_apply_window_int16_ssse3; - } } }

[FFmpeg-devel,4/6] ac3enc_fixed: drop unnecessary fixed-point DSP code

Checks

Commit Message

Patch