@@ -89,6 +89,21 @@ LPC_32 sse4, 32, psrlq
LPC_32 xop, 32, psrlq
%endif
+INIT_XMM sse2
+cglobal flac_wasted_32, 3,3,2, decoded, wasted, len
+ shl lend, 2
+ lea decodedq, [decodedq+lenq]
+ neg lenq
+ movd m1, wastedd
+ALIGN 16
+.loop:
+ mova m0, [decodedq+lenq]
+ pslld m0, m1
+ mova [decodedq+lenq], m0
+ add lenq, mmsize
+ jl .loop
+ RET
+
;----------------------------------------------------------------------------------
;void ff_flac_decorrelate_[lrm]s_16_sse2(uint8_t **out, int32_t **in, int channels,
; int len, int shift);
@@ -30,6 +30,8 @@ void ff_flac_lpc_32_sse4(int32_t *samples, const int coeffs[32], int order,
void ff_flac_lpc_32_xop(int32_t *samples, const int coeffs[32], int order,
int qlevel, int len);
+void ff_flac_wasted_32_sse2(int32_t *decoded, int wasted, int len);
+
#define DECORRELATE_FUNCS(fmt, opt) \
void ff_flac_decorrelate_ls_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \
int len, int shift); \
@@ -63,6 +65,7 @@ av_cold void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int
int cpu_flags = av_get_cpu_flags();
if (EXTERNAL_SSE2(cpu_flags)) {
+ c->wasted32 = ff_flac_wasted_32_sse2;
if (fmt == AV_SAMPLE_FMT_S16) {
c->decorrelate[1] = ff_flac_decorrelate_ls_16_sse2;
c->decorrelate[2] = ff_flac_decorrelate_rs_16_sse2;
flac_wasted_32_c: 851.3 flac_wasted_32_sse2: 53.3 Signed-off-by: James Almer <jamrial@gmail.com> --- libavcodec/x86/flacdsp.asm | 15 +++++++++++++++ libavcodec/x86/flacdsp_init.c | 3 +++ 2 files changed, 18 insertions(+)