[FFmpeg-devel,05/10] h264_idct8_add

Submitted by James Darnley on March 17, 2017, 1:18 p.m.

Details

Message ID 20170317131845.7760-6-jdarnley@obe.tv
State New
Headers show

Commit Message

James Darnley March 17, 2017, 1:18 p.m.
1.01x faster (1069±1.9 vs. 1060±0.7 decicycles) compared with sse2
---
 libavcodec/x86/h264_idct.asm  | 5 +++++
 libavcodec/x86/h264dsp_init.c | 2 ++
 2 files changed, 7 insertions(+)

Comments

Carl Eugen Hoyos March 20, 2017, 8:48 a.m.
2017-03-17 14:18 GMT+01:00 James Darnley <jdarnley@obe.tv>:
> 1.01x faster (1069±1.9 vs. 1060±0.7 decicycles) compared with sse2

So does this patch make sense?

Carl Eugen

Patch hide | download patch | download mbox

diff --git a/libavcodec/x86/h264_idct.asm b/libavcodec/x86/h264_idct.asm
index 24fb4d2..ca8ffdb 100644
--- a/libavcodec/x86/h264_idct.asm
+++ b/libavcodec/x86/h264_idct.asm
@@ -1162,3 +1162,8 @@  cglobal h264_idct_add_8, 3, 3, 8, dst_, block_, stride_
     movsxdifnidn stride_q, stride_d
     IDCT4_ADD    dst_q, block_q, stride_q
 RET
+
+cglobal h264_idct8_add_8, 3, 4, 10, dst_, block_, stride_
+    movsxdifnidn stride_q, stride_d
+    IDCT8_ADD_SSE dst_q, block_q, stride_q, r3
+RET
diff --git a/libavcodec/x86/h264dsp_init.c b/libavcodec/x86/h264dsp_init.c
index 8ba085f..2172a71 100644
--- a/libavcodec/x86/h264dsp_init.c
+++ b/libavcodec/x86/h264dsp_init.c
@@ -40,6 +40,7 @@  IDCT_ADD_FUNC(8_dc, 8, mmxext)
 IDCT_ADD_FUNC(8_dc, 10, sse2)
 IDCT_ADD_FUNC(8, 8, mmx)
 IDCT_ADD_FUNC(8, 8, sse2)
+IDCT_ADD_FUNC(8, 8, avx)
 IDCT_ADD_FUNC(8, 10, sse2)
 IDCT_ADD_FUNC(, 10, avx)
 IDCT_ADD_FUNC(8_dc, 10, avx)
@@ -340,6 +341,7 @@  av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
             }
 
             c->h264_idct_add        = ff_h264_idct_add_8_avx;
+            c->h264_idct8_add       = ff_h264_idct8_add_8_avx;
         }
     } else if (bit_depth == 10) {
         if (EXTERNAL_MMXEXT(cpu_flags)) {