diff mbox

[FFmpeg-devel,1/6] avcodec/h264: add avx 8-bit chroma v deblock/loop filter

Message ID 20170220153324.1732-2-jdarnley@obe.tv
State Accepted
Commit 5c56758843eb5ea8fc39177585a57606a34125bc
Headers show

Commit Message

James Darnley Feb. 20, 2017, 3:33 p.m. UTC
~1.24x faster (101 vs. 81 cycles) compared with mmxext function
---
 libavcodec/x86/h264_deblock.asm | 38 ++++++++++++++++++++++++++++++++++++++
 libavcodec/x86/h264dsp_init.c   |  2 ++
 2 files changed, 40 insertions(+)
diff mbox

Patch

diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm
index 93caa67..2e84ca3 100644
--- a/libavcodec/x86/h264_deblock.asm
+++ b/libavcodec/x86/h264_deblock.asm
@@ -1059,6 +1059,44 @@  ff_chroma_intra_body_mmxext:
     paddb  m2, m6
     ret
 
+%macro CHROMA_INTER_BODY_XMM 1
+    LOAD_MASK alpha_d, beta_d
+    movd m6, [tc0_q]
+    %rep %1
+        punpcklbw m6, m6
+    %endrep
+    pand m7, m6
+    DEBLOCK_P0_Q0
+%endmacro
+
+%macro CHROMA_V_START_XMM 1
+    movsxdifnidn stride_q, stride_d
+    dec alpha_d
+    dec beta_d
+    mov %1, pix_q
+    sub %1, stride_q
+    sub %1, stride_q
+%endmacro
+
+%macro DEBLOCK_CHROMA_XMM 1
+
+INIT_XMM %1
+
+cglobal deblock_v_chroma_8, 5, 6, 8, pix_, stride_, alpha_, beta_, tc0_
+    CHROMA_V_START_XMM r5
+    movq m0, [r5]
+    movq m1, [r5 + stride_q]
+    movq m2, [pix_q]
+    movq m3, [pix_q + stride_q]
+    CHROMA_INTER_BODY_XMM 1
+    movq [r5 + stride_q], m1
+    movq [pix_q], m2
+RET
+
+%endmacro ; DEBLOCK_CHROMA_XMM
+
+DEBLOCK_CHROMA_XMM avx
+
 ;-----------------------------------------------------------------------------
 ; void ff_h264_loop_filter_strength(int16_t bs[2][4][4], uint8_t nnz[40],
 ;                                   int8_t ref[2][40], int16_t mv[2][40][2],
diff --git a/libavcodec/x86/h264dsp_init.c b/libavcodec/x86/h264dsp_init.c
index 10f1940..6794aa5 100644
--- a/libavcodec/x86/h264dsp_init.c
+++ b/libavcodec/x86/h264dsp_init.c
@@ -317,6 +317,8 @@  av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
 #if ARCH_X86_64
             c->h264_h_loop_filter_luma_mbaff = ff_deblock_h_luma_mbaff_8_avx;
 #endif
+
+            c->h264_v_loop_filter_chroma       = ff_deblock_v_chroma_8_avx;
         }
     } else if (bit_depth == 10) {
         if (EXTERNAL_MMXEXT(cpu_flags)) {