[FFmpeg-devel,4/6] diracdec: avx2 legall

Submitted by James Darnley on July 19, 2018, 2:52 p.m.

Details

Message ID 20180719145252.30613-5-jdarnley@obe.tv
State New
Headers show

Commit Message

James Darnley July 19, 2018, 2:52 p.m.
---
 libavcodec/x86/dirac_dwt_10bit.asm    |  4 +++-
 libavcodec/x86/dirac_dwt_init_10bit.c | 22 ++++++++++++++++++++++
 2 files changed, 25 insertions(+), 1 deletion(-)

Patch hide | download patch | download mbox

diff --git a/libavcodec/x86/dirac_dwt_10bit.asm b/libavcodec/x86/dirac_dwt_10bit.asm
index 681de5e1df..ae110d2945 100644
--- a/libavcodec/x86/dirac_dwt_10bit.asm
+++ b/libavcodec/x86/dirac_dwt_10bit.asm
@@ -24,7 +24,7 @@ 
 SECTION_RODATA
 
 cextern pd_1
-pd_2: times 4 dd 2
+pd_2: times 8 dd 2
 pd_8: times 4 dd 8
 
 SECTION .text
@@ -204,3 +204,5 @@  HAAR_VERTICAL
 INIT_YMM avx2
 HAAR_HORIZONTAL
 HAAR_VERTICAL
+LEGALL53_VERTICAL_HI
+LEGALL53_VERTICAL_LO
diff --git a/libavcodec/x86/dirac_dwt_init_10bit.c b/libavcodec/x86/dirac_dwt_init_10bit.c
index e7e7534050..51d6eeae93 100644
--- a/libavcodec/x86/dirac_dwt_init_10bit.c
+++ b/libavcodec/x86/dirac_dwt_init_10bit.c
@@ -27,6 +27,8 @@  void ff_dd97_vertical_hi_sse2(int32_t *b0, int32_t *b1, int32_t *b2, int32_t *b3
 
 void ff_legall53_vertical_hi_sse2(int32_t *b0, int32_t *b1, int32_t *b2, int width);
 void ff_legall53_vertical_lo_sse2(int32_t *b0, int32_t *b1, int32_t *b2, int width);
+void ff_legall53_vertical_hi_avx2(int32_t *b0, int32_t *b1, int32_t *b2, int width);
+void ff_legall53_vertical_lo_avx2(int32_t *b0, int32_t *b1, int32_t *b2, int width);
 
 void ff_horizontal_compose_haar_10bit_sse2(int32_t *b0, int32_t *b1, int width_align);
 void ff_horizontal_compose_haar_10bit_avx(int32_t *b0, int32_t *b1, int width_align);
@@ -112,6 +114,22 @@  static void legall53_vertical_hi_sse2(int32_t *b0, int32_t *b1, int32_t *b2, int
         b1[i] = COMPOSE_DIRAC53iH0(b0[i], b1[i], b2[i]);
 }
 
+static void legall53_vertical_lo_avx2(int32_t *b0, int32_t *b1, int32_t *b2, int width)
+{
+    int i = width & ~7;
+    ff_legall53_vertical_lo_avx2(b0, b1, b2, i);
+    for(; i<width; i++)
+        b1[i] = COMPOSE_53iL0(b0[i], b1[i], b2[i]);
+}
+
+static void legall53_vertical_hi_avx2(int32_t *b0, int32_t *b1, int32_t *b2, int width)
+{
+    int i = width & ~7;
+    ff_legall53_vertical_hi_avx2(b0, b1, b2, i);
+    for(; i<width; i++)
+        b1[i] = COMPOSE_DIRAC53iH0(b0[i], b1[i], b2[i]);
+}
+
 static void dd97_vertical_hi_sse2(int32_t *b0, int32_t *b1, int32_t *b2,
                                   int32_t *b3, int32_t *b4, int width)
 {
@@ -161,6 +179,10 @@  av_cold void ff_spatial_idwt_init_10bit_x86(DWTContext *d, enum dwt_type type)
 
     if (EXTERNAL_AVX2(cpu_flags)) {
         switch (type) {
+            case DWT_DIRAC_LEGALL5_3:
+                d->vertical_compose_h0 = (void*)legall53_vertical_hi_avx2;
+                d->vertical_compose_l0 = (void*)legall53_vertical_lo_avx2;
+                break;
             case DWT_DIRAC_HAAR0:
                 d->vertical_compose = (void*)vertical_compose_haar_avx2;
                 break;