@@ -24,7 +24,7 @@
SECTION_RODATA
cextern pd_1
-pd_2: times 4 dd 2
+pd_2: times 8 dd 2
pd_8: times 4 dd 8
SECTION .text
@@ -204,3 +204,5 @@ HAAR_VERTICAL
INIT_YMM avx2
HAAR_HORIZONTAL
HAAR_VERTICAL
+LEGALL53_VERTICAL_HI
+LEGALL53_VERTICAL_LO
@@ -27,6 +27,8 @@ void ff_dd97_vertical_hi_sse2(int32_t *b0, int32_t *b1, int32_t *b2, int32_t *b3
void ff_legall53_vertical_hi_sse2(int32_t *b0, int32_t *b1, int32_t *b2, int width);
void ff_legall53_vertical_lo_sse2(int32_t *b0, int32_t *b1, int32_t *b2, int width);
+void ff_legall53_vertical_hi_avx2(int32_t *b0, int32_t *b1, int32_t *b2, int width);
+void ff_legall53_vertical_lo_avx2(int32_t *b0, int32_t *b1, int32_t *b2, int width);
void ff_horizontal_compose_haar_10bit_sse2(int32_t *b0, int32_t *b1, int width_align);
void ff_horizontal_compose_haar_10bit_avx(int32_t *b0, int32_t *b1, int width_align);
@@ -112,6 +114,22 @@ static void legall53_vertical_hi_sse2(int32_t *b0, int32_t *b1, int32_t *b2, int
b1[i] = COMPOSE_DIRAC53iH0(b0[i], b1[i], b2[i]);
}
+static void legall53_vertical_lo_avx2(int32_t *b0, int32_t *b1, int32_t *b2, int width)
+{
+ int i = width & ~7;
+ ff_legall53_vertical_lo_avx2(b0, b1, b2, i);
+ for(; i<width; i++)
+ b1[i] = COMPOSE_53iL0(b0[i], b1[i], b2[i]);
+}
+
+static void legall53_vertical_hi_avx2(int32_t *b0, int32_t *b1, int32_t *b2, int width)
+{
+ int i = width & ~7;
+ ff_legall53_vertical_hi_avx2(b0, b1, b2, i);
+ for(; i<width; i++)
+ b1[i] = COMPOSE_DIRAC53iH0(b0[i], b1[i], b2[i]);
+}
+
static void dd97_vertical_hi_sse2(int32_t *b0, int32_t *b1, int32_t *b2,
int32_t *b3, int32_t *b4, int width)
{
@@ -161,6 +179,10 @@ av_cold void ff_spatial_idwt_init_10bit_x86(DWTContext *d, enum dwt_type type)
if (EXTERNAL_AVX2(cpu_flags)) {
switch (type) {
+ case DWT_DIRAC_LEGALL5_3:
+ d->vertical_compose_h0 = (void*)legall53_vertical_hi_avx2;
+ d->vertical_compose_l0 = (void*)legall53_vertical_lo_avx2;
+ break;
case DWT_DIRAC_HAAR0:
d->vertical_compose = (void*)vertical_compose_haar_avx2;
break;