[FFmpeg-devel,2/6] avcodec/vp3dsp: add 10 coefficient version of the vp3 idct

Submitted by Peter Ross on Jan. 13, 2019, 8:01 p.m.

Details

Message ID adf3a65ace7bdd5e7ee3ce74c6be1df5afe3eb70.1547408762.git.pross@xvid.org
State New
Headers show

Commit Message

Peter Ross Jan. 13, 2019, 8:01 p.m.
---
 libavcodec/vp3dsp.c | 152 ++++++++++++++++++++++++++++++++++++++++++++
 libavcodec/vp3dsp.h |   3 +
 2 files changed, 155 insertions(+)

Patch hide | download patch | download mbox

diff --git a/libavcodec/vp3dsp.c b/libavcodec/vp3dsp.c
index f049953356..8204188aa8 100644
--- a/libavcodec/vp3dsp.c
+++ b/libavcodec/vp3dsp.c
@@ -195,6 +195,158 @@  static av_always_inline void idct(uint8_t *dst, ptrdiff_t stride,
     }
 }
 
+static av_always_inline void idct10(uint8_t *dst, ptrdiff_t stride,
+                                    int16_t *input, int type)
+{
+    int16_t *ip = input;
+
+    int A, B, C, D, Ad, Bd, Cd, Dd, E, F, G, H;
+    int Ed, Gd, Add, Bdd, Fd, Hd;
+
+    int i;
+
+    /* Inverse DCT on the rows now */
+    for (i = 0; i < 4; i++) {
+        /* Check for non-zero values */
+        if (ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8]) {
+            A =  M(xC1S7, ip[1 * 8]);
+            B =  M(xC7S1, ip[1 * 8]);
+            C =  M(xC3S5, ip[3 * 8]);
+            D = -M(xC5S3, ip[3 * 8]);
+
+            Ad = M(xC4S4, (A - C));
+            Bd = M(xC4S4, (B - D));
+
+            Cd = A + C;
+            Dd = B + D;
+
+            E = M(xC4S4, ip[0 * 8]);
+            F = E;
+
+            G = M(xC2S6, ip[2 * 8]);
+            H = M(xC6S2, ip[2 * 8]);
+
+            Ed = E - G;
+            Gd = E + G;
+
+            Add = F + Ad;
+            Bdd = Bd - H;
+
+            Fd = F - Ad;
+            Hd = Bd + H;
+
+            /* Final sequence of operations over-write original inputs */
+            ip[0 * 8] = Gd + Cd;
+            ip[7 * 8] = Gd - Cd;
+
+            ip[1 * 8] = Add + Hd;
+            ip[2 * 8] = Add - Hd;
+
+            ip[3 * 8] = Ed + Dd;
+            ip[4 * 8] = Ed - Dd;
+
+            ip[5 * 8] = Fd + Bdd;
+            ip[6 * 8] = Fd - Bdd;
+
+        }
+
+        ip += 1;
+    }
+
+    ip = input;
+
+    for (i = 0; i < 8; i++) {
+        /* Check for non-zero values (bitwise or faster than ||) */
+        if (ip[0] | ip[1] | ip[2] | ip[3]) {
+            A =  M(xC1S7, ip[1]);
+            B =  M(xC7S1, ip[1]);
+            C =  M(xC3S5, ip[3]);
+            D = -M(xC5S3, ip[3]);
+
+            Ad = M(xC4S4, (A - C));
+            Bd = M(xC4S4, (B - D));
+
+            Cd = A + C;
+            Dd = B + D;
+
+            E = M(xC4S4, ip[0]);
+            if (type == 1)
+                E += 16 * 128;
+            F = E;
+
+            G = M(xC2S6, ip[2]);
+            H = M(xC6S2, ip[2]);
+
+            Ed = E - G;
+            Gd = E + G;
+
+            Add = F + Ad;
+            Bdd = Bd - H;
+
+            Fd = F - Ad;
+            Hd = Bd + H;
+
+            Gd += 8;
+            Add += 8;
+            Ed += 8;
+            Fd += 8;
+
+            /* Final sequence of operations over-write original inputs. */
+            if (type == 1) {
+                dst[0 * stride] = av_clip_uint8((Gd + Cd) >> 4);
+                dst[7 * stride] = av_clip_uint8((Gd - Cd) >> 4);
+
+                dst[1 * stride] = av_clip_uint8((Add + Hd) >> 4);
+                dst[2 * stride] = av_clip_uint8((Add - Hd) >> 4);
+
+                dst[3 * stride] = av_clip_uint8((Ed + Dd) >> 4);
+                dst[4 * stride] = av_clip_uint8((Ed - Dd) >> 4);
+
+                dst[5 * stride] = av_clip_uint8((Fd + Bdd) >> 4);
+                dst[6 * stride] = av_clip_uint8((Fd - Bdd) >> 4);
+            } else {
+                dst[0 * stride] = av_clip_uint8(dst[0 * stride] + ((Gd + Cd) >> 4));
+                dst[7 * stride] = av_clip_uint8(dst[7 * stride] + ((Gd - Cd) >> 4));
+
+                dst[1 * stride] = av_clip_uint8(dst[1 * stride] + ((Add + Hd) >> 4));
+                dst[2 * stride] = av_clip_uint8(dst[2 * stride] + ((Add - Hd) >> 4));
+
+                dst[3 * stride] = av_clip_uint8(dst[3 * stride] + ((Ed + Dd) >> 4));
+                dst[4 * stride] = av_clip_uint8(dst[4 * stride] + ((Ed - Dd) >> 4));
+
+                dst[5 * stride] = av_clip_uint8(dst[5 * stride] + ((Fd + Bdd) >> 4));
+                dst[6 * stride] = av_clip_uint8(dst[6 * stride] + ((Fd - Bdd) >> 4));
+            }
+        } else {
+            if (type == 1) {
+                dst[0*stride] =
+                dst[1*stride] =
+                dst[2*stride] =
+                dst[3*stride] =
+                dst[4*stride] =
+                dst[5*stride] =
+                dst[6*stride] =
+                dst[7*stride] = 128;
+            }
+        }
+
+        ip += 8;
+        dst++;
+    }
+}
+
+void ff_vp3dsp_idct10_put_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
+{
+    idct10(dest, stride, block, 1);
+    memset(block, 0, sizeof(*block) * 64);
+}
+
+void ff_vp3dsp_idct10_add_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
+{
+    idct10(dest, stride, block, 2);
+    memset(block, 0, sizeof(*block) * 64);
+}
+
 static void vp3_idct_put_c(uint8_t *dest /* align 8 */, ptrdiff_t stride,
                            int16_t *block /* align 16 */)
 {
diff --git a/libavcodec/vp3dsp.h b/libavcodec/vp3dsp.h
index fe77d69ebf..ee5662ad6d 100644
--- a/libavcodec/vp3dsp.h
+++ b/libavcodec/vp3dsp.h
@@ -47,6 +47,9 @@  typedef struct VP3DSPContext {
     void (*h_loop_filter_12)(uint8_t *src, ptrdiff_t stride, int *bounding_values);
 } VP3DSPContext;
 
+void ff_vp3dsp_idct10_put_c(uint8_t *dest, ptrdiff_t stride, int16_t *block);
+void ff_vp3dsp_idct10_add_c(uint8_t *dest, ptrdiff_t stride, int16_t *block);
+
 void ff_vp4_v_loop_filter_12_c(uint8_t *first_pixel, ptrdiff_t stride, int *bounding_values);
 void ff_vp4_h_loop_filter_12_c(uint8_t *first_pixel, ptrdiff_t stride, int *bounding_values);