diff mbox

[FFmpeg-devel,1/9] vp9dsp: Deduplicate the subpel filters

Message ID 1479119547-7392-1-git-send-email-martin@martin.st
State Accepted
Commit 6409e9b6ccde39895a93f48212078e6c0872f8e7
Headers show

Commit Message

Martin Storsjö Nov. 14, 2016, 10:32 a.m. UTC
Make them aligned, to allow efficient access to them from simd.

This is an adapted cherry-pick from libav commit
a4cfcddcb0f76e837d5abc06840c2b26c0e8aefc.
---
 libavcodec/vp9dsp.c          | 56 +++++++++++++++++++++++++++++++++++++++
 libavcodec/vp9dsp.h          |  3 +++
 libavcodec/vp9dsp_template.c | 63 +++-----------------------------------------
 3 files changed, 63 insertions(+), 59 deletions(-)

Comments

Ronald S. Bultje Nov. 14, 2016, 12:58 p.m. UTC | #1
Hi,

On Mon, Nov 14, 2016 at 5:32 AM, Martin Storsjö <martin@martin.st> wrote:

> Make them aligned, to allow efficient access to them from simd.
>
> This is an adapted cherry-pick from libav commit
> a4cfcddcb0f76e837d5abc06840c2b26c0e8aefc.
> ---
>  libavcodec/vp9dsp.c          | 56 +++++++++++++++++++++++++++++++++++++++
>  libavcodec/vp9dsp.h          |  3 +++
>  libavcodec/vp9dsp_template.c | 63 +++---------------------------
> --------------
>  3 files changed, 63 insertions(+), 59 deletions(-)


OK.

Do I need to queue them up? I thought they would be merged automagically
from Libav...

Ronald
Martin Storsjö Nov. 14, 2016, 1:10 p.m. UTC | #2
On Mon, 14 Nov 2016, Ronald S. Bultje wrote:

> Hi,
>
> On Mon, Nov 14, 2016 at 5:32 AM, Martin Storsjö <martin@martin.st> wrote:
>
>> Make them aligned, to allow efficient access to them from simd.
>>
>> This is an adapted cherry-pick from libav commit
>> a4cfcddcb0f76e837d5abc06840c2b26c0e8aefc.
>> ---
>>  libavcodec/vp9dsp.c          | 56 +++++++++++++++++++++++++++++++++++++++
>>  libavcodec/vp9dsp.h          |  3 +++
>>  libavcodec/vp9dsp_template.c | 63 +++---------------------------
>> --------------
>>  3 files changed, 63 insertions(+), 59 deletions(-)
>
>
> OK.
>
> Do I need to queue them up?

Yes, that'd be appreciated.

> I thought they would be merged automagically from Libav...

In principle, but the merging is quite far behind at the moment. I've 
included the commit hashes of all included commits to make it clear which 
commits can be no-oped in future merges at least.

Also for the record, it has been tested on linux, iOS and with the MSVC 
toolchain (in wine).

// Martin
Michael Niedermayer Nov. 14, 2016, 2:29 p.m. UTC | #3
On Mon, Nov 14, 2016 at 12:32:19PM +0200, Martin Storsjö wrote:
> Make them aligned, to allow efficient access to them from simd.
> 
> This is an adapted cherry-pick from libav commit
> a4cfcddcb0f76e837d5abc06840c2b26c0e8aefc.
> ---
>  libavcodec/vp9dsp.c          | 56 +++++++++++++++++++++++++++++++++++++++
>  libavcodec/vp9dsp.h          |  3 +++
>  libavcodec/vp9dsp_template.c | 63 +++-----------------------------------------
>  3 files changed, 63 insertions(+), 59 deletions(-)

patchset tested with fate on arm-qemu, all fate tests pass

[...]
Ronald S. Bultje Nov. 15, 2016, 8:14 p.m. UTC | #4
Hi,

On Mon, Nov 14, 2016 at 9:29 AM, Michael Niedermayer <michael@niedermayer.cc
> wrote:

> On Mon, Nov 14, 2016 at 12:32:19PM +0200, Martin Storsjö wrote:
> > Make them aligned, to allow efficient access to them from simd.
> >
> > This is an adapted cherry-pick from libav commit
> > a4cfcddcb0f76e837d5abc06840c2b26c0e8aefc.
> > ---
> >  libavcodec/vp9dsp.c          | 56 ++++++++++++++++++++++++++++++
> +++++++++
> >  libavcodec/vp9dsp.h          |  3 +++
> >  libavcodec/vp9dsp_template.c | 63 +++---------------------------
> --------------
> >  3 files changed, 63 insertions(+), 59 deletions(-)
>
> patchset tested with fate on arm-qemu, all fate tests pass


Thanks for testing, and thanks for the patchset.

Pushed.

Ronald
diff mbox

Patch

diff --git a/libavcodec/vp9dsp.c b/libavcodec/vp9dsp.c
index 54e77e2..6dd49c8 100644
--- a/libavcodec/vp9dsp.c
+++ b/libavcodec/vp9dsp.c
@@ -25,6 +25,62 @@ 
 #include "libavutil/common.h"
 #include "vp9dsp.h"
 
+const DECLARE_ALIGNED(16, int16_t, ff_vp9_subpel_filters)[3][16][8] = {
+    [FILTER_8TAP_REGULAR] = {
+        {  0,  0,   0, 128,   0,   0,  0,  0 },
+        {  0,  1,  -5, 126,   8,  -3,  1,  0 },
+        { -1,  3, -10, 122,  18,  -6,  2,  0 },
+        { -1,  4, -13, 118,  27,  -9,  3, -1 },
+        { -1,  4, -16, 112,  37, -11,  4, -1 },
+        { -1,  5, -18, 105,  48, -14,  4, -1 },
+        { -1,  5, -19,  97,  58, -16,  5, -1 },
+        { -1,  6, -19,  88,  68, -18,  5, -1 },
+        { -1,  6, -19,  78,  78, -19,  6, -1 },
+        { -1,  5, -18,  68,  88, -19,  6, -1 },
+        { -1,  5, -16,  58,  97, -19,  5, -1 },
+        { -1,  4, -14,  48, 105, -18,  5, -1 },
+        { -1,  4, -11,  37, 112, -16,  4, -1 },
+        { -1,  3,  -9,  27, 118, -13,  4, -1 },
+        {  0,  2,  -6,  18, 122, -10,  3, -1 },
+        {  0,  1,  -3,   8, 126,  -5,  1,  0 },
+    }, [FILTER_8TAP_SHARP] = {
+        {  0,  0,   0, 128,   0,   0,  0,  0 },
+        { -1,  3,  -7, 127,   8,  -3,  1,  0 },
+        { -2,  5, -13, 125,  17,  -6,  3, -1 },
+        { -3,  7, -17, 121,  27, -10,  5, -2 },
+        { -4,  9, -20, 115,  37, -13,  6, -2 },
+        { -4, 10, -23, 108,  48, -16,  8, -3 },
+        { -4, 10, -24, 100,  59, -19,  9, -3 },
+        { -4, 11, -24,  90,  70, -21, 10, -4 },
+        { -4, 11, -23,  80,  80, -23, 11, -4 },
+        { -4, 10, -21,  70,  90, -24, 11, -4 },
+        { -3,  9, -19,  59, 100, -24, 10, -4 },
+        { -3,  8, -16,  48, 108, -23, 10, -4 },
+        { -2,  6, -13,  37, 115, -20,  9, -4 },
+        { -2,  5, -10,  27, 121, -17,  7, -3 },
+        { -1,  3,  -6,  17, 125, -13,  5, -2 },
+        {  0,  1,  -3,   8, 127,  -7,  3, -1 },
+    }, [FILTER_8TAP_SMOOTH] = {
+        {  0,  0,   0, 128,   0,   0,  0,  0 },
+        { -3, -1,  32,  64,  38,   1, -3,  0 },
+        { -2, -2,  29,  63,  41,   2, -3,  0 },
+        { -2, -2,  26,  63,  43,   4, -4,  0 },
+        { -2, -3,  24,  62,  46,   5, -4,  0 },
+        { -2, -3,  21,  60,  49,   7, -4,  0 },
+        { -1, -4,  18,  59,  51,   9, -4,  0 },
+        { -1, -4,  16,  57,  53,  12, -4, -1 },
+        { -1, -4,  14,  55,  55,  14, -4, -1 },
+        { -1, -4,  12,  53,  57,  16, -4, -1 },
+        {  0, -4,   9,  51,  59,  18, -4, -1 },
+        {  0, -4,   7,  49,  60,  21, -3, -2 },
+        {  0, -4,   5,  46,  62,  24, -3, -2 },
+        {  0, -4,   4,  43,  63,  26, -2, -2 },
+        {  0, -3,   2,  41,  63,  29, -2, -2 },
+        {  0, -3,   1,  38,  64,  32, -1, -3 },
+    }
+};
+
+
 av_cold void ff_vp9dsp_init(VP9DSPContext *dsp, int bpp, int bitexact)
 {
     if (bpp == 8) {
diff --git a/libavcodec/vp9dsp.h b/libavcodec/vp9dsp.h
index 733f5bf..cb43f5e 100644
--- a/libavcodec/vp9dsp.h
+++ b/libavcodec/vp9dsp.h
@@ -120,6 +120,9 @@  typedef struct VP9DSPContext {
     vp9_scaled_mc_func smc[5][4][2];
 } VP9DSPContext;
 
+
+extern const int16_t ff_vp9_subpel_filters[3][16][8];
+
 void ff_vp9dsp_init(VP9DSPContext *dsp, int bpp, int bitexact);
 
 void ff_vp9dsp_init_8(VP9DSPContext *dsp);
diff --git a/libavcodec/vp9dsp_template.c b/libavcodec/vp9dsp_template.c
index 4d810fe..bb54561 100644
--- a/libavcodec/vp9dsp_template.c
+++ b/libavcodec/vp9dsp_template.c
@@ -1991,61 +1991,6 @@  copy_avg_fn(4)
 
 #endif /* BIT_DEPTH != 12 */
 
-static const int16_t vp9_subpel_filters[3][16][8] = {
-    [FILTER_8TAP_REGULAR] = {
-        {  0,  0,   0, 128,   0,   0,  0,  0 },
-        {  0,  1,  -5, 126,   8,  -3,  1,  0 },
-        { -1,  3, -10, 122,  18,  -6,  2,  0 },
-        { -1,  4, -13, 118,  27,  -9,  3, -1 },
-        { -1,  4, -16, 112,  37, -11,  4, -1 },
-        { -1,  5, -18, 105,  48, -14,  4, -1 },
-        { -1,  5, -19,  97,  58, -16,  5, -1 },
-        { -1,  6, -19,  88,  68, -18,  5, -1 },
-        { -1,  6, -19,  78,  78, -19,  6, -1 },
-        { -1,  5, -18,  68,  88, -19,  6, -1 },
-        { -1,  5, -16,  58,  97, -19,  5, -1 },
-        { -1,  4, -14,  48, 105, -18,  5, -1 },
-        { -1,  4, -11,  37, 112, -16,  4, -1 },
-        { -1,  3,  -9,  27, 118, -13,  4, -1 },
-        {  0,  2,  -6,  18, 122, -10,  3, -1 },
-        {  0,  1,  -3,   8, 126,  -5,  1,  0 },
-    }, [FILTER_8TAP_SHARP] = {
-        {  0,  0,   0, 128,   0,   0,  0,  0 },
-        { -1,  3,  -7, 127,   8,  -3,  1,  0 },
-        { -2,  5, -13, 125,  17,  -6,  3, -1 },
-        { -3,  7, -17, 121,  27, -10,  5, -2 },
-        { -4,  9, -20, 115,  37, -13,  6, -2 },
-        { -4, 10, -23, 108,  48, -16,  8, -3 },
-        { -4, 10, -24, 100,  59, -19,  9, -3 },
-        { -4, 11, -24,  90,  70, -21, 10, -4 },
-        { -4, 11, -23,  80,  80, -23, 11, -4 },
-        { -4, 10, -21,  70,  90, -24, 11, -4 },
-        { -3,  9, -19,  59, 100, -24, 10, -4 },
-        { -3,  8, -16,  48, 108, -23, 10, -4 },
-        { -2,  6, -13,  37, 115, -20,  9, -4 },
-        { -2,  5, -10,  27, 121, -17,  7, -3 },
-        { -1,  3,  -6,  17, 125, -13,  5, -2 },
-        {  0,  1,  -3,   8, 127,  -7,  3, -1 },
-    }, [FILTER_8TAP_SMOOTH] = {
-        {  0,  0,   0, 128,   0,   0,  0,  0 },
-        { -3, -1,  32,  64,  38,   1, -3,  0 },
-        { -2, -2,  29,  63,  41,   2, -3,  0 },
-        { -2, -2,  26,  63,  43,   4, -4,  0 },
-        { -2, -3,  24,  62,  46,   5, -4,  0 },
-        { -2, -3,  21,  60,  49,   7, -4,  0 },
-        { -1, -4,  18,  59,  51,   9, -4,  0 },
-        { -1, -4,  16,  57,  53,  12, -4, -1 },
-        { -1, -4,  14,  55,  55,  14, -4, -1 },
-        { -1, -4,  12,  53,  57,  16, -4, -1 },
-        {  0, -4,   9,  51,  59,  18, -4, -1 },
-        {  0, -4,   7,  49,  60,  21, -3, -2 },
-        {  0, -4,   5,  46,  62,  24, -3, -2 },
-        {  0, -4,   4,  43,  63,  26, -2, -2 },
-        {  0, -3,   2,  41,  63,  29, -2, -2 },
-        {  0, -3,   1,  38,  64,  32, -1, -3 },
-    }
-};
-
 #define FILTER_8TAP(src, x, F, stride) \
     av_clip_pixel((F[0] * src[x + -3 * stride] + \
                    F[1] * src[x + -2 * stride] + \
@@ -2155,7 +2100,7 @@  static void avg##_8tap_##type##_##sz##dir##_c(uint8_t *dst, ptrdiff_t dst_stride
                                               int h, int mx, int my) \
 { \
     avg##_8tap_1d_##dir##_c(dst, dst_stride, src, src_stride, sz, h, \
-                            vp9_subpel_filters[type_idx][dir_m]); \
+                            ff_vp9_subpel_filters[type_idx][dir_m]); \
 }
 
 #define filter_fn_2d(sz, type, type_idx, avg) \
@@ -2164,8 +2109,8 @@  static void avg##_8tap_##type##_##sz##hv_c(uint8_t *dst, ptrdiff_t dst_stride, \
                                            int h, int mx, int my) \
 { \
     avg##_8tap_2d_hv_c(dst, dst_stride, src, src_stride, sz, h, \
-                       vp9_subpel_filters[type_idx][mx], \
-                       vp9_subpel_filters[type_idx][my]); \
+                       ff_vp9_subpel_filters[type_idx][mx], \
+                       ff_vp9_subpel_filters[type_idx][my]); \
 }
 
 #if BIT_DEPTH != 12
@@ -2454,7 +2399,7 @@  static void avg##_scaled_##type##_##sz##_c(uint8_t *dst, ptrdiff_t dst_stride, \
                                            int h, int mx, int my, int dx, int dy) \
 { \
     avg##_scaled_8tap_c(dst, dst_stride, src, src_stride, sz, h, mx, my, dx, dy, \
-                        vp9_subpel_filters[type_idx]); \
+                        ff_vp9_subpel_filters[type_idx]); \
 }
 
 #if BIT_DEPTH != 12