diff mbox

[FFmpeg-devel] vf_spp: only assign function pointers if permutation matches expectations.

Message ID 1498052818-62867-1-git-send-email-rsbultje@gmail.com
State Superseded
Headers show

Commit Message

Ronald S. Bultje June 21, 2017, 1:46 p.m. UTC
---
 libavfilter/x86/vf_spp.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

Comments

Michael Niedermayer June 22, 2017, 12:42 a.m. UTC | #1
On Wed, Jun 21, 2017 at 09:46:58AM -0400, Ronald S. Bultje wrote:
> ---
>  libavfilter/x86/vf_spp.c | 15 ++++++++++++++-
>  1 file changed, 14 insertions(+), 1 deletion(-)
> 
> diff --git a/libavfilter/x86/vf_spp.c b/libavfilter/x86/vf_spp.c
> index 45a9eb0..8e3c820 100644
> --- a/libavfilter/x86/vf_spp.c
> +++ b/libavfilter/x86/vf_spp.c
> @@ -217,6 +217,17 @@ static void store_slice_mmx(uint8_t *dst, const int16_t *src,
>  
>  #endif /* HAVE_MMX_INLINE */
>  
> +static const uint8_t simple_mmx_permutation[64] = {
> +    0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
> +    0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
> +    0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
> +    0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
> +    0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
> +    0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
> +    0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
> +    0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
> +};
> +
>  av_cold void ff_spp_init_x86(SPPContext *s)
>  {
>  #if HAVE_MMX_INLINE
> @@ -226,7 +237,9 @@ av_cold void ff_spp_init_x86(SPPContext *s)
>          int64_t bps;
>          s->store_slice = store_slice_mmx;
>          av_opt_get_int(s->dct, "bits_per_sample", 0, &bps);
> -        if (bps <= 8) {
> +        if (bps <= 8 &&
> +            !memcmp(s->dct->idct_permutation, simple_mmx_permutation,
> +                    sizeof(simple_mmx_permutation))) {

this would disable the SIMD code

also the memcmp() is a very ugly way to check the permutation

[...]
Ronald S. Bultje June 22, 2017, 1:03 a.m. UTC | #2
Hi,

On Wed, Jun 21, 2017 at 8:42 PM, Michael Niedermayer <michael@niedermayer.cc
> wrote:

> On Wed, Jun 21, 2017 at 09:46:58AM -0400, Ronald S. Bultje wrote:
> > ---
> >  libavfilter/x86/vf_spp.c | 15 ++++++++++++++-
> >  1 file changed, 14 insertions(+), 1 deletion(-)
> >
> > diff --git a/libavfilter/x86/vf_spp.c b/libavfilter/x86/vf_spp.c
> > index 45a9eb0..8e3c820 100644
> > --- a/libavfilter/x86/vf_spp.c
> > +++ b/libavfilter/x86/vf_spp.c
> > @@ -217,6 +217,17 @@ static void store_slice_mmx(uint8_t *dst, const
> int16_t *src,
> >
> >  #endif /* HAVE_MMX_INLINE */
> >
> > +static const uint8_t simple_mmx_permutation[64] = {
> > +    0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
> > +    0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
> > +    0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
> > +    0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
> > +    0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
> > +    0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
> > +    0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
> > +    0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
> > +};
> > +
> >  av_cold void ff_spp_init_x86(SPPContext *s)
> >  {
> >  #if HAVE_MMX_INLINE
> > @@ -226,7 +237,9 @@ av_cold void ff_spp_init_x86(SPPContext *s)
> >          int64_t bps;
> >          s->store_slice = store_slice_mmx;
> >          av_opt_get_int(s->dct, "bits_per_sample", 0, &bps);
> > -        if (bps <= 8) {
> > +        if (bps <= 8 &&
> > +            !memcmp(s->dct->idct_permutation, simple_mmx_permutation,
> > +                    sizeof(simple_mmx_permutation))) {
>
> this would disable the SIMD code
>

Yes, that's intentional. We can add new SIMD for additional (e.g.
TRANSPOSE) permutations, but that should really be done separately. I'm
happy to look into that, but not as part of this patch series, let's get
this in first. MPEG-1/2/4 etc. decoding performance is more important than
this filter's performance.

also the memcmp() is a very ugly way to check the permutation


Only thing that's possible through the API at this point, so I'm just being
practical :-). I agree it'd be nice to extend the API to simplify these
type of checks, but I don't really like the idea of adding an enum to
AVDCT, too implementation-specific and static... Have better ideas?

Ronald
Michael Niedermayer June 22, 2017, 8:28 p.m. UTC | #3
On Wed, Jun 21, 2017 at 09:03:11PM -0400, Ronald S. Bultje wrote:
> Hi,
> 
> On Wed, Jun 21, 2017 at 8:42 PM, Michael Niedermayer <michael@niedermayer.cc
> > wrote:
> 
> > On Wed, Jun 21, 2017 at 09:46:58AM -0400, Ronald S. Bultje wrote:
> > > ---
> > >  libavfilter/x86/vf_spp.c | 15 ++++++++++++++-
> > >  1 file changed, 14 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/libavfilter/x86/vf_spp.c b/libavfilter/x86/vf_spp.c
> > > index 45a9eb0..8e3c820 100644
> > > --- a/libavfilter/x86/vf_spp.c
> > > +++ b/libavfilter/x86/vf_spp.c
> > > @@ -217,6 +217,17 @@ static void store_slice_mmx(uint8_t *dst, const
> > int16_t *src,
> > >
> > >  #endif /* HAVE_MMX_INLINE */
> > >
> > > +static const uint8_t simple_mmx_permutation[64] = {
> > > +    0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
> > > +    0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
> > > +    0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
> > > +    0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
> > > +    0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
> > > +    0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
> > > +    0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
> > > +    0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
> > > +};
> > > +
> > >  av_cold void ff_spp_init_x86(SPPContext *s)
> > >  {
> > >  #if HAVE_MMX_INLINE
> > > @@ -226,7 +237,9 @@ av_cold void ff_spp_init_x86(SPPContext *s)
> > >          int64_t bps;
> > >          s->store_slice = store_slice_mmx;
> > >          av_opt_get_int(s->dct, "bits_per_sample", 0, &bps);
> > > -        if (bps <= 8) {
> > > +        if (bps <= 8 &&
> > > +            !memcmp(s->dct->idct_permutation, simple_mmx_permutation,
> > > +                    sizeof(simple_mmx_permutation))) {
> >
> > this would disable the SIMD code
> >
> 
> Yes, that's intentional. We can add new SIMD for additional (e.g.
> TRANSPOSE) permutations, but that should really be done separately. I'm
> happy to look into that, but not as part of this patch series, let's get
> this in first. MPEG-1/2/4 etc. decoding performance is more important than
> this filter's performance.

if you intend to fix this later then sure, ok


>
> also the memcmp() is a very ugly way to check the permutation
> 
> 
> Only thing that's possible through the API at this point, so I'm just being
> practical :-). I agree it'd be nice to extend the API to simplify these
> type of checks, but I don't really like the idea of adding an enum to
> AVDCT, too implementation-specific and static... Have better ideas?

you can use an enum that uses some "revission number" or date-time
of the first introduction of a permutation into the codebase.
A string of the equation that creates the permutation is an option
too. or some checksum/hash of the table.



[...]
Ronald S. Bultje June 23, 2017, 3:03 p.m. UTC | #4
Hi,

On Thu, Jun 22, 2017 at 4:28 PM, Michael Niedermayer <michael@niedermayer.cc>
wrote:
> On Wed, Jun 21, 2017 at 09:03:11PM -0400, Ronald S. Bultje wrote:
> > On Wed, Jun 21, 2017 at 8:42 PM, Michael Niedermayer
<michael@niedermayer.cc
> > > also the memcmp() is a very ugly way to check the permutation
> >
> > Only thing that's possible through the API at this point, so I'm just
being
> > practical :-). I agree it'd be nice to extend the API to simplify these
> > type of checks, but I don't really like the idea of adding an enum to
> > AVDCT, too implementation-specific and static... Have better ideas?
>
> you can use an enum that uses some "revission number" or date-time
> of the first introduction of a permutation into the codebase.
> A string of the equation that creates the permutation is an option
> too. or some checksum/hash of the table.

I think we agreed on IRC that a checksum would be suitable here, so I've
used a CRC, I hope that's OK.

Ronald
diff mbox

Patch

diff --git a/libavfilter/x86/vf_spp.c b/libavfilter/x86/vf_spp.c
index 45a9eb0..8e3c820 100644
--- a/libavfilter/x86/vf_spp.c
+++ b/libavfilter/x86/vf_spp.c
@@ -217,6 +217,17 @@  static void store_slice_mmx(uint8_t *dst, const int16_t *src,
 
 #endif /* HAVE_MMX_INLINE */
 
+static const uint8_t simple_mmx_permutation[64] = {
+    0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
+    0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
+    0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
+    0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
+    0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
+    0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
+    0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
+    0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
+};
+
 av_cold void ff_spp_init_x86(SPPContext *s)
 {
 #if HAVE_MMX_INLINE
@@ -226,7 +237,9 @@  av_cold void ff_spp_init_x86(SPPContext *s)
         int64_t bps;
         s->store_slice = store_slice_mmx;
         av_opt_get_int(s->dct, "bits_per_sample", 0, &bps);
-        if (bps <= 8) {
+        if (bps <= 8 &&
+            !memcmp(s->dct->idct_permutation, simple_mmx_permutation,
+                    sizeof(simple_mmx_permutation))) {
             switch (s->mode) {
             case 0: s->requantize = hardthresh_mmx; break;
             case 1: s->requantize = softthresh_mmx; break;