Message ID | 1498052818-62867-1-git-send-email-rsbultje@gmail.com |
---|---|
State | Superseded |
Headers | show |
On Wed, Jun 21, 2017 at 09:46:58AM -0400, Ronald S. Bultje wrote: > --- > libavfilter/x86/vf_spp.c | 15 ++++++++++++++- > 1 file changed, 14 insertions(+), 1 deletion(-) > > diff --git a/libavfilter/x86/vf_spp.c b/libavfilter/x86/vf_spp.c > index 45a9eb0..8e3c820 100644 > --- a/libavfilter/x86/vf_spp.c > +++ b/libavfilter/x86/vf_spp.c > @@ -217,6 +217,17 @@ static void store_slice_mmx(uint8_t *dst, const int16_t *src, > > #endif /* HAVE_MMX_INLINE */ > > +static const uint8_t simple_mmx_permutation[64] = { > + 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, > + 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, > + 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, > + 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, > + 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, > + 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, > + 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, > + 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, > +}; > + > av_cold void ff_spp_init_x86(SPPContext *s) > { > #if HAVE_MMX_INLINE > @@ -226,7 +237,9 @@ av_cold void ff_spp_init_x86(SPPContext *s) > int64_t bps; > s->store_slice = store_slice_mmx; > av_opt_get_int(s->dct, "bits_per_sample", 0, &bps); > - if (bps <= 8) { > + if (bps <= 8 && > + !memcmp(s->dct->idct_permutation, simple_mmx_permutation, > + sizeof(simple_mmx_permutation))) { this would disable the SIMD code also the memcmp() is a very ugly way to check the permutation [...]
Hi, On Wed, Jun 21, 2017 at 8:42 PM, Michael Niedermayer <michael@niedermayer.cc > wrote: > On Wed, Jun 21, 2017 at 09:46:58AM -0400, Ronald S. Bultje wrote: > > --- > > libavfilter/x86/vf_spp.c | 15 ++++++++++++++- > > 1 file changed, 14 insertions(+), 1 deletion(-) > > > > diff --git a/libavfilter/x86/vf_spp.c b/libavfilter/x86/vf_spp.c > > index 45a9eb0..8e3c820 100644 > > --- a/libavfilter/x86/vf_spp.c > > +++ b/libavfilter/x86/vf_spp.c > > @@ -217,6 +217,17 @@ static void store_slice_mmx(uint8_t *dst, const > int16_t *src, > > > > #endif /* HAVE_MMX_INLINE */ > > > > +static const uint8_t simple_mmx_permutation[64] = { > > + 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, > > + 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, > > + 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, > > + 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, > > + 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, > > + 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, > > + 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, > > + 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, > > +}; > > + > > av_cold void ff_spp_init_x86(SPPContext *s) > > { > > #if HAVE_MMX_INLINE > > @@ -226,7 +237,9 @@ av_cold void ff_spp_init_x86(SPPContext *s) > > int64_t bps; > > s->store_slice = store_slice_mmx; > > av_opt_get_int(s->dct, "bits_per_sample", 0, &bps); > > - if (bps <= 8) { > > + if (bps <= 8 && > > + !memcmp(s->dct->idct_permutation, simple_mmx_permutation, > > + sizeof(simple_mmx_permutation))) { > > this would disable the SIMD code > Yes, that's intentional. We can add new SIMD for additional (e.g. TRANSPOSE) permutations, but that should really be done separately. I'm happy to look into that, but not as part of this patch series, let's get this in first. MPEG-1/2/4 etc. decoding performance is more important than this filter's performance. also the memcmp() is a very ugly way to check the permutation Only thing that's possible through the API at this point, so I'm just being practical :-). I agree it'd be nice to extend the API to simplify these type of checks, but I don't really like the idea of adding an enum to AVDCT, too implementation-specific and static... Have better ideas? Ronald
On Wed, Jun 21, 2017 at 09:03:11PM -0400, Ronald S. Bultje wrote: > Hi, > > On Wed, Jun 21, 2017 at 8:42 PM, Michael Niedermayer <michael@niedermayer.cc > > wrote: > > > On Wed, Jun 21, 2017 at 09:46:58AM -0400, Ronald S. Bultje wrote: > > > --- > > > libavfilter/x86/vf_spp.c | 15 ++++++++++++++- > > > 1 file changed, 14 insertions(+), 1 deletion(-) > > > > > > diff --git a/libavfilter/x86/vf_spp.c b/libavfilter/x86/vf_spp.c > > > index 45a9eb0..8e3c820 100644 > > > --- a/libavfilter/x86/vf_spp.c > > > +++ b/libavfilter/x86/vf_spp.c > > > @@ -217,6 +217,17 @@ static void store_slice_mmx(uint8_t *dst, const > > int16_t *src, > > > > > > #endif /* HAVE_MMX_INLINE */ > > > > > > +static const uint8_t simple_mmx_permutation[64] = { > > > + 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, > > > + 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, > > > + 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, > > > + 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, > > > + 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, > > > + 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, > > > + 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, > > > + 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, > > > +}; > > > + > > > av_cold void ff_spp_init_x86(SPPContext *s) > > > { > > > #if HAVE_MMX_INLINE > > > @@ -226,7 +237,9 @@ av_cold void ff_spp_init_x86(SPPContext *s) > > > int64_t bps; > > > s->store_slice = store_slice_mmx; > > > av_opt_get_int(s->dct, "bits_per_sample", 0, &bps); > > > - if (bps <= 8) { > > > + if (bps <= 8 && > > > + !memcmp(s->dct->idct_permutation, simple_mmx_permutation, > > > + sizeof(simple_mmx_permutation))) { > > > > this would disable the SIMD code > > > > Yes, that's intentional. We can add new SIMD for additional (e.g. > TRANSPOSE) permutations, but that should really be done separately. I'm > happy to look into that, but not as part of this patch series, let's get > this in first. MPEG-1/2/4 etc. decoding performance is more important than > this filter's performance. if you intend to fix this later then sure, ok > > also the memcmp() is a very ugly way to check the permutation > > > Only thing that's possible through the API at this point, so I'm just being > practical :-). I agree it'd be nice to extend the API to simplify these > type of checks, but I don't really like the idea of adding an enum to > AVDCT, too implementation-specific and static... Have better ideas? you can use an enum that uses some "revission number" or date-time of the first introduction of a permutation into the codebase. A string of the equation that creates the permutation is an option too. or some checksum/hash of the table. [...]
Hi, On Thu, Jun 22, 2017 at 4:28 PM, Michael Niedermayer <michael@niedermayer.cc> wrote: > On Wed, Jun 21, 2017 at 09:03:11PM -0400, Ronald S. Bultje wrote: > > On Wed, Jun 21, 2017 at 8:42 PM, Michael Niedermayer <michael@niedermayer.cc > > > also the memcmp() is a very ugly way to check the permutation > > > > Only thing that's possible through the API at this point, so I'm just being > > practical :-). I agree it'd be nice to extend the API to simplify these > > type of checks, but I don't really like the idea of adding an enum to > > AVDCT, too implementation-specific and static... Have better ideas? > > you can use an enum that uses some "revission number" or date-time > of the first introduction of a permutation into the codebase. > A string of the equation that creates the permutation is an option > too. or some checksum/hash of the table. I think we agreed on IRC that a checksum would be suitable here, so I've used a CRC, I hope that's OK. Ronald
diff --git a/libavfilter/x86/vf_spp.c b/libavfilter/x86/vf_spp.c index 45a9eb0..8e3c820 100644 --- a/libavfilter/x86/vf_spp.c +++ b/libavfilter/x86/vf_spp.c @@ -217,6 +217,17 @@ static void store_slice_mmx(uint8_t *dst, const int16_t *src, #endif /* HAVE_MMX_INLINE */ +static const uint8_t simple_mmx_permutation[64] = { + 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, + 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, + 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, + 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, + 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, + 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, + 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, + 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, +}; + av_cold void ff_spp_init_x86(SPPContext *s) { #if HAVE_MMX_INLINE @@ -226,7 +237,9 @@ av_cold void ff_spp_init_x86(SPPContext *s) int64_t bps; s->store_slice = store_slice_mmx; av_opt_get_int(s->dct, "bits_per_sample", 0, &bps); - if (bps <= 8) { + if (bps <= 8 && + !memcmp(s->dct->idct_permutation, simple_mmx_permutation, + sizeof(simple_mmx_permutation))) { switch (s->mode) { case 0: s->requantize = hardthresh_mmx; break; case 1: s->requantize = softthresh_mmx; break;