diff mbox

[FFmpeg-devel,2/2] swresample/resample: optimize exact_rational=on:linear_interp=on case

Message ID 1479977548-2621-2-git-send-email-mfcc64@gmail.com
State Accepted
Commit 06f94149c61fd6beff6fcd0fd7ccc34b77c948dc
Headers show

Commit Message

Muhammad Faiz Nov. 24, 2016, 8:52 a.m. UTC
separate dsp.resample to dsp.resample_common and dsp.resample_linear
and choose to call faster resample_common even when linear_interp=on
when c->frac and c->dst_incr_mod are both zero

speed up resampling when exact_rational and linear_interp are both
enabled because exact_rational force c->frac and c->dst_incr_mod to
be zero when soft compensation does not happen

benchmark on exact_rational=on:linear_interp=on
        old     new
real    8.432s  5.097s
user    7.679s  4.989s
sys     0.125s  0.107s

Signed-off-by: Muhammad Faiz <mfcc64@gmail.com>
---
 libswresample/arm/resample_init.c |  6 ++----
 libswresample/resample.c          |  7 ++++++-
 libswresample/resample.h          |  6 ++++--
 libswresample/resample_dsp.c      | 12 ++++++++----
 libswresample/x86/resample_init.c | 32 ++++++++++++++++----------------
 5 files changed, 36 insertions(+), 27 deletions(-)

Comments

Michael Niedermayer Nov. 24, 2016, 6:47 p.m. UTC | #1
On Thu, Nov 24, 2016 at 03:52:28PM +0700, Muhammad Faiz wrote:
> separate dsp.resample to dsp.resample_common and dsp.resample_linear
> and choose to call faster resample_common even when linear_interp=on
> when c->frac and c->dst_incr_mod are both zero
> 
> speed up resampling when exact_rational and linear_interp are both
> enabled because exact_rational force c->frac and c->dst_incr_mod to
> be zero when soft compensation does not happen
> 
> benchmark on exact_rational=on:linear_interp=on
>         old     new
> real    8.432s  5.097s
> user    7.679s  4.989s
> sys     0.125s  0.107s
> 
> Signed-off-by: Muhammad Faiz <mfcc64@gmail.com>
> ---
>  libswresample/arm/resample_init.c |  6 ++----
>  libswresample/resample.c          |  7 ++++++-
>  libswresample/resample.h          |  6 ++++--
>  libswresample/resample_dsp.c      | 12 ++++++++----
>  libswresample/x86/resample_init.c | 32 ++++++++++++++++----------------
>  5 files changed, 36 insertions(+), 27 deletions(-)

should be ok

thx

[...]
Muhammad Faiz Nov. 24, 2016, 8:26 p.m. UTC | #2
On 11/25/16, Michael Niedermayer <michael@niedermayer.cc> wrote:
> On Thu, Nov 24, 2016 at 03:52:28PM +0700, Muhammad Faiz wrote:
>> separate dsp.resample to dsp.resample_common and dsp.resample_linear
>> and choose to call faster resample_common even when linear_interp=on
>> when c->frac and c->dst_incr_mod are both zero
>>
>> speed up resampling when exact_rational and linear_interp are both
>> enabled because exact_rational force c->frac and c->dst_incr_mod to
>> be zero when soft compensation does not happen
>>
>> benchmark on exact_rational=on:linear_interp=on
>>         old     new
>> real    8.432s  5.097s
>> user    7.679s  4.989s
>> sys     0.125s  0.107s
>>
>> Signed-off-by: Muhammad Faiz <mfcc64@gmail.com>
>> ---
>>  libswresample/arm/resample_init.c |  6 ++----
>>  libswresample/resample.c          |  7 ++++++-
>>  libswresample/resample.h          |  6 ++++--
>>  libswresample/resample_dsp.c      | 12 ++++++++----
>>  libswresample/x86/resample_init.c | 32 ++++++++++++++++----------------
>>  5 files changed, 36 insertions(+), 27 deletions(-)
>
> should be ok
>
> thx

Applied

Thank's
diff mbox

Patch

diff --git a/libswresample/arm/resample_init.c b/libswresample/arm/resample_init.c
index 003fafd..e334a27 100644
--- a/libswresample/arm/resample_init.c
+++ b/libswresample/arm/resample_init.c
@@ -111,12 +111,10 @@  av_cold void swri_resample_dsp_arm_init(ResampleContext *c)
 
     switch(c->format) {
     case AV_SAMPLE_FMT_FLTP:
-        if (!c->linear)
-            c->dsp.resample = ff_resample_common_float_neon;
+        c->dsp.resample_common = ff_resample_common_float_neon;
         break;
     case AV_SAMPLE_FMT_S16P:
-        if (!c->linear)
-            c->dsp.resample = ff_resample_common_s16_neon;
+        c->dsp.resample_common = ff_resample_common_s16_neon;
         break;
     }
 }
diff --git a/libswresample/resample.c b/libswresample/resample.c
index 8635bf1..e65a57a 100644
--- a/libswresample/resample.c
+++ b/libswresample/resample.c
@@ -496,7 +496,12 @@  static int swri_resample(ResampleContext *c,
 
         dst_size = FFMIN(dst_size, delta_n);
         if (dst_size > 0) {
-            *consumed = c->dsp.resample(c, dst, src, dst_size, update_ctx);
+            /* resample_linear and resample_common should have same behavior
+             * when frac and dst_incr_mod are zero */
+            if (c->linear && (c->frac || c->dst_incr_mod))
+                *consumed = c->dsp.resample_linear(c, dst, src, dst_size, update_ctx);
+            else
+                *consumed = c->dsp.resample_common(c, dst, src, dst_size, update_ctx);
         } else {
             *consumed = 0;
         }
diff --git a/libswresample/resample.h b/libswresample/resample.h
index 7fe9b97..946f5cc 100644
--- a/libswresample/resample.h
+++ b/libswresample/resample.h
@@ -53,8 +53,10 @@  typedef struct ResampleContext {
     struct {
         void (*resample_one)(void *dst, const void *src,
                              int n, int64_t index, int64_t incr);
-        int (*resample)(struct ResampleContext *c, void *dst,
-                        const void *src, int n, int update_ctx);
+        int (*resample_common)(struct ResampleContext *c, void *dst,
+                               const void *src, int n, int update_ctx);
+        int (*resample_linear)(struct ResampleContext *c, void *dst,
+                               const void *src, int n, int update_ctx);
     } dsp;
 } ResampleContext;
 
diff --git a/libswresample/resample_dsp.c b/libswresample/resample_dsp.c
index 41369f3..6ffbb87 100644
--- a/libswresample/resample_dsp.c
+++ b/libswresample/resample_dsp.c
@@ -48,19 +48,23 @@  void swri_resample_dsp_init(ResampleContext *c)
     switch(c->format){
     case AV_SAMPLE_FMT_S16P:
         c->dsp.resample_one = resample_one_int16;
-        c->dsp.resample     = c->linear ? resample_linear_int16 : resample_common_int16;
+        c->dsp.resample_common = resample_common_int16;
+        c->dsp.resample_linear = resample_linear_int16;
         break;
     case AV_SAMPLE_FMT_S32P:
         c->dsp.resample_one = resample_one_int32;
-        c->dsp.resample     = c->linear ? resample_linear_int32 : resample_common_int32;
+        c->dsp.resample_common = resample_common_int32;
+        c->dsp.resample_linear = resample_linear_int32;
         break;
     case AV_SAMPLE_FMT_FLTP:
         c->dsp.resample_one = resample_one_float;
-        c->dsp.resample     = c->linear ? resample_linear_float : resample_common_float;
+        c->dsp.resample_common = resample_common_float;
+        c->dsp.resample_linear = resample_linear_float;
         break;
     case AV_SAMPLE_FMT_DBLP:
         c->dsp.resample_one = resample_one_double;
-        c->dsp.resample     = c->linear ? resample_linear_double : resample_common_double;
+        c->dsp.resample_common = resample_common_double;
+        c->dsp.resample_linear = resample_linear_double;
         break;
     }
 
diff --git a/libswresample/x86/resample_init.c b/libswresample/x86/resample_init.c
index 9d7d5cf..e515762 100644
--- a/libswresample/x86/resample_init.c
+++ b/libswresample/x86/resample_init.c
@@ -50,40 +50,40 @@  av_cold void swri_resample_dsp_x86_init(ResampleContext *c)
     switch(c->format){
     case AV_SAMPLE_FMT_S16P:
         if (ARCH_X86_32 && EXTERNAL_MMXEXT(mm_flags)) {
-            c->dsp.resample = c->linear ? ff_resample_linear_int16_mmxext
-                                        : ff_resample_common_int16_mmxext;
+            c->dsp.resample_linear = ff_resample_linear_int16_mmxext;
+            c->dsp.resample_common = ff_resample_common_int16_mmxext;
         }
         if (EXTERNAL_SSE2(mm_flags)) {
-            c->dsp.resample = c->linear ? ff_resample_linear_int16_sse2
-                                        : ff_resample_common_int16_sse2;
+            c->dsp.resample_linear = ff_resample_linear_int16_sse2;
+            c->dsp.resample_common = ff_resample_common_int16_sse2;
         }
         if (EXTERNAL_XOP(mm_flags)) {
-            c->dsp.resample = c->linear ? ff_resample_linear_int16_xop
-                                        : ff_resample_common_int16_xop;
+            c->dsp.resample_linear = ff_resample_linear_int16_xop;
+            c->dsp.resample_common = ff_resample_common_int16_xop;
         }
         break;
     case AV_SAMPLE_FMT_FLTP:
         if (EXTERNAL_SSE(mm_flags)) {
-            c->dsp.resample = c->linear ? ff_resample_linear_float_sse
-                                        : ff_resample_common_float_sse;
+            c->dsp.resample_linear = ff_resample_linear_float_sse;
+            c->dsp.resample_common = ff_resample_common_float_sse;
         }
         if (EXTERNAL_AVX_FAST(mm_flags)) {
-            c->dsp.resample = c->linear ? ff_resample_linear_float_avx
-                                        : ff_resample_common_float_avx;
+            c->dsp.resample_linear = ff_resample_linear_float_avx;
+            c->dsp.resample_common = ff_resample_common_float_avx;
         }
         if (EXTERNAL_FMA3_FAST(mm_flags)) {
-            c->dsp.resample = c->linear ? ff_resample_linear_float_fma3
-                                        : ff_resample_common_float_fma3;
+            c->dsp.resample_linear = ff_resample_linear_float_fma3;
+            c->dsp.resample_common = ff_resample_common_float_fma3;
         }
         if (EXTERNAL_FMA4(mm_flags)) {
-            c->dsp.resample = c->linear ? ff_resample_linear_float_fma4
-                                        : ff_resample_common_float_fma4;
+            c->dsp.resample_linear = ff_resample_linear_float_fma4;
+            c->dsp.resample_common = ff_resample_common_float_fma4;
         }
         break;
     case AV_SAMPLE_FMT_DBLP:
         if (EXTERNAL_SSE2(mm_flags)) {
-            c->dsp.resample = c->linear ? ff_resample_linear_double_sse2
-                                        : ff_resample_common_double_sse2;
+            c->dsp.resample_linear = ff_resample_linear_double_sse2;
+            c->dsp.resample_common = ff_resample_common_double_sse2;
         }
         break;
     }