diff mbox

[FFmpeg-devel] af_volumedetect: Work with sample formats other than s16/s16p

Message ID 1471065661-26093-1-git-send-email-pburt0@gmail.com
State Rejected
Headers show

Commit Message

Burt P Aug. 13, 2016, 5:21 a.m. UTC
The histogram will still only be shown for s16 and s16p.

Signed-off-by: Burt P <pburt0@gmail.com>
---
 libavfilter/af_volumedetect.c | 133 ++++++++++++++++++++++++++++++++++++++----
 1 file changed, 121 insertions(+), 12 deletions(-)

Comments

Paul B Mahol Aug. 13, 2016, 7:12 a.m. UTC | #1
On Saturday, August 13, 2016, Burt P <pburt0@gmail.com> wrote:

> The histogram will still only be shown for s16 and s16p.
>
> Signed-off-by: Burt P <pburt0@gmail.com <javascript:;>>
> ---
>  libavfilter/af_volumedetect.c | 133 ++++++++++++++++++++++++++++++
> ++++++++----
>  1 file changed, 121 insertions(+), 12 deletions(-)
>
> diff --git a/libavfilter/af_volumedetect.c b/libavfilter/af_volumedetect.c
> index 4815bcc..3ee664f 100644
> --- a/libavfilter/af_volumedetect.c
> +++ b/libavfilter/af_volumedetect.c
> @@ -24,6 +24,8 @@
>  #include "avfilter.h"
>  #include "internal.h"
>
> +#define USE_OLD_VERSION 0  /* 0 or 1 */
> +
>  typedef struct {
>      /**
>       * Number of samples at each PCM value.
> @@ -31,18 +33,38 @@ typedef struct {
>       * The extra element is there for symmetry.
>       */
>      uint64_t histogram[0x10001];
> +    int use_hist;
> +    int sformat;
> +    double peak;
> +    double power;
> +    uint64_t nb_samples;
>  } VolDetectContext;
>
>  static int query_formats(AVFilterContext *ctx)
>  {
> -    static const enum AVSampleFormat sample_fmts[] = {
> -        AV_SAMPLE_FMT_S16,
> -        AV_SAMPLE_FMT_S16P,
> -        AV_SAMPLE_FMT_NONE
> +    static const enum AVSampleFormat sample_fmts[][11] = {
> +        { /* [0]: USE_OLD_VERSION = 0 */
> +            AV_SAMPLE_FMT_S16,
> +            AV_SAMPLE_FMT_S16P,
> +            AV_SAMPLE_FMT_U8,
> +            AV_SAMPLE_FMT_U8P,
> +            AV_SAMPLE_FMT_S32,
> +            AV_SAMPLE_FMT_S32P,
> +            AV_SAMPLE_FMT_FLT,
> +            AV_SAMPLE_FMT_FLTP,
> +            AV_SAMPLE_FMT_DBL,
> +            AV_SAMPLE_FMT_DBLP,
> +            AV_SAMPLE_FMT_NONE
> +        },
> +        { /* [1]: USE_OLD_VERSION = 1 */
> +            AV_SAMPLE_FMT_S16,
> +            AV_SAMPLE_FMT_S16P,
> +            AV_SAMPLE_FMT_NONE
> +        },
>      };
>      AVFilterFormats *formats;
>
> -    if (!(formats = ff_make_format_list(sample_fmts)))
> +    if (!(formats = ff_make_format_list(sample_fmts[USE_OLD_VERSION])))
>          return AVERROR(ENOMEM);
>      return ff_set_common_formats(ctx, formats);
>  }
> @@ -56,22 +78,70 @@ static int filter_frame(AVFilterLink *inlink, AVFrame
> *samples)
>      int nb_channels = av_get_channel_layout_nb_channels(layout);
>      int nb_planes   = nb_channels;
>      int plane, i;
> -    int16_t *pcm;
> +    uint8_t *pcm;
> +    int sample_size;
> +    double sample;
> +
> +    vd->sformat = inlink->format;
>
>      if (!av_sample_fmt_is_planar(samples->format)) {
>          nb_samples *= nb_channels;
>          nb_planes = 1;
>      }
>      for (plane = 0; plane < nb_planes; plane++) {
> -        pcm = (int16_t *)samples->extended_data[plane];
> -        for (i = 0; i < nb_samples; i++)
> -            vd->histogram[pcm[i] + 0x8000]++;
> +        pcm = samples->extended_data[plane];
> +        sample_size = 0;
> +        for (i = 0; i < nb_samples; i++) {
> +            switch (inlink->format) {
> +                case AV_SAMPLE_FMT_S16:
> +                case AV_SAMPLE_FMT_S16P:
> +                    vd->histogram[*(int16_t*)pcm + 0x8000]++;
> +                    vd->use_hist = 1;
> +                    sample_size = sizeof(int16_t);
> +                    sample = (double)*(int16_t*)pcm;
> +                    sample /= 0x8000;
> +                case AV_SAMPLE_FMT_U8:
> +                case AV_SAMPLE_FMT_U8P:
> +                    if (!sample_size) {
> +                        sample_size = sizeof(uint8_t);
> +                        sample = (double)(*pcm);
> +                        sample -= 0x80;
> +                        sample /= 0x80;
> +                    }
> +                case AV_SAMPLE_FMT_S32:
> +                case AV_SAMPLE_FMT_S32P:
> +                    if (!sample_size) {
> +                        sample_size = sizeof(int32_t);
> +                        sample = (double)*(int32_t*)pcm;
> +                        sample /= 0x80000000U;
> +                    }
> +                case AV_SAMPLE_FMT_FLT:
> +                case AV_SAMPLE_FMT_FLTP:
> +                    if (!sample_size) {
> +                        sample_size = sizeof(float);
> +                        sample = (double)*(float*)pcm;
> +                    }
> +                case AV_SAMPLE_FMT_DBL:
> +                case AV_SAMPLE_FMT_DBLP:
> +                    if (!sample_size) {
> +                        sample_size = sizeof(double);
> +                        sample = *(double*)pcm;
> +                    }
> +                    sample = fabs(sample);
> +                    vd->peak = FFMAX(vd->peak, sample);
> +                    vd->power += (sample*sample);
> +                    vd->nb_samples++;
> +            }
> +            av_assert0(sample_size != 0);
> +            pcm += sample_size;
> +        }
>      }
>
>      return ff_filter_frame(inlink->dst->outputs[0], samples);
>  }
>
>  #define MAX_DB 91
> +#define VERY_SMALL 0.000000000001
>
>  static inline double logdb(uint64_t v)
>  {
> @@ -88,9 +158,13 @@ static void print_stats(AVFilterContext *ctx)
>      uint64_t nb_samples = 0, power = 0, nb_samples_shift = 0, sum = 0;
>      uint64_t histdb[MAX_DB + 1] = { 0 };
>
> +    if (!vd->use_hist)
> +        return;
> +
>      for (i = 0; i < 0x10000; i++)
>          nb_samples += vd->histogram[i];
>      av_log(ctx, AV_LOG_INFO, "n_samples: %"PRId64"\n", nb_samples);
> +
>      if (!nb_samples)
>          return;
>
> @@ -107,13 +181,13 @@ static void print_stats(AVFilterContext *ctx)
>          return;
>      power = (power + nb_samples_shift / 2) / nb_samples_shift;
>      av_assert0(power <= 0x8000 * 0x8000);
> -    av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb(power));
> +    av_log(ctx, AV_LOG_INFO, "mean_volume: %.4f dB\n", -logdb(power));
>
>      max_volume = 0x8000;
>      while (max_volume > 0 && !vd->histogram[0x8000 + max_volume] &&
>                               !vd->histogram[0x8000 - max_volume])
>          max_volume--;
> -    av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -logdb(max_volume *
> max_volume));
> +    av_log(ctx, AV_LOG_INFO, "max_volume: %.4f dB\n", -logdb(max_volume *
> max_volume));
>
>      for (i = 0; i < 0x10000; i++)
>          histdb[(int)logdb((i - 0x8000) * (i - 0x8000))] +=
> vd->histogram[i];
> @@ -122,11 +196,46 @@ static void print_stats(AVFilterContext *ctx)
>          av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", i,
> histdb[i]);
>          sum += histdb[i];
>      }
> +
> +}
> +
> +static void print_stats2(AVFilterContext *ctx)
> +{
> +    VolDetectContext *vd = ctx->priv;
> +    double power_db, peak_db;
> +
> +    if (!vd->nb_samples)
> +        return;
> +
> +    vd->power = vd->power / vd->nb_samples;
> +    vd->peak *= vd->peak;
> +    power_db = 10*log10(vd->power);
> +    peak_db  = 10*log10(vd->peak);
> +    av_log(ctx, AV_LOG_INFO, "sample_format: %s\n",
> av_get_sample_fmt_name(vd->sformat) );
> +    av_log(ctx, AV_LOG_INFO, "n_samples: %"PRId64"\n", vd->nb_samples);
> +    av_log(ctx, AV_LOG_INFO, "mean_volume: %.4f dB\n", power_db);
> +    av_log(ctx, AV_LOG_INFO, "max_volume: %.4f dB\n", peak_db);
> +
> +    if (vd->use_hist) {
> +        int i;
> +        uint64_t sum = 0;
> +        uint64_t histdb[MAX_DB + 1] = { 0 };
> +        for (i = 0; i < 0x10000; i++)
> +            histdb[(int)logdb((i - 0x8000) * (i - 0x8000))] +=
> vd->histogram[i];
> +        for (i = 0; i <= MAX_DB && !histdb[i]; i++);
> +        for (; i <= MAX_DB && sum < vd->nb_samples / 1000; i++) {
> +            av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", i,
> histdb[i]);
> +            sum += histdb[i];
> +        }
> +    }
>  }
>
>  static av_cold void uninit(AVFilterContext *ctx)
>  {
> -    print_stats(ctx);
> +    if (USE_OLD_VERSION)
> +        print_stats(ctx);
> +    else
> +        print_stats2(ctx);
>  }
>
>  static const AVFilterPad volumedetect_inputs[] = {
>

Code duplication, see astats filter.
Nicolas George Aug. 13, 2016, 8:44 a.m. UTC | #2
Le septidi 27 thermidor, an CCXXIV, Burt P a écrit :
> The histogram will still only be shown for s16 and s16p.
> 
> Signed-off-by: Burt P <pburt0@gmail.com>
> ---
>  libavfilter/af_volumedetect.c | 133 ++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 121 insertions(+), 12 deletions(-)

Did you benchmark the original s16 case? The main point of this filter is
that it is fast. With these changes, I see switches inside inner loops, and
I am afraid it becomes less fast.

Regards,
Paul B Mahol Aug. 13, 2016, 9:23 a.m. UTC | #3
On 8/13/16, Nicolas George <george@nsup.org> wrote:
> Le septidi 27 thermidor, an CCXXIV, Burt P a écrit :
>> The histogram will still only be shown for s16 and s16p.
>>
>> Signed-off-by: Burt P <pburt0@gmail.com>
>> ---
>>  libavfilter/af_volumedetect.c | 133
>> ++++++++++++++++++++++++++++++++++++++----
>>  1 file changed, 121 insertions(+), 12 deletions(-)
>
> Did you benchmark the original s16 case? The main point of this filter is
> that it is fast. With these changes, I see switches inside inner loops, and
> I am afraid it becomes less fast.

Also, swresample already does what converting code do, but faster.
Burt P Aug. 13, 2016, 6:55 p.m. UTC | #4
Thanks, I will use astats instead.

On Sat, Aug 13, 2016 at 4:23 AM, Paul B Mahol <onemda@gmail.com> wrote:
> On 8/13/16, Nicolas George <george@nsup.org> wrote:
>> Le septidi 27 thermidor, an CCXXIV, Burt P a écrit :
>>> The histogram will still only be shown for s16 and s16p.
>>>
>>> Signed-off-by: Burt P <pburt0@gmail.com>
>>> ---
>>>  libavfilter/af_volumedetect.c | 133
>>> ++++++++++++++++++++++++++++++++++++++----
>>>  1 file changed, 121 insertions(+), 12 deletions(-)
>>
>> Did you benchmark the original s16 case? The main point of this filter is
>> that it is fast. With these changes, I see switches inside inner loops, and
>> I am afraid it becomes less fast.
>
> Also, swresample already does what converting code do, but faster.
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Paul B Mahol Aug. 13, 2016, 7:25 p.m. UTC | #5
On Saturday, August 13, 2016, Burt P. <pburt0@gmail.com> wrote:

> Thanks, I will use astats instead.
>
>
You could add dB scalling to astats metadata.
Burt P Aug. 13, 2016, 8:55 p.m. UTC | #6
On Sat, Aug 13, 2016 at 4:23 AM, Paul B Mahol <onemda@gmail.com> wrote:
>
> Also, swresample already does what converting code do, but faster.

Doing the conversion in the filter itself instead of using the
auto-inserted conversion filter is that the original data is left
completely untouched.
Using astats, the sample format is converted to DBL for the astats
filter and everything after.
I can't risk anything affecting the LSB of the input or the HDCD code
may be disrupted.
Paul B Mahol Aug. 13, 2016, 9:41 p.m. UTC | #7
On Saturday, August 13, 2016, Burt P. <pburt0@gmail.com> wrote:

> On Sat, Aug 13, 2016 at 4:23 AM, Paul B Mahol <onemda@gmail.com
> <javascript:;>> wrote:
> >
> > Also, swresample already does what converting code do, but faster.
>
> Doing the conversion in the filter itself instead of using the
> auto-inserted conversion filter is that the original data is left
> completely untouched.
> Using astats, the sample format is converted to DBL for the astats
> filter and everything after.
> I can't risk anything affecting the LSB of the input or the HDCD code
> may be disrupted.




Integer mode could be easily added. Doesn't dbl hold enough bits?
diff mbox

Patch

diff --git a/libavfilter/af_volumedetect.c b/libavfilter/af_volumedetect.c
index 4815bcc..3ee664f 100644
--- a/libavfilter/af_volumedetect.c
+++ b/libavfilter/af_volumedetect.c
@@ -24,6 +24,8 @@ 
 #include "avfilter.h"
 #include "internal.h"
 
+#define USE_OLD_VERSION 0  /* 0 or 1 */
+
 typedef struct {
     /**
      * Number of samples at each PCM value.
@@ -31,18 +33,38 @@  typedef struct {
      * The extra element is there for symmetry.
      */
     uint64_t histogram[0x10001];
+    int use_hist;
+    int sformat;
+    double peak;
+    double power;
+    uint64_t nb_samples;
 } VolDetectContext;
 
 static int query_formats(AVFilterContext *ctx)
 {
-    static const enum AVSampleFormat sample_fmts[] = {
-        AV_SAMPLE_FMT_S16,
-        AV_SAMPLE_FMT_S16P,
-        AV_SAMPLE_FMT_NONE
+    static const enum AVSampleFormat sample_fmts[][11] = {
+        { /* [0]: USE_OLD_VERSION = 0 */
+            AV_SAMPLE_FMT_S16,
+            AV_SAMPLE_FMT_S16P,
+            AV_SAMPLE_FMT_U8,
+            AV_SAMPLE_FMT_U8P,
+            AV_SAMPLE_FMT_S32,
+            AV_SAMPLE_FMT_S32P,
+            AV_SAMPLE_FMT_FLT,
+            AV_SAMPLE_FMT_FLTP,
+            AV_SAMPLE_FMT_DBL,
+            AV_SAMPLE_FMT_DBLP,
+            AV_SAMPLE_FMT_NONE
+        },
+        { /* [1]: USE_OLD_VERSION = 1 */
+            AV_SAMPLE_FMT_S16,
+            AV_SAMPLE_FMT_S16P,
+            AV_SAMPLE_FMT_NONE
+        },
     };
     AVFilterFormats *formats;
 
-    if (!(formats = ff_make_format_list(sample_fmts)))
+    if (!(formats = ff_make_format_list(sample_fmts[USE_OLD_VERSION])))
         return AVERROR(ENOMEM);
     return ff_set_common_formats(ctx, formats);
 }
@@ -56,22 +78,70 @@  static int filter_frame(AVFilterLink *inlink, AVFrame *samples)
     int nb_channels = av_get_channel_layout_nb_channels(layout);
     int nb_planes   = nb_channels;
     int plane, i;
-    int16_t *pcm;
+    uint8_t *pcm;
+    int sample_size;
+    double sample;
+
+    vd->sformat = inlink->format;
 
     if (!av_sample_fmt_is_planar(samples->format)) {
         nb_samples *= nb_channels;
         nb_planes = 1;
     }
     for (plane = 0; plane < nb_planes; plane++) {
-        pcm = (int16_t *)samples->extended_data[plane];
-        for (i = 0; i < nb_samples; i++)
-            vd->histogram[pcm[i] + 0x8000]++;
+        pcm = samples->extended_data[plane];
+        sample_size = 0;
+        for (i = 0; i < nb_samples; i++) {
+            switch (inlink->format) {
+                case AV_SAMPLE_FMT_S16:
+                case AV_SAMPLE_FMT_S16P:
+                    vd->histogram[*(int16_t*)pcm + 0x8000]++;
+                    vd->use_hist = 1;
+                    sample_size = sizeof(int16_t);
+                    sample = (double)*(int16_t*)pcm;
+                    sample /= 0x8000;
+                case AV_SAMPLE_FMT_U8:
+                case AV_SAMPLE_FMT_U8P:
+                    if (!sample_size) {
+                        sample_size = sizeof(uint8_t);
+                        sample = (double)(*pcm);
+                        sample -= 0x80;
+                        sample /= 0x80;
+                    }
+                case AV_SAMPLE_FMT_S32:
+                case AV_SAMPLE_FMT_S32P:
+                    if (!sample_size) {
+                        sample_size = sizeof(int32_t);
+                        sample = (double)*(int32_t*)pcm;
+                        sample /= 0x80000000U;
+                    }
+                case AV_SAMPLE_FMT_FLT:
+                case AV_SAMPLE_FMT_FLTP:
+                    if (!sample_size) {
+                        sample_size = sizeof(float);
+                        sample = (double)*(float*)pcm;
+                    }
+                case AV_SAMPLE_FMT_DBL:
+                case AV_SAMPLE_FMT_DBLP:
+                    if (!sample_size) {
+                        sample_size = sizeof(double);
+                        sample = *(double*)pcm;
+                    }
+                    sample = fabs(sample);
+                    vd->peak = FFMAX(vd->peak, sample);
+                    vd->power += (sample*sample);
+                    vd->nb_samples++;
+            }
+            av_assert0(sample_size != 0);
+            pcm += sample_size;
+        }
     }
 
     return ff_filter_frame(inlink->dst->outputs[0], samples);
 }
 
 #define MAX_DB 91
+#define VERY_SMALL 0.000000000001
 
 static inline double logdb(uint64_t v)
 {
@@ -88,9 +158,13 @@  static void print_stats(AVFilterContext *ctx)
     uint64_t nb_samples = 0, power = 0, nb_samples_shift = 0, sum = 0;
     uint64_t histdb[MAX_DB + 1] = { 0 };
 
+    if (!vd->use_hist)
+        return;
+
     for (i = 0; i < 0x10000; i++)
         nb_samples += vd->histogram[i];
     av_log(ctx, AV_LOG_INFO, "n_samples: %"PRId64"\n", nb_samples);
+
     if (!nb_samples)
         return;
 
@@ -107,13 +181,13 @@  static void print_stats(AVFilterContext *ctx)
         return;
     power = (power + nb_samples_shift / 2) / nb_samples_shift;
     av_assert0(power <= 0x8000 * 0x8000);
-    av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb(power));
+    av_log(ctx, AV_LOG_INFO, "mean_volume: %.4f dB\n", -logdb(power));
 
     max_volume = 0x8000;
     while (max_volume > 0 && !vd->histogram[0x8000 + max_volume] &&
                              !vd->histogram[0x8000 - max_volume])
         max_volume--;
-    av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -logdb(max_volume * max_volume));
+    av_log(ctx, AV_LOG_INFO, "max_volume: %.4f dB\n", -logdb(max_volume * max_volume));
 
     for (i = 0; i < 0x10000; i++)
         histdb[(int)logdb((i - 0x8000) * (i - 0x8000))] += vd->histogram[i];
@@ -122,11 +196,46 @@  static void print_stats(AVFilterContext *ctx)
         av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", i, histdb[i]);
         sum += histdb[i];
     }
+
+}
+
+static void print_stats2(AVFilterContext *ctx)
+{
+    VolDetectContext *vd = ctx->priv;
+    double power_db, peak_db;
+
+    if (!vd->nb_samples)
+        return;
+
+    vd->power = vd->power / vd->nb_samples;
+    vd->peak *= vd->peak;
+    power_db = 10*log10(vd->power);
+    peak_db  = 10*log10(vd->peak);
+    av_log(ctx, AV_LOG_INFO, "sample_format: %s\n", av_get_sample_fmt_name(vd->sformat) );
+    av_log(ctx, AV_LOG_INFO, "n_samples: %"PRId64"\n", vd->nb_samples);
+    av_log(ctx, AV_LOG_INFO, "mean_volume: %.4f dB\n", power_db);
+    av_log(ctx, AV_LOG_INFO, "max_volume: %.4f dB\n", peak_db);
+
+    if (vd->use_hist) {
+        int i;
+        uint64_t sum = 0;
+        uint64_t histdb[MAX_DB + 1] = { 0 };
+        for (i = 0; i < 0x10000; i++)
+            histdb[(int)logdb((i - 0x8000) * (i - 0x8000))] += vd->histogram[i];
+        for (i = 0; i <= MAX_DB && !histdb[i]; i++);
+        for (; i <= MAX_DB && sum < vd->nb_samples / 1000; i++) {
+            av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", i, histdb[i]);
+            sum += histdb[i];
+        }
+    }
 }
 
 static av_cold void uninit(AVFilterContext *ctx)
 {
-    print_stats(ctx);
+    if (USE_OLD_VERSION)
+        print_stats(ctx);
+    else
+        print_stats2(ctx);
 }
 
 static const AVFilterPad volumedetect_inputs[] = {