Message ID | 20240702013354.14942-3-yigithanyigitdevel@gmail.com |
---|---|
State | New |
Headers | show |
Series | avfilter/af_volumedetect.c: Add 32bit float audio | expand |
Context | Check | Description |
---|---|---|
yinshiyou/make_fate_loongarch64 | fail | Make fate failed |
yinshiyou/make_loongarch64 | warning | New warnings during build |
andriy/make_fate_x86 | success | Make fate finished |
andriy/make_x86 | warning | New warnings during build |
Le 2 juillet 2024 04:33:51 GMT+03:00, Yigithan Yigit <yigithanyigitdevel@gmail.com> a écrit : >--- > libavfilter/af_volumedetect.c | 139 ++++++++++++++++++++++++++-------- > 1 file changed, 107 insertions(+), 32 deletions(-) Did you try to compile this patch? > >diff --git a/libavfilter/af_volumedetect.c b/libavfilter/af_volumedetect.c >index 327801a7f9..edd2d56f7a 100644 >--- a/libavfilter/af_volumedetect.c >+++ b/libavfilter/af_volumedetect.c >@@ -1,5 +1,6 @@ > /* > * Copyright (c) 2012 Nicolas George >+ * Copyright (c) 2024 Yigithan Yigit - 32 Bit Float Audio Support > * > * This file is part of FFmpeg. > * >@@ -20,48 +21,62 @@ > > #include "libavutil/channel_layout.h" > #include "libavutil/avassert.h" >+#include "libavutil/mem.h" > #include "audio.h" > #include "avfilter.h" > #include "internal.h" > >+#define MAX_DB_FLT 1024 > #define MAX_DB 91 >+#define HISTOGRAM_SIZE 0x10000 >+#define HISTOGRAM_SIZE_FLT (MAX_DB_FLT*2) >+ >+typedef struct VolDetectContext VolDetectContext; > > typedef struct VolDetectContext { >- /** >- * Number of samples at each PCM value. >- * histogram[0x8000 + i] is the number of samples at value i. >- * The extra element is there for symmetry. >- */ >- uint64_t histogram[0x10001]; >+ uint64_t* histogram; ///< for integer number of samples at each PCM value, for float number of samples at each dB >+ uint64_t nb_samples; ///< number of samples >+ double sum2; ///< sum of the squares of the samples >+ double max; ///< maximum sample value >+ int is_float; ///< true if the input is in floating point >+ void (*process_samples)(VolDetectContext *vd, AVFrame *samples); > } VolDetectContext; > >-static inline double logdb(uint64_t v) >+static inline double logdb(double v, enum AVSampleFormat sample_fmt) > { >- double d = v / (double)(0x8000 * 0x8000); >- if (!v) >- return MAX_DB; >- return -log10(d) * 10; >+ if (sample_fmt == AV_SAMPLE_FMT_FLT) { >+ if (!v) >+ return MAX_DB_FLT; >+ return -log10(v) * 10; >+ } else { >+ double d = v / (double)(0x8000 * 0x8000); >+ if (!v) >+ return MAX_DB; >+ return -log10(d) * 10; >+ } >+} >+ >+static void update_float_stats(VolDetectContext *vd, float *audio_data) >+{ >+ double sample; >+ int idx; >+ if(!isfinite(*audio_data) || isnan(*audio_data)) >+ return; >+ sample = fabsf(*audio_data); >+ if (sample > vd->max) >+ vd->max = sample; >+ vd->sum2 += sample * sample; >+ idx = (int)floorf(logdb(sample * sample, AV_SAMPLE_FMT_FLT)) + MAX_DB_FLT; >+ vd->histogram[idx]++; >+ vd->nb_samples++; > } > > static int filter_frame(AVFilterLink *inlink, AVFrame *samples) > { > AVFilterContext *ctx = inlink->dst; > VolDetectContext *vd = ctx->priv; >- int nb_samples = samples->nb_samples; >- int nb_channels = samples->ch_layout.nb_channels; >- int nb_planes = nb_channels; >- int plane, i; >- int16_t *pcm; >- >- if (!av_sample_fmt_is_planar(samples->format)) { >- nb_samples *= nb_channels; >- nb_planes = 1; >- } >- for (plane = 0; plane < nb_planes; plane++) { >- pcm = (int16_t *)samples->extended_data[plane]; >- for (i = 0; i < nb_samples; i++) >- vd->histogram[pcm[i] + 0x8000]++; >- } >+ >+ vd->process_samples(vd, samples); > > return ff_filter_frame(inlink->dst->outputs[0], samples); > } >@@ -73,6 +88,20 @@ static void print_stats(AVFilterContext *ctx) > uint64_t nb_samples = 0, power = 0, nb_samples_shift = 0, sum = 0; > uint64_t histdb[MAX_DB + 1] = { 0 }; > >+ if (!vd->nb_samples) >+ return; >+ if (vd->is_float) { >+ av_log(ctx, AV_LOG_INFO, "n_samples: %" PRId64 "\n", vd->nb_samples); >+ av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb(vd->sum2 / vd->nb_samples, AV_SAMPLE_FMT_FLT)); >+ av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -2.0*logdb(vd->max, AV_SAMPLE_FMT_FLT)); >+ for (i = 0; i < HISTOGRAM_SIZE_FLT && !vd->histogram[i]; i++); >+ for (; i >= 0 && sum < vd->nb_samples / 1000; i++) { >+ if (!vd->histogram[i]) >+ continue; >+ av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %" PRId64 "\n", MAX_DB_FLT - i, vd->histogram[i]); >+ sum += vd->histogram[i]; >+ } >+ } else { > for (i = 0; i < 0x10000; i++) > nb_samples += vd->histogram[i]; > av_log(ctx, AV_LOG_INFO, "n_samples: %"PRId64"\n", nb_samples); >@@ -92,26 +121,61 @@ static void print_stats(AVFilterContext *ctx) > return; > power = (power + nb_samples_shift / 2) / nb_samples_shift; > av_assert0(power <= 0x8000 * 0x8000); >- av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb(power)); >+ av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb((double)power, AV_SAMPLE_FMT_S16)); > > max_volume = 0x8000; > while (max_volume > 0 && !vd->histogram[0x8000 + max_volume] && > !vd->histogram[0x8000 - max_volume]) > max_volume--; >- av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -logdb(max_volume * max_volume)); >+ av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -logdb((double)(max_volume * max_volume), AV_SAMPLE_FMT_S16)); > > for (i = 0; i < 0x10000; i++) >- histdb[(int)logdb((i - 0x8000) * (i - 0x8000))] += vd->histogram[i]; >+ histdb[(int)logdb((double)(i - 0x8000) * (i - 0x8000), AV_SAMPLE_FMT_S16)] += vd->histogram[i]; > for (i = 0; i <= MAX_DB && !histdb[i]; i++); > for (; i <= MAX_DB && sum < nb_samples / 1000; i++) { >- av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", i, histdb[i]); >+ av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", -i, histdb[i]); > sum += histdb[i]; > } >+ } >+} >+ >+static int config_output(AVFilterLink *outlink) >+{ >+ AVFilterContext *ctx = outlink->src; >+ VolDetectContext *vd = ctx->priv; >+ size_t histogram_size; >+ >+ vd->is_float = outlink->format == AV_SAMPLE_FMT_FLT || >+ outlink->format == AV_SAMPLE_FMT_FLTP; >+ >+ if (!vd->is_float) { >+ /* >+ * Number of samples at each PCM value. >+ * Only used for integer formats. >+ * For 16 bit signed PCM there are 65536. >+ * histogram[0x8000 + i] is the number of samples at value i. >+ * The extra element is there for symmetry. >+ */ >+ histogram_size = HISTOGRAM_SIZE + 1; >+ } else { >+ /* >+ * The histogram is used to store the number of samples at each dB >+ * instead of the number of samples at each PCM value. >+ */ >+ histogram_size = HISTOGRAM_SIZE_FLT + 1; >+ } >+ vd->histogram = av_calloc(histogram_size, sizeof(uint64_t)); >+ if (!vd->histogram) >+ return AVERROR(ENOMEM); >+ return 0; > } > > static av_cold void uninit(AVFilterContext *ctx) > { >+ VolDetectContext *vd = ctx->priv; > print_stats(ctx); >+ if (vd->histogram) >+ av_freep(&vd->histogram); > } > > static const AVFilterPad volumedetect_inputs[] = { >@@ -122,6 +186,14 @@ static const AVFilterPad volumedetect_inputs[] = { > }, > }; > >+static const AVFilterPad volumedetect_outputs[] = { >+ { >+ .name = "default", >+ .type = AVMEDIA_TYPE_AUDIO, >+ .config_props = config_output, >+ }, >+}; >+ > const AVFilter ff_af_volumedetect = { > .name = "volumedetect", > .description = NULL_IF_CONFIG_SMALL("Detect audio volume."), >@@ -129,6 +201,9 @@ const AVFilter ff_af_volumedetect = { > .uninit = uninit, > .flags = AVFILTER_FLAG_METADATA_ONLY, > FILTER_INPUTS(volumedetect_inputs), >- FILTER_OUTPUTS(ff_audio_default_filterpad), >- FILTER_SAMPLEFMTS(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P), >+ FILTER_OUTPUTS(volumedetect_outputs), >+ FILTER_SAMPLEFMTS(AV_SAMPLE_FMT_S16, >+ AV_SAMPLE_FMT_S16P, >+ AV_SAMPLE_FMT_FLT, >+ AV_SAMPLE_FMT_FLTP), > };
Le 2 juillet 2024 08:46:53 GMT+03:00, "Rémi Denis-Courmont" <remi@remlab.net> a écrit : > > >Le 2 juillet 2024 04:33:51 GMT+03:00, Yigithan Yigit <yigithanyigitdevel@gmail.com> a écrit : >>--- >> libavfilter/af_volumedetect.c | 139 ++++++++++++++++++++++++++-------- >> 1 file changed, 107 insertions(+), 32 deletions(-) > >Did you try to compile this patch? Nvmd misread.
Le 2 juillet 2024 04:33:51 GMT+03:00, Yigithan Yigit <yigithanyigitdevel@gmail.com> a écrit : >--- > libavfilter/af_volumedetect.c | 139 ++++++++++++++++++++++++++-------- > 1 file changed, 107 insertions(+), 32 deletions(-) > >diff --git a/libavfilter/af_volumedetect.c b/libavfilter/af_volumedetect.c >index 327801a7f9..edd2d56f7a 100644 >--- a/libavfilter/af_volumedetect.c >+++ b/libavfilter/af_volumedetect.c >@@ -1,5 +1,6 @@ > /* > * Copyright (c) 2012 Nicolas George >+ * Copyright (c) 2024 Yigithan Yigit - 32 Bit Float Audio Support > * > * This file is part of FFmpeg. > * >@@ -20,48 +21,62 @@ > > #include "libavutil/channel_layout.h" > #include "libavutil/avassert.h" >+#include "libavutil/mem.h" > #include "audio.h" > #include "avfilter.h" > #include "internal.h" > >+#define MAX_DB_FLT 1024 > #define MAX_DB 91 >+#define HISTOGRAM_SIZE 0x10000 >+#define HISTOGRAM_SIZE_FLT (MAX_DB_FLT*2) >+ >+typedef struct VolDetectContext VolDetectContext; > > typedef struct VolDetectContext { >- /** >- * Number of samples at each PCM value. >- * histogram[0x8000 + i] is the number of samples at value i. >- * The extra element is there for symmetry. >- */ >- uint64_t histogram[0x10001]; >+ uint64_t* histogram; ///< for integer number of samples at each PCM value, for float number of samples at each dB >+ uint64_t nb_samples; ///< number of samples >+ double sum2; ///< sum of the squares of the samples >+ double max; ///< maximum sample value >+ int is_float; ///< true if the input is in floating point >+ void (*process_samples)(VolDetectContext *vd, AVFrame *samples); > } VolDetectContext; > >-static inline double logdb(uint64_t v) >+static inline double logdb(double v, enum AVSampleFormat sample_fmt) > { >- double d = v / (double)(0x8000 * 0x8000); >- if (!v) >- return MAX_DB; >- return -log10(d) * 10; >+ if (sample_fmt == AV_SAMPLE_FMT_FLT) { There's no point in doing this. You've already up-converted to double precision and do all the calculations in double precision. Maybe that's fine or maybe not, but either way, this doesn't look sensible. >+ if (!v) >+ return MAX_DB_FLT; >+ return -log10(v) * 10; >+ } else { >+ double d = v / (double)(0x8000 * 0x8000); >+ if (!v) >+ return MAX_DB; >+ return -log10(d) * 10; >+ } >+} >+ >+static void update_float_stats(VolDetectContext *vd, float *audio_data) >+{ >+ double sample; >+ int idx; >+ if(!isfinite(*audio_data) || isnan(*audio_data)) >+ return; >+ sample = fabsf(*audio_data); >+ if (sample > vd->max) >+ vd->max = sample; >+ vd->sum2 += sample * sample; >+ idx = (int)floorf(logdb(sample * sample, AV_SAMPLE_FMT_FLT)) + MAX_DB_FLT; >+ vd->histogram[idx]++; >+ vd->nb_samples++; > } > > static int filter_frame(AVFilterLink *inlink, AVFrame *samples) > { > AVFilterContext *ctx = inlink->dst; > VolDetectContext *vd = ctx->priv; >- int nb_samples = samples->nb_samples; >- int nb_channels = samples->ch_layout.nb_channels; >- int nb_planes = nb_channels; >- int plane, i; >- int16_t *pcm; >- >- if (!av_sample_fmt_is_planar(samples->format)) { >- nb_samples *= nb_channels; >- nb_planes = 1; >- } >- for (plane = 0; plane < nb_planes; plane++) { >- pcm = (int16_t *)samples->extended_data[plane]; >- for (i = 0; i < nb_samples; i++) >- vd->histogram[pcm[i] + 0x8000]++; >- } >+ >+ vd->process_samples(vd, samples); > > return ff_filter_frame(inlink->dst->outputs[0], samples); > } >@@ -73,6 +88,20 @@ static void print_stats(AVFilterContext *ctx) > uint64_t nb_samples = 0, power = 0, nb_samples_shift = 0, sum = 0; > uint64_t histdb[MAX_DB + 1] = { 0 }; > >+ if (!vd->nb_samples) >+ return; >+ if (vd->is_float) { >+ av_log(ctx, AV_LOG_INFO, "n_samples: %" PRId64 "\n", vd->nb_samples); >+ av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb(vd->sum2 / vd->nb_samples, AV_SAMPLE_FMT_FLT)); >+ av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -2.0*logdb(vd->max, AV_SAMPLE_FMT_FLT)); >+ for (i = 0; i < HISTOGRAM_SIZE_FLT && !vd->histogram[i]; i++); >+ for (; i >= 0 && sum < vd->nb_samples / 1000; i++) { >+ if (!vd->histogram[i]) >+ continue; >+ av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %" PRId64 "\n", MAX_DB_FLT - i, vd->histogram[i]); >+ sum += vd->histogram[i]; >+ } >+ } else { > for (i = 0; i < 0x10000; i++) > nb_samples += vd->histogram[i]; > av_log(ctx, AV_LOG_INFO, "n_samples: %"PRId64"\n", nb_samples); >@@ -92,26 +121,61 @@ static void print_stats(AVFilterContext *ctx) > return; > power = (power + nb_samples_shift / 2) / nb_samples_shift; > av_assert0(power <= 0x8000 * 0x8000); >- av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb(power)); >+ av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb((double)power, AV_SAMPLE_FMT_S16)); > > max_volume = 0x8000; > while (max_volume > 0 && !vd->histogram[0x8000 + max_volume] && > !vd->histogram[0x8000 - max_volume]) > max_volume--; >- av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -logdb(max_volume * max_volume)); >+ av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -logdb((double)(max_volume * max_volume), AV_SAMPLE_FMT_S16)); > > for (i = 0; i < 0x10000; i++) >- histdb[(int)logdb((i - 0x8000) * (i - 0x8000))] += vd->histogram[i]; >+ histdb[(int)logdb((double)(i - 0x8000) * (i - 0x8000), AV_SAMPLE_FMT_S16)] += vd->histogram[i]; > for (i = 0; i <= MAX_DB && !histdb[i]; i++); > for (; i <= MAX_DB && sum < nb_samples / 1000; i++) { >- av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", i, histdb[i]); >+ av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", -i, histdb[i]); > sum += histdb[i]; > } >+ } >+} >+ >+static int config_output(AVFilterLink *outlink) >+{ >+ AVFilterContext *ctx = outlink->src; >+ VolDetectContext *vd = ctx->priv; >+ size_t histogram_size; >+ >+ vd->is_float = outlink->format == AV_SAMPLE_FMT_FLT || >+ outlink->format == AV_SAMPLE_FMT_FLTP; >+ >+ if (!vd->is_float) { >+ /* >+ * Number of samples at each PCM value. >+ * Only used for integer formats. >+ * For 16 bit signed PCM there are 65536. >+ * histogram[0x8000 + i] is the number of samples at value i. >+ * The extra element is there for symmetry. >+ */ >+ histogram_size = HISTOGRAM_SIZE + 1; >+ } else { >+ /* >+ * The histogram is used to store the number of samples at each dB >+ * instead of the number of samples at each PCM value. >+ */ >+ histogram_size = HISTOGRAM_SIZE_FLT + 1; >+ } >+ vd->histogram = av_calloc(histogram_size, sizeof(uint64_t)); >+ if (!vd->histogram) >+ return AVERROR(ENOMEM); >+ return 0; > } > > static av_cold void uninit(AVFilterContext *ctx) > { >+ VolDetectContext *vd = ctx->priv; > print_stats(ctx); >+ if (vd->histogram) >+ av_freep(&vd->histogram); > } > > static const AVFilterPad volumedetect_inputs[] = { >@@ -122,6 +186,14 @@ static const AVFilterPad volumedetect_inputs[] = { > }, > }; > >+static const AVFilterPad volumedetect_outputs[] = { >+ { >+ .name = "default", >+ .type = AVMEDIA_TYPE_AUDIO, >+ .config_props = config_output, >+ }, >+}; >+ > const AVFilter ff_af_volumedetect = { > .name = "volumedetect", > .description = NULL_IF_CONFIG_SMALL("Detect audio volume."), >@@ -129,6 +201,9 @@ const AVFilter ff_af_volumedetect = { > .uninit = uninit, > .flags = AVFILTER_FLAG_METADATA_ONLY, > FILTER_INPUTS(volumedetect_inputs), >- FILTER_OUTPUTS(ff_audio_default_filterpad), >- FILTER_SAMPLEFMTS(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P), >+ FILTER_OUTPUTS(volumedetect_outputs), >+ FILTER_SAMPLEFMTS(AV_SAMPLE_FMT_S16, >+ AV_SAMPLE_FMT_S16P, >+ AV_SAMPLE_FMT_FLT, >+ AV_SAMPLE_FMT_FLTP), > };
> On Jul 2, 2024, at 8:51 AM, Rémi Denis-Courmont <remi@remlab.net> wrote: > > > > Le 2 juillet 2024 04:33:51 GMT+03:00, Yigithan Yigit <yigithanyigitdevel@gmail.com <mailto:yigithanyigitdevel@gmail.com>> a écrit : >> --- >> libavfilter/af_volumedetect.c | 139 ++++++++++++++++++++++++++-------- >> 1 file changed, 107 insertions(+), 32 deletions(-) >> >> diff --git a/libavfilter/af_volumedetect.c b/libavfilter/af_volumedetect.c >> index 327801a7f9..edd2d56f7a 100644 >> --- a/libavfilter/af_volumedetect.c >> +++ b/libavfilter/af_volumedetect.c >> @@ -1,5 +1,6 @@ >> /* >> * Copyright (c) 2012 Nicolas George >> + * Copyright (c) 2024 Yigithan Yigit - 32 Bit Float Audio Support >> * >> * This file is part of FFmpeg. >> * >> @@ -20,48 +21,62 @@ >> >> #include "libavutil/channel_layout.h" >> #include "libavutil/avassert.h" >> +#include "libavutil/mem.h" >> #include "audio.h" >> #include "avfilter.h" >> #include "internal.h" >> >> +#define MAX_DB_FLT 1024 >> #define MAX_DB 91 >> +#define HISTOGRAM_SIZE 0x10000 >> +#define HISTOGRAM_SIZE_FLT (MAX_DB_FLT*2) >> + >> +typedef struct VolDetectContext VolDetectContext; >> >> typedef struct VolDetectContext { >> - /** >> - * Number of samples at each PCM value. >> - * histogram[0x8000 + i] is the number of samples at value i. >> - * The extra element is there for symmetry. >> - */ >> - uint64_t histogram[0x10001]; >> + uint64_t* histogram; ///< for integer number of samples at each PCM value, for float number of samples at each dB >> + uint64_t nb_samples; ///< number of samples >> + double sum2; ///< sum of the squares of the samples >> + double max; ///< maximum sample value >> + int is_float; ///< true if the input is in floating point >> + void (*process_samples)(VolDetectContext *vd, AVFrame *samples); >> } VolDetectContext; >> >> -static inline double logdb(uint64_t v) >> +static inline double logdb(double v, enum AVSampleFormat sample_fmt) >> { >> - double d = v / (double)(0x8000 * 0x8000); >> - if (!v) >> - return MAX_DB; >> - return -log10(d) * 10; >> + if (sample_fmt == AV_SAMPLE_FMT_FLT) { > > There's no point in doing this. You've already up-converted to double precision and do all the calculations in double precision. Maybe that's fine or maybe not, but either way, this doesn't look sensible. > >> + if (!v) >> + return MAX_DB_FLT; >> + return -log10(v) * 10; >> + } else { >> + double d = v / (double)(0x8000 * 0x8000); >> + if (!v) >> + return MAX_DB; >> + return -log10(d) * 10; >> + } >> +} >> + If I understand your concerns correctly, We should have function like this; > static inline double logdb(double v, enum AVSampleFormat sample_fmt) > { > if (!v) > return sample_fmt == AV_SAMPLE_FMT_FLT ? MAX_DB_FLT : MAX_DB; > > if (sample_fmt == AV_SAMPLE_FMT_S16) > v = ldexp(v, -30); > > return -log10(v) * 10; > } What do you think about that? Thanks for the feedback Yigithan
diff --git a/libavfilter/af_volumedetect.c b/libavfilter/af_volumedetect.c index 327801a7f9..edd2d56f7a 100644 --- a/libavfilter/af_volumedetect.c +++ b/libavfilter/af_volumedetect.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2012 Nicolas George + * Copyright (c) 2024 Yigithan Yigit - 32 Bit Float Audio Support * * This file is part of FFmpeg. * @@ -20,48 +21,62 @@ #include "libavutil/channel_layout.h" #include "libavutil/avassert.h" +#include "libavutil/mem.h" #include "audio.h" #include "avfilter.h" #include "internal.h" +#define MAX_DB_FLT 1024 #define MAX_DB 91 +#define HISTOGRAM_SIZE 0x10000 +#define HISTOGRAM_SIZE_FLT (MAX_DB_FLT*2) + +typedef struct VolDetectContext VolDetectContext; typedef struct VolDetectContext { - /** - * Number of samples at each PCM value. - * histogram[0x8000 + i] is the number of samples at value i. - * The extra element is there for symmetry. - */ - uint64_t histogram[0x10001]; + uint64_t* histogram; ///< for integer number of samples at each PCM value, for float number of samples at each dB + uint64_t nb_samples; ///< number of samples + double sum2; ///< sum of the squares of the samples + double max; ///< maximum sample value + int is_float; ///< true if the input is in floating point + void (*process_samples)(VolDetectContext *vd, AVFrame *samples); } VolDetectContext; -static inline double logdb(uint64_t v) +static inline double logdb(double v, enum AVSampleFormat sample_fmt) { - double d = v / (double)(0x8000 * 0x8000); - if (!v) - return MAX_DB; - return -log10(d) * 10; + if (sample_fmt == AV_SAMPLE_FMT_FLT) { + if (!v) + return MAX_DB_FLT; + return -log10(v) * 10; + } else { + double d = v / (double)(0x8000 * 0x8000); + if (!v) + return MAX_DB; + return -log10(d) * 10; + } +} + +static void update_float_stats(VolDetectContext *vd, float *audio_data) +{ + double sample; + int idx; + if(!isfinite(*audio_data) || isnan(*audio_data)) + return; + sample = fabsf(*audio_data); + if (sample > vd->max) + vd->max = sample; + vd->sum2 += sample * sample; + idx = (int)floorf(logdb(sample * sample, AV_SAMPLE_FMT_FLT)) + MAX_DB_FLT; + vd->histogram[idx]++; + vd->nb_samples++; } static int filter_frame(AVFilterLink *inlink, AVFrame *samples) { AVFilterContext *ctx = inlink->dst; VolDetectContext *vd = ctx->priv; - int nb_samples = samples->nb_samples; - int nb_channels = samples->ch_layout.nb_channels; - int nb_planes = nb_channels; - int plane, i; - int16_t *pcm; - - if (!av_sample_fmt_is_planar(samples->format)) { - nb_samples *= nb_channels; - nb_planes = 1; - } - for (plane = 0; plane < nb_planes; plane++) { - pcm = (int16_t *)samples->extended_data[plane]; - for (i = 0; i < nb_samples; i++) - vd->histogram[pcm[i] + 0x8000]++; - } + + vd->process_samples(vd, samples); return ff_filter_frame(inlink->dst->outputs[0], samples); } @@ -73,6 +88,20 @@ static void print_stats(AVFilterContext *ctx) uint64_t nb_samples = 0, power = 0, nb_samples_shift = 0, sum = 0; uint64_t histdb[MAX_DB + 1] = { 0 }; + if (!vd->nb_samples) + return; + if (vd->is_float) { + av_log(ctx, AV_LOG_INFO, "n_samples: %" PRId64 "\n", vd->nb_samples); + av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb(vd->sum2 / vd->nb_samples, AV_SAMPLE_FMT_FLT)); + av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -2.0*logdb(vd->max, AV_SAMPLE_FMT_FLT)); + for (i = 0; i < HISTOGRAM_SIZE_FLT && !vd->histogram[i]; i++); + for (; i >= 0 && sum < vd->nb_samples / 1000; i++) { + if (!vd->histogram[i]) + continue; + av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %" PRId64 "\n", MAX_DB_FLT - i, vd->histogram[i]); + sum += vd->histogram[i]; + } + } else { for (i = 0; i < 0x10000; i++) nb_samples += vd->histogram[i]; av_log(ctx, AV_LOG_INFO, "n_samples: %"PRId64"\n", nb_samples); @@ -92,26 +121,61 @@ static void print_stats(AVFilterContext *ctx) return; power = (power + nb_samples_shift / 2) / nb_samples_shift; av_assert0(power <= 0x8000 * 0x8000); - av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb(power)); + av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb((double)power, AV_SAMPLE_FMT_S16)); max_volume = 0x8000; while (max_volume > 0 && !vd->histogram[0x8000 + max_volume] && !vd->histogram[0x8000 - max_volume]) max_volume--; - av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -logdb(max_volume * max_volume)); + av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -logdb((double)(max_volume * max_volume), AV_SAMPLE_FMT_S16)); for (i = 0; i < 0x10000; i++) - histdb[(int)logdb((i - 0x8000) * (i - 0x8000))] += vd->histogram[i]; + histdb[(int)logdb((double)(i - 0x8000) * (i - 0x8000), AV_SAMPLE_FMT_S16)] += vd->histogram[i]; for (i = 0; i <= MAX_DB && !histdb[i]; i++); for (; i <= MAX_DB && sum < nb_samples / 1000; i++) { - av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", i, histdb[i]); + av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", -i, histdb[i]); sum += histdb[i]; } + } +} + +static int config_output(AVFilterLink *outlink) +{ + AVFilterContext *ctx = outlink->src; + VolDetectContext *vd = ctx->priv; + size_t histogram_size; + + vd->is_float = outlink->format == AV_SAMPLE_FMT_FLT || + outlink->format == AV_SAMPLE_FMT_FLTP; + + if (!vd->is_float) { + /* + * Number of samples at each PCM value. + * Only used for integer formats. + * For 16 bit signed PCM there are 65536. + * histogram[0x8000 + i] is the number of samples at value i. + * The extra element is there for symmetry. + */ + histogram_size = HISTOGRAM_SIZE + 1; + } else { + /* + * The histogram is used to store the number of samples at each dB + * instead of the number of samples at each PCM value. + */ + histogram_size = HISTOGRAM_SIZE_FLT + 1; + } + vd->histogram = av_calloc(histogram_size, sizeof(uint64_t)); + if (!vd->histogram) + return AVERROR(ENOMEM); + return 0; } static av_cold void uninit(AVFilterContext *ctx) { + VolDetectContext *vd = ctx->priv; print_stats(ctx); + if (vd->histogram) + av_freep(&vd->histogram); } static const AVFilterPad volumedetect_inputs[] = { @@ -122,6 +186,14 @@ static const AVFilterPad volumedetect_inputs[] = { }, }; +static const AVFilterPad volumedetect_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_AUDIO, + .config_props = config_output, + }, +}; + const AVFilter ff_af_volumedetect = { .name = "volumedetect", .description = NULL_IF_CONFIG_SMALL("Detect audio volume."), @@ -129,6 +201,9 @@ const AVFilter ff_af_volumedetect = { .uninit = uninit, .flags = AVFILTER_FLAG_METADATA_ONLY, FILTER_INPUTS(volumedetect_inputs), - FILTER_OUTPUTS(ff_audio_default_filterpad), - FILTER_SAMPLEFMTS(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P), + FILTER_OUTPUTS(volumedetect_outputs), + FILTER_SAMPLEFMTS(AV_SAMPLE_FMT_S16, + AV_SAMPLE_FMT_S16P, + AV_SAMPLE_FMT_FLT, + AV_SAMPLE_FMT_FLTP), };