From 0388038ff2c7816194d01cf4c8966aab4a504b92 Mon Sep 17 00:00:00 2001
From: Paul B Mahol <onemda@gmail.com>
Date: Sat, 2 Dec 2023 00:51:54 +0100
Subject: [PATCH] avfilter: add aspace filter
Signed-off-by: Paul B Mahol <onemda@gmail.com>
---
doc/filters.texi | 62 +++++
libavfilter/Makefile | 1 +
libavfilter/af_aspace.c | 427 ++++++++++++++++++++++++++++++++++
libavfilter/allfilters.c | 1 +
libavfilter/aspace_template.c | 57 +++++
5 files changed, 548 insertions(+)
create mode 100644 libavfilter/af_aspace.c
create mode 100644 libavfilter/aspace_template.c
@@ -3274,6 +3274,68 @@ Set oversampling factor.
This filter supports the all above options as @ref{commands}.
+@section aspace
+Apply Arbitrary Distance-based Amplitude Panning to input audio stream.
+
+Accepted input audio channel layout is mono only.
+
+It accepts the following options:
+@table @option
+@item layout
+The channel layout of the output stream. The default is "stereo".
+
+@item a
+Set the source audio azimuth position in degrees.
+Allowed values are from @var{-180.0} to @var{180.0}.
+
+@item e
+Set the source audio elevation position in degrees.
+Allowed values are from @var{-90.0} to @var{90.0}.
+
+@item r
+Set the source audio distance from central point.
+
+@item b
+Set the spatial blur factor. Allowed values are from @var{0} to @var{100.0}.
+
+@item o
+Set the rolloff in decibels, this sets inverse distance law for sound propagating
+in a free field. Allowed range is from @var{0} to @var{90}.
+
+@item R
+Set the distance of each speaker in regular (circular) layout from central point.
+Allowed values are from @var{0.01} to @var{100.0}.
+
+@item precision
+Set which precision to use when processing samples.
+
+@table @option
+@item auto
+Auto pick internal sample format depending on other filters.
+
+@item float
+Always use single-floating point precision sample format.
+
+@item double
+Always use double-floating point precision sample format.
+@end table
+@end table
+
+@subsection Examples
+
+@itemize
+@item
+Pan single-channel input audio stream into 5.1 layout with custom azimuth position of -15 deg
+and spatial blur of 0.1 with custom rolloff of 6 dB:
+@example
+aspace=layout=5.1:b=0.1:o=6:a=-15
+@end example
+@end itemize
+
+@subsection Commands
+
+This filter supports the some of above options as @ref{commands}.
+
@section aspectralstats
Display frequency domain statistical information about the audio channels.
@@ -108,6 +108,7 @@ OBJS-$(CONFIG_ASHOWINFO_FILTER) += af_ashowinfo.o
OBJS-$(CONFIG_ASIDEDATA_FILTER) += f_sidedata.o
OBJS-$(CONFIG_ASISDR_FILTER) += af_asdr.o
OBJS-$(CONFIG_ASOFTCLIP_FILTER) += af_asoftclip.o
+OBJS-$(CONFIG_ASPACE_FILTER) += af_aspace.o
OBJS-$(CONFIG_ASPECTRALSTATS_FILTER) += af_aspectralstats.o
OBJS-$(CONFIG_ASPLIT_FILTER) += split.o
OBJS-$(CONFIG_ASR_FILTER) += af_asr.o
new file mode 100644
@@ -0,0 +1,427 @@
+/*
+ * Copyright (c) 2023 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <float.h>
+#include <math.h>
+#include <stdio.h>
+
+#include "libavutil/avstring.h"
+#include "libavutil/channel_layout.h"
+#include "libavutil/float_dsp.h"
+#include "libavutil/opt.h"
+#include "libavutil/avassert.h"
+#include "audio.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+
+enum PrecisionType {
+ P_AUTO = -1,
+ P_SINGLE,
+ P_DOUBLE,
+ NB_PTYPES,
+};
+
+typedef struct Speaker {
+ double position[3];
+ double distance;
+ double gain;
+} Speaker;
+
+typedef struct AudioSpaceContext {
+ const AVClass *class;
+
+ double polar[3];
+ int precision;
+ int set;
+ AVChannelLayout outlayout;
+ AVFrame *w;
+
+ double a, k, dmax;
+ double blur;
+ double radius;
+ double rolloff;
+ double source[3];
+ double prev_source[3];
+ double reference[3];
+ Speaker *speakers;
+
+ void (*process)(AVFilterContext *ctx, AVFrame *in, AVFrame *out, AVFrame *w);
+ AVFloatDSPContext *fdsp;
+} AudioSpaceContext;
+
+#define OFFSET(x) offsetof(AudioSpaceContext,x)
+#define AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+#define AFT AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
+
+static const AVOption aspace_options[] = {
+ { "layout", "set the layout of output audio", OFFSET(outlayout), AV_OPT_TYPE_CHLAYOUT, {.str="stereo"}, 0, 0, AF},
+ { "a", "set the azimuth of source audio", OFFSET(polar[0]), AV_OPT_TYPE_DOUBLE, {.dbl=0.}, -180, 180.0, AFT },
+ { "e", "set the elevation of source audio", OFFSET(polar[1]), AV_OPT_TYPE_DOUBLE, {.dbl=0.}, -90.0, 90.0, AFT },
+ { "r", "set the distance of source audio", OFFSET(polar[2]), AV_OPT_TYPE_DOUBLE, {.dbl=2.}, 0.0, DBL_MAX, AFT },
+ { "b", "set the spatial blur factor", OFFSET(blur), AV_OPT_TYPE_DOUBLE, {.dbl=0.2}, 0.0, 100.0, AFT },
+ { "o", "set the rolloff", OFFSET(rolloff), AV_OPT_TYPE_DOUBLE, {.dbl=18}, 0, 90, AFT },
+ { "R", "set the distance of each speaker in regular layout", OFFSET(radius), AV_OPT_TYPE_DOUBLE, {.dbl=1}, 0.1, 100, AF },
+ { "precision", "processing precision", OFFSET(precision), AV_OPT_TYPE_INT, {.i64=P_AUTO}, P_AUTO, NB_PTYPES-1, AF, "pre"},
+ { "auto", "auto", 0, AV_OPT_TYPE_CONST, {.i64=P_AUTO}, 0, 0, AF, "pre"},
+ { "float", "single floating-point precision", 0, AV_OPT_TYPE_CONST, {.i64=P_SINGLE}, 0, 0, AF, "pre"},
+ { "double", "double floating-point precision" , 0, AV_OPT_TYPE_CONST, {.i64=P_DOUBLE}, 0, 0, AF, "pre"},
+ {NULL}
+};
+
+static int query_formats(AVFilterContext *ctx)
+{
+ AudioSpaceContext *s = ctx->priv;
+ AVFilterFormats *formats = NULL;
+ AVFilterChannelLayouts *outlayouts = NULL;
+ AVFilterChannelLayouts *inlayouts = NULL;
+ AVChannelLayout inlayout = AV_CHANNEL_LAYOUT_MONO;
+ int ret = 0;
+
+ if (s->precision == P_AUTO) {
+ ret = ff_add_format(&formats, AV_SAMPLE_FMT_FLTP);
+ if (ret)
+ return ret;
+ ret = ff_add_format(&formats, AV_SAMPLE_FMT_DBLP);
+ } else if (s->precision == P_SINGLE) {
+ ret = ff_add_format(&formats, AV_SAMPLE_FMT_FLTP);
+ } else if (s->precision == P_DOUBLE) {
+ ret = ff_add_format(&formats, AV_SAMPLE_FMT_DBLP);
+ }
+ if (ret)
+ return ret;
+ ret = ff_set_common_formats(ctx, formats);
+ if (ret)
+ return ret;
+
+ ret = ff_add_channel_layout(&outlayouts, &s->outlayout);
+ if (ret)
+ return ret;
+
+ ret = ff_channel_layouts_ref(outlayouts, &ctx->outputs[0]->incfg.channel_layouts);
+ if (ret)
+ return ret;
+
+ ret = ff_add_channel_layout(&inlayouts, &inlayout);
+ if (ret)
+ return ret;
+
+ ret = ff_channel_layouts_ref(inlayouts, &ctx->inputs[0]->outcfg.channel_layouts);
+ if (ret)
+ return ret;
+
+ return ff_set_common_all_samplerates(ctx);
+}
+
+#define DEPTH 32
+#include "aspace_template.c"
+
+#undef DEPTH
+#define DEPTH 64
+#include "aspace_template.c"
+
+static double sqr(double x)
+{
+ return x * x;
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+ AVFilterContext *ctx = outlink->src;
+ AudioSpaceContext *s = ctx->priv;
+
+ switch (s->precision) {
+ case P_AUTO:
+ s->process = outlink->format == AV_SAMPLE_FMT_FLTP ? process_float : process_double;
+ break;
+ case P_SINGLE:
+ s->process = process_float;
+ break;
+ case P_DOUBLE:
+ s->process = process_double;
+ break;
+ default: av_assert0(0);
+ }
+
+ if (!s->speakers) {
+ s->speakers = av_calloc(outlink->ch_layout.nb_channels, sizeof(*s->speakers));
+ if (!s->speakers)
+ return AVERROR(ENOMEM);
+ }
+
+ s->source[0] = s->polar[2] * cos(s->polar[0] * M_PI / 180.0) * cos(s->polar[1] * M_PI / 180.0);
+ s->source[1] = s->polar[2] * sin(s->polar[0] * M_PI / 180.0) * cos(s->polar[1] * M_PI / 180.0);
+ s->source[2] = s->polar[2] * sin(s->polar[1] * M_PI / 180.0);
+
+ memcpy(s->prev_source, s->source, sizeof(s->prev_source));
+
+ s->reference[0] = 0.0;
+ s->reference[1] = 0.0;
+ s->reference[2] = 0.0;
+
+ for (int ch = 0; ch < outlink->ch_layout.nb_channels; ch++) {
+ Speaker *speaker = &s->speakers[ch];
+ int chan = av_channel_layout_channel_from_index(&outlink->ch_layout, ch);
+ double azim = 0, elev = 0;
+
+ switch (chan) {
+ case AV_CHAN_FRONT_LEFT: azim = 30; break;
+ case AV_CHAN_FRONT_RIGHT: azim = 330; break;
+ case AV_CHAN_FRONT_CENTER: azim = 0; break;
+ case AV_CHAN_LOW_FREQUENCY:
+ case AV_CHAN_LOW_FREQUENCY_2: azim = 0; break;
+ case AV_CHAN_BACK_LEFT: azim = 150; break;
+ case AV_CHAN_BACK_RIGHT: azim = 210; break;
+ case AV_CHAN_BACK_CENTER: azim = 180; break;
+ case AV_CHAN_SIDE_LEFT: azim = 90; break;
+ case AV_CHAN_SIDE_RIGHT: azim = 270; break;
+ case AV_CHAN_FRONT_LEFT_OF_CENTER: azim = 15; break;
+ case AV_CHAN_FRONT_RIGHT_OF_CENTER: azim = 345; break;
+ case AV_CHAN_TOP_CENTER: azim = 0;
+ elev = 90; break;
+ case AV_CHAN_TOP_FRONT_LEFT: azim = 30;
+ elev = 45; break;
+ case AV_CHAN_TOP_FRONT_CENTER: azim = 0;
+ elev = 45; break;
+ case AV_CHAN_TOP_FRONT_RIGHT: azim = 330;
+ elev = 45; break;
+ case AV_CHAN_TOP_BACK_LEFT: azim = 150;
+ elev = 45; break;
+ case AV_CHAN_TOP_BACK_RIGHT: azim = 210;
+ elev = 45; break;
+ case AV_CHAN_TOP_BACK_CENTER: azim = 180;
+ elev = 45; break;
+ case AV_CHAN_WIDE_LEFT: azim = 90; break;
+ case AV_CHAN_WIDE_RIGHT: azim = 270; break;
+ case AV_CHAN_SURROUND_DIRECT_LEFT: azim = 90; break;
+ case AV_CHAN_SURROUND_DIRECT_RIGHT: azim = 270; break;
+ case AV_CHAN_STEREO_LEFT: azim = 90; break;
+ case AV_CHAN_STEREO_RIGHT: azim = 270; break;
+ default:
+ return AVERROR(EINVAL);
+ }
+
+ speaker->position[0] = s->radius * cos(azim * M_PI / 180.0) * cos(elev * M_PI / 180.0);
+ speaker->position[1] = s->radius * sin(azim * M_PI / 180.0) * cos(elev * M_PI / 180.0);
+ speaker->position[2] = s->radius * sin(elev * M_PI / 180.0);
+
+ s->reference[0] += speaker->position[0];
+ s->reference[1] += speaker->position[1];
+ s->reference[2] += speaker->position[2];
+ }
+
+ s->reference[0] /= outlink->ch_layout.nb_channels;
+ s->reference[1] /= outlink->ch_layout.nb_channels;
+ s->reference[2] /= outlink->ch_layout.nb_channels;
+
+ s->dmax = 0.0;
+ for (int ch = 0; ch < outlink->ch_layout.nb_channels; ch++) {
+ Speaker *speaker = &s->speakers[ch];
+ double distance;
+
+ distance = sqrt(sqr(speaker->position[0] - s->reference[0]) +
+ sqr(speaker->position[1] - s->reference[1]) +
+ sqr(speaker->position[2] - s->reference[2]));
+
+ s->dmax = fmax(s->dmax, distance);
+ }
+
+ return 0;
+}
+
+static inline void iposition(double *out, const double *a,
+ const double *b, double f)
+{
+ out[0] = a[0] + (b[0] - a[0]) * f;
+ out[1] = a[1] + (b[1] - a[1]) * f;
+ out[2] = a[2] + (b[2] - a[2]) * f;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+ AVFilterContext *ctx = inlink->dst;
+ AudioSpaceContext *s = ctx->priv;
+ AVFilterLink *outlink = ctx->outputs[0];
+ double scale;
+ AVFrame *out;
+
+ if (!s->w || s->w->nb_samples < in->nb_samples) {
+ av_frame_free(&s->w);
+ s->w = ff_get_audio_buffer(outlink, in->nb_samples);
+ if (!s->w) {
+ av_frame_free(&in);
+ return AVERROR(ENOMEM);
+ }
+ }
+
+ out = ff_get_audio_buffer(outlink, in->nb_samples);
+ if (!out) {
+ av_frame_free(&in);
+ return AVERROR(ENOMEM);
+ }
+ av_frame_copy_props(out, in);
+
+ s->source[0] = s->polar[2] * cos(s->polar[0] * M_PI / 180.0) * cos(s->polar[1] * M_PI / 180.0);
+ s->source[1] = s->polar[2] * sin(s->polar[0] * M_PI / 180.0) * cos(s->polar[1] * M_PI / 180.0);
+ s->source[2] = s->polar[2] * sin(s->polar[1] * M_PI / 180.0);
+
+ scale = 1.0 / out->nb_samples;
+ s->a = s->rolloff / (20.0 * log10(2.0));
+ for (int n = 0; n < out->nb_samples; n++) {
+ double source[3];
+ double p, drs;
+
+ if (s->set && !memcmp(s->prev_source, s->source, sizeof(s->source))) {
+ switch (outlink->format) {
+ case AV_SAMPLE_FMT_FLTP:
+ for (int ch = 0; ch < outlink->ch_layout.nb_channels; ch++) {
+ Speaker *speaker = &s->speakers[ch];
+ const float gain = speaker->gain;
+
+ for (int n = 0; n < out->nb_samples; n++) {
+ float *w = (float *)s->w->extended_data[ch];
+
+ w[n] = gain;
+ }
+ }
+ break;
+ case AV_SAMPLE_FMT_DBLP:
+ for (int ch = 0; ch < outlink->ch_layout.nb_channels; ch++) {
+ Speaker *speaker = &s->speakers[ch];
+ const double gain = speaker->gain;
+
+ for (int n = 0; n < out->nb_samples; n++) {
+ double *w = (double *)s->w->extended_data[ch];
+
+ w[n] = gain;
+ }
+ }
+ break;
+ default:
+ av_assert0(0);
+ }
+
+ break;
+ }
+
+ iposition(source, s->prev_source, s->source, n * scale);
+
+ s->k = 0.0;
+ for (int ch = 0; ch < outlink->ch_layout.nb_channels; ch++) {
+ Speaker *speaker = &s->speakers[ch];
+
+ speaker->distance = sqrt(sqr(speaker->position[0] - source[0]) +
+ sqr(speaker->position[1] - source[1]) +
+ sqr(speaker->position[2] - source[2]) + sqr(s->blur));
+
+ s->k += 1.0 / pow(speaker->distance, 2.0 * s->a);
+ }
+
+ drs = sqrt(sqr(s->reference[0] - source[0]) +
+ sqr(s->reference[1] - source[1]) +
+ sqr(s->reference[2] - source[2]));
+
+ p = fmin(s->dmax / drs, 1.0);
+ s->k = pow(p, 2.0 * s->a) / sqrt(s->k);
+
+ switch (outlink->format) {
+ case AV_SAMPLE_FMT_FLTP:
+ for (int ch = 0; ch < outlink->ch_layout.nb_channels; ch++) {
+ float *w = (float *)s->w->extended_data[ch];
+ Speaker *speaker = &s->speakers[ch];
+
+ w[n] = s->k / powf(speaker->distance, s->a);
+ speaker->gain = w[n];
+ }
+ break;
+ case AV_SAMPLE_FMT_DBLP:
+ for (int ch = 0; ch < outlink->ch_layout.nb_channels; ch++) {
+ double *w = (double *)s->w->extended_data[ch];
+ Speaker *speaker = &s->speakers[ch];
+
+ w[n] = s->k / pow(speaker->distance, s->a);
+ speaker->gain = w[n];
+ }
+ break;
+ default:
+ av_assert0(0);
+ }
+
+ s->set = 1;
+ }
+
+ memcpy(s->prev_source, s->source, sizeof(s->prev_source));
+
+ s->process(ctx, in, out, s->w);
+
+ av_frame_free(&in);
+ return ff_filter_frame(outlink, out);
+}
+
+static av_cold int init(AVFilterContext *ctx)
+{
+ AudioSpaceContext *s = ctx->priv;
+
+ s->fdsp = avpriv_float_dsp_alloc(0);
+ if (!s->fdsp)
+ return AVERROR(ENOMEM);
+
+ return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+ AudioSpaceContext *s = ctx->priv;
+
+ av_freep(&s->fdsp);
+ av_freep(&s->speakers);
+ av_frame_free(&s->w);
+}
+
+static const AVFilterPad inputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_AUDIO,
+ .filter_frame = filter_frame,
+ },
+};
+
+static const AVFilterPad outputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_AUDIO,
+ .config_props = config_output,
+ },
+};
+
+AVFILTER_DEFINE_CLASS(aspace);
+
+const AVFilter ff_af_aspace = {
+ .name = "aspace",
+ .description = NULL_IF_CONFIG_SMALL("Arbitrary Distance Amplitude Panning"),
+ .priv_size = sizeof(AudioSpaceContext),
+ .priv_class = &aspace_class,
+ .init = init,
+ .uninit = uninit,
+ FILTER_QUERY_FUNC(query_formats),
+ FILTER_INPUTS(inputs),
+ FILTER_OUTPUTS(outputs),
+ .process_command = ff_filter_process_command,
+};
@@ -94,6 +94,7 @@ extern const AVFilter ff_af_ashowinfo;
extern const AVFilter ff_af_asidedata;
extern const AVFilter ff_af_asisdr;
extern const AVFilter ff_af_asoftclip;
+extern const AVFilter ff_af_aspace;
extern const AVFilter ff_af_aspectralstats;
extern const AVFilter ff_af_asplit;
extern const AVFilter ff_af_asr;
new file mode 100644
@@ -0,0 +1,57 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avfilter.h"
+#include "internal.h"
+#include "audio.h"
+
+#undef ftype
+#undef SAMPLE_FORMAT
+#if DEPTH == 32
+#define SAMPLE_FORMAT float
+#define ftype float
+#else
+#define SAMPLE_FORMAT double
+#define ftype double
+#endif
+
+#define fn3(a,b) a##_##b
+#define fn2(a,b) fn3(a,b)
+#define fn(a) fn2(a, SAMPLE_FORMAT)
+
+static void fn(process)(AVFilterContext *ctx,
+ AVFrame *in, AVFrame *out, AVFrame *w)
+{
+ const int nb_channels = out->ch_layout.nb_channels;
+ const int nb_samples = FFALIGN(in->nb_samples, 16);
+ AudioSpaceContext *s = ctx->priv;
+
+ for (int i = 0; i < nb_channels; i++) {
+#if DEPTH == 32
+ s->fdsp->vector_fmul((ftype *)out->extended_data[i],
+ (const ftype *)in->extended_data[0],
+ (const ftype *)w->extended_data[i],
+ nb_samples);
+#else
+ s->fdsp->vector_dmul((ftype *)out->extended_data[i],
+ (const ftype *)in->extended_data[0],
+ (const ftype *)w->extended_data[i],
+ nb_samples);
+#endif
+ }
+}
--
2.42.1