[FFmpeg-devel,TESTERS,WANTED] avfilter: add apitch filter

Message ID	20190511182529.30045-1-onemda@gmail.com
State	Superseded
Headers	show Return-Path: <ffmpeg-devel-bounces@ffmpeg.org> From: Paul B Mahol <onemda@gmail.com> To: ffmpeg-devel@ffmpeg.org Date: Sat, 11 May 2019 20:25:29 +0200 Message-Id: <20190511182529.30045-1-onemda@gmail.com> Subject: [FFmpeg-devel] [PATCH][TESTERS WANTED] avfilter: add apitch filter Precedence: list Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>

diff --git a/libavfilter/Makefile b/libavfilter/Makefile index b41304d480..3662d50ae0 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -68,6 +68,7 @@ OBJS-$(CONFIG_ANULL_FILTER) += af_anull.o OBJS-$(CONFIG_APAD_FILTER) += af_apad.o OBJS-$(CONFIG_APERMS_FILTER) += f_perms.o OBJS-$(CONFIG_APHASER_FILTER) += af_aphaser.o generate_wave_table.o +OBJS-$(CONFIG_APITCH_FILTER) += af_apitch.o OBJS-$(CONFIG_APULSATOR_FILTER) += af_apulsator.o OBJS-$(CONFIG_AREALTIME_FILTER) += f_realtime.o OBJS-$(CONFIG_ARESAMPLE_FILTER) += af_aresample.o diff --git a/libavfilter/af_apitch.c b/libavfilter/af_apitch.c new file mode 100644 index 0000000000..406951576b --- /dev/null +++ b/libavfilter/af_apitch.c @@ -0,0 +1,764 @@ +/* + * Copyright (c) 2019 Paul B Mahol + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 2.1 of the License, + * or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libswresample/swresample.h" +#include "libavutil/avassert.h" +#include "libavutil/avstring.h" +#include "libavutil/audio_fifo.h" +#include "libavfilter/internal.h" +#include "libavutil/common.h" +#include "libavutil/opt.h" +#include "libavcodec/avfft.h" +#include "filters.h" +#include "audio.h" + +typedef struct APitchContext { + const AVClass *class; + + float pitch; + float tempo; + int window_size; + int ratio; + + int power_change; + int fft_bits; + int nb_channels; + + FFTContext *fft, *ifft; + AVAudioFifo *ififo; + int64_t pts; + int eof; + float *window_func_lut; + + AVFrame *buffer; + AVFrame *magnitude; + AVFrame *phase; + AVFrame *acc; + AVFrame *new_phase; + AVFrame *last_phase; + AVFrame *osamples; + AVFrame *peaks; + AVFrame *map; + + int input_overlap; + int output_overlap; + int samples_to_drain; + float last_power; + + int flushed; + int pitch_changed; + struct SwrContext *swr; +} APitchContext; + +#define OFFSET(x) offsetof(APitchContext, x) +#define AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM + +static const AVOption apitch_options[] = { + { "pitch", "set pitch scale factor", OFFSET(pitch), AV_OPT_TYPE_FLOAT, {.dbl=1}, .01, 100, AF }, + { "tempo", "set tempo scale factor", OFFSET(tempo), AV_OPT_TYPE_FLOAT, {.dbl=1}, .01, 100, AF }, + { "oratio", "set overlap ratio", OFFSET(ratio), AV_OPT_TYPE_INT, {.i64=4}, 1, 64, AF }, + { NULL }, +}; + +AVFILTER_DEFINE_CLASS(apitch); + +static int set_input_overlap(APitchContext *s, float rate) +{ + s->input_overlap = s->output_overlap * rate; + if (s->input_overlap <= 0) + return AVERROR(EINVAL); + return 0; +} + +static int config_output(AVFilterLink *outlink) +{ + AVFilterContext *ctx = outlink->src; + AVFilterLink *inlink = ctx->inputs[0]; + APitchContext *s = ctx->priv; + int ret; + + s->swr = swr_alloc(); + if (!s->swr) + return AVERROR(ENOMEM); + + s->swr = swr_alloc_set_opts(s->swr, + inlink->channel_layout, inlink->format, inlink->sample_rate, + inlink->channel_layout, inlink->format, inlink->sample_rate * s->pitch, + 0, ctx); + if (!s->swr) + return AVERROR(ENOMEM); + + ret = swr_init(s->swr); + if (ret < 0) + return ret; + + s->window_size = inlink->sample_rate / 10; + s->power_change = 1; + s->pts = AV_NOPTS_VALUE; + s->fft_bits = av_log2(s->window_size); + s->fft = av_fft_init(s->fft_bits, 0); + s->ifft = av_fft_init(s->fft_bits, 1); + if (!s->fft || !s->ifft) + return AVERROR(ENOMEM); + + s->window_size = 1 << s->fft_bits; + + s->ratio = FFMIN(1 << av_log2(s->ratio), s->window_size >> 1); + s->nb_channels = outlink->channels; + + s->ififo = av_audio_fifo_alloc(outlink->format, outlink->channels, s->window_size); + if (!s->ififo) + return AVERROR(ENOMEM); + + s->window_func_lut = av_realloc_f(s->window_func_lut, s->window_size, + sizeof(*s->window_func_lut)); + if (!s->window_func_lut) + return AVERROR(ENOMEM); + + for (int i = 0; i < s->window_size; i++) { + float t = (float)i / (float)(s->window_size - 1); + float h = 0.5 * (1.0 - cosf(2.0 * M_PI * t)); + s->window_func_lut[i] = h; + } + + s->output_overlap = s->window_size / s->ratio; + if (s->output_overlap <= 0) + return AVERROR(EINVAL); + + return set_input_overlap(s, s->tempo / s->pitch); +} + +static float get_power(const float *in, const int size) +{ + float sum = 0.f; + + for (int n = 0; n < size; n++) + sum += in[n] * in[n]; + + return sqrtf(sum / size); +} + +static void normalize(float *new, const float num, const int size) +{ + float den = get_power(new, size); + float f = den > 0.f ? num / den : 0.f; + + for (int i = 0; i < size; i++) + new[i] *= f; +} + +static void complex2polar(const FFTComplex *in, + float *magn, float *phase, const int size) +{ + for (int i = 0; i < size; i++) { + const float re = in[i + 1].re; + const float im = in[i + 1].im; + + magn[i] = hypotf(re, im); + phase[i] = atan2f(im, re); + } +} + +static void polar2complex(FFTComplex *out, + const float *magn, const float *phase, + const int size) +{ + for (int i = 0; i < size; i++) { + const float m = magn[i]; + const float p = phase[i]; + + out[i + 1].re = cosf(p) * m; + out[i + 1].im = sinf(p) * m; + } +} + +static void update_phase(const float *phase, float *last_phase, + float *new_phase, const int size) +{ + for (int i = 0; i < size; i++) { + new_phase[i] = last_phase[i]; + last_phase[i] = phase[i]; + } +} + +static void restore_symmetry(FFTComplex *win, const int size) +{ + for (int i = 1; i < size / 2; i++) { + int pos = size - i; + + win[pos].re = win[i].re; + win[pos].im = -win[i].im; + } +} + +static float principal_angle(const float a) +{ + float b = fmodf(a + M_PI, 2 * M_PI); + + if (b < 0.f) + b = 2 * M_PI + b; + b -= M_PI; + + return b; +} + +static void make_phase(const float *phase, const float *last_phase, + float *new_phase, + int io, int oo, int size) +{ + const float size2 = 2 * size; + + for (int i = 0; i < size; i++) { + float h = 2 * M_PI * (i + 1.f) / size2; + float ii = phase[i] - last_phase[i] - io * h; + float j = h + principal_angle(ii) / io; + + new_phase[i] += oo * j; + } +} + +static int is_peak(const float *magnitude, const int pos, const int search) +{ + const float cmag = magnitude[pos]; + + for (int i = 0; i <= search; i++) + if (!(i > pos) && magnitude[pos - i] > cmag) + return 0; + for (int i = 0; i <= search; i++) + if (magnitude[pos + i] > cmag) + return 0; + + return 1; +} + +static int find_peaks(const float *magnitude, float *peaks, const int search, const int size) +{ + int count = 0; + + for (int i = 0; i < size - search; i++) { + if (is_peak(magnitude, i, search)) + peaks[count++] = i; + } + + return count; +} + +static void interp_phase(const float *map, const float *peaks, const int nb_peaks, + const float *phase, const float *last_phase, + float *new_phase, const float io, const float oo, const int size) +{ + const float size2 = 2 * size; + + for (int i = 0; i < nb_peaks; i++) { + int peak = peaks[i]; + int l = map[peak]; + float m, n, o; + + if (FFABS(peak - l) > FFMIN(1, av_log2(i)) - 3) + l = peak; + m = 2.f * M_PI * (peak + 1.f) / size2; + n = phase[peak] - last_phase[l] - io * m; + o = m + principal_angle(n) / io; + + new_phase[peak] = new_phase[l] + oo * o; + } +} + +static void get_new_phase(const float *map, const float *phase, + float *new_phase, const float I, const int size) +{ + for (int i = 0; i < size; i++) { + int f = map[i]; + + new_phase[i] = new_phase[f] + I * (phase[i] - phase[f]); + } +} + +static int lowest_valley(const float *magnitude, + const int start, const int size) +{ + float lowest_magn = magnitude[start]; + int lowest = start; + + for (int i = start + 1; i < size; i++) { + if (magnitude[i] < lowest_magn) { + lowest = i; + lowest_magn = magnitude[i]; + } + } + + return lowest; +} + +static void map_peaks(const float *peaks, const int nb_peaks, + const float *magnitude, float *map, const int size) +{ + int e = 0, f; + + for (f = 0; f < nb_peaks - 1; f++) { + int peak = peaks[f]; + int next_peak = peaks[f+1]; + + for (int j = lowest_valley(magnitude, peak, next_peak); e <= j; e++) + map[e] = peak; + } + + for (; e < size; e++) + map[e] = peaks[f]; +} + +static void stretch(APitchContext *s, int nb_samples, + int window_size, float sample_rate, + const float *indata, float *outdata, int ch) +{ + FFTComplex *window_buffer = (FFTComplex *)s->buffer->extended_data[ch]; + float *phase = (float *)s->phase->extended_data[ch]; + float *magnitude = (float *)s->magnitude->extended_data[ch]; + float *acc = (float *)s->acc->extended_data[ch]; + float *new_phase = (float *)s->new_phase->extended_data[ch]; + float *last_phase = (float *)s->last_phase->extended_data[ch]; + float *osamples = (float *)s->osamples->extended_data[ch]; + float *peaks = (float *)s->peaks->extended_data[ch]; + float *map = (float *)s->map->extended_data[ch]; + const int output_overlap = s->output_overlap; + const int input_overlap = s->input_overlap; + const int half_window_size = window_size / 2; + const float ratio = output_overlap / (float)input_overlap; + const float wscale = 1.f / window_size; + float power; + + if (s->pitch == s->tempo && s->tempo == 1.f) { + for (int k = 0; k < window_size; k++) + osamples[k] = indata[k]; + + goto copy; + } + + for (int k = 0; k < window_size; k++) { + const float window = s->window_func_lut[k]; + + window_buffer[k].re = indata[k] * window; + window_buffer[k].im = 0.f; + } + + power = get_power(indata, window_size); + if (power > 2.f * s->last_power) + s->power_change = 1; + + av_fft_permute(s->fft, window_buffer); + av_fft_calc(s->fft, window_buffer); + + complex2polar(window_buffer, magnitude, phase, half_window_size); + + if (s->power_change) + update_phase(phase, last_phase, new_phase, half_window_size); + s->power_change = 0; + + if (ratio < 2.f) { + int nb_peaks = find_peaks(magnitude, peaks, 1, half_window_size); + float I = 2.f / 3.f + fminf(1.5, ratio) / 3.f; + + av_assert0(nb_peaks > 0); + map_peaks(peaks, nb_peaks, magnitude, map, half_window_size); + interp_phase(map, peaks, nb_peaks, phase, last_phase, new_phase, + input_overlap, output_overlap, half_window_size); + get_new_phase(map, phase, new_phase, I, half_window_size); + } else { + make_phase(phase, last_phase, new_phase, input_overlap, output_overlap, half_window_size); + } + + polar2complex(window_buffer, magnitude, new_phase, half_window_size); + restore_symmetry(window_buffer, window_size); + + av_fft_permute(s->ifft, window_buffer); + av_fft_calc(s->ifft, window_buffer); + + for (int k = 0; k < window_size; k++) + osamples[k] = window_buffer[k].re * wscale; + + normalize(osamples, power, window_size); + +copy: + for (int k = 0; k < window_size; k++) { + const float window = s->window_func_lut[k]; + osamples[k] *= window * .5f; + } + + for (int k = 0; k < window_size; k++) { + acc[k] += osamples[k]; + } + + for (int k = 0; k < output_overlap; k++) + outdata[k] = acc[k]; + + memmove(acc, acc + output_overlap, (window_size * 2 - output_overlap) * sizeof(float)); + FFSWAP(uint8_t *, s->last_phase->extended_data[ch], s->phase->extended_data[ch]); + + s->last_power = power; +} + +static int filter_frame(AVFilterLink *inlink) +{ + AVFilterContext *ctx = inlink->dst; + AVFilterLink *outlink = ctx->outputs[0]; + APitchContext *s = ctx->priv; + AVFrame *out = NULL, *in = NULL, *new_out = NULL; + int ret, drain, available = 0; + + if (s->pitch_changed) { + out = ff_get_audio_buffer(outlink, s->output_overlap); + if (!out) { + return AVERROR(ENOMEM); + } + + ret = swr_convert_frame(s->swr, out, NULL); + if (ret < 0) { + av_frame_free(&out); + return ret; + } + + if (out->nb_samples > 0) { + out->pts = s->pts; + s->pts += out->nb_samples; + + return ff_filter_frame(outlink, out); + } else { + s->pitch_changed = 0; + av_frame_free(&out); + } + + s->swr = swr_alloc_set_opts(s->swr, + inlink->channel_layout, inlink->format, inlink->sample_rate, + inlink->channel_layout, inlink->format, inlink->sample_rate * s->pitch, + 0, ctx); + if (!s->swr) + return AVERROR(ENOMEM); + + ret = swr_init(s->swr); + if (ret < 0) + return ret; + } + + in = ff_get_audio_buffer(outlink, s->window_size); + if (!in) + return AVERROR(ENOMEM); + + ret = av_audio_fifo_peek(s->ififo, (void **)in->extended_data, s->window_size); + if (ret < 0) { + ret = AVERROR(ENOMEM); + goto end; + } + available = av_audio_fifo_size(s->ififo) > 0; + + if (available > 0) { + out = ff_get_audio_buffer(outlink, s->output_overlap); + if (!out) { + ret = AVERROR(ENOMEM); + goto end; + } + + for (int ch = 0; ch < inlink->channels; ch++) { + stretch(s, in->nb_samples, + s->window_size, + inlink->sample_rate, + (const float *)in->extended_data[ch], + (float *)out->extended_data[ch], ch); + } + } + + if (s->pitch != 1.f) { + new_out = ff_get_audio_buffer(outlink, s->output_overlap); + if (!new_out) { + av_frame_free(&out); + ret = AVERROR(ENOMEM); + goto end; + } + + if (out) + out->sample_rate *= s->pitch; + ret = swr_convert_frame(s->swr, new_out, out); + av_frame_free(&out); + if (ret < 0) + goto end; + out = new_out; + new_out = NULL; + } + + if (!out) { + ret = 1; + goto end; + } + + out->pts = s->pts; + s->pts += s->output_overlap; + + s->flushed = out->nb_samples == 0; + if (s->flushed) { + ret = 1; + av_frame_free(&out); + goto end; + } + + ret = ff_filter_frame(outlink, out); + if (ret < 0) + goto end; + + s->samples_to_drain = s->input_overlap; + drain = FFMIN(s->samples_to_drain, av_audio_fifo_size(s->ififo)); + av_audio_fifo_drain(s->ififo, drain); + s->samples_to_drain -= drain; + +end: + av_frame_free(&in); + av_frame_free(&new_out); + return ret; +} + +static int activate(AVFilterContext *ctx) +{ + AVFilterLink *inlink = ctx->inputs[0]; + AVFilterLink *outlink = ctx->outputs[0]; + APitchContext *s = ctx->priv; + AVFrame *in = NULL; + int ret = 0, status; + int64_t pts; + + if (!s->magnitude) { + s->magnitude = ff_get_audio_buffer(outlink, s->window_size / 2); + if (!s->magnitude) + return AVERROR(ENOMEM); + } + + if (!s->phase) { + s->phase = ff_get_audio_buffer(outlink, s->window_size / 2); + if (!s->phase) + return AVERROR(ENOMEM); + } + + if (!s->acc) { + s->acc = ff_get_audio_buffer(outlink, s->window_size * 2); + if (!s->acc) + return AVERROR(ENOMEM); + } + + if (!s->new_phase) { + s->new_phase = ff_get_audio_buffer(outlink, s->window_size / 2); + if (!s->new_phase) + return AVERROR(ENOMEM); + } + + if (!s->last_phase) { + s->last_phase = ff_get_audio_buffer(outlink, s->window_size / 2); + if (!s->last_phase) + return AVERROR(ENOMEM); + } + + if (!s->osamples) { + s->osamples = ff_get_audio_buffer(outlink, s->window_size); + if (!s->osamples) + return AVERROR(ENOMEM); + } + + if (!s->peaks) { + s->peaks = ff_get_audio_buffer(outlink, s->window_size / 2); + if (!s->peaks) + return AVERROR(ENOMEM); + } + + if (!s->map) { + s->map = ff_get_audio_buffer(outlink, s->window_size / 2); + if (!s->map) + return AVERROR(ENOMEM); + } + + if (!s->buffer) { + s->buffer = ff_get_audio_buffer(outlink, s->window_size * 2); + if (!s->buffer) + return AVERROR(ENOMEM); + } + + FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink); + + if (!s->eof && av_audio_fifo_size(s->ififo) < s->window_size) { + ret = ff_inlink_consume_frame(inlink, &in); + if (ret < 0) + return ret; + + if (ret > 0) { + ret = av_audio_fifo_write(s->ififo, (void **)in->extended_data, + in->nb_samples); + if (ret >= 0 && s->pts == AV_NOPTS_VALUE) + s->pts = in->pts; + + if (s->samples_to_drain > 0) { + int drain = FFMIN(s->samples_to_drain, av_audio_fifo_size(s->ififo)); + av_audio_fifo_drain(s->ififo, drain); + s->samples_to_drain -= drain; + } + + av_frame_free(&in); + if (ret < 0) + return ret; + } + } + + if ((av_audio_fifo_size(s->ififo) >= s->window_size) || + ((av_audio_fifo_size(s->ififo) > 0 || !s->flushed) && s->eof)) { + ret = filter_frame(inlink); + if (av_audio_fifo_size(s->ififo) >= s->window_size) + ff_filter_set_ready(ctx, 100); + if (ret != 1) + return ret; + } + + if (!s->eof && ff_inlink_acknowledge_status(inlink, &status, &pts)) { + if (status == AVERROR_EOF) { + s->eof = 1; + if (av_audio_fifo_size(s->ififo) >= 0 && !s->flushed) { + ff_filter_set_ready(ctx, 100); + return 0; + } + } + } + + if (s->eof && (av_audio_fifo_size(s->ififo) <= 0 || s->flushed)) { + ff_outlink_set_status(outlink, AVERROR_EOF, s->pts); + return 0; + } + + if (!s->eof) + FF_FILTER_FORWARD_WANTED(outlink, inlink); + + return FFERROR_NOT_READY; +} + +static int query_formats(AVFilterContext *ctx) +{ + AVFilterFormats *formats; + AVFilterChannelLayouts *layouts; + static const enum AVSampleFormat sample_fmts[] = { + AV_SAMPLE_FMT_FLTP, + AV_SAMPLE_FMT_NONE + }; + int ret; + + layouts = ff_all_channel_counts(); + if (!layouts) + return AVERROR(ENOMEM); + ret = ff_set_common_channel_layouts(ctx, layouts); + if (ret < 0) + return ret; + + formats = ff_make_format_list(sample_fmts); + if (!formats) + return AVERROR(ENOMEM); + ret = ff_set_common_formats(ctx, formats); + if (ret < 0) + return ret; + + formats = ff_all_samplerates(); + if (!formats) + return AVERROR(ENOMEM); + return ff_set_common_samplerates(ctx, formats); +} + +static av_cold void uninit(AVFilterContext *ctx) +{ + APitchContext *s = ctx->priv; + + av_fft_end(s->fft); + s->fft = NULL; + av_fft_end(s->ifft); + s->ifft = NULL; + + av_frame_free(&s->acc); + av_frame_free(&s->magnitude); + av_frame_free(&s->phase); + av_frame_free(&s->peaks); + av_frame_free(&s->new_phase); + av_frame_free(&s->last_phase); + av_frame_free(&s->osamples); + av_frame_free(&s->map); + av_frame_free(&s->buffer); + + av_freep(&s->window_func_lut); + + av_audio_fifo_free(s->ififo); + s->ififo = NULL; + + swr_free(&s->swr); +} + +static int process_command(AVFilterContext *ctx, + const char *cmd, + const char *arg, + char *res, + int res_len, + int flags) +{ + APitchContext *s = ctx->priv; + float tempo; + float pitch; + int ret = 0; + + if (!strcmp(cmd, "tempo") && av_sscanf(arg, "%f", &tempo) == 1) { + s->tempo = av_clipf(tempo, 0.01f, 100.f); + ret = set_input_overlap(s, s->tempo / s->pitch); + } else if (!strcmp(cmd, "pitch") && av_sscanf(arg, "%f", &pitch) == 1) { + pitch = av_clipf(pitch, 0.01f, 100.f); + s->pitch_changed = pitch != s->pitch; + s->pitch = pitch; + ret = set_input_overlap(s, s->tempo / s->pitch); + } else { + ret = AVERROR(ENOSYS); + } + + return ret; +} + +static const AVFilterPad inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_AUDIO, + }, + { NULL } +}; + +static const AVFilterPad outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_AUDIO, + .config_props = config_output, + }, + { NULL } +}; + +AVFilter ff_af_apitch = { + .name = "apitch", + .description = NULL_IF_CONFIG_SMALL("Adjust audio pitch and tempo."), + .priv_size = sizeof(APitchContext), + .priv_class = &apitch_class, + .inputs = inputs, + .outputs = outputs, + .activate = activate, + .query_formats = query_formats, + .process_command = process_command, + .uninit = uninit, +}; diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index 9bdfa7d1bc..e3ea0e8214 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -60,6 +60,7 @@ extern AVFilter ff_af_anull; extern AVFilter ff_af_apad; extern AVFilter ff_af_aperms; extern AVFilter ff_af_aphaser; +extern AVFilter ff_af_apitch; extern AVFilter ff_af_apulsator; extern AVFilter ff_af_arealtime; extern AVFilter ff_af_aresample;

[FFmpeg-devel,TESTERS,WANTED] avfilter: add apitch filter

Commit Message

Comments

Patch