diff mbox series

[FFmpeg-devel] libavfilter/asrc_flite: fixes and improvements

Message ID CAPYw7P4xTFRBbGO3ubP6kfKCN7FLSXE+z=3-+KqfMJ6KQ3+Drw@mail.gmail.com
State New
Headers show
Series [FFmpeg-devel] libavfilter/asrc_flite: fixes and improvements | expand

Checks

Context Check Description
andriy/configure_x86 warning Failed to apply patch

Commit Message

Paul B Mahol Dec. 3, 2023, 10:23 p.m. UTC
Attached.

Comments

Stefano Sabatini Dec. 4, 2023, 11:27 p.m. UTC | #1
On date Sunday 2023-12-03 23:23:48 +0100, Paul B Mahol wrote:
> Attached.

> From fe1ece70c0ecbe6fb24e0823fe46db57242396e4 Mon Sep 17 00:00:00 2001
> From: Paul B Mahol <onemda@gmail.com>
> Date: Sun, 3 Dec 2023 21:38:08 +0100
> Subject: [PATCH 1/2] avfilter/asrc_flite: switch to activate
> 
> Allows to set EOF timestamp.
> 
> Signed-off-by: Paul B Mahol <onemda@gmail.com>
> ---
>  libavfilter/asrc_flite.c | 24 +++++++++++++-----------
>  1 file changed, 13 insertions(+), 11 deletions(-)

Cannot really comment, but should be good if tested.

> From e8aad4411ee0f8bc4bd50d5e3a10b7f712687f60 Mon Sep 17 00:00:00 2001
> From: Paul B Mahol <onemda@gmail.com>
> Date: Sun, 3 Dec 2023 22:50:11 +0100
> Subject: [PATCH 2/2] avfilter/asrc_flite: use streaming function
> 
> Fix continuous accumulation of audio samples for big txt inputs.
> 
> Signed-off-by: Paul B Mahol <onemda@gmail.com>
> ---
>  libavfilter/asrc_flite.c | 84 ++++++++++++++++++++++++++++++----------
>  1 file changed, 64 insertions(+), 20 deletions(-)
> 
> diff --git a/libavfilter/asrc_flite.c b/libavfilter/asrc_flite.c
> index 74c8414b5c..70a2fd3e40 100644
> --- a/libavfilter/asrc_flite.c
> +++ b/libavfilter/asrc_flite.c
> @@ -24,6 +24,8 @@
>   */
>  
>  #include <flite/flite.h>
> +#include "libavutil/audio_fifo.h"
> +#include "libavutil/avstring.h"
>  #include "libavutil/channel_layout.h"
>  #include "libavutil/file.h"
>  #include "libavutil/opt.h"
> @@ -39,11 +41,14 @@ typedef struct FliteContext {
>      char *voice_str;
>      char *textfile;
>      char *text;
> -    cst_wave *wave;
> -    int16_t *wave_samples;
> -    int      wave_nb_samples;
> +    char *text_p;
> +    char *text_saveptr;
> +    int nb_channels;
> +    int sample_rate;
> +    AVAudioFifo *fifo;
>      int list_voices;
>      cst_voice *voice;
> +    cst_audio_streaming_info *asi;
>      struct voice_entry *voice_entry;
>      int64_t pts;
>      int frame_nb_samples; ///< number of samples per frame
> @@ -140,10 +145,30 @@ static int select_voice(struct voice_entry **entry_ret, const char *voice_name,
>      return AVERROR(EINVAL);
>  }
>  
> +static int audio_stream_chunk_by_word(const cst_wave *w, int start, int size,

nit+: w -> wave to simplify reading

> +                                      int last, cst_audio_streaming_info *asi)
> +{
> +    FliteContext *flite = asi->userdata;
> +    void *const ptr[8] = { &w->samples[start] };
> +
> +    flite->nb_channels = w->num_channels;
> +    flite->sample_rate = w->sample_rate;

> +    if (!flite->fifo) {
> +        flite->fifo = av_audio_fifo_alloc(AV_SAMPLE_FMT_S16, flite->nb_channels, size);

any reason to initialize it here rather than in init?

> +        if (!flite->fifo)
> +            return CST_AUDIO_STREAM_STOP;

[...]

LGTM otherwise, thanks.
Paul B Mahol Dec. 4, 2023, 11:30 p.m. UTC | #2
On Tue, Dec 5, 2023 at 12:27 AM Stefano Sabatini <stefasab@gmail.com> wrote:

> On date Sunday 2023-12-03 23:23:48 +0100, Paul B Mahol wrote:
> > Attached.
>
> > From fe1ece70c0ecbe6fb24e0823fe46db57242396e4 Mon Sep 17 00:00:00 2001
> > From: Paul B Mahol <onemda@gmail.com>
> > Date: Sun, 3 Dec 2023 21:38:08 +0100
> > Subject: [PATCH 1/2] avfilter/asrc_flite: switch to activate
> >
> > Allows to set EOF timestamp.
> >
> > Signed-off-by: Paul B Mahol <onemda@gmail.com>
> > ---
> >  libavfilter/asrc_flite.c | 24 +++++++++++++-----------
> >  1 file changed, 13 insertions(+), 11 deletions(-)
>
> Cannot really comment, but should be good if tested.
>
> > From e8aad4411ee0f8bc4bd50d5e3a10b7f712687f60 Mon Sep 17 00:00:00 2001
> > From: Paul B Mahol <onemda@gmail.com>
> > Date: Sun, 3 Dec 2023 22:50:11 +0100
> > Subject: [PATCH 2/2] avfilter/asrc_flite: use streaming function
> >
> > Fix continuous accumulation of audio samples for big txt inputs.
> >
> > Signed-off-by: Paul B Mahol <onemda@gmail.com>
> > ---
> >  libavfilter/asrc_flite.c | 84 ++++++++++++++++++++++++++++++----------
> >  1 file changed, 64 insertions(+), 20 deletions(-)
> >
> > diff --git a/libavfilter/asrc_flite.c b/libavfilter/asrc_flite.c
> > index 74c8414b5c..70a2fd3e40 100644
> > --- a/libavfilter/asrc_flite.c
> > +++ b/libavfilter/asrc_flite.c
> > @@ -24,6 +24,8 @@
> >   */
> >
> >  #include <flite/flite.h>
> > +#include "libavutil/audio_fifo.h"
> > +#include "libavutil/avstring.h"
> >  #include "libavutil/channel_layout.h"
> >  #include "libavutil/file.h"
> >  #include "libavutil/opt.h"
> > @@ -39,11 +41,14 @@ typedef struct FliteContext {
> >      char *voice_str;
> >      char *textfile;
> >      char *text;
> > -    cst_wave *wave;
> > -    int16_t *wave_samples;
> > -    int      wave_nb_samples;
> > +    char *text_p;
> > +    char *text_saveptr;
> > +    int nb_channels;
> > +    int sample_rate;
> > +    AVAudioFifo *fifo;
> >      int list_voices;
> >      cst_voice *voice;
> > +    cst_audio_streaming_info *asi;
> >      struct voice_entry *voice_entry;
> >      int64_t pts;
> >      int frame_nb_samples; ///< number of samples per frame
> > @@ -140,10 +145,30 @@ static int select_voice(struct voice_entry
> **entry_ret, const char *voice_name,
> >      return AVERROR(EINVAL);
> >  }
> >
> > +static int audio_stream_chunk_by_word(const cst_wave *w, int start, int
> size,
>
> nit+: w -> wave to simplify reading
>
> > +                                      int last,
> cst_audio_streaming_info *asi)
> > +{
> > +    FliteContext *flite = asi->userdata;
> > +    void *const ptr[8] = { &w->samples[start] };
> > +
> > +    flite->nb_channels = w->num_channels;
> > +    flite->sample_rate = w->sample_rate;
>
> > +    if (!flite->fifo) {
> > +        flite->fifo = av_audio_fifo_alloc(AV_SAMPLE_FMT_S16,
> flite->nb_channels, size);
>
> any reason to initialize it here rather than in init?
>

Mostly because number of channels is unknown at init point of filter.


>
> > +        if (!flite->fifo)
> > +            return CST_AUDIO_STREAM_STOP;
>
> [...]
>
> LGTM otherwise, thanks.
>

Note that this patch expect flite >= 2.0 is used where thread safety was
fixed (gonna retest again to confirm this).
diff mbox series

Patch

From e8aad4411ee0f8bc4bd50d5e3a10b7f712687f60 Mon Sep 17 00:00:00 2001
From: Paul B Mahol <onemda@gmail.com>
Date: Sun, 3 Dec 2023 22:50:11 +0100
Subject: [PATCH 2/2] avfilter/asrc_flite: use streaming function

Fix continuous accumulation of audio samples for big txt inputs.

Signed-off-by: Paul B Mahol <onemda@gmail.com>
---
 libavfilter/asrc_flite.c | 84 ++++++++++++++++++++++++++++++----------
 1 file changed, 64 insertions(+), 20 deletions(-)

diff --git a/libavfilter/asrc_flite.c b/libavfilter/asrc_flite.c
index 74c8414b5c..70a2fd3e40 100644
--- a/libavfilter/asrc_flite.c
+++ b/libavfilter/asrc_flite.c
@@ -24,6 +24,8 @@ 
  */
 
 #include <flite/flite.h>
+#include "libavutil/audio_fifo.h"
+#include "libavutil/avstring.h"
 #include "libavutil/channel_layout.h"
 #include "libavutil/file.h"
 #include "libavutil/opt.h"
@@ -39,11 +41,14 @@  typedef struct FliteContext {
     char *voice_str;
     char *textfile;
     char *text;
-    cst_wave *wave;
-    int16_t *wave_samples;
-    int      wave_nb_samples;
+    char *text_p;
+    char *text_saveptr;
+    int nb_channels;
+    int sample_rate;
+    AVAudioFifo *fifo;
     int list_voices;
     cst_voice *voice;
+    cst_audio_streaming_info *asi;
     struct voice_entry *voice_entry;
     int64_t pts;
     int frame_nb_samples; ///< number of samples per frame
@@ -140,10 +145,30 @@  static int select_voice(struct voice_entry **entry_ret, const char *voice_name,
     return AVERROR(EINVAL);
 }
 
+static int audio_stream_chunk_by_word(const cst_wave *w, int start, int size,
+                                      int last, cst_audio_streaming_info *asi)
+{
+    FliteContext *flite = asi->userdata;
+    void *const ptr[8] = { &w->samples[start] };
+
+    flite->nb_channels = w->num_channels;
+    flite->sample_rate = w->sample_rate;
+    if (!flite->fifo) {
+        flite->fifo = av_audio_fifo_alloc(AV_SAMPLE_FMT_S16, flite->nb_channels, size);
+        if (!flite->fifo)
+            return CST_AUDIO_STREAM_STOP;
+    }
+
+    av_audio_fifo_write(flite->fifo, ptr, size);
+
+    return CST_AUDIO_STREAM_CONT;
+}
+
 static av_cold int init(AVFilterContext *ctx)
 {
     FliteContext *flite = ctx->priv;
     int ret = 0;
+    char *text;
 
     if (flite->list_voices) {
         list_voices(ctx, "\n");
@@ -197,10 +222,21 @@  static av_cold int init(AVFilterContext *ctx)
         return AVERROR(EINVAL);
     }
 
-    /* synth all the file data in block */
-    flite->wave = flite_text_to_wave(flite->text, flite->voice);
-    flite->wave_samples    = flite->wave->samples;
-    flite->wave_nb_samples = flite->wave->num_samples;
+    flite->asi = new_audio_streaming_info();
+    if (!flite->asi)
+        return AVERROR_BUG;
+
+    flite->asi->asc = audio_stream_chunk_by_word;
+    flite->asi->userdata = flite;
+    feat_set(flite->voice->features, "streaming_info", audio_streaming_info_val(flite->asi));
+
+    flite->text_p = flite->text;
+    if (!(text = av_strtok(flite->text_p, "\n", &flite->text_saveptr)))
+        return AVERROR(EINVAL);
+    flite->text_p = NULL;
+
+    flite_text_to_speech(text, flite->voice, "none");
+
     return 0;
 }
 
@@ -216,8 +252,7 @@  static av_cold void uninit(AVFilterContext *ctx)
         }
         pthread_mutex_unlock(&flite_mutex);
     }
-    delete_wave(flite->wave);
-    flite->wave = NULL;
+    av_audio_fifo_free(flite->fifo);
 }
 
 static int query_formats(AVFilterContext *ctx)
@@ -230,13 +265,13 @@  static int query_formats(AVFilterContext *ctx)
     AVFilterFormats *sample_rates = NULL;
     AVChannelLayout chlayout = { 0 };
 
-    av_channel_layout_default(&chlayout, flite->wave->num_channels);
+    av_channel_layout_default(&chlayout, flite->nb_channels);
 
     if ((ret = ff_add_channel_layout         (&chlayouts     , &chlayout               )) < 0 ||
         (ret = ff_set_common_channel_layouts (ctx            , chlayouts               )) < 0 ||
         (ret = ff_add_format                 (&sample_formats, AV_SAMPLE_FMT_S16       )) < 0 ||
         (ret = ff_set_common_formats         (ctx            , sample_formats          )) < 0 ||
-        (ret = ff_add_format                 (&sample_rates  , flite->wave->sample_rate)) < 0 ||
+        (ret = ff_add_format                 (&sample_rates  , flite->sample_rate      )) < 0 ||
         (ret = ff_set_common_samplerates     (ctx            , sample_rates            )) < 0)
         return ret;
 
@@ -248,12 +283,13 @@  static int config_props(AVFilterLink *outlink)
     AVFilterContext *ctx = outlink->src;
     FliteContext *flite = ctx->priv;
 
-    outlink->sample_rate = flite->wave->sample_rate;
-    outlink->time_base = (AVRational){1, flite->wave->sample_rate};
+    outlink->sample_rate = flite->sample_rate;
+    outlink->time_base = (AVRational){1, flite->sample_rate};
 
     av_log(ctx, AV_LOG_VERBOSE, "voice:%s fmt:%s sample_rate:%d\n",
            flite->voice_str,
            av_get_sample_fmt_name(outlink->format), outlink->sample_rate);
+
     return 0;
 }
 
@@ -261,14 +297,23 @@  static int activate(AVFilterContext *ctx)
 {
     AVFilterLink *outlink = ctx->outputs[0];
     FliteContext *flite = ctx->priv;
-    int nb_samples = FFMIN(flite->wave_nb_samples, flite->frame_nb_samples);
     AVFrame *samplesref;
+    int nb_samples;
 
     if (!ff_outlink_frame_wanted(outlink))
         return FFERROR_NOT_READY;
 
+    nb_samples = FFMIN(av_audio_fifo_size(flite->fifo), flite->frame_nb_samples);
     if (!nb_samples) {
-        ff_outlink_set_status(outlink, AVERROR_EOF, flite->pts);
+        char *text;
+
+        if (!(text = av_strtok(flite->text_p, "\n", &flite->text_saveptr))) {
+            ff_outlink_set_status(outlink, AVERROR_EOF, flite->pts);
+            return 0;
+        }
+
+        flite_text_to_speech(text, flite->voice, "none");
+        ff_filter_set_ready(ctx, 100);
         return 0;
     }
 
@@ -276,13 +321,12 @@  static int activate(AVFilterContext *ctx)
     if (!samplesref)
         return AVERROR(ENOMEM);
 
-    memcpy(samplesref->data[0], flite->wave_samples,
-           nb_samples * flite->wave->num_channels * 2);
+    av_audio_fifo_read(flite->fifo, (void **)samplesref->extended_data,
+                       nb_samples);
+
     samplesref->pts = flite->pts;
-    samplesref->sample_rate = flite->wave->sample_rate;
+    samplesref->sample_rate = flite->sample_rate;
     flite->pts += nb_samples;
-    flite->wave_samples += nb_samples * flite->wave->num_channels;
-    flite->wave_nb_samples -= nb_samples;
 
     return ff_filter_frame(outlink, samplesref);
 }
-- 
2.42.1