diff mbox series

[FFmpeg-devel,v5,10/12] avfilter/textmod: Add textmod filter

Message ID MN2PR04MB59812646AC0D461F950C3888BAD89@MN2PR04MB5981.namprd04.prod.outlook.com
State Superseded, archived
Headers show
Series [FFmpeg-devel,v5,01/12] avutil/frame: Subtitle Filtering - Add AVMediaType property to AVFrame | expand

Checks

Context Check Description
andriy/configurex86 warning Failed to apply patch

Commit Message

Soft Works Sept. 12, 2021, 3:22 a.m. UTC
Signed-off-by: softworkz <softworkz@hotmail.com>
---
 doc/filters.texi         |  64 +++++++
 libavfilter/Makefile     |   3 +
 libavfilter/allfilters.c |   1 +
 libavfilter/sf_textmod.c | 381 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 449 insertions(+)
 create mode 100644 libavfilter/sf_textmod.c

Comments

Andreas Rheinhardt Sept. 12, 2021, 9:55 p.m. UTC | #1
Soft Works:
> Signed-off-by: softworkz <softworkz@hotmail.com>
> ---
>  doc/filters.texi         |  64 +++++++
>  libavfilter/Makefile     |   3 +
>  libavfilter/allfilters.c |   1 +
>  libavfilter/sf_textmod.c | 381 +++++++++++++++++++++++++++++++++++++++
>  4 files changed, 449 insertions(+)
>  create mode 100644 libavfilter/sf_textmod.c
> 
> diff --git a/doc/filters.texi b/doc/filters.texi
> index 1d76461ada..9fd2876d63 100644
> --- a/doc/filters.texi
> +++ b/doc/filters.texi
> @@ -25024,6 +25024,70 @@ existing filters using @code{--disable-filters}.
>  
>  Below is a description of the currently available subtitle filters.
>  
> +@section textmod
> +
> +Modify subtitle text in a number of ways.
> +
> +It accepts the following parameters:
> +
> +@table @option
> +@item mode
> +The kind of text modification to apply
> +
> +Supported operation modes are:
> +
> +@table @var
> +@item 0, leet
> +Convert subtitle text to 'leet speak'. It's primarily useful for testing as the modification will be visible with almost all text lines.
> +@item 1, to_upper
> +Change all text to upper case. Might improve readability.
> +@item 2, to_lower
> +Change all text to lower case.
> +@item 3, replace_chars
> +Replace one or more characters. Requires the find and replace parameters to be specified. 
> +Both need to be equal in length.
> +The first char in find is replaced by the first char in replace, same for all subsequent chars.
> +@item 4, remove_chars
> +Remove certain characters. Requires the find parameter to be specified. 
> +All chars in the find parameter string will be removed from all subtitle text.
> +@item 5, replace_words
> +Replace one or more words. Requires the find and replace parameters to be specified. Multiple words must be separated by the delimiter char specified vie the separator parameter (default: ','). 
> +The number of words in the find and replace parameters needs to be equal.
> +The first word in find is replaced by the first word in replace, same for all subsequent words
> +@item 6, remove_words
> +Remove certain words. Requires the find parameter to be specified. Multiple words must be separated by the delimiter char specified vie the separator parameter (default: ','). 
> +All words in the find parameter string will be removed from all subtitle text.
> +@end table
> +
> +@item find
> +Required for replace_chars, remove_chars, replace_words and remove_words.
> +
> +@item replace
> +Required for replace_chars and replace_words.
> +
> +@item separator
> +Delimiter character for words. Used with replace_words and remove_words- Must be a single character.
> +The default is '.'.
> +
> +@end table
> +
> +@subsection Examples
> +
> +@itemize
> +@item
> +Change all characters to upper case while keeping all styles and animations:
> +@example
> +ffmpeg -i "https://streams.videolan.org/ffmpeg/mkv_subtitles.mkv" -filter_complex "[0:s]textmod=mode=to_upper" -map 0 -y out.mkv
> +@end example
> +@item
> +Mark the 100-pixel-wide region on the left edge of the frame as very
> +uninteresting (to be encoded at much lower quality than the rest of
> +the frame).
> +@example
> +addroi=0:0:100:ih:+1/5
> +@end example
> +@end itemize
> +
>  @section graphicsub2video
>  
>  Renders graphic subtitles as video frames. 
> diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> index 0e752c5bf9..5a5a4be47e 100644
> --- a/libavfilter/Makefile
> +++ b/libavfilter/Makefile
> @@ -534,6 +534,9 @@ OBJS-$(CONFIG_YUVTESTSRC_FILTER)             += vsrc_testsrc.o
>  
>  OBJS-$(CONFIG_NULLSINK_FILTER)               += vsink_nullsink.o
>  
> +# subtitle filters
> +OBJS-$(CONFIG_TEXTMOD_FILTER)                += sf_textmod.o
> +
>  # multimedia filters
>  OBJS-$(CONFIG_ABITSCOPE_FILTER)              += avf_abitscope.o
>  OBJS-$(CONFIG_ADRAWGRAPH_FILTER)             += f_drawgraph.o
> diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
> index 77463aa4c8..6d7a535ee8 100644
> --- a/libavfilter/allfilters.c
> +++ b/libavfilter/allfilters.c
> @@ -524,6 +524,7 @@ extern const AVFilter ff_avf_showvolume;
>  extern const AVFilter ff_avf_showwaves;
>  extern const AVFilter ff_avf_showwavespic;
>  extern const AVFilter ff_vaf_spectrumsynth;
> +extern const AVFilter ff_sf_textmod;
>  extern const AVFilter ff_svf_graphicsub2video;
>  extern const AVFilter ff_svf_textsub2video;
>  
> diff --git a/libavfilter/sf_textmod.c b/libavfilter/sf_textmod.c
> new file mode 100644
> index 0000000000..7c23ded9ef
> --- /dev/null
> +++ b/libavfilter/sf_textmod.c
> @@ -0,0 +1,381 @@
> +/*
> + * Copyright (c) 2021 softworkz
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +/**
> + * @file
> + * text subtitle filter which allows to modify subtitle text in several ways
> + */
> +
> +#include <libavcodec/ass.h>
> +
> +#include "libavutil/avassert.h"
> +#include "libavutil/avstring.h"
> +#include "libavutil/opt.h"
> +#include "avfilter.h"
> +#include "internal.h"
> +#include "libavcodec/avcodec.h"
> +#include "libavcodec/ass_split.h"
> +
> +static const char* leet_src = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
> +static const char* leet_dst = "abcd3f6#1jklmn0pq257uvwxyzAB(D3F6#1JKLMN0PQ257UVWXYZ";
> +
> +enum TextModOperation {
> +    OP_LEET,
> +    OP_TO_UPPER,
> +    OP_TO_LOWER,
> +    OP_REPLACE_CHARS,
> +    OP_REMOVE_CHARS,
> +    OP_REPLACE_WORDS,
> +    OP_REMOVE_WORDS,
> +    NB_OPS,
> +};
> +
> +typedef struct TextModContext {
> +    const AVClass *class;
> +    enum AVSubtitleType format;
> +    enum TextModOperation operation;
> +    char *find;
> +    char *replace;
> +    char *separator;
> +    char **find_list;
> +    int  nb_find_list;
> +    char **replace_list;
> +    int  nb_replace_list;
> +} TextModContext;
> +
> +static char **split_string(char *source, int *nb_elems, char delim)
> +{
> +    char **list = NULL;
> +    char *temp = NULL;
> +    char *ptr = av_strtok(source, &delim, &temp);
> +
> +    while (ptr) {
> +        av_dynarray_add(&list, nb_elems, ptr);
> +        if (!list)
> +            return NULL;
> +
> +        ptr = av_strtok(NULL, &delim, &temp);
> +    }
> +
> +    av_dynarray_add(&list, nb_elems, NULL);
> +
> +    return list;
> +}
> +
> +static int init(AVFilterContext *ctx)
> +{
> +    TextModContext *s = ctx->priv;
> +
> +    switch (s->operation) {
> +    case OP_REPLACE_CHARS:
> +    case OP_REMOVE_CHARS:
> +    case OP_REPLACE_WORDS:
> +    case OP_REMOVE_WORDS:
> +        if (!s->find || !strlen(s->find)) {
> +            av_log(ctx, AV_LOG_ERROR, "Selected mode requires the 'find' parameter to be specified");
> +            return AVERROR(EINVAL);
> +        }
> +        break;
> +    }
> +
> +    switch (s->operation) {
> +    case OP_REPLACE_CHARS:
> +    case OP_REPLACE_WORDS:
> +        if (!s->replace || !strlen(s->replace)) {
> +            av_log(ctx, AV_LOG_ERROR, "Selected mode requires the 'replace' parameter to be specified");
> +            return AVERROR(EINVAL);
> +        }
> +        break;
> +    }
> +
> +    if (s->operation == OP_REPLACE_CHARS && strlen(s->find) != strlen(s->replace)) {
> +        av_log(ctx, AV_LOG_ERROR, "Selected mode requires the 'find' and 'replace' parameters to have the same length");
> +        return AVERROR(EINVAL);
> +    }
> +
> +    if (s->operation == OP_REPLACE_WORDS || s->operation == OP_REMOVE_WORDS) {
> +        if (!s->separator || strlen(s->separator) != 1) {
> +            av_log(ctx, AV_LOG_ERROR, "Selected mode requires a single separator char to be specified");
> +            return AVERROR(EINVAL);
> +        }
> +
> +        s->find_list = split_string(s->find, &s->nb_find_list, *s->separator);
> +        if (!s->find_list)
> +            return AVERROR(ENOMEM);
> +
> +        if (s->operation == OP_REPLACE_WORDS) {
> +
> +            s->replace_list = split_string(s->replace, &s->nb_replace_list, *s->separator);
> +            if (!s->replace_list)
> +                return AVERROR(ENOMEM);
> +
> +            if (s->nb_find_list != s->nb_replace_list) {
> +                av_log(ctx, AV_LOG_ERROR, "The number of words in 'find' and 'replace' needs to be equal");
> +                return AVERROR(EINVAL);
> +            }
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +static void uninit(AVFilterContext *ctx)
> +{
> +    TextModContext *s = ctx->priv;
> +    int i;
> +
> +    for (i = 0; i < s->nb_find_list; i++) {
> +        av_free(&s->find_list[i]);

This is completely wrong and will crash: You either want to do
av_freep(&s->find_list[i]) or av_free(s->find_list[i]) if these strings
were independently allocated; but looking at split_string() shows that
they are not, they are substrings of s->find. Similar for the loop below.

> +    }
> +    s->nb_find_list = 0;
> +    av_freep(&s->find_list);
> +
> +    for (i = 0; i < s->nb_replace_list; i++) {
> +        av_free(&s->replace_list[i]);
> +    }
> +    s->nb_replace_list = 0;
> +    av_freep(&s->replace_list);
> +}
> +
> +static int query_formats(AVFilterContext *ctx)
> +{
> +    AVFilterFormats *formats;
> +    AVFilterLink *inlink = ctx->inputs[0];
> +    AVFilterLink *outlink = ctx->outputs[0];
> +    static const enum AVSubtitleType subtitle_fmts[] = { AV_SUBTITLE_FMT_ASS, AV_SUBTITLE_FMT_NONE };
> +    int ret;
> +
> +    /* set input subtitle format */
> +    formats = ff_make_format_list(subtitle_fmts);
> +    if ((ret = ff_formats_ref(formats, &inlink->outcfg.formats)) < 0)
> +        return ret;
> +
> +    /* set output video format */
> +    if ((ret = ff_formats_ref(formats, &outlink->incfg.formats)) < 0)
> +        return ret;
> +
> +    return 0;
> +}
> +
> +static char *process_text(TextModContext *s, char *text)
> +{
> +    const char *char_src = s->find;
> +    const char *char_dst = s->replace;
> +    char *result = NULL;
> +    int escape_level = 0, k = 0;
> +
> +    switch (s->operation) {
> +    case OP_LEET:
> +    case OP_REPLACE_CHARS:
> +
> +        if (s->operation == OP_LEET) {
> +            char_src = leet_src;
> +            char_dst = leet_dst;
> +        }
> +
> +        result = av_strdup(text);
> +        if (!result)
> +            return NULL;
> +
> +        for (size_t n = 0; n < strlen(result); n++) {
> +            if (result[n] == '{')
> +                escape_level++;
> +
> +            if (!escape_level) {
> +                for (size_t t = 0; t < FF_ARRAY_ELEMS(char_src); t++) {
> +                    if (result[n] == char_src[t]) {
> +                        result[n] = char_dst[t];
> +                        break;
> +                    }
> +                }
> +            }
> +
> +            if (result[n] == '}')
> +                escape_level--;
> +        }
> +
> +        break;
> +    case OP_TO_UPPER:
> +    case OP_TO_LOWER:
> +
> +        result = av_strdup(text);
> +        if (!result)
> +            return NULL;
> +
> +        for (size_t n = 0; n < strlen(result); n++) {
> +            if (result[n] == '{')
> +                escape_level++;
> +            if (!escape_level)
> +                result[n] = s->operation == OP_TO_LOWER ? av_tolower(result[n]) : av_toupper(result[n]);
> +            if (result[n] == '}')
> +                escape_level--;
> +        }
> +
> +        break;
> +    case OP_REMOVE_CHARS:
> +
> +        result = av_strdup(text);
> +        if (!result)
> +            return NULL;
> +
> +        for (size_t n = 0; n < strlen(result); n++) {
> +            int skip_char = 0;
> +
> +            if (result[n] == '{')
> +                escape_level++;
> +
> +            if (!escape_level) {
> +                for (size_t t = 0; t < FF_ARRAY_ELEMS(char_src); t++) {
> +                    if (result[n] == char_src[t]) {
> +                        skip_char = 1;
> +                        break;
> +                    }
> +                }
> +            }
> +
> +            if (!skip_char)
> +                result[k++] = result[n];
> +
> +            if (result[n] == '}')
> +                escape_level--;
> +        }
> +
> +        result[k] = 0;
> +
> +        break;
> +    case OP_REPLACE_WORDS:
> +    case OP_REMOVE_WORDS:
> +
> +        result = av_strdup(text);
> +        if (!result)
> +            return NULL;
> +
> +        for (int n = 0; n < s->nb_find_list; n++) {
> +            char *tmp           = result;
> +            const char *replace = (s->operation == OP_REPLACE_WORDS) ? s->replace_list[n] : "";
> +
> +            result = av_strireplace(result, s->find_list[n], replace);
> +            if (!result)
> +                return NULL;
> +
> +            av_free(tmp);
> +        }
> +
> +        break;
> +    }
> +
> +    return result;
> +}
> +
> +static char *process_dialog(TextModContext *s, char *ass_line)
> +{
> +    ASSDialog *dialog = ff_ass_split_dialog(NULL, ass_line);
> +    char *result, *text;
> +
> +    if (!dialog)
> +        return NULL;
> +
> +    text = process_text(s, dialog->text);
> +    if (!text)
> +        return NULL;
> +
> +    result = ff_ass_get_dialog(dialog->readorder, dialog->layer, dialog->style, dialog->name, text);
> +
> +    av_free(text);
> +    ff_ass_free_dialog(&dialog);
> +    return result;
> +}
> +
> +static int filter_frame(AVFilterLink *inlink, AVFrame *src_frame)
> +{
> +    TextModContext *s = inlink->dst->priv;
> +    AVFilterLink *outlink = inlink->dst->outputs[0];
> +    int ret;
> +    AVFrame *out;
> +
> +    outlink->format = inlink->format;
> +
> +    out = av_frame_clone(src_frame);

Why clone? You can just reuse src_frame as is.

> +    if (!out)
> +        return AVERROR(ENOMEM);
> +
> +    for (unsigned i = 0; i < out->num_subtitle_rects; i++) {
> +
> +        AVSubtitleRect *rect = out->subtitle_rects[i];
> +
> +        if (rect->ass) {

Is there are actually a reason that num_subtitle_rects can't be taken at
face value? Your query_formats callback after all signals that only ass
subtitles are accepted.

> +            char *tmp = rect->ass;
> +            rect->ass = process_dialog(s, rect->ass);

You may not be the sole owner of this AVSubtitleRect; after all,
they are shared. Ergo you must not modify it. Is it possible that you
believed that av_frame_clone() would make the frame writable? It does
not. For non-subtitle frames, av_frame_make_writable() makes them
writable; but it does not for subtitles, because you made
av_frame_get_buffer2() a no-op for subtitles and so
av_frame_make_writable() will temporarily increment the refcount and
then decrement it again.

> +            av_free(tmp);
> +            if (!rect->ass)
> +                return AVERROR(ENOMEM);
> +        }
> +    }
> +
> +    av_frame_free(&src_frame);
> +    return ff_filter_frame(outlink, out);
> +}
> +
> +#define OFFSET(x) offsetof(TextModContext, x)
> +#define FLAGS (AV_OPT_FLAG_SUBTITLE_PARAM | AV_OPT_FLAG_FILTERING_PARAM)
> +
> +static const AVOption textmod_options[] = {
> +    { "mode",             "set operation mode",              OFFSET(operation),  AV_OPT_TYPE_INT,    {.i64=OP_LEET},          OP_LEET, NB_OPS-1, FLAGS, "mode" },
> +    {   "leet",           "convert text to 'leet speak'",    0,                  AV_OPT_TYPE_CONST,  {.i64=OP_LEET},          0,       0,        FLAGS, "mode" },
> +    {   "to_upper",       "change to upper case",            0,                  AV_OPT_TYPE_CONST,  {.i64=OP_TO_UPPER},      0,       0,        FLAGS, "mode" },
> +    {   "to_lower",       "change to lower case",            0,                  AV_OPT_TYPE_CONST,  {.i64=OP_TO_LOWER},      0,       0,        FLAGS, "mode" },
> +    {   "replace_chars",  "replace characters",              0,                  AV_OPT_TYPE_CONST,  {.i64=OP_REPLACE_CHARS}, 0,       0,        FLAGS, "mode" },
> +    {   "remove_chars",   "remove characters",               0,                  AV_OPT_TYPE_CONST,  {.i64=OP_REMOVE_CHARS},  0,       0,        FLAGS, "mode" },
> +    {   "replace_words",  "replace words",                   0,                  AV_OPT_TYPE_CONST,  {.i64=OP_REPLACE_WORDS}, 0,       0,        FLAGS, "mode" },
> +    {   "remove_words",   "remove words",                    0,                  AV_OPT_TYPE_CONST,  {.i64=OP_REMOVE_WORDS},  0,       0,        FLAGS, "mode" },
> +    { "find",             "chars/words to find or remove",   OFFSET(find),       AV_OPT_TYPE_STRING, {.str = NULL},           0,       0,        FLAGS, NULL   },
> +    { "replace",          "chars/words to replace",          OFFSET(replace),    AV_OPT_TYPE_STRING, {.str = NULL},           0,       0,        FLAGS, NULL   },
> +    { "separator",        "word separator (default: ',')",   OFFSET(separator),  AV_OPT_TYPE_STRING, {.str = ","},            0,       0,        FLAGS, NULL   },
> +    { NULL },
> +};
> +
> +AVFILTER_DEFINE_CLASS(textmod);
> +
> +static const AVFilterPad inputs[] = {
> +    {
> +        .name         = "default",
> +        .type         = AVMEDIA_TYPE_SUBTITLE,
> +        .filter_frame = filter_frame,
> +    },
> +};
> +
> +static const AVFilterPad outputs[] = {
> +    {
> +        .name          = "default",
> +        .type          = AVMEDIA_TYPE_SUBTITLE,
> +    },
> +};
> +
> +const AVFilter ff_sf_textmod = {
> +    .name          = "textmod",
> +    .description   = NULL_IF_CONFIG_SMALL("Modify subtitle text in several ways"),
> +    .init          = init,
> +    .uninit        = uninit,
> +    .query_formats = query_formats,
> +    .priv_size     = sizeof(TextModContext),
> +    .priv_class    = &textmod_class,
> +    FILTER_INPUTS(inputs),
> +    FILTER_OUTPUTS(outputs),
> +};
>
Soft Works Sept. 12, 2021, 10:34 p.m. UTC | #2
> -----Original Message-----
> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of
> Andreas Rheinhardt
> Sent: Sunday, 12 September 2021 23:56
> To: ffmpeg-devel@ffmpeg.org
> Subject: Re: [FFmpeg-devel] [PATCH v5 10/12] avfilter/textmod: Add
> textmod filter
> 
> Soft Works:
> > Signed-off-by: softworkz <softworkz@hotmail.com>
> > ---
> >  doc/filters.texi         |  64 +++++++
> >  libavfilter/Makefile     |   3 +
> >  libavfilter/allfilters.c |   1 +
> >  libavfilter/sf_textmod.c | 381
> +++++++++++++++++++++++++++++++++++++++
> >  4 files changed, 449 insertions(+)
> >  create mode 100644 libavfilter/sf_textmod.c
> >
> > diff --git a/doc/filters.texi b/doc/filters.texi
> > index 1d76461ada..9fd2876d63 100644
> > --- a/doc/filters.texi
> > +++ b/doc/filters.texi
> > @@ -25024,6 +25024,70 @@ existing filters using @code{--disable-
> filters}.

[...]

> > +static void uninit(AVFilterContext *ctx)
> > +{
> > +    TextModContext *s = ctx->priv;
> > +    int i;
> > +
> > +    for (i = 0; i < s->nb_find_list; i++) {
> > +        av_free(&s->find_list[i]);
> 
> This is completely wrong and will crash: You either want to do
> av_freep(&s->find_list[i]) or av_free(s->find_list[i]) if these
> strings
> were independently allocated; but looking at split_string() shows
> that
> they are not, they are substrings of s->find. Similar for the loop
> below.

You are right of course, thanks.

> > +    }
> > +    s->nb_find_list = 0;
> > +    av_freep(&s->find_list);
> > +
> > +    for (i = 0; i < s->nb_replace_list; i++) {
> > +        av_free(&s->replace_list[i]);
> > +    }
> > +    s->nb_replace_list = 0;
> > +    av_freep(&s->replace_list);
> > +}
> > +
> > +static int query_formats(AVFilterContext *ctx)
> > +{
> > +    AVFilterFormats *formats;
> > +    AVFilterLink *inlink = ctx->inputs[0];
> > +    AVFilterLink *outlink = ctx->outputs[0];
> > +    static const enum AVSubtitleType subtitle_fmts[] = {
> AV_SUBTITLE_FMT_ASS, AV_SUBTITLE_FMT_NONE };
> > +    int ret;
> > +
> > +    /* set input subtitle format */
> > +    formats = ff_make_format_list(subtitle_fmts);
> > +    if ((ret = ff_formats_ref(formats, &inlink->outcfg.formats)) <
> 0)
> > +        return ret;
> > +
> > +    /* set output video format */
> > +    if ((ret = ff_formats_ref(formats, &outlink->incfg.formats)) <
> 0)
> > +        return ret;
> > +
> > +    return 0;
> > +}
> > +
> > +static char *process_text(TextModContext *s, char *text)
> > +{
> > +    const char *char_src = s->find;
> > +    const char *char_dst = s->replace;
> > +    char *result = NULL;
> > +    int escape_level = 0, k = 0;
> > +
> > +    switch (s->operation) {
> > +    case OP_LEET:
> > +    case OP_REPLACE_CHARS:
> > +
> > +        if (s->operation == OP_LEET) {
> > +            char_src = leet_src;
> > +            char_dst = leet_dst;
> > +        }
> > +
> > +        result = av_strdup(text);
> > +        if (!result)
> > +            return NULL;
> > +
> > +        for (size_t n = 0; n < strlen(result); n++) {
> > +            if (result[n] == '{')
> > +                escape_level++;
> > +
> > +            if (!escape_level) {
> > +                for (size_t t = 0; t < FF_ARRAY_ELEMS(char_src);
> t++) {
> > +                    if (result[n] == char_src[t]) {
> > +                        result[n] = char_dst[t];
> > +                        break;
> > +                    }
> > +                }
> > +            }
> > +
> > +            if (result[n] == '}')
> > +                escape_level--;
> > +        }
> > +
> > +        break;
> > +    case OP_TO_UPPER:
> > +    case OP_TO_LOWER:
> > +
> > +        result = av_strdup(text);
> > +        if (!result)
> > +            return NULL;
> > +
> > +        for (size_t n = 0; n < strlen(result); n++) {
> > +            if (result[n] == '{')
> > +                escape_level++;
> > +            if (!escape_level)
> > +                result[n] = s->operation == OP_TO_LOWER ?
> av_tolower(result[n]) : av_toupper(result[n]);
> > +            if (result[n] == '}')
> > +                escape_level--;
> > +        }
> > +
> > +        break;
> > +    case OP_REMOVE_CHARS:
> > +
> > +        result = av_strdup(text);
> > +        if (!result)
> > +            return NULL;
> > +
> > +        for (size_t n = 0; n < strlen(result); n++) {
> > +            int skip_char = 0;
> > +
> > +            if (result[n] == '{')
> > +                escape_level++;
> > +
> > +            if (!escape_level) {
> > +                for (size_t t = 0; t < FF_ARRAY_ELEMS(char_src);
> t++) {
> > +                    if (result[n] == char_src[t]) {
> > +                        skip_char = 1;
> > +                        break;
> > +                    }
> > +                }
> > +            }
> > +
> > +            if (!skip_char)
> > +                result[k++] = result[n];
> > +
> > +            if (result[n] == '}')
> > +                escape_level--;
> > +        }
> > +
> > +        result[k] = 0;
> > +
> > +        break;
> > +    case OP_REPLACE_WORDS:
> > +    case OP_REMOVE_WORDS:
> > +
> > +        result = av_strdup(text);
> > +        if (!result)
> > +            return NULL;
> > +
> > +        for (int n = 0; n < s->nb_find_list; n++) {
> > +            char *tmp           = result;
> > +            const char *replace = (s->operation ==
> OP_REPLACE_WORDS) ? s->replace_list[n] : "";
> > +
> > +            result = av_strireplace(result, s->find_list[n],
> replace);
> > +            if (!result)
> > +                return NULL;
> > +
> > +            av_free(tmp);
> > +        }
> > +
> > +        break;
> > +    }
> > +
> > +    return result;
> > +}
> > +
> > +static char *process_dialog(TextModContext *s, char *ass_line)
> > +{
> > +    ASSDialog *dialog = ff_ass_split_dialog(NULL, ass_line);
> > +    char *result, *text;
> > +
> > +    if (!dialog)
> > +        return NULL;
> > +
> > +    text = process_text(s, dialog->text);
> > +    if (!text)
> > +        return NULL;
> > +
> > +    result = ff_ass_get_dialog(dialog->readorder, dialog->layer,
> dialog->style, dialog->name, text);
> > +
> > +    av_free(text);
> > +    ff_ass_free_dialog(&dialog);
> > +    return result;
> > +}
> > +
> > +static int filter_frame(AVFilterLink *inlink, AVFrame *src_frame)
> > +{
> > +    TextModContext *s = inlink->dst->priv;
> > +    AVFilterLink *outlink = inlink->dst->outputs[0];
> > +    int ret;
> > +    AVFrame *out;
> > +
> > +    outlink->format = inlink->format;
> > +
> > +    out = av_frame_clone(src_frame);
> 
> Why clone? You can just reuse src_frame as is.

[..]

> 
> You may not be the sole owner of this AVSubtitleRect; after all,
> they are shared. Ergo you must not modify it. Is it possible that you
> believed that av_frame_clone() would make the frame writable? It does
> not. For non-subtitle frames, av_frame_make_writable() makes them
> writable; but it does not for subtitles, because you made
> av_frame_get_buffer2() a no-op for subtitles and so
> av_frame_make_writable() will temporarily increment the refcount and
> then decrement it again.

One unsolved problem I have about dealing with AVSubtitleRect
as being part of AVFrame is that it's not possible to make a copy, 
in a reliable way because the allocated sizes of the data[4] pointers
are not reliably known.

Usually, data[0] is the image and data[1] is the palette, but will 
it always be like this? Then better not have a data array but 
named pointer variables instead.


I was not sure what will be the general route: Merging AVSubtitle
into AVFrame or attaching AVSubtitle as a property to AVFrame.

BTW - what is your opinion about that question?


Also, thank you very much for your detailed reviews of my patches!

Kind regards,
softworkz
Lynne Sept. 13, 2021, 12:10 a.m. UTC | #3
13 Sept 2021, 00:34 by softworkz@hotmail.com:

>
>
>> -----Original Message-----
>> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of
>> Andreas Rheinhardt
>> Sent: Sunday, 12 September 2021 23:56
>> To: ffmpeg-devel@ffmpeg.org
>> Subject: Re: [FFmpeg-devel] [PATCH v5 10/12] avfilter/textmod: Add
>> textmod filter
>>
>> Soft Works:
>> > Signed-off-by: softworkz <softworkz@hotmail.com>
>> > ---
>> >  doc/filters.texi         |  64 +++++++
>> >  libavfilter/Makefile     |   3 +
>> >  libavfilter/allfilters.c |   1 +
>> >  libavfilter/sf_textmod.c | 381
>> +++++++++++++++++++++++++++++++++++++++
>> >  4 files changed, 449 insertions(+)
>> >  create mode 100644 libavfilter/sf_textmod.c
>> >
>> > diff --git a/doc/filters.texi b/doc/filters.texi
>> > index 1d76461ada..9fd2876d63 100644
>> > --- a/doc/filters.texi
>> > +++ b/doc/filters.texi
>> > @@ -25024,6 +25024,70 @@ existing filters using @code{--disable-
>> filters}.
>>
>
> [...]
>
>> > +static void uninit(AVFilterContext *ctx)
>> > +{
>> > +    TextModContext *s = ctx->priv;
>> > +    int i;
>> > +
>> > +    for (i = 0; i < s->nb_find_list; i++) {
>> > +        av_free(&s->find_list[i]);
>>
>> This is completely wrong and will crash: You either want to do
>> av_freep(&s->find_list[i]) or av_free(s->find_list[i]) if these
>> strings
>> were independently allocated; but looking at split_string() shows
>> that
>> they are not, they are substrings of s->find. Similar for the loop
>> below.
>>
>
> You are right of course, thanks.
>
>> > +    }
>> > +    s->nb_find_list = 0;
>> > +    av_freep(&s->find_list);
>> > +
>> > +    for (i = 0; i < s->nb_replace_list; i++) {
>> > +        av_free(&s->replace_list[i]);
>> > +    }
>> > +    s->nb_replace_list = 0;
>> > +    av_freep(&s->replace_list);
>> > +}
>> > +
>> > +static int query_formats(AVFilterContext *ctx)
>> > +{
>> > +    AVFilterFormats *formats;
>> > +    AVFilterLink *inlink = ctx->inputs[0];
>> > +    AVFilterLink *outlink = ctx->outputs[0];
>> > +    static const enum AVSubtitleType subtitle_fmts[] = {
>> AV_SUBTITLE_FMT_ASS, AV_SUBTITLE_FMT_NONE };
>> > +    int ret;
>> > +
>> > +    /* set input subtitle format */
>> > +    formats = ff_make_format_list(subtitle_fmts);
>> > +    if ((ret = ff_formats_ref(formats, &inlink->outcfg.formats)) <
>> 0)
>> > +        return ret;
>> > +
>> > +    /* set output video format */
>> > +    if ((ret = ff_formats_ref(formats, &outlink->incfg.formats)) <
>> 0)
>> > +        return ret;
>> > +
>> > +    return 0;
>> > +}
>> > +
>> > +static char *process_text(TextModContext *s, char *text)
>> > +{
>> > +    const char *char_src = s->find;
>> > +    const char *char_dst = s->replace;
>> > +    char *result = NULL;
>> > +    int escape_level = 0, k = 0;
>> > +
>> > +    switch (s->operation) {
>> > +    case OP_LEET:
>> > +    case OP_REPLACE_CHARS:
>> > +
>> > +        if (s->operation == OP_LEET) {
>> > +            char_src = leet_src;
>> > +            char_dst = leet_dst;
>> > +        }
>> > +
>> > +        result = av_strdup(text);
>> > +        if (!result)
>> > +            return NULL;
>> > +
>> > +        for (size_t n = 0; n < strlen(result); n++) {
>> > +            if (result[n] == '{')
>> > +                escape_level++;
>> > +
>> > +            if (!escape_level) {
>> > +                for (size_t t = 0; t < FF_ARRAY_ELEMS(char_src);
>> t++) {
>> > +                    if (result[n] == char_src[t]) {
>> > +                        result[n] = char_dst[t];
>> > +                        break;
>> > +                    }
>> > +                }
>> > +            }
>> > +
>> > +            if (result[n] == '}')
>> > +                escape_level--;
>> > +        }
>> > +
>> > +        break;
>> > +    case OP_TO_UPPER:
>> > +    case OP_TO_LOWER:
>> > +
>> > +        result = av_strdup(text);
>> > +        if (!result)
>> > +            return NULL;
>> > +
>> > +        for (size_t n = 0; n < strlen(result); n++) {
>> > +            if (result[n] == '{')
>> > +                escape_level++;
>> > +            if (!escape_level)
>> > +                result[n] = s->operation == OP_TO_LOWER ?
>> av_tolower(result[n]) : av_toupper(result[n]);
>> > +            if (result[n] == '}')
>> > +                escape_level--;
>> > +        }
>> > +
>> > +        break;
>> > +    case OP_REMOVE_CHARS:
>> > +
>> > +        result = av_strdup(text);
>> > +        if (!result)
>> > +            return NULL;
>> > +
>> > +        for (size_t n = 0; n < strlen(result); n++) {
>> > +            int skip_char = 0;
>> > +
>> > +            if (result[n] == '{')
>> > +                escape_level++;
>> > +
>> > +            if (!escape_level) {
>> > +                for (size_t t = 0; t < FF_ARRAY_ELEMS(char_src);
>> t++) {
>> > +                    if (result[n] == char_src[t]) {
>> > +                        skip_char = 1;
>> > +                        break;
>> > +                    }
>> > +                }
>> > +            }
>> > +
>> > +            if (!skip_char)
>> > +                result[k++] = result[n];
>> > +
>> > +            if (result[n] == '}')
>> > +                escape_level--;
>> > +        }
>> > +
>> > +        result[k] = 0;
>> > +
>> > +        break;
>> > +    case OP_REPLACE_WORDS:
>> > +    case OP_REMOVE_WORDS:
>> > +
>> > +        result = av_strdup(text);
>> > +        if (!result)
>> > +            return NULL;
>> > +
>> > +        for (int n = 0; n < s->nb_find_list; n++) {
>> > +            char *tmp           = result;
>> > +            const char *replace = (s->operation ==
>> OP_REPLACE_WORDS) ? s->replace_list[n] : "";
>> > +
>> > +            result = av_strireplace(result, s->find_list[n],
>> replace);
>> > +            if (!result)
>> > +                return NULL;
>> > +
>> > +            av_free(tmp);
>> > +        }
>> > +
>> > +        break;
>> > +    }
>> > +
>> > +    return result;
>> > +}
>> > +
>> > +static char *process_dialog(TextModContext *s, char *ass_line)
>> > +{
>> > +    ASSDialog *dialog = ff_ass_split_dialog(NULL, ass_line);
>> > +    char *result, *text;
>> > +
>> > +    if (!dialog)
>> > +        return NULL;
>> > +
>> > +    text = process_text(s, dialog->text);
>> > +    if (!text)
>> > +        return NULL;
>> > +
>> > +    result = ff_ass_get_dialog(dialog->readorder, dialog->layer,
>> dialog->style, dialog->name, text);
>> > +
>> > +    av_free(text);
>> > +    ff_ass_free_dialog(&dialog);
>> > +    return result;
>> > +}
>> > +
>> > +static int filter_frame(AVFilterLink *inlink, AVFrame *src_frame)
>> > +{
>> > +    TextModContext *s = inlink->dst->priv;
>> > +    AVFilterLink *outlink = inlink->dst->outputs[0];
>> > +    int ret;
>> > +    AVFrame *out;
>> > +
>> > +    outlink->format = inlink->format;
>> > +
>> > +    out = av_frame_clone(src_frame);
>>
>> Why clone? You can just reuse src_frame as is.
>>
>
> [..]
>
>>
>> You may not be the sole owner of this AVSubtitleRect; after all,
>> they are shared. Ergo you must not modify it. Is it possible that you
>> believed that av_frame_clone() would make the frame writable? It does
>> not. For non-subtitle frames, av_frame_make_writable() makes them
>> writable; but it does not for subtitles, because you made
>> av_frame_get_buffer2() a no-op for subtitles and so
>> av_frame_make_writable() will temporarily increment the refcount and
>> then decrement it again.
>>
>
> One unsolved problem I have about dealing with AVSubtitleRect
> as being part of AVFrame is that it's not possible to make a copy, 
> in a reliable way because the allocated sizes of the data[4] pointers
> are not reliably known.
>

We have cropping fields and cropping side data (IIRC) now,
can they be used for this?


> Usually, data[0] is the image and data[1] is the palette, but will 
> it always be like this? Then better not have a data array but 
> named pointer variables instead.
>
>
> I was not sure what will be the general route: Merging AVSubtitle
> into AVFrame or attaching AVSubtitle as a property to AVFrame.
>

I think that's not really the best way to go. Subtitles ought to
be contained in the data[] fields and described by the other
fields like with audio and video. While we do sometimes
have data[] contain pointers to structs (hardware frames),
for this case it's a crude solution.
Soft Works Sept. 13, 2021, 12:22 a.m. UTC | #4
> -----Original Message-----
> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of
> Lynne
> Sent: Monday, 13 September 2021 02:11
> To: FFmpeg development discussions and patches <ffmpeg-
> devel@ffmpeg.org>
> Subject: Re: [FFmpeg-devel] [PATCH v5 10/12] avfilter/textmod: Add
> textmod filter
> 
> 13 Sept 2021, 00:34 by softworkz@hotmail.com:
> 
> >
> >
> >> -----Original Message-----
> >> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of
> >> Andreas Rheinhardt
> >> Sent: Sunday, 12 September 2021 23:56
> >> To: ffmpeg-devel@ffmpeg.org
> >> Subject: Re: [FFmpeg-devel] [PATCH v5 10/12] avfilter/textmod: Add
> >> textmod filter

[..]

> >
> > One unsolved problem I have about dealing with AVSubtitleRect
> > as being part of AVFrame is that it's not possible to make a copy,
> > in a reliable way because the allocated sizes of the data[4]
> pointers
> > are not reliably known.
> >
> 
> We have cropping fields and cropping side data (IIRC) now,
> can they be used for this?
> 
> 
> > Usually, data[0] is the image and data[1] is the palette, but will
> > it always be like this? Then better not have a data array but
> > named pointer variables instead.
> >
> >
> > I was not sure what will be the general route: Merging AVSubtitle
> > into AVFrame or attaching AVSubtitle as a property to AVFrame.
> >
> 
> I think that's not really the best way to go. Subtitles ought to
> be contained in the data[] fields and described by the other
> fields like with audio and video.

You mean subtitle rects? Each AVSubtitle can include multiple
AVSubtitleRect(s)

Each bitmap subtitle rect has its own x,y,w and h as well as flags.
There are no existing fields to describe this. It would need 
an array of structs for these fields anyway (=> AVSubtitleRect), 
so why put the data in the data pointers then?
And in case of text subtitles, would the data[] fields contain
strings then?


softworkz
Soft Works Sept. 15, 2021, 4:48 a.m. UTC | #5
> -----Original Message-----
> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of
> Andreas Rheinhardt
> Sent: Sunday, 12 September 2021 23:56
> To: ffmpeg-devel@ffmpeg.org
> Subject: Re: [FFmpeg-devel] [PATCH v5 10/12] avfilter/textmod: Add
> textmod filter
> 
> Soft Works:
> > Signed-off-by: softworkz <softworkz@hotmail.com>
> > ---
> >  doc/filters.texi         |  64 +++++++
> >  libavfilter/Makefile     |   3 +
> >  libavfilter/allfilters.c |   1 +
> >  libavfilter/sf_textmod.c | 381
> +++++++++++++++++++++++++++++++++++++++
> >  4 files changed, 449 insertions(+)
> >  create mode 100644 libavfilter/sf_textmod.c
> >

[..]

> > +
> > +static int filter_frame(AVFilterLink *inlink, AVFrame *src_frame)
> > +{
> > +    TextModContext *s = inlink->dst->priv;
> > +    AVFilterLink *outlink = inlink->dst->outputs[0];
> > +    int ret;
> > +    AVFrame *out;
> > +
> > +    outlink->format = inlink->format;
> > +
> > +    out = av_frame_clone(src_frame);
> 
> Why clone? You can just reuse src_frame as is.

Not then but now (see below)

> 
> > +    if (!out)
> > +        return AVERROR(ENOMEM);
> > +
> > +    for (unsigned i = 0; i < out->num_subtitle_rects; i++) {
> > +
> > +        AVSubtitleRect *rect = out->subtitle_rects[i];
> > +
> > +        if (rect->ass) {
> 
> Is there are actually a reason that num_subtitle_rects can't be taken
> at
> face value? Your query_formats callback after all signals that only
> ass
> subtitles are accepted.

What do you mean by "at face value" - 'fixed'?

> 
> > +            char *tmp = rect->ass;
> > +            rect->ass = process_dialog(s, rect->ass);
> 
> You may not be the sole owner of this AVSubtitleRect; after all,
> they are shared. Ergo you must not modify it. Is it possible that you
> believed that av_frame_clone() would make the frame writable? It does
> not. For non-subtitle frames, av_frame_make_writable() makes them
> writable; but it does not for subtitles, because you made
> av_frame_get_buffer2() a no-op for subtitles and so
> av_frame_make_writable() will temporarily increment the refcount and
> then decrement it again.

In the next update, subtitle frames will have the same behavior like
video and audio frames, with make_writable working properly.

Thanks,
softworkz
diff mbox series

Patch

diff --git a/doc/filters.texi b/doc/filters.texi
index 1d76461ada..9fd2876d63 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -25024,6 +25024,70 @@  existing filters using @code{--disable-filters}.
 
 Below is a description of the currently available subtitle filters.
 
+@section textmod
+
+Modify subtitle text in a number of ways.
+
+It accepts the following parameters:
+
+@table @option
+@item mode
+The kind of text modification to apply
+
+Supported operation modes are:
+
+@table @var
+@item 0, leet
+Convert subtitle text to 'leet speak'. It's primarily useful for testing as the modification will be visible with almost all text lines.
+@item 1, to_upper
+Change all text to upper case. Might improve readability.
+@item 2, to_lower
+Change all text to lower case.
+@item 3, replace_chars
+Replace one or more characters. Requires the find and replace parameters to be specified. 
+Both need to be equal in length.
+The first char in find is replaced by the first char in replace, same for all subsequent chars.
+@item 4, remove_chars
+Remove certain characters. Requires the find parameter to be specified. 
+All chars in the find parameter string will be removed from all subtitle text.
+@item 5, replace_words
+Replace one or more words. Requires the find and replace parameters to be specified. Multiple words must be separated by the delimiter char specified vie the separator parameter (default: ','). 
+The number of words in the find and replace parameters needs to be equal.
+The first word in find is replaced by the first word in replace, same for all subsequent words
+@item 6, remove_words
+Remove certain words. Requires the find parameter to be specified. Multiple words must be separated by the delimiter char specified vie the separator parameter (default: ','). 
+All words in the find parameter string will be removed from all subtitle text.
+@end table
+
+@item find
+Required for replace_chars, remove_chars, replace_words and remove_words.
+
+@item replace
+Required for replace_chars and replace_words.
+
+@item separator
+Delimiter character for words. Used with replace_words and remove_words- Must be a single character.
+The default is '.'.
+
+@end table
+
+@subsection Examples
+
+@itemize
+@item
+Change all characters to upper case while keeping all styles and animations:
+@example
+ffmpeg -i "https://streams.videolan.org/ffmpeg/mkv_subtitles.mkv" -filter_complex "[0:s]textmod=mode=to_upper" -map 0 -y out.mkv
+@end example
+@item
+Mark the 100-pixel-wide region on the left edge of the frame as very
+uninteresting (to be encoded at much lower quality than the rest of
+the frame).
+@example
+addroi=0:0:100:ih:+1/5
+@end example
+@end itemize
+
 @section graphicsub2video
 
 Renders graphic subtitles as video frames. 
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 0e752c5bf9..5a5a4be47e 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -534,6 +534,9 @@  OBJS-$(CONFIG_YUVTESTSRC_FILTER)             += vsrc_testsrc.o
 
 OBJS-$(CONFIG_NULLSINK_FILTER)               += vsink_nullsink.o
 
+# subtitle filters
+OBJS-$(CONFIG_TEXTMOD_FILTER)                += sf_textmod.o
+
 # multimedia filters
 OBJS-$(CONFIG_ABITSCOPE_FILTER)              += avf_abitscope.o
 OBJS-$(CONFIG_ADRAWGRAPH_FILTER)             += f_drawgraph.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 77463aa4c8..6d7a535ee8 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -524,6 +524,7 @@  extern const AVFilter ff_avf_showvolume;
 extern const AVFilter ff_avf_showwaves;
 extern const AVFilter ff_avf_showwavespic;
 extern const AVFilter ff_vaf_spectrumsynth;
+extern const AVFilter ff_sf_textmod;
 extern const AVFilter ff_svf_graphicsub2video;
 extern const AVFilter ff_svf_textsub2video;
 
diff --git a/libavfilter/sf_textmod.c b/libavfilter/sf_textmod.c
new file mode 100644
index 0000000000..7c23ded9ef
--- /dev/null
+++ b/libavfilter/sf_textmod.c
@@ -0,0 +1,381 @@ 
+/*
+ * Copyright (c) 2021 softworkz
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * text subtitle filter which allows to modify subtitle text in several ways
+ */
+
+#include <libavcodec/ass.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/avstring.h"
+#include "libavutil/opt.h"
+#include "avfilter.h"
+#include "internal.h"
+#include "libavcodec/avcodec.h"
+#include "libavcodec/ass_split.h"
+
+static const char* leet_src = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
+static const char* leet_dst = "abcd3f6#1jklmn0pq257uvwxyzAB(D3F6#1JKLMN0PQ257UVWXYZ";
+
+enum TextModOperation {
+    OP_LEET,
+    OP_TO_UPPER,
+    OP_TO_LOWER,
+    OP_REPLACE_CHARS,
+    OP_REMOVE_CHARS,
+    OP_REPLACE_WORDS,
+    OP_REMOVE_WORDS,
+    NB_OPS,
+};
+
+typedef struct TextModContext {
+    const AVClass *class;
+    enum AVSubtitleType format;
+    enum TextModOperation operation;
+    char *find;
+    char *replace;
+    char *separator;
+    char **find_list;
+    int  nb_find_list;
+    char **replace_list;
+    int  nb_replace_list;
+} TextModContext;
+
+static char **split_string(char *source, int *nb_elems, char delim)
+{
+    char **list = NULL;
+    char *temp = NULL;
+    char *ptr = av_strtok(source, &delim, &temp);
+
+    while (ptr) {
+        av_dynarray_add(&list, nb_elems, ptr);
+        if (!list)
+            return NULL;
+
+        ptr = av_strtok(NULL, &delim, &temp);
+    }
+
+    av_dynarray_add(&list, nb_elems, NULL);
+
+    return list;
+}
+
+static int init(AVFilterContext *ctx)
+{
+    TextModContext *s = ctx->priv;
+
+    switch (s->operation) {
+    case OP_REPLACE_CHARS:
+    case OP_REMOVE_CHARS:
+    case OP_REPLACE_WORDS:
+    case OP_REMOVE_WORDS:
+        if (!s->find || !strlen(s->find)) {
+            av_log(ctx, AV_LOG_ERROR, "Selected mode requires the 'find' parameter to be specified");
+            return AVERROR(EINVAL);
+        }
+        break;
+    }
+
+    switch (s->operation) {
+    case OP_REPLACE_CHARS:
+    case OP_REPLACE_WORDS:
+        if (!s->replace || !strlen(s->replace)) {
+            av_log(ctx, AV_LOG_ERROR, "Selected mode requires the 'replace' parameter to be specified");
+            return AVERROR(EINVAL);
+        }
+        break;
+    }
+
+    if (s->operation == OP_REPLACE_CHARS && strlen(s->find) != strlen(s->replace)) {
+        av_log(ctx, AV_LOG_ERROR, "Selected mode requires the 'find' and 'replace' parameters to have the same length");
+        return AVERROR(EINVAL);
+    }
+
+    if (s->operation == OP_REPLACE_WORDS || s->operation == OP_REMOVE_WORDS) {
+        if (!s->separator || strlen(s->separator) != 1) {
+            av_log(ctx, AV_LOG_ERROR, "Selected mode requires a single separator char to be specified");
+            return AVERROR(EINVAL);
+        }
+
+        s->find_list = split_string(s->find, &s->nb_find_list, *s->separator);
+        if (!s->find_list)
+            return AVERROR(ENOMEM);
+
+        if (s->operation == OP_REPLACE_WORDS) {
+
+            s->replace_list = split_string(s->replace, &s->nb_replace_list, *s->separator);
+            if (!s->replace_list)
+                return AVERROR(ENOMEM);
+
+            if (s->nb_find_list != s->nb_replace_list) {
+                av_log(ctx, AV_LOG_ERROR, "The number of words in 'find' and 'replace' needs to be equal");
+                return AVERROR(EINVAL);
+            }
+        }
+    }
+
+    return 0;
+}
+
+static void uninit(AVFilterContext *ctx)
+{
+    TextModContext *s = ctx->priv;
+    int i;
+
+    for (i = 0; i < s->nb_find_list; i++) {
+        av_free(&s->find_list[i]);
+    }
+    s->nb_find_list = 0;
+    av_freep(&s->find_list);
+
+    for (i = 0; i < s->nb_replace_list; i++) {
+        av_free(&s->replace_list[i]);
+    }
+    s->nb_replace_list = 0;
+    av_freep(&s->replace_list);
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+    AVFilterFormats *formats;
+    AVFilterLink *inlink = ctx->inputs[0];
+    AVFilterLink *outlink = ctx->outputs[0];
+    static const enum AVSubtitleType subtitle_fmts[] = { AV_SUBTITLE_FMT_ASS, AV_SUBTITLE_FMT_NONE };
+    int ret;
+
+    /* set input subtitle format */
+    formats = ff_make_format_list(subtitle_fmts);
+    if ((ret = ff_formats_ref(formats, &inlink->outcfg.formats)) < 0)
+        return ret;
+
+    /* set output video format */
+    if ((ret = ff_formats_ref(formats, &outlink->incfg.formats)) < 0)
+        return ret;
+
+    return 0;
+}
+
+static char *process_text(TextModContext *s, char *text)
+{
+    const char *char_src = s->find;
+    const char *char_dst = s->replace;
+    char *result = NULL;
+    int escape_level = 0, k = 0;
+
+    switch (s->operation) {
+    case OP_LEET:
+    case OP_REPLACE_CHARS:
+
+        if (s->operation == OP_LEET) {
+            char_src = leet_src;
+            char_dst = leet_dst;
+        }
+
+        result = av_strdup(text);
+        if (!result)
+            return NULL;
+
+        for (size_t n = 0; n < strlen(result); n++) {
+            if (result[n] == '{')
+                escape_level++;
+
+            if (!escape_level) {
+                for (size_t t = 0; t < FF_ARRAY_ELEMS(char_src); t++) {
+                    if (result[n] == char_src[t]) {
+                        result[n] = char_dst[t];
+                        break;
+                    }
+                }
+            }
+
+            if (result[n] == '}')
+                escape_level--;
+        }
+
+        break;
+    case OP_TO_UPPER:
+    case OP_TO_LOWER:
+
+        result = av_strdup(text);
+        if (!result)
+            return NULL;
+
+        for (size_t n = 0; n < strlen(result); n++) {
+            if (result[n] == '{')
+                escape_level++;
+            if (!escape_level)
+                result[n] = s->operation == OP_TO_LOWER ? av_tolower(result[n]) : av_toupper(result[n]);
+            if (result[n] == '}')
+                escape_level--;
+        }
+
+        break;
+    case OP_REMOVE_CHARS:
+
+        result = av_strdup(text);
+        if (!result)
+            return NULL;
+
+        for (size_t n = 0; n < strlen(result); n++) {
+            int skip_char = 0;
+
+            if (result[n] == '{')
+                escape_level++;
+
+            if (!escape_level) {
+                for (size_t t = 0; t < FF_ARRAY_ELEMS(char_src); t++) {
+                    if (result[n] == char_src[t]) {
+                        skip_char = 1;
+                        break;
+                    }
+                }
+            }
+
+            if (!skip_char)
+                result[k++] = result[n];
+
+            if (result[n] == '}')
+                escape_level--;
+        }
+
+        result[k] = 0;
+
+        break;
+    case OP_REPLACE_WORDS:
+    case OP_REMOVE_WORDS:
+
+        result = av_strdup(text);
+        if (!result)
+            return NULL;
+
+        for (int n = 0; n < s->nb_find_list; n++) {
+            char *tmp           = result;
+            const char *replace = (s->operation == OP_REPLACE_WORDS) ? s->replace_list[n] : "";
+
+            result = av_strireplace(result, s->find_list[n], replace);
+            if (!result)
+                return NULL;
+
+            av_free(tmp);
+        }
+
+        break;
+    }
+
+    return result;
+}
+
+static char *process_dialog(TextModContext *s, char *ass_line)
+{
+    ASSDialog *dialog = ff_ass_split_dialog(NULL, ass_line);
+    char *result, *text;
+
+    if (!dialog)
+        return NULL;
+
+    text = process_text(s, dialog->text);
+    if (!text)
+        return NULL;
+
+    result = ff_ass_get_dialog(dialog->readorder, dialog->layer, dialog->style, dialog->name, text);
+
+    av_free(text);
+    ff_ass_free_dialog(&dialog);
+    return result;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *src_frame)
+{
+    TextModContext *s = inlink->dst->priv;
+    AVFilterLink *outlink = inlink->dst->outputs[0];
+    int ret;
+    AVFrame *out;
+
+    outlink->format = inlink->format;
+
+    out = av_frame_clone(src_frame);
+    if (!out)
+        return AVERROR(ENOMEM);
+
+    for (unsigned i = 0; i < out->num_subtitle_rects; i++) {
+
+        AVSubtitleRect *rect = out->subtitle_rects[i];
+
+        if (rect->ass) {
+            char *tmp = rect->ass;
+            rect->ass = process_dialog(s, rect->ass);
+            av_free(tmp);
+            if (!rect->ass)
+                return AVERROR(ENOMEM);
+        }
+    }
+
+    av_frame_free(&src_frame);
+    return ff_filter_frame(outlink, out);
+}
+
+#define OFFSET(x) offsetof(TextModContext, x)
+#define FLAGS (AV_OPT_FLAG_SUBTITLE_PARAM | AV_OPT_FLAG_FILTERING_PARAM)
+
+static const AVOption textmod_options[] = {
+    { "mode",             "set operation mode",              OFFSET(operation),  AV_OPT_TYPE_INT,    {.i64=OP_LEET},          OP_LEET, NB_OPS-1, FLAGS, "mode" },
+    {   "leet",           "convert text to 'leet speak'",    0,                  AV_OPT_TYPE_CONST,  {.i64=OP_LEET},          0,       0,        FLAGS, "mode" },
+    {   "to_upper",       "change to upper case",            0,                  AV_OPT_TYPE_CONST,  {.i64=OP_TO_UPPER},      0,       0,        FLAGS, "mode" },
+    {   "to_lower",       "change to lower case",            0,                  AV_OPT_TYPE_CONST,  {.i64=OP_TO_LOWER},      0,       0,        FLAGS, "mode" },
+    {   "replace_chars",  "replace characters",              0,                  AV_OPT_TYPE_CONST,  {.i64=OP_REPLACE_CHARS}, 0,       0,        FLAGS, "mode" },
+    {   "remove_chars",   "remove characters",               0,                  AV_OPT_TYPE_CONST,  {.i64=OP_REMOVE_CHARS},  0,       0,        FLAGS, "mode" },
+    {   "replace_words",  "replace words",                   0,                  AV_OPT_TYPE_CONST,  {.i64=OP_REPLACE_WORDS}, 0,       0,        FLAGS, "mode" },
+    {   "remove_words",   "remove words",                    0,                  AV_OPT_TYPE_CONST,  {.i64=OP_REMOVE_WORDS},  0,       0,        FLAGS, "mode" },
+    { "find",             "chars/words to find or remove",   OFFSET(find),       AV_OPT_TYPE_STRING, {.str = NULL},           0,       0,        FLAGS, NULL   },
+    { "replace",          "chars/words to replace",          OFFSET(replace),    AV_OPT_TYPE_STRING, {.str = NULL},           0,       0,        FLAGS, NULL   },
+    { "separator",        "word separator (default: ',')",   OFFSET(separator),  AV_OPT_TYPE_STRING, {.str = ","},            0,       0,        FLAGS, NULL   },
+    { NULL },
+};
+
+AVFILTER_DEFINE_CLASS(textmod);
+
+static const AVFilterPad inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_SUBTITLE,
+        .filter_frame = filter_frame,
+    },
+};
+
+static const AVFilterPad outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_SUBTITLE,
+    },
+};
+
+const AVFilter ff_sf_textmod = {
+    .name          = "textmod",
+    .description   = NULL_IF_CONFIG_SMALL("Modify subtitle text in several ways"),
+    .init          = init,
+    .uninit        = uninit,
+    .query_formats = query_formats,
+    .priv_size     = sizeof(TextModContext),
+    .priv_class    = &textmod_class,
+    FILTER_INPUTS(inputs),
+    FILTER_OUTPUTS(outputs),
+};