diff mbox series

[FFmpeg-devel,v15,09/16] avfilter/overlaytextsubs: Add overlaytextsubs and textsubs2video filters

Message ID DM8P223MB0365C875619A24B6AEC43006BA629@DM8P223MB0365.NAMP223.PROD.OUTLOOK.COM
State Superseded, archived
Headers show
Series [FFmpeg-devel,v15,01/16] global: Prepare AVFrame for subtitle handling | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished
andriy/make_ppc success Make finished
andriy/make_fate_ppc success Make fate finished

Commit Message

Soft Works Nov. 25, 2021, 12:48 a.m. UTC
- overlaytextsubs {VS -> V)
  Overlay text subtitles onto a video stream.

- textsubs2video {S -> V)
  Converts text subtitles to video frames

Signed-off-by: softworkz <softworkz@hotmail.com>
---
 configure                        |   2 +
 doc/filters.texi                 | 113 ++++++
 libavfilter/Makefile             |   2 +
 libavfilter/allfilters.c         |   4 +-
 libavfilter/avfilter.c           |  18 +-
 libavfilter/vf_overlaytextsubs.c | 624 +++++++++++++++++++++++++++++++
 6 files changed, 757 insertions(+), 6 deletions(-)
 create mode 100644 libavfilter/vf_overlaytextsubs.c
diff mbox series

Patch

diff --git a/configure b/configure
index e4d1443237..db1db0a0a6 100755
--- a/configure
+++ b/configure
@@ -3642,6 +3642,7 @@  overlay_opencl_filter_deps="opencl"
 overlay_qsv_filter_deps="libmfx"
 overlay_qsv_filter_select="qsvvpp"
 overlay_vulkan_filter_deps="vulkan spirv_compiler"
+overlaytextsubs_filter_deps="avcodec libass"
 owdenoise_filter_deps="gpl"
 pad_opencl_filter_deps="opencl"
 pan_filter_deps="swresample"
@@ -3686,6 +3687,7 @@  superequalizer_filter_deps="avcodec"
 superequalizer_filter_select="rdft"
 surround_filter_deps="avcodec"
 surround_filter_select="rdft"
+textsub2video_filter_deps="avcodec libass"
 tinterlace_filter_deps="gpl"
 tinterlace_merge_test_deps="tinterlace_filter"
 tinterlace_pad_test_deps="tinterlace_filter"
diff --git a/doc/filters.texi b/doc/filters.texi
index e08936e6ba..6407e6f80a 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -25659,6 +25659,119 @@  Overlay PGS subtitles
 ffmpeg -i "https://streams.videolan.org/samples/sub/PGS/Girl_With_The_Dragon_Tattoo_2%3A23%3A56.mkv" -filter_complex "[0:0][0:1]overlaygraphicsubs" output.mp4
 @end example
 @end itemize
+
+@section overlaytextsubs
+
+Overlay text subtitles onto a video stream.
+
+This filter supersedes the classic @ref{subtitles} filter opposed to which it does no longer require to open and access the source stream separately, which is often causing problems or doesn't even work for non-local or slow sources.
+
+Inputs:
+@itemize
+@item 0: Video [YUV420P, YUV422P, YUV444P, ARGB, RGBA, ABGR, BGRA, RGB24, BGR24]
+@item 1: Subtitles [TEXT]
+@end itemize
+
+Outputs:
+@itemize
+@item 0: Video (same as input)
+@end itemize
+
+It accepts the following parameters:
+
+@table @option
+
+@item alpha
+Process alpha channel, by default alpha channel is untouched.
+
+@item fonts_dir
+Set a directory path containing fonts that can be used by the filter.
+These fonts will be used in addition to whatever the font provider uses.
+
+@item default_font_path
+Path to a font file to be used as the default font.
+
+@item font_size
+Set the default font size.
+
+@item fontconfig_file
+Path to ASS fontconfig configuration file.
+
+@item force_style
+Override default style or script info parameters of the subtitles. It accepts a
+string containing ASS style format @code{KEY=VALUE} couples separated by ",".
+
+@item margin
+Set the rendering margin in pixels.
+
+@item render_latest_only
+For rendering, alway use the latest event only, which is covering the given point in time 
+@end table
+
+@subsection Examples
+
+@itemize
+@item
+Overlay ASS subtitles with animations:
+@example
+ffmpeg -i "http://streams.videolan.org/samples/sub/SSA/subtitle_testing_complex.mkv" -filter_complex "[0:v]overlaytextsubs" -map 0 -y out.mkv
+@end example
+@end itemize
+
+@section textsub2video
+
+Converts text subtitles to video frames.
+
+For overlaying text subtitles onto video frames it is recommended to use the overlay_textsubs filter.
+The textsub2video is useful for for creating transparent text-frames when overlay is done via hw acceleration 
+
+Inputs:
+@itemize
+@item 0: Subtitles [TEXT]
+@end itemize
+
+Outputs:
+@itemize
+@item 0: Video [RGB32]
+@end itemize
+
+It accepts the following parameters:
+
+@table @option
+
+@item rate, r
+Set the framerate for updating overlay frames.
+Normally, overlay frames will only be updated each time when the subtitles to display are changing.
+In cases where subtitles include advanced features (like animation), this parameter determines the frequency by which the overlay frames should be updated.
+
+@item size, s
+Set the output frame size.
+Allows to override the size of output video frames.
+
+@item fonts_dir
+Set a directory path containing fonts that can be used by the filter.
+These fonts will be used in addition to whatever the font provider uses.
+
+@item default_font_path
+Path to a font file to be used as the default font.
+
+@item font_size
+Set the default font size.
+
+@item fontconfig_file
+Path to ASS fontconfig configuration file.
+
+@item force_style
+Override default style or script info parameters of the subtitles. It accepts a
+string containing ASS style format @code{KEY=VALUE} couples separated by ",".
+
+@item margin
+Set the rendering margin in pixels.
+
+@item render_latest_only
+For rendering, alway use the latest event only, which is covering the given point in time.
+@end table
+
 @c man end SUBTITLE FILTERS
 
 @chapter Multimedia Filters
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 6c790391b8..719d993512 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -375,6 +375,7 @@  OBJS-$(CONFIG_OVERLAY_OPENCL_FILTER)         += vf_overlay_opencl.o opencl.o \
 OBJS-$(CONFIG_OVERLAY_QSV_FILTER)            += vf_overlay_qsv.o framesync.o
 OBJS-$(CONFIG_OVERLAY_VULKAN_FILTER)         += vf_overlay_vulkan.o vulkan.o vulkan_filter.o
 OBJS-$(CONFIG_OVERLAYGRAPHICSUBS_FILTER)     += vf_overlaygraphicsubs.o framesync.o
+OBJS-$(CONFIG_OVERLAYTEXTSUBS_FILTER)        += vf_overlaytextsubs.o
 OBJS-$(CONFIG_OWDENOISE_FILTER)              += vf_owdenoise.o
 OBJS-$(CONFIG_PAD_FILTER)                    += vf_pad.o
 OBJS-$(CONFIG_PAD_OPENCL_FILTER)             += vf_pad_opencl.o opencl.o opencl/pad.o
@@ -464,6 +465,7 @@  OBJS-$(CONFIG_SWAPRECT_FILTER)               += vf_swaprect.o
 OBJS-$(CONFIG_SWAPUV_FILTER)                 += vf_swapuv.o
 OBJS-$(CONFIG_TBLEND_FILTER)                 += vf_blend.o framesync.o
 OBJS-$(CONFIG_TELECINE_FILTER)               += vf_telecine.o
+OBJS-$(CONFIG_TEXTSUB2VIDEO_FILTER)          += vf_overlaytextsubs.o
 OBJS-$(CONFIG_THISTOGRAM_FILTER)             += vf_histogram.o
 OBJS-$(CONFIG_THRESHOLD_FILTER)              += vf_threshold.o framesync.o
 OBJS-$(CONFIG_THUMBNAIL_FILTER)              += vf_thumbnail.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 825b636e60..61dc969285 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -354,9 +354,10 @@  extern const AVFilter ff_vf_oscilloscope;
 extern const AVFilter ff_vf_overlay;
 extern const AVFilter ff_vf_overlay_opencl;
 extern const AVFilter ff_vf_overlay_qsv;
-extern const AVFilter ff_vf_overlaygraphicsubs;
 extern const AVFilter ff_vf_overlay_vulkan;
 extern const AVFilter ff_vf_overlay_cuda;
+extern const AVFilter ff_vf_overlaygraphicsubs;
+extern const AVFilter ff_vf_overlaytextsubs;
 extern const AVFilter ff_vf_owdenoise;
 extern const AVFilter ff_vf_pad;
 extern const AVFilter ff_vf_pad_opencl;
@@ -541,6 +542,7 @@  extern const AVFilter ff_avf_showwaves;
 extern const AVFilter ff_avf_showwavespic;
 extern const AVFilter ff_vaf_spectrumsynth;
 extern const AVFilter ff_svf_graphicsub2video;
+extern const AVFilter ff_svf_textsub2video;
 
 /* multimedia sources */
 extern const AVFilter ff_avsrc_amovie;
diff --git a/libavfilter/avfilter.c b/libavfilter/avfilter.c
index 75d5e86539..aa9aa71f53 100644
--- a/libavfilter/avfilter.c
+++ b/libavfilter/avfilter.c
@@ -463,7 +463,7 @@  static int64_t guess_status_pts(AVFilterContext *ctx, int status, AVRational lin
     return AV_NOPTS_VALUE;
 }
 
-static int ff_request_frame_to_filter(AVFilterLink *link)
+static int ff_request_frame_to_filter(AVFilterLink *link, int input_index)
 {
     int ret = -1;
 
@@ -472,8 +472,8 @@  static int ff_request_frame_to_filter(AVFilterLink *link)
     link->frame_blocked_in = 1;
     if (link->srcpad->request_frame)
         ret = link->srcpad->request_frame(link);
-    else if (link->src->inputs[0])
-        ret = ff_request_frame(link->src->inputs[0]);
+    else if (link->src->inputs[input_index])
+        ret = ff_request_frame(link->src->inputs[input_index]);
     if (ret < 0) {
         if (ret != AVERROR(EAGAIN) && ret != link->status_in)
             ff_avfilter_link_set_in_status(link, ret, guess_status_pts(link->src, ret, link->time_base));
@@ -1172,6 +1172,14 @@  static int forward_status_change(AVFilterContext *filter, AVFilterLink *in)
 {
     unsigned out = 0, progress = 0;
     int ret;
+    int input_index = 0;
+
+    for (int i = 0; i < in->dst->nb_inputs; i++) {
+        if (&in->dst->input_pads[i] == in->dstpad) {
+            input_index = i;
+            break;
+        }
+    }
 
     av_assert0(!in->status_out);
     if (!filter->nb_outputs) {
@@ -1181,7 +1189,7 @@  static int forward_status_change(AVFilterContext *filter, AVFilterLink *in)
     while (!in->status_out) {
         if (!filter->outputs[out]->status_in) {
             progress++;
-            ret = ff_request_frame_to_filter(filter->outputs[out]);
+            ret = ff_request_frame_to_filter(filter->outputs[out], input_index);
             if (ret < 0)
                 return ret;
         }
@@ -1218,7 +1226,7 @@  static int ff_filter_activate_default(AVFilterContext *filter)
     for (i = 0; i < filter->nb_outputs; i++) {
         if (filter->outputs[i]->frame_wanted_out &&
             !filter->outputs[i]->frame_blocked_in) {
-            return ff_request_frame_to_filter(filter->outputs[i]);
+            return ff_request_frame_to_filter(filter->outputs[i], 0);
         }
     }
     return FFERROR_NOT_READY;
diff --git a/libavfilter/vf_overlaytextsubs.c b/libavfilter/vf_overlaytextsubs.c
new file mode 100644
index 0000000000..25714aa54e
--- /dev/null
+++ b/libavfilter/vf_overlaytextsubs.c
@@ -0,0 +1,624 @@ 
+/*
+ * Copyright (c) 2021 softworkz
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * overlay text subtitles on top of a video frame
+ */
+
+#include <ass/ass.h>
+#include <libavutil/ass_internal.h>
+#include <libavutil/thread.h>
+
+#include "drawutils.h"
+#include "filters.h"
+
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+
+typedef struct TextSubsContext {
+    const AVClass *class;
+    AVMutex mutex;
+
+    ASS_Library   *library;
+    ASS_Renderer  *renderer;
+    ASS_Track     *track;
+
+    char *default_font_path;
+    char *fonts_dir;
+    char *fc_file;
+    double font_size;
+    char *force_style;
+    char *language;
+    int margin;
+    int render_latest_only;
+
+    int alpha;
+    FFDrawContext draw;
+
+    int got_header;
+    int out_w, out_h;
+    AVRational frame_rate;
+    AVFrame *last_frame;
+    int need_frame;
+    int eof;
+} TextSubsContext;
+
+/* libass supports a log level ranging from 0 to 7 */
+static const int ass_libavfilter_log_level_map[] = {
+    AV_LOG_QUIET,               /* 0 */
+    AV_LOG_PANIC,               /* 1 */
+    AV_LOG_FATAL,               /* 2 */
+    AV_LOG_ERROR,               /* 3 */
+    AV_LOG_WARNING,             /* 4 */
+    AV_LOG_INFO,                /* 5 */
+    AV_LOG_VERBOSE,             /* 6 */
+    AV_LOG_DEBUG,               /* 7 */
+};
+
+static void ass_log(int ass_level, const char *fmt, va_list args, void *ctx)
+{
+    const int ass_level_clip = av_clip(ass_level, 0, FF_ARRAY_ELEMS(ass_libavfilter_log_level_map) - 1);
+    const int level = ass_libavfilter_log_level_map[ass_level_clip];
+
+    av_vlog(ctx, level, fmt, args);
+    av_log(ctx, level, "\n");
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    TextSubsContext *s = ctx->priv;
+
+    if (s->track)
+        ass_free_track(s->track);
+    if (s->renderer)
+        ass_renderer_done(s->renderer);
+    if (s->library)
+        ass_library_done(s->library);
+
+    s->track = NULL;
+    s->renderer = NULL;
+    s->library = NULL;
+
+    ff_mutex_destroy(&s->mutex);
+
+    av_frame_free(&s->last_frame);
+}
+
+static int overlay_textsubs_query_formats(AVFilterContext *ctx)
+{
+    AVFilterFormats *formats;
+    AVFilterLink *inlink0 = ctx->inputs[0];
+    AVFilterLink *inlink1 = ctx->inputs[1];
+    AVFilterLink *outlink = ctx->outputs[0];
+    static const enum AVSubtitleType subtitle_fmts[] = { AV_SUBTITLE_FMT_ASS, AV_SUBTITLE_FMT_NONE };
+    int ret;
+
+    /* set input0 video formats */
+    formats = ff_draw_supported_pixel_formats(0);
+    if ((ret = ff_formats_ref(formats, &inlink0->outcfg.formats)) < 0)
+        return ret;
+
+    /* set input1 subtitle formats */
+    formats = ff_make_format_list(subtitle_fmts);
+    if ((ret = ff_formats_ref(formats, &inlink1->outcfg.formats)) < 0)
+        return ret;
+
+    /* set output0 video formats */
+    formats = ff_draw_supported_pixel_formats(0);
+    if ((ret = ff_formats_ref(formats, &outlink->incfg.formats)) < 0)
+        return ret;
+
+    return 0;
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+
+    outlink->w = ctx->inputs[0]->w;
+    outlink->h = ctx->inputs[0]->h;
+    outlink->time_base = ctx->inputs[0]->time_base;
+
+    return 0;
+}
+
+static int config_input_main(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx  = inlink->dst;
+    TextSubsContext *s = inlink->dst->priv;
+    int ret;
+
+    ret = ff_draw_init(&s->draw, inlink->format, s->alpha ? FF_DRAW_PROCESS_ALPHA : 0);
+    if (ret < 0) {
+        av_log(ctx, AV_LOG_ERROR, "Could not initialize ff_draw.\n");
+        return ret;
+    }
+
+    ass_set_frame_size  (s->renderer, inlink->w, inlink->h);
+    ass_set_pixel_aspect(s->renderer, av_q2d(inlink->sample_aspect_ratio));
+
+    av_log(ctx, AV_LOG_VERBOSE, "Subtitle screen: %dx%d\n\n\n\n", inlink->w, inlink->h);
+
+    return 0;
+}
+
+/* libass stores an RGBA color in the format RRGGBBTT, where TT is the transparency level */
+#define AR(c)  ( (c)>>24)
+#define AG(c)  (((c)>>16)&0xFF)
+#define AB(c)  (((c)>>8) &0xFF)
+#define AA(c)  ((0xFF-(c)) &0xFF)
+
+static void overlay_ass_image(TextSubsContext *s, AVFrame *picref,
+                              const ASS_Image *image)
+{
+    for (; image; image = image->next) {
+        uint8_t rgba_color[] = {AR(image->color), AG(image->color), AB(image->color), AA(image->color)};
+        FFDrawColor color;
+        ff_draw_color(&s->draw, &color, rgba_color);
+        ff_blend_mask(&s->draw, &color,
+                      picref->data, picref->linesize,
+                      picref->width, picref->height,
+                      image->bitmap, image->stride, image->w, image->h,
+                      3, 0, image->dst_x, image->dst_y);
+    }
+}
+
+static void process_header(AVFilterContext *link, AVFrame *frame)
+{
+    TextSubsContext *s = link->priv;
+    ASS_Track *track = s->track;
+    ASS_Style *style;
+    int sid = 0;
+
+    if (!track)
+        return;
+
+    if (frame && frame->subtitle_header) {
+        char *subtitle_header = (char *)frame->subtitle_header->data;
+        ass_process_codec_private(s->track, subtitle_header, strlen(subtitle_header));
+    }
+    else {
+        char* subtitle_header = avpriv_ass_get_subtitle_header_default(0);
+        if (!subtitle_header)
+            return;
+
+        ass_process_codec_private(s->track, subtitle_header, strlen(subtitle_header));
+        av_free(subtitle_header);
+    }
+
+    if (s->language)
+        s->track->Language = av_strdup(s->language);
+
+    if (!s->track->event_format) {
+        s->track->event_format = av_strdup("ReadOrder, Layer, Style, Name, MarginL, MarginR, MarginV, Effect, Text");
+    }
+
+    if (s->track->n_styles == 0) {
+        sid = ass_alloc_style(track);
+        style = &s->track->styles[sid];
+        style->Name             = av_strdup("Default");
+        style->PrimaryColour    = 0xffffff00;
+        style->SecondaryColour  = 0x00ffff00;
+        style->OutlineColour    = 0x00000000;
+        style->BackColour       = 0x00000080;
+        style->Bold             = 200;
+        style->ScaleX           = 1.0;
+        style->ScaleY           = 1.0;
+        style->Spacing          = 0;
+        style->BorderStyle      = 1;
+        style->Outline          = 2;
+        style->Shadow           = 3;
+        style->Alignment        = 2;
+    }
+    else
+        style = &s->track->styles[sid];
+
+    style->FontSize         = s->font_size;
+    style->MarginL = style->MarginR = style->MarginV = s->margin;
+
+    track->default_style = sid;
+
+    s->got_header = 1;
+}
+
+static int filter_video_frame(AVFilterLink *inlink, AVFrame *frame)
+{
+    AVFilterContext *ctx = inlink->dst;
+    TextSubsContext *s = ctx->priv;
+    int detect_change = 0;
+    ASS_Image *image;
+
+    int64_t time_ms = (int64_t)((double)frame->pts * av_q2d(inlink->time_base) * 1000);
+
+    ff_mutex_lock(&s->mutex);
+    image = ass_render_frame(s->renderer, s->track, time_ms, &detect_change);
+    ff_mutex_unlock(&s->mutex);
+
+    if (detect_change)
+        av_log(ctx, AV_LOG_DEBUG, "Change happened at time ms:%"PRId64"\n", time_ms);
+
+    overlay_ass_image(s, frame, image);
+
+    return ff_filter_frame(ctx->outputs[0], frame);
+}
+
+static int filter_subtitle_frame(AVFilterLink *inlink, AVFrame *sub)
+{
+    AVFilterContext *ctx = inlink->dst;
+    TextSubsContext *s = ctx->priv;
+    const int64_t start_time = av_rescale_q(sub->subtitle_pts, AV_TIME_BASE_Q, av_make_q(1, 1000));
+    const int64_t duration   = sub->subtitle_end_time;
+
+    // Postpone header processing until we receive a frame with content
+    if (!s->got_header && sub->num_subtitle_areas > 0)
+        process_header(ctx, sub);
+
+    for (unsigned i = 0; i < sub->num_subtitle_areas; i++) {
+        char *ass_line = sub->subtitle_areas[i]->ass;
+        if (!ass_line)
+            break;
+
+        ff_mutex_lock(&s->mutex);
+        ass_process_chunk(s->track, ass_line, strlen(ass_line), start_time, duration);
+
+        if (s->render_latest_only && s->track->n_events > 1) {
+            const int64_t diff = s->track->events[s->track->n_events - 1].Start
+                               - s->track->events[s->track->n_events - 2].Start;
+            if (s->track->events[s->track->n_events - 2].Duration > diff)
+                s->track->events[s->track->n_events - 2].Duration = diff;
+        }
+
+        ff_mutex_unlock(&s->mutex);
+    }
+
+    av_frame_free(&sub);
+    return 0;
+}
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    TextSubsContext *s = ctx->priv;
+
+    s->library = ass_library_init();
+
+    if (!s->library) {
+        av_log(ctx, AV_LOG_ERROR, "Could not initialize libass.\n");
+        return AVERROR(EINVAL);
+    }
+
+    ass_set_message_cb(s->library, ass_log, ctx);
+
+    /* Initialize fonts */
+    if (s->fonts_dir)
+        ass_set_fonts_dir(s->library, s->fonts_dir);
+
+    ass_set_extract_fonts(s->library, 1);
+
+    s->renderer = ass_renderer_init(s->library);
+    if (!s->renderer) {
+        av_log(ctx, AV_LOG_ERROR, "Could not initialize libass renderer.\n");
+        return AVERROR(EINVAL);
+    }
+
+    s->track = ass_new_track(s->library);
+    if (!s->track) {
+        av_log(ctx, AV_LOG_ERROR, "ass_new_track() failed!\n");
+        return AVERROR(EINVAL);
+    }
+
+    ass_set_fonts(s->renderer, s->default_font_path, NULL, 1, s->fc_file, 1);
+
+    if (s->force_style) {
+        char **list = NULL;
+        char *temp = NULL;
+        char *ptr = av_strtok(s->force_style, ",", &temp);
+        int i = 0;
+        while (ptr) {
+            av_dynarray_add(&list, &i, ptr);
+            if (!list) {
+                return AVERROR(ENOMEM);
+            }
+            ptr = av_strtok(NULL, ",", &temp);
+        }
+        av_dynarray_add(&list, &i, NULL);
+        if (!list) {
+            return AVERROR(ENOMEM);
+        }
+        ass_set_style_overrides(s->library, list);
+        av_free(list);
+    }
+
+    ff_mutex_init(&s->mutex, NULL);
+
+    return 0;
+}
+
+static int textsub2video_query_formats(AVFilterContext *ctx)
+{
+    AVFilterFormats *formats;
+    AVFilterLink *inlink = ctx->inputs[0];
+    AVFilterLink *outlink = ctx->outputs[0];
+    static const enum AVSubtitleType subtitle_fmts[] = { AV_SUBTITLE_FMT_ASS, AV_SUBTITLE_FMT_NONE };
+    int ret;
+
+    /* set input0 subtitle format */
+    formats = ff_make_format_list(subtitle_fmts);
+    if ((ret = ff_formats_ref(formats, &inlink->outcfg.formats)) < 0)
+        return ret;
+
+    /* set output0 video format */
+    formats = ff_draw_supported_pixel_formats(AV_PIX_FMT_FLAG_ALPHA);
+    if ((ret = ff_formats_ref(formats, &outlink->incfg.formats)) < 0)
+        return ret;
+
+    return 0;
+}
+
+static int textsub2video_config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    TextSubsContext *s = ctx->priv;
+
+    if (s->out_w <= 0 || s->out_h <= 0) {
+        s->out_w = inlink->w;
+        s->out_h = inlink->h;
+    }
+
+    return 0;
+}
+
+static int textsub2video_config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    TextSubsContext *s = ctx->priv;
+    int ret;
+
+    ret = ff_draw_init(&s->draw, outlink->format, FF_DRAW_PROCESS_ALPHA);
+    if (ret < 0) {
+        av_log(ctx, AV_LOG_ERROR, "Could not initialize ff_draw.\n");
+        return ret;
+    }
+
+    if (s->out_w <= 0 || s->out_h <= 0) {
+        av_log(ctx, AV_LOG_ERROR, "No output image size set.\n");
+        return AVERROR(EINVAL);
+    }
+
+    ass_set_frame_size  (s->renderer, s->out_w, s->out_h);
+
+    outlink->w = s->out_w;
+    outlink->h = s->out_h;
+    outlink->sample_aspect_ratio = (AVRational){1,1};
+    outlink->frame_rate = s->frame_rate;
+
+    return 0;
+}
+
+static int textsub2video_request_frame(AVFilterLink *outlink)
+{
+    TextSubsContext *s = outlink->src->priv;
+    AVFilterLink *inlink = outlink->src->inputs[0];
+    int64_t last_pts = outlink->current_pts;
+    int64_t next_pts, time_ms;
+    int i, detect_change = 0, status;
+    AVFrame *out;
+    ASS_Image *image;
+
+    status = ff_outlink_get_status(inlink);
+    if (status == AVERROR_EOF)
+        return AVERROR_EOF;
+
+    if (s->eof)
+        return AVERROR_EOF;
+
+    if (inlink->current_pts == AV_NOPTS_VALUE || outlink->current_pts > inlink->current_pts) {
+        int ret = ff_request_frame(inlink);
+        if (ret == AVERROR_EOF) {
+            s->eof = 1;
+        }
+
+        if (ret != 0)
+            av_log(outlink->src, AV_LOG_DEBUG, "ff_request_frame returned: %d\n", ret);
+
+        s->need_frame = 1;
+        return 0;
+    }
+
+    if (last_pts == AV_NOPTS_VALUE)
+        last_pts = 0; // inlink->current_pts * av_q2d(inlink->time_base) / av_q2d(outlink->time_base);
+
+    next_pts = last_pts + (int64_t)(1.0 / av_q2d(outlink->frame_rate) / av_q2d(outlink->time_base));
+
+    time_ms = (int64_t)((double)next_pts * av_q2d(outlink->time_base) * 1000);
+
+    image = ass_render_frame(s->renderer, s->track, time_ms, &detect_change);
+
+    if (detect_change)
+        av_log(outlink->src, AV_LOG_VERBOSE, "Change happened at time ms:%"PRId64" pts:%"PRId64"\n", time_ms, next_pts);
+    else if (s->last_frame) {
+        out = av_frame_clone(s->last_frame);
+        if (!out)
+            return AVERROR(ENOMEM);
+
+        out->pts = out->pkt_dts = out->best_effort_timestamp = next_pts;
+        return ff_filter_frame(outlink, out);
+    }
+
+    out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!out)
+        return AVERROR(ENOMEM);
+
+    for (i = 0; i < AV_NUM_DATA_POINTERS; i++) {
+        if (out->buf[i] && i != 1)
+            memset(out->buf[i]->data, 0, out->buf[i]->size);
+    }
+
+    out->pts = out->pkt_dts = out->best_effort_timestamp = next_pts;
+
+    if (image)
+        overlay_ass_image(s, out, image);
+
+    av_frame_free(&s->last_frame);
+
+    s->last_frame = av_frame_clone(out);
+
+    return ff_filter_frame(outlink, out);
+}
+
+static int textsub2video_filter_frame(AVFilterLink *inlink, AVFrame *sub)
+{
+    AVFilterContext *ctx = inlink->dst;
+    TextSubsContext *s = ctx->priv;
+    const int64_t start_time = av_rescale_q(sub->subtitle_pts, AV_TIME_BASE_Q, av_make_q(1, 1000));
+    const int64_t duration   = sub->subtitle_end_time;
+
+    av_log(ctx, AV_LOG_VERBOSE, "textsub2video_filter_frame num_subtitle_rects: %d, start_time_ms: %"PRId64"\n", sub->num_subtitle_areas, start_time);
+
+    if (!s->got_header && sub->num_subtitle_areas > 0)
+        process_header(ctx, sub);
+
+    for (unsigned i = 0; i < sub->num_subtitle_areas; i++) {
+        char *ass_line = sub->subtitle_areas[i]->ass;
+        if (!ass_line)
+            break;
+        ff_mutex_lock(&s->mutex);
+        ass_process_chunk(s->track, ass_line, strlen(ass_line), start_time, duration);
+        ff_mutex_unlock(&s->mutex);
+    }
+
+    av_frame_free(&sub);
+
+    if (s->need_frame) {
+        s->need_frame = 0;
+        return textsub2video_request_frame(ctx->outputs[0]);
+    }
+
+    return 0;
+}
+
+#define OFFSET(x) offsetof(TextSubsContext, x)
+#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_FILTERING_PARAM)
+
+static const AVOption overlaytextsubs_options[] = {
+    {"alpha",              "enable processing of alpha channel", OFFSET(alpha),              AV_OPT_TYPE_BOOL,   {.i64 = 0   }, 0,         1,        .flags =  FLAGS},
+    {"font_size",          "default font size",                  OFFSET(font_size),          AV_OPT_TYPE_DOUBLE, {.dbl = 18.0}, 0.0,       100.0,    .flags =  FLAGS},
+    {"force_style",        "force subtitle style",               OFFSET(force_style),        AV_OPT_TYPE_STRING, {.str = NULL}, 0,         0,        .flags =  FLAGS},
+    {"margin",             "default margin",                     OFFSET(margin),             AV_OPT_TYPE_INT,    {.i64 = 20  }, 0,         INT_MAX,  .flags =  FLAGS},
+    {"default_font_path",  "path to default font",               OFFSET(default_font_path),  AV_OPT_TYPE_STRING, {.str = NULL}, CHAR_MIN,  CHAR_MAX, .flags =  FLAGS},
+    {"fonts_dir",          "directory to scan for fonts",        OFFSET(fonts_dir),          AV_OPT_TYPE_STRING, {.str = NULL}, CHAR_MIN,  CHAR_MAX, .flags =  FLAGS},
+    {"fontsdir",           "directory to scan for fonts",        OFFSET(fonts_dir),          AV_OPT_TYPE_STRING, {.str = NULL}, CHAR_MIN,  CHAR_MAX, .flags =  FLAGS},
+    {"fontconfig_file",    "fontconfig file to load",            OFFSET(fc_file),            AV_OPT_TYPE_STRING, {.str = NULL}, CHAR_MIN,  CHAR_MAX, .flags =  FLAGS},
+    {"language",           "default language",                   OFFSET(language),           AV_OPT_TYPE_STRING, {.str = NULL}, CHAR_MIN,  CHAR_MAX, .flags =  FLAGS},
+    {"render_latest_only", "newest sub event for each time",     OFFSET(render_latest_only), AV_OPT_TYPE_BOOL,   {.i64 = 0   }, 0,         1,        .flags =  FLAGS},
+    { .name = NULL }
+};
+
+static const AVOption textsub2video_options[] = {
+    {"rate",               "set frame rate",                   OFFSET(frame_rate),         AV_OPT_TYPE_VIDEO_RATE, {.str="8"},   0,         INT_MAX,   .flags =  FLAGS},
+    {"r",                  "set frame rate",                   OFFSET(frame_rate),         AV_OPT_TYPE_VIDEO_RATE, {.str="8"},   0,         INT_MAX,   .flags =  FLAGS},
+    {"size",               "set video size",                   OFFSET(out_w),              AV_OPT_TYPE_IMAGE_SIZE, {.str = NULL}, 0,         0,        .flags =  FLAGS},
+    {"s",                  "set video size",                   OFFSET(out_w),              AV_OPT_TYPE_IMAGE_SIZE, {.str = NULL}, 0,         0,        .flags =  FLAGS},
+    {"font_size",          "default font size",                OFFSET(font_size),          AV_OPT_TYPE_DOUBLE,     {.dbl = 18.0}, 0.0,       100.0,    .flags =  FLAGS},
+    {"force_style",        "force subtitle style",             OFFSET(force_style),        AV_OPT_TYPE_STRING,     {.str = NULL}, 0,         0,        .flags =  FLAGS},
+    {"margin",             "default margin",                   OFFSET(margin),             AV_OPT_TYPE_INT,        {.i64 = 20  }, 0,         INT_MAX,  .flags =  FLAGS},
+    {"default_font_path",  "path to default font",             OFFSET(default_font_path),  AV_OPT_TYPE_STRING,     {.str = NULL}, CHAR_MIN,  CHAR_MAX, .flags =  FLAGS},
+    {"fonts_dir",          "directory to scan for fonts",      OFFSET(fonts_dir),          AV_OPT_TYPE_STRING,     {.str = NULL}, CHAR_MIN,  CHAR_MAX, .flags =  FLAGS},
+    {"fontsdir",           "directory to scan for fonts",      OFFSET(fonts_dir),          AV_OPT_TYPE_STRING,     {.str = NULL}, CHAR_MIN,  CHAR_MAX, .flags =  FLAGS},
+    {"fontconfig_file",    "fontconfig file to load",          OFFSET(fc_file),            AV_OPT_TYPE_STRING,     {.str = NULL}, CHAR_MIN,  CHAR_MAX, .flags =  FLAGS},
+    {"language",           "default language",                 OFFSET(language),           AV_OPT_TYPE_STRING,     {.str = NULL}, CHAR_MIN,  CHAR_MAX, .flags =  FLAGS},
+    {"render_latest_only", "newest sub event for each time",   OFFSET(render_latest_only), AV_OPT_TYPE_BOOL,       {.i64 = 0   }, 0,         1,        .flags =  FLAGS},
+    { .name = NULL }
+};
+
+#if CONFIG_OVERLAYTEXTSUBS_FILTER
+
+AVFILTER_DEFINE_CLASS(overlaytextsubs);
+
+static const AVFilterPad overlaytextsubs_inputs[] = {
+    {
+        .name         = "main",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_input_main,
+        .flags        = AVFILTERPAD_FLAG_NEEDS_WRITABLE,
+        .filter_frame = filter_video_frame,
+    },
+    {
+        .name         = "overlay",
+        .type         = AVMEDIA_TYPE_SUBTITLE,
+        .filter_frame = filter_subtitle_frame,
+    },
+};
+
+static const AVFilterPad overlaytextsubs_outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .config_props  = config_output,
+    },
+};
+
+const AVFilter ff_vf_overlaytextsubs = {
+    .name          = "overlaytextsubs",
+    .description   = NULL_IF_CONFIG_SMALL("Overlay textual subtitles on top of the input."),
+    .init          = init,
+    .uninit        = uninit,
+    .priv_size     = sizeof(TextSubsContext),
+    .priv_class    = &overlaytextsubs_class,
+    FILTER_INPUTS(overlaytextsubs_inputs),
+    FILTER_OUTPUTS(overlaytextsubs_outputs),
+    FILTER_QUERY_FUNC(overlay_textsubs_query_formats),
+};
+#endif
+
+#if CONFIG_TEXTSUB2VIDEO_FILTER
+
+AVFILTER_DEFINE_CLASS(textsub2video);
+
+static const AVFilterPad textsub2video_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_SUBTITLE,
+        .config_props = textsub2video_config_input,
+        .filter_frame = textsub2video_filter_frame,
+    },
+};
+
+static const AVFilterPad textsub2video_outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .config_props  = textsub2video_config_output,
+        .request_frame = textsub2video_request_frame,
+    },
+};
+
+const AVFilter ff_svf_textsub2video = {
+    .name          = "textsub2video",
+    .description   = NULL_IF_CONFIG_SMALL("Convert textual subtitles to video frames"),
+    .init          = init,
+    .uninit        = uninit,
+    .priv_size     = sizeof(TextSubsContext),
+    .priv_class    = &textsub2video_class,
+    FILTER_INPUTS(textsub2video_inputs),
+    FILTER_OUTPUTS(textsub2video_outputs),
+    FILTER_QUERY_FUNC(textsub2video_query_formats),
+};
+#endif