diff mbox series

[FFmpeg-devel,v16,01/16] global: Prepare AVFrame for subtitle handling

Message ID DM8P223MB0365D2E18075BEB87B865FB0BA629@DM8P223MB0365.NAMP223.PROD.OUTLOOK.COM
State Superseded, archived
Headers show
Series [FFmpeg-devel,v16,01/16] global: Prepare AVFrame for subtitle handling | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished
andriy/makex86 warning New warnings during build
andriy/make_ppc success Make finished
andriy/make_fate_ppc success Make fate finished
andriy/makeppc warning New warnings during build

Commit Message

Soft Works Nov. 25, 2021, 5:53 p.m. UTC
Root commit for adding subtitle filtering capabilities.
In detail:

- Add type (AVMediaType) field to AVFrame
  Replaces previous way of distinction which was based on checking
  width and height to determine whether a frame is audio or video
- Add subtitle fields to AVFrame
- Add new struct AVSubtitleArea, similar to AVSubtitleRect, but different
  allocation logic. Cannot and must not be used interchangeably, hence
  the new struct
- Move enum AVSubtitleType, AVSubtitle and AVSubtitleRect to avutil
- Add public-named members to enum AVSubtitleType (AV_SUBTITLE_FMT_)
- Add avcodec_decode_subtitle3 which takes subtitle frames,
  serving as compatibility shim to legacy subtitle decoding
- Add additional methods for conversion between old and new API

Signed-off-by: softworkz <softworkz@hotmail.com>
---
 libavcodec/avcodec.c             |  19 ---
 libavcodec/avcodec.h             |  75 ++--------
 libavcodec/decode.c              |  53 +++++--
 libavcodec/libzvbi-teletextdec.c |   1 +
 libavcodec/pgssubdec.c           |   1 +
 libavcodec/utils.c               |  11 ++
 libavfilter/vf_subtitles.c       |  50 +++++--
 libavformat/utils.c              |   1 +
 libavutil/Makefile               |   2 +
 libavutil/frame.c                | 194 +++++++++++++++++++++---
 libavutil/frame.h                |  93 +++++++++++-
 libavutil/subfmt.c               | 243 +++++++++++++++++++++++++++++++
 libavutil/subfmt.h               | 185 +++++++++++++++++++++++
 13 files changed, 803 insertions(+), 125 deletions(-)
 create mode 100644 libavutil/subfmt.c
 create mode 100644 libavutil/subfmt.h

Comments

Andreas Rheinhardt Nov. 26, 2021, 10:34 a.m. UTC | #1
Soft Works:
> Root commit for adding subtitle filtering capabilities.
> In detail:
> 
> - Add type (AVMediaType) field to AVFrame
>   Replaces previous way of distinction which was based on checking
>   width and height to determine whether a frame is audio or video
> - Add subtitle fields to AVFrame
> - Add new struct AVSubtitleArea, similar to AVSubtitleRect, but different
>   allocation logic. Cannot and must not be used interchangeably, hence
>   the new struct
> - Move enum AVSubtitleType, AVSubtitle and AVSubtitleRect to avutil
> - Add public-named members to enum AVSubtitleType (AV_SUBTITLE_FMT_)
> - Add avcodec_decode_subtitle3 which takes subtitle frames,
>   serving as compatibility shim to legacy subtitle decoding
> - Add additional methods for conversion between old and new API
> 
> Signed-off-by: softworkz <softworkz@hotmail.com>
> ---
>  libavcodec/avcodec.c             |  19 ---
>  libavcodec/avcodec.h             |  75 ++--------
>  libavcodec/decode.c              |  53 +++++--
>  libavcodec/libzvbi-teletextdec.c |   1 +
>  libavcodec/pgssubdec.c           |   1 +
>  libavcodec/utils.c               |  11 ++
>  libavfilter/vf_subtitles.c       |  50 +++++--
>  libavformat/utils.c              |   1 +
>  libavutil/Makefile               |   2 +
>  libavutil/frame.c                | 194 +++++++++++++++++++++---
>  libavutil/frame.h                |  93 +++++++++++-
>  libavutil/subfmt.c               | 243 +++++++++++++++++++++++++++++++
>  libavutil/subfmt.h               | 185 +++++++++++++++++++++++

As has already been said by others, this should be split: One patch for
the actual new libavutil additions, one patch for the lavc decoding API,
one patch for the encoding API, one patch for every user that is made to
use the new APIs.

>  13 files changed, 803 insertions(+), 125 deletions(-)
>  create mode 100644 libavutil/subfmt.c
>  create mode 100644 libavutil/subfmt.h
> 
> diff --git a/libavcodec/avcodec.c b/libavcodec/avcodec.c
> index c00a9b2af8..13e3711b9c 100644
> --- a/libavcodec/avcodec.c
> +++ b/libavcodec/avcodec.c
> @@ -422,25 +422,6 @@ void avcodec_flush_buffers(AVCodecContext *avctx)
>          av_bsf_flush(avci->bsf);
>  }
>  
> -void avsubtitle_free(AVSubtitle *sub)
> -{
> -    int i;
> -
> -    for (i = 0; i < sub->num_rects; i++) {
> -        av_freep(&sub->rects[i]->data[0]);
> -        av_freep(&sub->rects[i]->data[1]);
> -        av_freep(&sub->rects[i]->data[2]);
> -        av_freep(&sub->rects[i]->data[3]);
> -        av_freep(&sub->rects[i]->text);
> -        av_freep(&sub->rects[i]->ass);
> -        av_freep(&sub->rects[i]);
> -    }
> -
> -    av_freep(&sub->rects);
> -
> -    memset(sub, 0, sizeof(*sub));
> -}
> -
>  av_cold int avcodec_close(AVCodecContext *avctx)
>  {
>      int i;
> diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
> index 7ee8bc2b7c..0c5819b116 100644
> --- a/libavcodec/avcodec.h
> +++ b/libavcodec/avcodec.h
> @@ -35,6 +35,7 @@
>  #include "libavutil/frame.h"
>  #include "libavutil/log.h"
>  #include "libavutil/pixfmt.h"
> +#include "libavutil/subfmt.h"
>  #include "libavutil/rational.h"
>  
>  #include "codec.h"
> @@ -1674,7 +1675,7 @@ typedef struct AVCodecContext {
>  
>      /**
>       * Header containing style information for text subtitles.
> -     * For SUBTITLE_ASS subtitle type, it should contain the whole ASS
> +     * For AV_SUBTITLE_FMT_ASS subtitle type, it should contain the whole ASS
>       * [Script Info] and [V4+ Styles] section, plus the [Events] line and
>       * the Format line following. It shouldn't include any Dialogue line.
>       * - encoding: Set/allocated/freed by user (before avcodec_open2())
> @@ -2238,63 +2239,8 @@ typedef struct AVHWAccel {
>   * @}
>   */
>  
> -enum AVSubtitleType {
> -    SUBTITLE_NONE,
> -
> -    SUBTITLE_BITMAP,                ///< A bitmap, pict will be set
> -
> -    /**
> -     * Plain text, the text field must be set by the decoder and is
> -     * authoritative. ass and pict fields may contain approximations.
> -     */
> -    SUBTITLE_TEXT,
> -
> -    /**
> -     * Formatted text, the ass field must be set by the decoder and is
> -     * authoritative. pict and text fields may contain approximations.
> -     */
> -    SUBTITLE_ASS,
> -};
> -
>  #define AV_SUBTITLE_FLAG_FORCED 0x00000001
>  
> -typedef struct AVSubtitleRect {
> -    int x;         ///< top left corner  of pict, undefined when pict is not set
> -    int y;         ///< top left corner  of pict, undefined when pict is not set
> -    int w;         ///< width            of pict, undefined when pict is not set
> -    int h;         ///< height           of pict, undefined when pict is not set
> -    int nb_colors; ///< number of colors in pict, undefined when pict is not set
> -
> -    /**
> -     * data+linesize for the bitmap of this subtitle.
> -     * Can be set for text/ass as well once they are rendered.
> -     */
> -    uint8_t *data[4];
> -    int linesize[4];
> -
> -    enum AVSubtitleType type;
> -
> -    char *text;                     ///< 0 terminated plain UTF-8 text
> -
> -    /**
> -     * 0 terminated ASS/SSA compatible event line.
> -     * The presentation of this is unaffected by the other values in this
> -     * struct.
> -     */
> -    char *ass;
> -
> -    int flags;
> -} AVSubtitleRect;
> -
> -typedef struct AVSubtitle {
> -    uint16_t format; /* 0 = graphics */
> -    uint32_t start_display_time; /* relative to packet pts, in ms */
> -    uint32_t end_display_time; /* relative to packet pts, in ms */
> -    unsigned num_rects;
> -    AVSubtitleRect **rects;
> -    int64_t pts;    ///< Same as packet pts, in AV_TIME_BASE
> -} AVSubtitle;
> -
>  /**
>   * Return the LIBAVCODEC_VERSION_INT constant.
>   */
> @@ -2430,13 +2376,6 @@ int avcodec_open2(AVCodecContext *avctx, const AVCodec *codec, AVDictionary **op
>   */
>  int avcodec_close(AVCodecContext *avctx);
>  
> -/**
> - * Free all allocated data in the given subtitle struct.
> - *
> - * @param sub AVSubtitle to free.
> - */
> -void avsubtitle_free(AVSubtitle *sub);
> -
>  /**
>   * @}
>   */
> @@ -2527,6 +2466,8 @@ enum AVChromaLocation avcodec_chroma_pos_to_enum(int xpos, int ypos);
>   *                 must be freed with avsubtitle_free if *got_sub_ptr is set.
>   * @param[in,out] got_sub_ptr Zero if no subtitle could be decompressed, otherwise, it is nonzero.
>   * @param[in] avpkt The input AVPacket containing the input buffer.
> + *
> + * @deprecated Use the new decode API (avcodec_send_packet, avcodec_receive_frame) instead.

This is not how you deprecate something: You have to add a @deprecated
doxygen comment as well as the attribute_deprecated to make some noise.

Actually, you should deprecate the whole AVSubtitle API.

>   */
>  int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
>                              int *got_sub_ptr,
> @@ -3125,6 +3066,14 @@ void avcodec_flush_buffers(AVCodecContext *avctx);
>   */
>  int av_get_audio_frame_duration(AVCodecContext *avctx, int frame_bytes);
>  
> +/**
> + * Return subtitle format from a codec descriptor
> + *
> + * @param codec_descriptor codec descriptor
> + * @return                 the subtitle type (e.g. bitmap, text)
> + */
> +enum AVSubtitleType av_get_subtitle_format_from_codecdesc(const AVCodecDescriptor *codec_descriptor);

Do we need this function? It seems too trivial and as Anton has pointed
out is flawed. And anyway, this would belong into codec_desc.h.

> +
>  /* memory */
>  
>  /**
> diff --git a/libavcodec/decode.c b/libavcodec/decode.c
> index c44724d150..788b043266 100644
> --- a/libavcodec/decode.c
> +++ b/libavcodec/decode.c
> @@ -576,6 +576,34 @@ static int decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
>      return ret;
>  }
>  
> +static int decode_subtitle2_priv(AVCodecContext *avctx, AVSubtitle *sub,
> +                                 int *got_sub_ptr, const AVPacket *avpkt);
> +
> +static int decode_subtitle_shim(AVCodecContext *avctx, AVFrame *frame, const AVPacket *avpkt)
> +{
> +    int ret, got_sub_ptr = 0;
> +    AVSubtitle subtitle = { 0 };
> +
> +    av_frame_unref(frame);
> +
> +    ret = decode_subtitle2_priv(avctx, &subtitle, &got_sub_ptr, avpkt);
> +
> +    if (ret >= 0 && got_sub_ptr) {
> +        frame->type = AVMEDIA_TYPE_SUBTITLE;
> +        frame->format = subtitle.format;
> +        ret = av_frame_get_buffer2(frame, 0);
> +        if (ret < 0)
> +            return ret;
> +        av_frame_put_subtitle(frame, &subtitle);
> +
> +        frame->pkt_dts = avpkt->dts;
> +    }
> +
> +    avsubtitle_free(&subtitle);
> +
> +    return ret;
> +}
> +
>  int attribute_align_arg avcodec_send_packet(AVCodecContext *avctx, const AVPacket *avpkt)
>  {
>      AVCodecInternal *avci = avctx->internal;
> @@ -590,6 +618,9 @@ int attribute_align_arg avcodec_send_packet(AVCodecContext *avctx, const AVPacke
>      if (avpkt && !avpkt->size && avpkt->data)
>          return AVERROR(EINVAL);
>  
> +    if (avctx->codec_type == AVMEDIA_TYPE_SUBTITLE)
> +        return decode_subtitle_shim(avctx, avci->buffer_frame, avpkt);

1. This is missing a check whether buffer_frame is empty or not; in the
latter case, you need to return AVERROR(EAGAIN); instead
decode_subtitle_shim() destroys what is already there.
(Said check is currently offloaded to the BSF API in the audio/video
codepath. This is actually a violation of the BSF API.)
2. Flushing is not really implemented well:
a) It is legal to call avcodec_send_packet() with avpkt == NULL to
indicate EOF. If I am not mistaken, decode_subtitle2_priv() will just
crash in this case.
b) avcodec_receive_frame() only returns what is in buffer_frame; it
never calls decode_subtitle2_priv() or the underlying decode function at
all. This basically presumes that a subtitle decoder can only return one
subtitle after flushing. I don't know whether this is true for our
decoders with the delay cap.
c) avcodec_receive_frame() seems to never return AVERROR_EOF after
flushing, but always AVERROR(EAGAIN) (if the delay cap is set, the first
call to avcodec_receive_frame() after flushing can also result in a
frame being returned). Yet this makes no sense and is IMO an API
violation on lavc's part.

(While we have subtitle decoders with the delay cap, I don't know
whether this is actually tested by fate and whether all code actually
flushes subtitle decoders at all.)

> +
>      av_packet_unref(avci->buffer_pkt);
>      if (avpkt && (avpkt->data || avpkt->side_data_elems)) {
>          ret = av_packet_ref(avci->buffer_pkt, avpkt);
> @@ -651,7 +682,9 @@ int attribute_align_arg avcodec_receive_frame(AVCodecContext *avctx, AVFrame *fr
>  
>      if (avci->buffer_frame->buf[0]) {
>          av_frame_move_ref(frame, avci->buffer_frame);
> -    } else {
> +    } else if (avctx->codec_type == AVMEDIA_TYPE_SUBTITLE)
> +        return AVERROR(EAGAIN);
> +    else {
>          ret = decode_receive_frame_internal(avctx, frame);
>          if (ret < 0)
>              return ret;
> @@ -723,7 +756,7 @@ static void get_subtitle_defaults(AVSubtitle *sub)
>  
>  #define UTF8_MAX_BYTES 4 /* 5 and 6 bytes sequences should not be used */
>  static int recode_subtitle(AVCodecContext *avctx, AVPacket **outpkt,
> -                           AVPacket *inpkt, AVPacket *buf_pkt)
> +                           const AVPacket *inpkt, AVPacket *buf_pkt)

*outpkt = inpkt; is now no longer const-correct. And if you make outpkt
const AVPacket**, then avctx->codec->decode(avctx, sub, got_sub_ptr,
pkt) is no longer const-correct (although no subtitle decoder modifies
the packet it is given!). In other words: Been there, done that.

>  {
>  #if CONFIG_ICONV
>      iconv_t cd = (iconv_t)-1;
> @@ -802,9 +835,8 @@ static int utf8_check(const uint8_t *str)
>      return 1;
>  }
>  
> -int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
> -                             int *got_sub_ptr,
> -                             AVPacket *avpkt)
> +static int decode_subtitle2_priv(AVCodecContext *avctx, AVSubtitle *sub,
> +                             int *got_sub_ptr, const AVPacket *avpkt)
>  {
>      int ret = 0;
>  
> @@ -844,10 +876,7 @@ int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
>                                                   avctx->pkt_timebase, ms);
>          }
>  
> -        if (avctx->codec_descriptor->props & AV_CODEC_PROP_BITMAP_SUB)
> -            sub->format = 0;
> -        else if (avctx->codec_descriptor->props & AV_CODEC_PROP_TEXT_SUB)
> -            sub->format = 1;
> +        sub->format = av_get_subtitle_format_from_codecdesc(avctx->codec_descriptor);
>  
>          for (unsigned i = 0; i < sub->num_rects; i++) {
>              if (avctx->sub_charenc_mode != FF_SUB_CHARENC_MODE_IGNORE &&
> @@ -871,6 +900,12 @@ int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
>      return ret;
>  }
>  
> +int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
> +                             int *got_sub_ptr, AVPacket *avpkt)
> +{
> +    return decode_subtitle2_priv(avctx, sub, got_sub_ptr, avpkt);
> +}
> +
>  enum AVPixelFormat avcodec_default_get_format(struct AVCodecContext *avctx,
>                                                const enum AVPixelFormat *fmt)
>  {
> diff --git a/libavcodec/libzvbi-teletextdec.c b/libavcodec/libzvbi-teletextdec.c
> index 1073d6a0bd..535c82d7a5 100644
> --- a/libavcodec/libzvbi-teletextdec.c
> +++ b/libavcodec/libzvbi-teletextdec.c
> @@ -27,6 +27,7 @@
>  #include "libavutil/intreadwrite.h"
>  #include "libavutil/log.h"
>  #include "libavutil/common.h"
> +#include "libavutil/subfmt.h"

If you need to add a new header here for this (which I don't think),
then this means that you broke API.

>  
>  #include <libzvbi.h>
>  
> diff --git a/libavcodec/pgssubdec.c b/libavcodec/pgssubdec.c
> index 388639a110..5709058b1c 100644
> --- a/libavcodec/pgssubdec.c
> +++ b/libavcodec/pgssubdec.c
> @@ -32,6 +32,7 @@
>  #include "libavutil/colorspace.h"
>  #include "libavutil/imgutils.h"
>  #include "libavutil/opt.h"
> +#include "libavutil/subfmt.h"
>  
>  #define RGBA(r,g,b,a) (((unsigned)(a) << 24) | ((r) << 16) | ((g) << 8) | (b))
>  #define MAX_EPOCH_PALETTES 8   // Max 8 allowed per PGS epoch
> diff --git a/libavcodec/utils.c b/libavcodec/utils.c
> index a91a54b0dc..efe14e5a43 100644
> --- a/libavcodec/utils.c
> +++ b/libavcodec/utils.c
> @@ -824,6 +824,17 @@ int av_get_audio_frame_duration(AVCodecContext *avctx, int frame_bytes)
>      return FFMAX(0, duration);
>  }
>  
> +enum AVSubtitleType av_get_subtitle_format_from_codecdesc(const AVCodecDescriptor *codec_descriptor)
> +{
> +    if(codec_descriptor->props & AV_CODEC_PROP_BITMAP_SUB)
> +        return AV_SUBTITLE_FMT_BITMAP;
> +
> +    if(codec_descriptor->props & AV_CODEC_PROP_TEXT_SUB)
> +        return AV_SUBTITLE_FMT_ASS;
> +
> +    return AV_SUBTITLE_FMT_UNKNOWN;
> +}
> +
>  int av_get_audio_frame_duration2(AVCodecParameters *par, int frame_bytes)
>  {
>      int duration = get_audio_frame_duration(par->codec_id, par->sample_rate,
> diff --git a/libavfilter/vf_subtitles.c b/libavfilter/vf_subtitles.c
> index 377160c72b..6b54cf669b 100644
> --- a/libavfilter/vf_subtitles.c
> +++ b/libavfilter/vf_subtitles.c
> @@ -35,14 +35,12 @@
>  # include "libavformat/avformat.h"
>  #endif
>  #include "libavutil/avstring.h"
> -#include "libavutil/imgutils.h"
>  #include "libavutil/opt.h"
>  #include "libavutil/parseutils.h"
>  #include "drawutils.h"
>  #include "avfilter.h"
>  #include "internal.h"
>  #include "formats.h"
> -#include "video.h"
>  
>  typedef struct AssContext {
>      const AVClass *class;
> @@ -292,6 +290,29 @@ static int attachment_is_font(AVStream * st)
>      return 0;
>  }
>  
> +static int decode(AVCodecContext *avctx, AVFrame *frame, int *got_frame, AVPacket *pkt)
> +{
> +    int ret;
> +
> +    *got_frame = 0;

You don't really need this: You could just return 0 if no frame was
returned, 1 if a frame was returned and < 0 on (actual, not EAGAIN) errors.

> +
> +    if (pkt) {
> +        ret = avcodec_send_packet(avctx, pkt);
> +        // In particular, we don't expect AVERROR(EAGAIN), because we read all
> +        // decoded frames with avcodec_receive_frame() until done.

Where do you do this? For this one would need to call
avcodec_receive_frame() in a loop until it returns EAGAIN, but you don't
do this.

> +        if (ret < 0 && ret != AVERROR_EOF)
> +            return ret;
> +    }
> +
> +    ret = avcodec_receive_frame(avctx, frame);
> +    if (ret < 0 && ret != AVERROR(EAGAIN))
> +        return ret;
> +    if (ret >= 0)
> +        *got_frame = 1;
> +
> +    return 0;
> +}
> +
>  AVFILTER_DEFINE_CLASS(subtitles);
>  
>  static av_cold int init_subtitles(AVFilterContext *ctx)
> @@ -306,6 +327,7 @@ static av_cold int init_subtitles(AVFilterContext *ctx)
>      AVStream *st;
>      AVPacket pkt;
>      AssContext *ass = ctx->priv;
> +    enum AVSubtitleType subtitle_format;
>  
>      /* Init libass */
>      ret = init(ctx);
> @@ -386,13 +408,17 @@ static av_cold int init_subtitles(AVFilterContext *ctx)
>          ret = AVERROR_DECODER_NOT_FOUND;
>          goto end;
>      }
> +
>      dec_desc = avcodec_descriptor_get(st->codecpar->codec_id);
> -    if (dec_desc && !(dec_desc->props & AV_CODEC_PROP_TEXT_SUB)) {
> +    subtitle_format = av_get_subtitle_format_from_codecdesc(dec_desc);
> +
> +    if (subtitle_format != AV_SUBTITLE_FMT_ASS) {
>          av_log(ctx, AV_LOG_ERROR,
> -               "Only text based subtitles are currently supported\n");
> -        ret = AVERROR_PATCHWELCOME;
> +               "Only text based subtitles are supported by this filter\n");
> +        ret = AVERROR_INVALIDDATA;
>          goto end;
>      }
> +
>      if (ass->charenc)
>          av_dict_set(&codec_opts, "sub_charenc", ass->charenc, 0);
>  
> @@ -448,18 +474,18 @@ static av_cold int init_subtitles(AVFilterContext *ctx)
>                                    dec_ctx->subtitle_header_size);
>      while (av_read_frame(fmt, &pkt) >= 0) {
>          int i, got_subtitle;
> -        AVSubtitle sub = {0};
> +        AVFrame *sub = av_frame_alloc();

Unchecked allocation. And anyway, this should not be allocated once per
loop iteration.

>  
>          if (pkt.stream_index == sid) {
> -            ret = avcodec_decode_subtitle2(dec_ctx, &sub, &got_subtitle, &pkt);
> +            ret = decode(dec_ctx, sub, &got_subtitle, &pkt);
>              if (ret < 0) {
>                  av_log(ctx, AV_LOG_WARNING, "Error decoding: %s (ignored)\n",
>                         av_err2str(ret));
>              } else if (got_subtitle) {
> -                const int64_t start_time = av_rescale_q(sub.pts, AV_TIME_BASE_Q, av_make_q(1, 1000));
> -                const int64_t duration   = sub.end_display_time;
> -                for (i = 0; i < sub.num_rects; i++) {
> -                    char *ass_line = sub.rects[i]->ass;
> +                const int64_t start_time = av_rescale_q(sub->subtitle_pts, AV_TIME_BASE_Q, av_make_q(1, 1000));
> +                const int64_t duration   = sub->subtitle_end_time;
> +                for (i = 0; i < sub->num_subtitle_areas; i++) {
> +                    char *ass_line = sub->subtitle_areas[i]->ass;
>                      if (!ass_line)
>                          break;
>                      ass_process_chunk(ass->track, ass_line, strlen(ass_line),
> @@ -468,7 +494,7 @@ static av_cold int init_subtitles(AVFilterContext *ctx)
>              }
>          }
>          av_packet_unref(&pkt);
> -        avsubtitle_free(&sub);
> +        av_frame_free(&sub);
>      }
>  
>  end:
> diff --git a/libavformat/utils.c b/libavformat/utils.c
> index 7840e8717c..d9b54c7725 100644
> --- a/libavformat/utils.c
> +++ b/libavformat/utils.c
> @@ -32,6 +32,7 @@
>  #include "libavutil/opt.h"
>  #include "libavutil/parseutils.h"
>  #include "libavutil/pixfmt.h"
> +#include "libavutil/subfmt.h"
>  #include "libavutil/thread.h"
>  #include "libavutil/time.h"
>  
> diff --git a/libavutil/Makefile b/libavutil/Makefile
> index 529046dbc8..7e79936876 100644
> --- a/libavutil/Makefile
> +++ b/libavutil/Makefile
> @@ -74,6 +74,7 @@ HEADERS = adler32.h                                                     \
>            sha512.h                                                      \
>            spherical.h                                                   \
>            stereo3d.h                                                    \
> +          subfmt.h                                                      \
>            threadmessage.h                                               \
>            time.h                                                        \
>            timecode.h                                                    \
> @@ -159,6 +160,7 @@ OBJS = adler32.o                                                        \
>         slicethread.o                                                    \
>         spherical.o                                                      \
>         stereo3d.o                                                       \
> +       subfmt.o                                                         \
>         threadmessage.o                                                  \
>         time.o                                                           \
>         timecode.o                                                       \
> diff --git a/libavutil/frame.c b/libavutil/frame.c
> index d4d3ad6988..dfe9c269a6 100644
> --- a/libavutil/frame.c
> +++ b/libavutil/frame.c
> @@ -26,6 +26,7 @@
>  #include "imgutils.h"
>  #include "mem.h"
>  #include "samplefmt.h"
> +#include "subfmt.h"
>  #include "hwcontext.h"
>  
>  #define CHECK_CHANNELS_CONSISTENCY(frame) \
> @@ -50,6 +51,9 @@ const char *av_get_colorspace_name(enum AVColorSpace val)
>      return name[val];
>  }
>  #endif
> +
> +static int frame_copy_subtitles(AVFrame *dst, const AVFrame *src, int copy_data);
> +
>  static void get_frame_defaults(AVFrame *frame)
>  {
>      if (frame->extended_data != frame->data)
> @@ -72,7 +76,12 @@ static void get_frame_defaults(AVFrame *frame)
>      frame->colorspace          = AVCOL_SPC_UNSPECIFIED;
>      frame->color_range         = AVCOL_RANGE_UNSPECIFIED;
>      frame->chroma_location     = AVCHROMA_LOC_UNSPECIFIED;
> -    frame->flags               = 0;
> +    frame->subtitle_start_time = 0;
> +    frame->subtitle_end_time   = 0;
> +    frame->num_subtitle_areas  = 0;
> +    frame->subtitle_areas      = NULL;
> +    frame->subtitle_pts        = 0;
> +    frame->subtitle_header     = NULL;
>  }
>  
>  static void free_side_data(AVFrameSideData **ptr_sd)
> @@ -243,23 +252,55 @@ static int get_audio_buffer(AVFrame *frame, int align)
>  
>  }
>  
> +static int get_subtitle_buffer(AVFrame *frame)
> +{
> +    // Buffers in AVFrame->buf[] are not used in case of subtitle frames.
> +    // To accomodate with existing code, checking ->buf[0] to determine
> +    // whether a frame is ref-counted or has data, we're adding a 1-byte
> +    // buffer here, which marks the subtitle frame to contain data.

This is a terrible hack that might be necessary for now. But I don't see
anything

> +    frame->buf[0] = av_buffer_alloc(1);
> +    if (!frame->buf[0]) {
> +        av_frame_unref(frame);
> +        return AVERROR(ENOMEM);
> +    }
> +
> +    frame->extended_data = frame->data;
> +
> +    return 0;
> +}
> +
>  int av_frame_get_buffer(AVFrame *frame, int align)
> +{
> +    if (frame->width > 0 && frame->height > 0)
> +        frame->type = AVMEDIA_TYPE_VIDEO;
> +    else if (frame->nb_samples > 0 && (frame->channel_layout || frame->channels > 0))
> +        frame->type = AVMEDIA_TYPE_AUDIO;
> +
> +    return av_frame_get_buffer2(frame, align);
> +}
> +
> +int av_frame_get_buffer2(AVFrame *frame, int align)
>  {
>      if (frame->format < 0)
>          return AVERROR(EINVAL);
>  
> -    if (frame->width > 0 && frame->height > 0)
> +    switch(frame->type) {
> +    case AVMEDIA_TYPE_VIDEO:
>          return get_video_buffer(frame, align);
> -    else if (frame->nb_samples > 0 && (frame->channel_layout || frame->channels > 0))
> +    case AVMEDIA_TYPE_AUDIO:
>          return get_audio_buffer(frame, align);
> -
> -    return AVERROR(EINVAL);
> +    case AVMEDIA_TYPE_SUBTITLE:
> +        return get_subtitle_buffer(frame);
> +    default:
> +        return AVERROR(EINVAL);
> +    }
>  }
>  
>  static int frame_copy_props(AVFrame *dst, const AVFrame *src, int force_copy)
>  {
>      int ret, i;
>  
> +    dst->type                   = src->type;
>      dst->key_frame              = src->key_frame;
>      dst->pict_type              = src->pict_type;
>      dst->sample_aspect_ratio    = src->sample_aspect_ratio;
> @@ -290,6 +331,12 @@ static int frame_copy_props(AVFrame *dst, const AVFrame *src, int force_copy)
>      dst->colorspace             = src->colorspace;
>      dst->color_range            = src->color_range;
>      dst->chroma_location        = src->chroma_location;
> +    dst->subtitle_start_time    = src->subtitle_start_time;
> +    dst->subtitle_end_time      = src->subtitle_end_time;
> +    dst->subtitle_pts           = src->subtitle_pts;
> +
> +    if ((ret = av_buffer_replace(&dst->subtitle_header, src->subtitle_header)) < 0)
> +        return ret;
>  
>      av_dict_copy(&dst->metadata, src->metadata, 0);
>  
> @@ -331,6 +378,7 @@ int av_frame_ref(AVFrame *dst, const AVFrame *src)
>      av_assert1(dst->width == 0 && dst->height == 0);
>      av_assert1(dst->channels == 0);
>  
> +    dst->type           = src->type;
>      dst->format         = src->format;
>      dst->width          = src->width;
>      dst->height         = src->height;
> @@ -344,7 +392,7 @@ int av_frame_ref(AVFrame *dst, const AVFrame *src)
>  
>      /* duplicate the frame data if it's not refcounted */
>      if (!src->buf[0]) {
> -        ret = av_frame_get_buffer(dst, 0);
> +        ret = av_frame_get_buffer2(dst, 0);
>          if (ret < 0)
>              goto fail;
>  
> @@ -366,6 +414,10 @@ int av_frame_ref(AVFrame *dst, const AVFrame *src)
>          }
>      }
>  
> +    /* copy subtitle areas */
> +    if (src->type == AVMEDIA_TYPE_SUBTITLE)
> +        frame_copy_subtitles(dst, src, 0);
> +
>      if (src->extended_buf) {
>          dst->extended_buf = av_calloc(src->nb_extended_buf,
>                                        sizeof(*dst->extended_buf));
> @@ -436,7 +488,7 @@ AVFrame *av_frame_clone(const AVFrame *src)
>  
>  void av_frame_unref(AVFrame *frame)
>  {
> -    int i;
> +    unsigned i, n;
>  
>      if (!frame)
>          return;
> @@ -455,6 +507,21 @@ void av_frame_unref(AVFrame *frame)
>      av_buffer_unref(&frame->opaque_ref);
>      av_buffer_unref(&frame->private_ref);
>  
> +    av_buffer_unref(&frame->subtitle_header);
> +
> +    for (i = 0; i < frame->num_subtitle_areas; i++) {
> +        AVSubtitleArea *area = frame->subtitle_areas[i];
> +
> +        for (n = 0; n < FF_ARRAY_ELEMS(area->buf); n++)
> +            av_buffer_unref(&area->buf[n]);
> +
> +        av_freep(&area->text);
> +        av_freep(&area->ass);
> +        av_free(area);
> +    }
> +
> +    av_freep(&frame->subtitle_areas);
> +
>      get_frame_defaults(frame);
>  }
>  
> @@ -472,7 +539,8 @@ void av_frame_move_ref(AVFrame *dst, AVFrame *src)
>  
>  int av_frame_is_writable(AVFrame *frame)
>  {
> -    int i, ret = 1;
> +    AVSubtitleArea *area;
> +    unsigned i, n, ret = 1;

Use smaller scope for the variables that you add here (i.e. don't add
them here).

>  
>      /* assume non-refcounted frames are not writable */
>      if (!frame->buf[0])
> @@ -484,6 +552,15 @@ int av_frame_is_writable(AVFrame *frame)
>      for (i = 0; i < frame->nb_extended_buf; i++)
>          ret &= !!av_buffer_is_writable(frame->extended_buf[i]);
>  
> +    for (i = 0; i < frame->num_subtitle_areas; i++) {
> +        area = frame->subtitle_areas[i];
> +        if (area) {

Who sets an incorrect num_subtitle_areas?

> +            for (n = 0; n < FF_ARRAY_ELEMS(area->buf); n++)
> +                if (area->buf[n])
> +                    ret &= !!av_buffer_is_writable(area->buf[n]);
> +            }
> +    }
> +
>      return ret;
>  }
>  
> @@ -499,6 +576,7 @@ int av_frame_make_writable(AVFrame *frame)
>          return 0;
>  
>      memset(&tmp, 0, sizeof(tmp));
> +    tmp.type           = frame->type;
>      tmp.format         = frame->format;
>      tmp.width          = frame->width;
>      tmp.height         = frame->height;
> @@ -509,7 +587,7 @@ int av_frame_make_writable(AVFrame *frame)
>      if (frame->hw_frames_ctx)
>          ret = av_hwframe_get_buffer(frame->hw_frames_ctx, &tmp, 0);
>      else
> -        ret = av_frame_get_buffer(&tmp, 0);
> +        ret = av_frame_get_buffer2(&tmp, 0);
>      if (ret < 0)
>          return ret;
>  
> @@ -544,14 +622,22 @@ AVBufferRef *av_frame_get_plane_buffer(AVFrame *frame, int plane)
>      uint8_t *data;
>      int planes, i;
>  
> -    if (frame->nb_samples) {
> -        int channels = frame->channels;
> -        if (!channels)
> -            return NULL;
> -        CHECK_CHANNELS_CONSISTENCY(frame);
> -        planes = av_sample_fmt_is_planar(frame->format) ? channels : 1;
> -    } else
> +    switch(frame->type) {
> +    case AVMEDIA_TYPE_VIDEO:
>          planes = 4;
> +        break;
> +    case AVMEDIA_TYPE_AUDIO:
> +        {
> +            int channels = frame->channels;
> +            if (!channels)
> +                return NULL;
> +            CHECK_CHANNELS_CONSISTENCY(frame);
> +            planes = av_sample_fmt_is_planar(frame->format) ? channels : 1;
> +            break;
> +        }
> +    default:
> +        return NULL;
> +    }
>  
>      if (plane < 0 || plane >= planes || !frame->extended_data[plane])
>          return NULL;
> @@ -675,17 +761,39 @@ static int frame_copy_audio(AVFrame *dst, const AVFrame *src)
>      return 0;
>  }
>  
> +static int frame_copy_subtitles(AVFrame *dst, const AVFrame *src, int copy_data)
> +{
> +    dst->format = src->format;
> +
> +    dst->num_subtitle_areas = src->num_subtitle_areas;
> +
> +    if (src->num_subtitle_areas) {
> +        dst->subtitle_areas = av_malloc_array(src->num_subtitle_areas, sizeof(AVSubtitleArea *));
> +
> +        for (unsigned i = 0; i < src->num_subtitle_areas; i++) {
> +            dst->subtitle_areas[i] = av_mallocz(sizeof(AVSubtitleArea));
> +            av_subtitle_area2area(dst->subtitle_areas[i], src->subtitle_areas[i], copy_data);
> +        }
> +    }
> +
> +    return 0;
> +}
> +
>  int av_frame_copy(AVFrame *dst, const AVFrame *src)
>  {
>      if (dst->format != src->format || dst->format < 0)
>          return AVERROR(EINVAL);
>  
> -    if (dst->width > 0 && dst->height > 0)
> +    switch(dst->type) {
> +    case AVMEDIA_TYPE_VIDEO:
>          return frame_copy_video(dst, src);
> -    else if (dst->nb_samples > 0 && dst->channels > 0)
> +    case AVMEDIA_TYPE_AUDIO:
>          return frame_copy_audio(dst, src);
> -
> -    return AVERROR(EINVAL);
> +    case AVMEDIA_TYPE_SUBTITLE:
> +        return frame_copy_subtitles(dst, src, 1);
> +    default:
> +        return AVERROR(EINVAL);
> +    }
>  }
>  
>  void av_frame_remove_side_data(AVFrame *frame, enum AVFrameSideDataType type)
> @@ -832,3 +940,49 @@ int av_frame_apply_cropping(AVFrame *frame, int flags)
>  
>      return 0;
>  }
> +
> +/**
> + * Copies subtitle data from AVSubtitle (deprecated) to AVFrame
> + *
> + * @note This is a compatibility method for conversion to the legacy API
> + */
> +void av_frame_put_subtitle(AVFrame *frame, const AVSubtitle *sub)
> +{
> +    frame->format              = sub->format;
> +    frame->subtitle_start_time = sub->start_display_time;
> +    frame->subtitle_end_time   = sub->end_display_time;
> +    frame->subtitle_pts        = sub->pts;
> +    frame->num_subtitle_areas  = sub->num_rects;
> +
> +    if (sub->num_rects) {
> +        frame->subtitle_areas = av_malloc_array(sub->num_rects, sizeof(AVSubtitleArea *));
> +
> +        for (unsigned i = 0; i < sub->num_rects; i++) {
> +            frame->subtitle_areas[i] = av_mallocz(sizeof(AVSubtitleArea));
> +            av_subtitle_rect2area(frame->subtitle_areas[i], sub->rects[i]);
> +        }
> +    }
> +}
> +
> +/**
> + * Copies subtitle data from AVFrame to AVSubtitle (deprecated)
> + *
> + * @note This is a compatibility method for conversion to the legacy API
> + */
> +void av_frame_get_subtitle(AVSubtitle *sub, AVFrame *frame)
> +{
> +    sub->start_display_time = frame->subtitle_start_time;
> +    sub->end_display_time   = frame->subtitle_end_time;
> +    sub->pts                = frame->subtitle_pts;
> +    sub->num_rects          = frame->num_subtitle_areas;
> +
> +    if (frame->num_subtitle_areas) {
> +        sub->rects = av_malloc_array(frame->num_subtitle_areas, sizeof(AVSubtitle *));

Unchecked. Furthermore, wrong sizeof operand. Notice that you have
already set num_rects before the allocation, so on error, the AVSubtitle
would be inconsistent if you simply returned AVERROR(ENOMEM).

> +
> +        for (unsigned i = 0; i < frame->num_subtitle_areas; i++) {
> +            sub->rects[i] = av_mallocz(sizeof(AVSubtitleRect));

Unchecked allocations.

> +            av_subtitle_area2rect(sub->rects[i], frame->subtitle_areas[i]);

Missing check.

> +        }
> +    }
> +}
> +
> diff --git a/libavutil/frame.h b/libavutil/frame.h
> index 753234792e..742f4ba07e 100644
> --- a/libavutil/frame.h
> +++ b/libavutil/frame.h
> @@ -34,6 +34,7 @@
>  #include "rational.h"
>  #include "samplefmt.h"
>  #include "pixfmt.h"
> +#include "subfmt.h"
>  #include "version.h"
>  
>  
> @@ -278,7 +279,7 @@ typedef struct AVRegionOfInterest {
>  } AVRegionOfInterest;
>  
>  /**
> - * This structure describes decoded (raw) audio or video data.
> + * This structure describes decoded (raw) audio, video or subtitle data.
>   *
>   * AVFrame must be allocated using av_frame_alloc(). Note that this only
>   * allocates the AVFrame itself, the buffers for the data must be managed
> @@ -309,6 +310,13 @@ typedef struct AVRegionOfInterest {
>   */
>  typedef struct AVFrame {
>  #define AV_NUM_DATA_POINTERS 8
> +    /**
> +     * Media type of the frame (audio, video, subtitles..)
> +     *
> +     * See AVMEDIA_TYPE_xxx
> +     */
> +    enum AVMediaType type;
> +
>      /**
>       * pointer to the picture/channel planes.
>       * This might be different from the first allocated byte. For video,
> @@ -390,7 +398,7 @@ typedef struct AVFrame {
>      /**
>       * format of the frame, -1 if unknown or unset
>       * Values correspond to enum AVPixelFormat for video frames,
> -     * enum AVSampleFormat for audio)
> +     * enum AVSampleFormat for audio, AVSubtitleType for subtitles)
>       */
>      int format;
>  
> @@ -481,6 +489,39 @@ typedef struct AVFrame {
>       */
>      uint64_t channel_layout;
>  
> +    /**
> +     * Display start time, relative to packet pts, in ms.
> +     */
> +    uint32_t subtitle_start_time;
> +
> +    /**
> +     * Display end time, relative to packet pts, in ms.
> +     */
> +    uint32_t subtitle_end_time;

Hardcoding a millisecond timestamp precision into the API doesn't seem
good. As does using 32bit.

> +
> +    /**
> +     * Number of items in the @ref subtitle_areas array.
> +     */
> +    unsigned num_subtitle_areas;
> +
> +    /**
> +     * Array of subtitle areas, may be empty.
> +     */
> +    AVSubtitleArea **subtitle_areas;
> +
> +    /**
> +     * Usually the same as packet pts, in AV_TIME_BASE.
> +     *
> +     * @deprecated This is kept for compatibility reasons and corresponds to
> +     * AVSubtitle->pts. Might be removed in the future.
> +     */
> +    int64_t subtitle_pts;

What exactly is the point of having two different timestamps?

> +
> +    /**
> +     * Header containing style information for text subtitles.
> +     */
> +    AVBufferRef *subtitle_header;
> +
>      /**
>       * AVBuffer references backing the data for this frame. If all elements of
>       * this array are NULL, then this frame is not reference counted. This array
> @@ -740,6 +781,8 @@ void av_frame_move_ref(AVFrame *dst, AVFrame *src);
>  /**
>   * Allocate new buffer(s) for audio or video data.
>   *
> + * Note: For subtitle data, use av_frame_get_buffer2
> + *
>   * The following fields must be set on frame before calling this function:
>   * - format (pixel format for video, sample format for audio)
>   * - width and height for video
> @@ -759,9 +802,39 @@ void av_frame_move_ref(AVFrame *dst, AVFrame *src);
>   *              recommended to pass 0 here unless you know what you are doing.
>   *
>   * @return 0 on success, a negative AVERROR on error.
> + *
> + * @deprecated Use @ref av_frame_get_buffer2 instead and set @ref AVFrame.type
> + * before calling.
>   */
> +attribute_deprecated
>  int av_frame_get_buffer(AVFrame *frame, int align);
>  
> +/**
> + * Allocate new buffer(s) for audio, video or subtitle data.
> + *
> + * The following fields must be set on frame before calling this function:
> + * - format (pixel format for video, sample format for audio)
> + * - width and height for video
> + * - nb_samples and channel_layout for audio
> + * - type (AVMediaType)
> + *
> + * This function will fill AVFrame.data and AVFrame.buf arrays and, if
> + * necessary, allocate and fill AVFrame.extended_data and AVFrame.extended_buf.
> + * For planar formats, one buffer will be allocated for each plane.
> + *
> + * @warning: if frame already has been allocated, calling this function will
> + *           leak memory. In addition, undefined behavior can occur in certain
> + *           cases.
> + *
> + * @param frame frame in which to store the new buffers.
> + * @param align Required buffer size alignment. If equal to 0, alignment will be
> + *              chosen automatically for the current CPU. It is highly
> + *              recommended to pass 0 here unless you know what you are doing.
> + *
> + * @return 0 on success, a negative AVERROR on error.
> + */
> +int av_frame_get_buffer2(AVFrame *frame, int align);
> +
>  /**
>   * Check if the frame data is writable.
>   *
> @@ -863,6 +936,22 @@ AVFrameSideData *av_frame_get_side_data(const AVFrame *frame,
>   */
>  void av_frame_remove_side_data(AVFrame *frame, enum AVFrameSideDataType type);
>  
> +/**
> + * Copies subtitle data from AVSubtitle to AVFrame.
> + *
> + * @deprecated This is a compatibility method for interoperability with
> + * the legacy subtitle API.
> + */
> +void av_frame_put_subtitle(AVFrame *frame, const AVSubtitle *sub);
> +
> +/**
> + * Copies subtitle data from AVFrame to AVSubtitle.
> + *
> + * @deprecated This is a compatibility method for interoperability with
> + * the legacy subtitle API.
> + */
> +void av_frame_get_subtitle(AVSubtitle *sub, AVFrame *frame);

1. Missing const.
2. Wrong return type.
3. I don't see a need for this compatibility at all. As explained below,
the only user that needs something like this is libavcodec and then it
should live in libavcodec.
The last two points apply to both av_frame_get_subtitle and
av_frame_put_subtitle. Notice that the only use of any of these
functions outside of libavcodec is remove later again (patch #8 adds an
av_frame_get_subtitle() in ffmpeg.c, #16 removes it again) which shows
that such a compatibility stuff is not needed outside of libavcodec.

> +
>  
>  /**
>   * Flags for frame cropping.
> diff --git a/libavutil/subfmt.c b/libavutil/subfmt.c
> new file mode 100644
> index 0000000000..f68907a644
> --- /dev/null
> +++ b/libavutil/subfmt.c
> @@ -0,0 +1,243 @@
> +/*
> + * Copyright (c) 2021 softworkz
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include <string.h>
> +#include "common.h"
> +#include "subfmt.h"
> +
> +typedef struct SubtitleFmtInfo {
> +    enum AVSubtitleType fmt;
> +    const char* name;
> +} SubtitleFmtInfo;
> +
> +static SubtitleFmtInfo sub_fmt_info[AV_SUBTITLE_FMT_NB] = {
> +    {.fmt = AV_SUBTITLE_FMT_UNKNOWN, .name = "Unknown subtitle format", },
> +    {.fmt = AV_SUBTITLE_FMT_BITMAP,  .name = "Graphical subtitles",     },
> +    {.fmt = AV_SUBTITLE_FMT_TEXT,    .name = "Text subtitles (plain)",  },
> +    {.fmt = AV_SUBTITLE_FMT_ASS,     .name = "Text subtitles (ass)",    },
> +};

There is no need to add an enum AVSubtitleType to the struct. After all,
the valid range of enum AVSubtitleType will always be
0..(AV_SUBTITLE_FMT_NB - 1) as long as we don't shoot ourselves in the foot.
Notice that av_get_subtitle_fmt_name() requires sub_fmt_info[i].fmt == i
for i in 0..(AV_SUBTITLE_FMT_NB - 1), i.e. it both relies on there not
being gaps (that's ok) and on the particular values of AV_SUBTITLE_FMT_*
being set in the way they are. The latter is not ok, use
[AV_SUBTITLE_FMT_UNKNOWN] = "Unknown subtitle format" etc. instead.
Furthermore, these lengths of the names are so close to each other that
it is advantageous to use an array of char[24] instead of an array of
pointers. This also saves relocations.

> +
> +const char *av_get_subtitle_fmt_name(enum AVSubtitleType sub_fmt)
> +{
> +    if (sub_fmt < 0 || sub_fmt >= AV_SUBTITLE_FMT_NB)
> +        return NULL;
> +    return sub_fmt_info[sub_fmt].name;
> +}
> +
> +enum AVSubtitleType av_get_subtitle_fmt(const char *name)
> +{
> +    for (int i = 0; i < AV_SUBTITLE_FMT_NB; i++)
> +        if (!strcmp(sub_fmt_info[i].name, name))
> +            return i;
> +    return AV_SUBTITLE_FMT_NONE;
> +}
> +
> +int av_subtitle_area2rect(AVSubtitleRect *dst, const AVSubtitleArea *src)
> +{
> +    dst->x         =  src->x;        
> +    dst->y         =  src->y;        
> +    dst->w         =  src->w;        
> +    dst->h         =  src->h;        
> +    dst->nb_colors =  src->nb_colors;
> +    dst->type      =  src->type;                     
> +    dst->flags     =  src->flags;
> +
> +    switch (dst->type) {
> +    case AV_SUBTITLE_FMT_BITMAP:
> +
> +        if (src->h > 0 && src->w > 0 && src->buf[0]) {
> +            uint32_t *pal;
> +            AVBufferRef *buf = src->buf[0];
> +            dst->data[0] = av_mallocz(buf->size);
> +            memcpy(dst->data[0], buf->data, buf->size);

av_memdup(); also unchecked allocation and unnecessary zeroing.

> +            dst->linesize[0] = src->linesize[0];
> +
> +            dst->data[1] = av_mallocz(256 * 4);

Unchecked allocation.

> +            pal = (uint32_t *)dst->data[1];
> +
> +            for (unsigned i = 0; i < 256; i++) {
> +                pal[i] = src->pal[i];
> +            }

This loop could be avoided by using av_memdup().

> +        }
> +
> +        break;
> +    case AV_SUBTITLE_FMT_TEXT:
> +
> +        if (src->text)
> +            dst->text = av_strdup(src->text);
> +        else
> +            dst->text = av_strdup("");
> +
> +        if (!dst->text)
> +            return AVERROR(ENOMEM);
> +
> +        break;
> +    case AV_SUBTITLE_FMT_ASS:
> +
> +        if (src->ass)
> +            dst->ass = av_strdup(src->ass);
> +        else
> +            dst->ass = av_strdup("");
> +
> +        if (!dst->ass)
> +            return AVERROR(ENOMEM);
> +
> +        break;
> +    default:
> +
> +        av_log(NULL, AV_LOG_ERROR, "Subtitle rect has invalid format: %d", dst->type);
> +        return AVERROR(ENOMEM);
> +    }
> +
> +    return 0;
> +}
> +
> +int av_subtitle_rect2area(AVSubtitleArea *dst, const AVSubtitleRect *src)
> +{
> +    dst->x         =  src->x;        
> +    dst->y         =  src->y;        
> +    dst->w         =  src->w;        
> +    dst->h         =  src->h;        
> +    dst->nb_colors =  src->nb_colors;
> +    dst->type      =  src->type;                     

There seems to be an awful lot of trailing whitespace in the above lines
(and probably many more lines).

> +    dst->flags     =  src->flags;
> +
> +    switch (dst->type) {
> +    case AV_SUBTITLE_FMT_BITMAP:
> +
> +        if (src->h > 0 && src->w > 0 && src->data[0]) {
> +            AVBufferRef *buf = av_buffer_allocz(src->h * src->linesize[0]);

Unchecked allocation; also zeroing is unnecessary here.

> +            memcpy(buf->data, src->data[0], buf->size);
> +
> +            dst->buf[0] = buf;
> +            dst->linesize[0] = src->linesize[0];
> +        }
> +
> +        if (src->data[1]) {
> +            uint32_t *pal = (uint32_t *)src->data[1];
> +
> +            for (unsigned i = 0; i < 256; i++) {
> +                dst->pal[i] = pal[i];
> +            }
> +        }
> +
> +        break;
> +    case AV_SUBTITLE_FMT_TEXT:
> +
> +        if (src->text) {
> +            dst->text = av_strdup(src->text);
> +            if (!dst->text)
> +                return AVERROR(ENOMEM);
> +        }
> +
> +        break;
> +    case AV_SUBTITLE_FMT_ASS:
> +
> +        if (src->ass) {
> +            dst->ass = av_strdup(src->ass);
> +            if (!dst->ass)
> +                return AVERROR(ENOMEM);
> +        }
> +
> +        break;
> +    default:
> +
> +        av_log(NULL, AV_LOG_ERROR, "Subtitle area has invalid format: %d", dst->type);
> +        return AVERROR(ENOMEM);

ENOMEM?

> +    }
> +
> +    return 0;
> +}
> +
> +int av_subtitle_area2area(AVSubtitleArea *dst, const AVSubtitleArea *src, int copy_data)
> +{
> +    dst->x         =  src->x;        
> +    dst->y         =  src->y;        
> +    dst->w         =  src->w;        
> +    dst->h         =  src->h;        
> +    dst->nb_colors =  src->nb_colors;
> +    dst->type      =  src->type;                     
> +    dst->flags     =  src->flags;
> +
> +    switch (dst->type) {
> +    case AV_SUBTITLE_FMT_BITMAP:
> +
> +        if (src->h > 0 && src->w > 0 && src->buf[0]) {
> +            dst->buf[0] = av_buffer_ref(src->buf[0]);
> +            if (!dst->buf[0])
> +                return AVERROR(ENOMEM);
> +
> +            if (copy_data) {
> +                const int ret = av_buffer_make_writable(&dst->buf[0]);
> +                if (ret < 0)
> +                    return ret;
> +            }
> +
> +            dst->linesize[0] = src->linesize[0];
> +        }

This would actually need to be a loop (for (int i = 0; i <
AV_NUM_BUFFER_POINTERS; i++)). Furthermore, is it intended that there is
no cleanup code in case only a subset of the allocations succeed?

> +
> +        for (unsigned i = 0; i < 256; i++)
> +            dst->pal[i] = src->pal[i];

memcpy?

> +
> +        break;
> +    case AV_SUBTITLE_FMT_TEXT:
> +
> +        if (src->text) {
> +            dst->text = av_strdup(src->text);
> +            if (!dst->text)
> +                return AVERROR(ENOMEM);
> +        }
> +
> +        break;
> +    case AV_SUBTITLE_FMT_ASS:
> +
> +        if (src->ass) {
> +            dst->ass = av_strdup(src->ass);
> +            if (!dst->ass)
> +                return AVERROR(ENOMEM);
> +        }
> +
> +        break;
> +    default:
> +
> +        av_log(NULL, AV_LOG_ERROR, "Subtitle area has invalid format: %d", dst->type);
> +        return AVERROR(ENOMEM);
> +    }
> +
> +    return 0;
> +}
> +
> +void avsubtitle_free(AVSubtitle *sub)
> +{
> +    for (unsigned i = 0; i < sub->num_rects; i++) {
> +        av_freep(&sub->rects[i]->data[0]);
> +        av_freep(&sub->rects[i]->data[1]);
> +        av_freep(&sub->rects[i]->data[2]);
> +        av_freep(&sub->rects[i]->data[3]);
> +        av_freep(&sub->rects[i]->text);
> +        av_freep(&sub->rects[i]->ass);
> +        av_freep(&sub->rects[i]);
> +    }
> +
> +    av_freep(&sub->rects);
> +
> +    memset(sub, 0, sizeof(*sub));
> +}
> +
> diff --git a/libavutil/subfmt.h b/libavutil/subfmt.h
> new file mode 100644
> index 0000000000..372e3db413
> --- /dev/null
> +++ b/libavutil/subfmt.h
> @@ -0,0 +1,185 @@
> +/*
> + * Copyright (c) 2021 softworkz
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AVUTIL_SUBFMT_H
> +#define AVUTIL_SUBFMT_H
> +
> +#include <inttypes.h>
> +

stdint is enough.

> +#include "buffer.h"
> +
> +enum AVSubtitleType {
> +
> +    /**
> +     * Subtitle format unknown.
> +     */
> +    AV_SUBTITLE_FMT_NONE = -1,
> +
> +    /**
> +     * Subtitle format unknown.
> +     */
> +    AV_SUBTITLE_FMT_UNKNOWN = 0,
> +
> +    /**
> +     * Bitmap area in AVSubtitleRect.data, pixfmt AV_PIX_FMT_PAL8.
> +     */
> +    AV_SUBTITLE_FMT_BITMAP = 1,
> +    SUBTITLE_BITMAP = 1,        ///< Deprecated, use AV_SUBTITLE_FMT_BITMAP instead.
> +
> +    /**
> +     * Plain text in AVSubtitleRect.text.
> +     */
> +    AV_SUBTITLE_FMT_TEXT = 2,
> +    SUBTITLE_TEXT = 2,          ///< Deprecated, use AV_SUBTITLE_FMT_TEXT instead.
> +
> +    /**
> +     * Text Formatted as per ASS specification, contained AVSubtitleRect.ass.
> +     */
> +    AV_SUBTITLE_FMT_ASS = 3,
> +    SUBTITLE_ASS = 3,           ///< Deprecated, use AV_SUBTITLE_FMT_ASS instead.
> +
> +    AV_SUBTITLE_FMT_NB,
> +};
> +
> +typedef struct AVSubtitleArea {
> +#define AV_NUM_BUFFER_POINTERS 1
> +
> +    enum AVSubtitleType type;
> +    int flags;
> +
> +    int x;         ///< top left corner  of area.
> +    int y;         ///< top left corner  of area.
> +    int w;         ///< width            of area.
> +    int h;         ///< height           of area.
> +    int nb_colors; ///< number of colors in bitmap palette (@ref pal).
> +
> +    /**
> +     * Buffers and line sizes for the bitmap of this subtitle.
> +     * 
> +     * @{
> +     */
> +    AVBufferRef *buf[AV_NUM_BUFFER_POINTERS];
> +    int linesize[AV_NUM_BUFFER_POINTERS];
> +    /**
> +     * @}
> +     */
> +
> +    uint32_t pal[256]; ///< RGBA palette for the bitmap.
> +
> +    char *text;        ///< 0-terminated plain UTF-8 text
> +    char *ass;         ///< 0-terminated ASS/SSA compatible event line.
> +
> +} AVSubtitleArea;

I presume that sizeof(AVSubtitleArea) is not supposed to be part of the
public API/ABI (after all, you used an array of pointers to
AVSubtitleArea in AVFrame). This needs to be documented.

> +
> +typedef struct AVSubtitleRect {
> +    unsigned nb_refs;
> +    int x;         ///< top left corner  of pict, undefined when pict is not set
> +    int y;         ///< top left corner  of pict, undefined when pict is not set
> +    int w;         ///< width            of pict, undefined when pict is not set
> +    int h;         ///< height           of pict, undefined when pict is not set
> +    int nb_colors; ///< number of colors in pict, undefined when pict is not set
> +
> +    /**
> +     * data+linesize for the bitmap of this subtitle.
> +     */
> +    uint8_t *data[4];
> +    int linesize[4];
> +
> +    enum AVSubtitleType type;
> +
> +    char *text;                     ///< 0 terminated plain UTF-8 text
> +
> +    /**
> +     * 0-terminated ASS/SSA compatible event line.
> +     */
> +    char *ass;
> +
> +    int flags;
> +} AVSubtitleRect;
> +
> +typedef struct AVSubtitle {
> +    uint16_t format; /* 0 = graphics */
> +    uint32_t start_display_time; /* relative to packet pts, in ms */
> +    uint32_t end_display_time; /* relative to packet pts, in ms */
> +    unsigned num_rects;
> +    AVSubtitleRect **rects;
> +    int64_t pts;    ///< Same as packet pts, in AV_TIME_BASE
> +} AVSubtitle;
> +
> +/**
> + * Return the name of sub_fmt, or NULL if sub_fmt is not
> + * recognized.
> + */
> +const char *av_get_subtitle_fmt_name(enum AVSubtitleType sub_fmt);
> +
> +/**
> + * Return a subtitle format corresponding to name, or AV_SUBTITLE_FMT_NONE
> + * on error.
> + *
> + * @param name Subtitle format name.
> + */
> +enum AVSubtitleType av_get_subtitle_fmt(const char *name);

Do we need this? Given that there are many possible ways to name the
subtitles, it doesn't seem useful to me. The only way to be sure to use
the "authoritative" name for a subtitle format is by
av_get_subtitle_fmt_name(). But then one doesn't need
av_get_subtitle_fmt() at all.

> +
> +/**
> + * Copy a subtitle area.
> + *
> + * @param dst        The target area.
> + * @param src        The source area.
> + * @param copy_data  Determines whether to copy references or actual
> + *                   data from @ref AVSubtitleArea.buf.
> + *
> + * @return 0 on success.

Better make it "< 0 on error" to reserve positive return values in case
we ever needed them.

> + */
> +int av_subtitle_area2area(AVSubtitleArea *dst, const AVSubtitleArea *src, int copy_data);

This documentation does not mention that dst should be "blank". But this
actually poses a problem: How does the user know that an AVSubtitleArea
is blank given that ? For AVFrames the answer is this: An AVFrame is
blank if it comes directly from av_frame_alloc() or av_frame_unref().
But there is no equivalent for these functions here.
Furthermore, is this API supposed to be a standalone API or is
AVSubtitleArea something that is always supposed to be part of an
AVFrame? The usage in this patchset suggests the latter:
av_subtitle_area2area() is only used in frame.c.

> +
> +/**
> + * Copy data from @ref AVSubtitleArea to @ref AVSubtitleRect.
> + *
> + * @param dst        The target rect (@ref AVSubtitleRect).
> + * @param src        The source area (@ref AVSubtitleArea).
> + *
> + * @return 0 on success.
> + *
> + * @deprecated This is a compatibility method for interoperability with
> + * the legacy subtitle API.
> + */
> +int av_subtitle_area2rect(AVSubtitleRect *dst, const AVSubtitleArea *src);
> +
> +/**
> + * Copy data from @ref AVSubtitleRect to @ref AVSubtitleArea.
> + *
> + * @param dst        The source area (@ref AVSubtitleArea).
> + * @param src        The target rect (@ref AVSubtitleRect).
> + *
> + * @return 0 on success.
> + *
> + * @deprecated This is a compatibility method for interoperability with
> + * the legacy subtitle API.
> + */
> +int av_subtitle_rect2area(AVSubtitleArea *dst, const AVSubtitleRect *src);

Do we need compatibility with the legacy API at all? I only see two
usecases for this:

1. One has a huge project using AVSubtitles that uses AVSubtitle so
pervasively that one can't transition to the new API in one step. But I
doubt that that's a thing.
2. One is forced to provide compatibility with the legacy API while also
providing an API based upon the new API. This is of course the situation
with libavcodec, which for this reason needs such compatibility
functions (in libavcodec!). But I don't see a second user with the same
needs, so I don't see why these functions (which would actually need to
be declared as deprecated from the very beginning if public) should be
public.

> +
> +/**
> + * Free all allocated data in the given subtitle struct.
> + *
> + * @param sub AVSubtitle to free.
> + */
> +void avsubtitle_free(AVSubtitle *sub);
> +
> +#endif /* AVUTIL_SUBFMT_H */
>
Anton Khirnov Nov. 26, 2021, 4:21 p.m. UTC | #2
You have completely disregarded my comments on the previous patch. Stop
doing that, it is rude.

Your patchset is unacceptable until you deal with them.
Soft Works Nov. 26, 2021, 6:25 p.m. UTC | #3
> -----Original Message-----
> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Anton
> Khirnov
> Sent: Friday, November 26, 2021 5:21 PM
> To: ffmpeg-devel@ffmpeg.org
> Subject: Re: [FFmpeg-devel] [PATCH v16 01/16] global: Prepare AVFrame for
> subtitle handling
> 
> You have completely disregarded my comments on the previous patch. Stop
> doing that, it is rude.

I value everyone's time taken to review the patches I'm submitting.
Rest assured that I'm going over every single comment and try to address 
each one appropriately.

I think you misunderstood because I had answered one of your comments
immediately. Right now I'm re-working the patchset to address all suggestions
and eventually I'll respond to all those that I haven't addressed, 
explaining why.

Thank you very much for reviewing my code!

softworkz
Soft Works Nov. 27, 2021, 2:57 a.m. UTC | #4
> -----Original Message-----
> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Andreas
> Rheinhardt
> Sent: Friday, November 26, 2021 11:35 AM
> To: ffmpeg-devel@ffmpeg.org
> Subject: Re: [FFmpeg-devel] [PATCH v16 01/16] global: Prepare AVFrame for
> subtitle handling

Hi Andreas,

thanks for the detailed review. There was hardly anything to object or disagree.
Everything I don't respond to is implemented as suggested.

> As has already been said by others, this should be split: One patch for
> the actual new libavutil additions, one patch for the lavc decoding API,
> one patch for the encoding API, one patch for every user that is made to
> use the new APIs.

By keeping the AVSubtitle definition in its original location, it was 
possible to split this as suggested on IRC.
Thanks.

> This is not how you deprecate something: You have to add a @deprecated
> doxygen comment as well as the attribute_deprecated to make some noise.

I didn’t want the noise before things get serious ;-)

> 
> Actually, you should deprecate the whole AVSubtitle API.

Done. Do we deprecate structs as well?

Also, there's a function avcodec_get_subtitle_rect_class().

I have no idea whether it ever had any practical use. A similar function
above (avcodec_get_frame_class) is deprecated.


> > +/**
> > + * Return subtitle format from a codec descriptor
> > + *
> > + * @param codec_descriptor codec descriptor
> > + * @return                 the subtitle type (e.g. bitmap, text)
> > + */
> > +enum AVSubtitleType av_get_subtitle_format_from_codecdesc(const
> AVCodecDescriptor *codec_descriptor);
> 
> Do we need this function? It seems too trivial and as Anton has pointed
> out is flawed. And anyway, this would belong into codec_desc.h.

I had replied to Anton why it's not flawed. Moved it to codec_desc and
named it as Anton had suggested.


> >  int attribute_align_arg avcodec_send_packet(AVCodecContext *avctx, const
> AVPacket *avpkt)
> >  {
> >      AVCodecInternal *avci = avctx->internal;
> > @@ -590,6 +618,9 @@ int attribute_align_arg
> avcodec_send_packet(AVCodecContext *avctx, const AVPacke
> >      if (avpkt && !avpkt->size && avpkt->data)
> >          return AVERROR(EINVAL);
> >
> > +    if (avctx->codec_type == AVMEDIA_TYPE_SUBTITLE)
> > +        return decode_subtitle_shim(avctx, avci->buffer_frame, avpkt);
> 
> 1. This is missing a check whether buffer_frame is empty or not; in the
> latter case, you need to return AVERROR(EAGAIN); instead
> decode_subtitle_shim() destroys what is already there.
> (Said check is currently offloaded to the BSF API in the audio/video
> codepath. This is actually a violation of the BSF API.)

Added that check.

> 2. Flushing is not really implemented well:
> a) It is legal to call avcodec_send_packet() with avpkt == NULL to
> indicate EOF. If I am not mistaken, decode_subtitle2_priv() will just
> crash in this case.

Like all subtitle decoders would when supplying a NULL packet.
What should I return when a null packet is supplied?
EAGAIN or EOF?

> b) avcodec_receive_frame() only returns what is in buffer_frame; it
> never calls decode_subtitle2_priv() or the underlying decode function at
> all. This basically presumes that a subtitle decoder can only return one
> subtitle after flushing. I don't know whether this is true for our
> decoders with the delay cap.

The only one I could see with that flag is cc_dec, which is a special case
anyway as it doesn't work on regular streams.
(even this one would crash when providing a NULL packet)

> c) avcodec_receive_frame() seems to never return AVERROR_EOF after
> flushing, but always AVERROR(EAGAIN) (if the delay cap is set, the first
> call to avcodec_receive_frame() after flushing can also result in a
> frame being returned). Yet this makes no sense and is IMO an API
> violation on lavc's part.
> (While we have subtitle decoders with the delay cap, I don't know
> whether this is actually tested by fate and whether all code actually
> flushes subtitle decoders at all.)

I don't think so.


> > diff --git a/libavfilter/vf_subtitles.c b/libavfilter/vf_subtitles.c
> > index 377160c72b..6b54cf669b 100644
> > --- a/libavfilter/vf_subtitles.c
> > +++ b/libavfilter/vf_subtitles.c
> > @@ -35,14 +35,12 @@
> >  # include "libavformat/avformat.h"
> >  #endif
> >  #include "libavutil/avstring.h"
> > -#include "libavutil/imgutils.h"
> >  #include "libavutil/opt.h"
> >  #include "libavutil/parseutils.h"
> >  #include "drawutils.h"
> >  #include "avfilter.h"
> >  #include "internal.h"
> >  #include "formats.h"
> > -#include "video.h"
> >
> >  typedef struct AssContext {
> >      const AVClass *class;
> > @@ -292,6 +290,29 @@ static int attachment_is_font(AVStream * st)
> >      return 0;
> >  }
> >
> > +static int decode(AVCodecContext *avctx, AVFrame *frame, int *got_frame,
> AVPacket *pkt)
> > +{
> > +    int ret;
> > +
> > +    *got_frame = 0;
> 
> You don't really need this: You could just return 0 if no frame was
> returned, 1 if a frame was returned and < 0 on (actual, not EAGAIN) errors.
> 
> > +
> > +    if (pkt) {
> > +        ret = avcodec_send_packet(avctx, pkt);
> > +        // In particular, we don't expect AVERROR(EAGAIN), because we read
> all
> > +        // decoded frames with avcodec_receive_frame() until done.
> 
> Where do you do this? For this one would need to call
> avcodec_receive_frame() in a loop until it returns EAGAIN, but you don't
> do this.

This decode() function is copied from ffmpeg.c


> > +static int get_subtitle_buffer(AVFrame *frame)
> > +{
> > +    // Buffers in AVFrame->buf[] are not used in case of subtitle frames.
> > +    // To accomodate with existing code, checking ->buf[0] to determine
> > +    // whether a frame is ref-counted or has data, we're adding a 1-byte
> > +    // buffer here, which marks the subtitle frame to contain data.
> 
> This is a terrible hack that might be necessary for now. But I don't see
> anything

I'm not sure about the second half sentence, but what I can say is that I 
had taken the time to try and replace the checks for ->buf[0] to a frame
field, but it turned out that this change would be so big that it cannot
be part of the subtitle patchset.
(also talked about this with Hendrik a while ago)



> > +    for (i = 0; i < frame->num_subtitle_areas; i++) {
> > +        area = frame->subtitle_areas[i];
> > +        if (area) {
> 
> Who sets an incorrect num_subtitle_areas?

Maybe code that hasn't gotten reviewed by you ;-)

> > +    /**
> > +     * Display start time, relative to packet pts, in ms.
> > +     */
> > +    uint32_t subtitle_start_time;
> > +
> > +    /**
> > +     * Display end time, relative to packet pts, in ms.
> > +     */
> > +    uint32_t subtitle_end_time;
> 
> Hardcoding a millisecond timestamp precision into the API doesn't seem
> good. As does using 32bit.

Our internal text subtitle format is defined to be ASS and those
variables are reflecting just that.


> > +/**
> > + * Copies subtitle data from AVFrame to AVSubtitle.
> > + *
> > + * @deprecated This is a compatibility method for interoperability with
> > + * the legacy subtitle API.
> > + */
> > +void av_frame_get_subtitle(AVSubtitle *sub, AVFrame *frame);
> 
> 1. Missing const.
> 2. Wrong return type.
> 3. I don't see a need for this compatibility at all. As explained below,
> the only user that needs something like this is libavcodec and then it
> should live in libavcodec.
> The last two points apply to both av_frame_get_subtitle and
> av_frame_put_subtitle. Notice that the only use of any of these
> functions outside of libavcodec is remove later again (patch #8 adds an
> av_frame_get_subtitle() in ffmpeg.c, #16 removes it again) which shows
> that such a compatibility stuff is not needed outside of libavcodec.

Moved to avcodec as ff_ functions.


> > +    uint32_t pal[256]; ///< RGBA palette for the bitmap.
> > +
> > +    char *text;        ///< 0-terminated plain UTF-8 text
> > +    char *ass;         ///< 0-terminated ASS/SSA compatible event line.
> > +
> > +} AVSubtitleArea;
> 
> I presume that sizeof(AVSubtitleArea) is not supposed to be part of the
> public API/ABI (after all, you used an array of pointers to
> AVSubtitleArea in AVFrame). This needs to be documented.

Makes sense, but I'm not sure how and where?



> > +/**
> > + * Return a subtitle format corresponding to name, or AV_SUBTITLE_FMT_NONE
> > + * on error.
> > + *
> > + * @param name Subtitle format name.
> > + */
> > +enum AVSubtitleType av_get_subtitle_fmt(const char *name);
> 
> Do we need this? Given that there are many possible ways to name the
> subtitles, it doesn't seem useful to me. The only way to be sure to use
> the "authoritative" name for a subtitle format is by
> av_get_subtitle_fmt_name(). But then one doesn't need
> av_get_subtitle_fmt() at all.

I've tried to implement everything analog to the functionality we have
for audio and video. This corresponds to av_get_pix_fmt().


> > + */
> > +int av_subtitle_area2area(AVSubtitleArea *dst, const AVSubtitleArea *src,
> int copy_data);
> 
> This documentation does not mention that dst should be "blank". But this
> actually poses a problem: How does the user know that an AVSubtitleArea
> is blank given that ? For AVFrames the answer is this: An AVFrame is
> blank if it comes directly from av_frame_alloc() or av_frame_unref().
> But there is no equivalent for these functions here.
> Furthermore, is this API supposed to be a standalone API or is
> AVSubtitleArea something that is always supposed to be part of an
> AVFrame? The usage in this patchset suggests the latter:
> av_subtitle_area2area() is only used in frame.c.

AVSubtitleArea is always supposed to be part of an AVFrame.
I had thought the copy function might be useful outside, but I didn't 
need it in any of the filter.
Made it private in AVFrame now.


> > +/**
> > + * Copy data from @ref AVSubtitleArea to @ref AVSubtitleRect.
> > + *
> > + * @param dst        The target rect (@ref AVSubtitleRect).
> > + * @param src        The source area (@ref AVSubtitleArea).
> > + *
> > + * @return 0 on success.
> > + *
> > + * @deprecated This is a compatibility method for interoperability with
> > + * the legacy subtitle API.
> > + */
> > +int av_subtitle_area2rect(AVSubtitleRect *dst, const AVSubtitleArea *src);
> > +
> > +/**
> > + * Copy data from @ref AVSubtitleRect to @ref AVSubtitleArea.
> > + *
> > + * @param dst        The source area (@ref AVSubtitleArea).
> > + * @param src        The target rect (@ref AVSubtitleRect).
> > + *
> > + * @return 0 on success.
> > + *
> > + * @deprecated This is a compatibility method for interoperability with
> > + * the legacy subtitle API.
> > + */
> > +int av_subtitle_rect2area(AVSubtitleArea *dst, const AVSubtitleRect *src);
> 
> Do we need compatibility with the legacy API at all? I only see two
> usecases for this:
> 
> 1. One has a huge project using AVSubtitles that uses AVSubtitle so
> pervasively that one can't transition to the new API in one step. But I
> doubt that that's a thing.
> 2. One is forced to provide compatibility with the legacy API while also
> providing an API based upon the new API. This is of course the situation
> with libavcodec, which for this reason needs such compatibility
> functions (in libavcodec!). But I don't see a second user with the same
> needs, so I don't see why these functions (which would actually need to
> be declared as deprecated from the very beginning if public) should be
> public.

Made those private in avcodec.

Thanks again,
softworkz
Andreas Rheinhardt Nov. 27, 2021, 8:19 a.m. UTC | #5
Soft Works:
> 
> 
>> -----Original Message-----
>> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Andreas
>> Rheinhardt
>> Sent: Friday, November 26, 2021 11:35 AM
>> To: ffmpeg-devel@ffmpeg.org
>> Subject: Re: [FFmpeg-devel] [PATCH v16 01/16] global: Prepare AVFrame for
>> subtitle handling
> 
> Hi Andreas,
> 
> thanks for the detailed review. There was hardly anything to object or disagree.
> Everything I don't respond to is implemented as suggested.
> 
>> As has already been said by others, this should be split: One patch for
>> the actual new libavutil additions, one patch for the lavc decoding API,
>> one patch for the encoding API, one patch for every user that is made to
>> use the new APIs.
> 
> By keeping the AVSubtitle definition in its original location, it was 
> possible to split this as suggested on IRC.
> Thanks.
> 
>> This is not how you deprecate something: You have to add a @deprecated
>> doxygen comment as well as the attribute_deprecated to make some noise.
> 
> I didn’t want the noise before things get serious ;-)
> 
>>
>> Actually, you should deprecate the whole AVSubtitle API.
> 
> Done. Do we deprecate structs as well?
> 
> Also, there's a function avcodec_get_subtitle_rect_class().
> 
> I have no idea whether it ever had any practical use. A similar function
> above (avcodec_get_frame_class) is deprecated.
> 

It never had any practical use. AVSubtitleRect elements can just be set
directly. I always pondered deprecating it, but was somehow too lazy.

> 
>>> +/**
>>> + * Return subtitle format from a codec descriptor
>>> + *
>>> + * @param codec_descriptor codec descriptor
>>> + * @return                 the subtitle type (e.g. bitmap, text)
>>> + */
>>> +enum AVSubtitleType av_get_subtitle_format_from_codecdesc(const
>> AVCodecDescriptor *codec_descriptor);
>>
>> Do we need this function? It seems too trivial and as Anton has pointed
>> out is flawed. And anyway, this would belong into codec_desc.h.
> 
> I had replied to Anton why it's not flawed. Moved it to codec_desc and
> named it as Anton had suggested.
> 
> 
>>>  int attribute_align_arg avcodec_send_packet(AVCodecContext *avctx, const
>> AVPacket *avpkt)
>>>  {
>>>      AVCodecInternal *avci = avctx->internal;
>>> @@ -590,6 +618,9 @@ int attribute_align_arg
>> avcodec_send_packet(AVCodecContext *avctx, const AVPacke
>>>      if (avpkt && !avpkt->size && avpkt->data)
>>>          return AVERROR(EINVAL);
>>>
>>> +    if (avctx->codec_type == AVMEDIA_TYPE_SUBTITLE)
>>> +        return decode_subtitle_shim(avctx, avci->buffer_frame, avpkt);
>>
>> 1. This is missing a check whether buffer_frame is empty or not; in the
>> latter case, you need to return AVERROR(EAGAIN); instead
>> decode_subtitle_shim() destroys what is already there.
>> (Said check is currently offloaded to the BSF API in the audio/video
>> codepath. This is actually a violation of the BSF API.)
> 
> Added that check.
> 
>> 2. Flushing is not really implemented well:
>> a) It is legal to call avcodec_send_packet() with avpkt == NULL to
>> indicate EOF. If I am not mistaken, decode_subtitle2_priv() will just
>> crash in this case.
> 
> Like all subtitle decoders would when supplying a NULL packet.
> What should I return when a null packet is supplied?
> EAGAIN or EOF?
> 

Treat is as an ordinary flush packet and abide by the docs of
avcodec_send_packet(): "Sending the first flush packet will return
success. Subsequent ones are unnecessary and will return AVERROR_EOF."
(avcodec_send_packet() by the way does not abide by its own
documentation: Packets with side data are never considered flush
packets. And in case a BSF outputs multiple packets for an input packet
or a decoder outputs multiple frames for an input packet, sending a
flush packet can actually return errors; furthermore, I also don't see
that subsequent flush packets always return EOF. Will look into this.)

>> b) avcodec_receive_frame() only returns what is in buffer_frame; it
>> never calls decode_subtitle2_priv() or the underlying decode function at
>> all. This basically presumes that a subtitle decoder can only return one
>> subtitle after flushing. I don't know whether this is true for our
>> decoders with the delay cap.
> 
> The only one I could see with that flag is cc_dec, which is a special case
> anyway as it doesn't work on regular streams.

You missed libzvbi.

> (even this one would crash when providing a NULL packet)

Of course they do. The subtitle API treats packets with size == 0 as
flush packets.

> 
>> c) avcodec_receive_frame() seems to never return AVERROR_EOF after
>> flushing, but always AVERROR(EAGAIN) (if the delay cap is set, the first
>> call to avcodec_receive_frame() after flushing can also result in a
>> frame being returned). Yet this makes no sense and is IMO an API
>> violation on lavc's part.
>> (While we have subtitle decoders with the delay cap, I don't know
>> whether this is actually tested by fate and whether all code actually
>> flushes subtitle decoders at all.)
> 
> I don't think so.
> 
> 
>>> diff --git a/libavfilter/vf_subtitles.c b/libavfilter/vf_subtitles.c
>>> index 377160c72b..6b54cf669b 100644
>>> --- a/libavfilter/vf_subtitles.c
>>> +++ b/libavfilter/vf_subtitles.c
>>> @@ -35,14 +35,12 @@
>>>  # include "libavformat/avformat.h"
>>>  #endif
>>>  #include "libavutil/avstring.h"
>>> -#include "libavutil/imgutils.h"
>>>  #include "libavutil/opt.h"
>>>  #include "libavutil/parseutils.h"
>>>  #include "drawutils.h"
>>>  #include "avfilter.h"
>>>  #include "internal.h"
>>>  #include "formats.h"
>>> -#include "video.h"
>>>
>>>  typedef struct AssContext {
>>>      const AVClass *class;
>>> @@ -292,6 +290,29 @@ static int attachment_is_font(AVStream * st)
>>>      return 0;
>>>  }
>>>
>>> +static int decode(AVCodecContext *avctx, AVFrame *frame, int *got_frame,
>> AVPacket *pkt)
>>> +{
>>> +    int ret;
>>> +
>>> +    *got_frame = 0;
>>
>> You don't really need this: You could just return 0 if no frame was
>> returned, 1 if a frame was returned and < 0 on (actual, not EAGAIN) errors.
>>
>>> +
>>> +    if (pkt) {
>>> +        ret = avcodec_send_packet(avctx, pkt);
>>> +        // In particular, we don't expect AVERROR(EAGAIN), because we read
>> all
>>> +        // decoded frames with avcodec_receive_frame() until done.
>>
>> Where do you do this? For this one would need to call
>> avcodec_receive_frame() in a loop until it returns EAGAIN, but you don't
>> do this.
> 
> This decode() function is copied from ffmpeg.c
> 

From the documentation of decode() in ffmpeg.c: "if you got a frame, you
must call it again with pkt=NULL." Notice that transcode_subtitles() is
not called in a loop, which means that the subtitle decoding API is not
properly drained. The check would need to be modified to "if (repeating
&& pkt)". I wonder whether this would really change anything.

> 
>>> +static int get_subtitle_buffer(AVFrame *frame)
>>> +{
>>> +    // Buffers in AVFrame->buf[] are not used in case of subtitle frames.
>>> +    // To accomodate with existing code, checking ->buf[0] to determine
>>> +    // whether a frame is ref-counted or has data, we're adding a 1-byte
>>> +    // buffer here, which marks the subtitle frame to contain data.
>>
>> This is a terrible hack that might be necessary for now. But I don't see
>> anything
> 
> I'm not sure about the second half sentence, but what I can say is that I 
> had taken the time to try and replace the checks for ->buf[0] to a frame
> field, but it turned out that this change would be so big that it cannot
> be part of the subtitle patchset.
> (also talked about this with Hendrik a while ago)
> 
> 
> 
>>> +    for (i = 0; i < frame->num_subtitle_areas; i++) {
>>> +        area = frame->subtitle_areas[i];
>>> +        if (area) {
>>
>> Who sets an incorrect num_subtitle_areas?
> 
> Maybe code that hasn't gotten reviewed by you ;-)
> 
>>> +    /**
>>> +     * Display start time, relative to packet pts, in ms.
>>> +     */
>>> +    uint32_t subtitle_start_time;
>>> +
>>> +    /**
>>> +     * Display end time, relative to packet pts, in ms.
>>> +     */
>>> +    uint32_t subtitle_end_time;
>>
>> Hardcoding a millisecond timestamp precision into the API doesn't seem
>> good. As does using 32bit.
> 
> Our internal text subtitle format is defined to be ASS and those
> variables are reflecting just that.
> 

But the timing of ASS (both when in AVPackets as well when in
AVSubtitles) is external to the ASS data; so there is no need to impose
this ms constraint on the whole API (even on non-ASS subtitles).

> 
>>> +/**
>>> + * Copies subtitle data from AVFrame to AVSubtitle.
>>> + *
>>> + * @deprecated This is a compatibility method for interoperability with
>>> + * the legacy subtitle API.
>>> + */
>>> +void av_frame_get_subtitle(AVSubtitle *sub, AVFrame *frame);
>>
>> 1. Missing const.
>> 2. Wrong return type.
>> 3. I don't see a need for this compatibility at all. As explained below,
>> the only user that needs something like this is libavcodec and then it
>> should live in libavcodec.
>> The last two points apply to both av_frame_get_subtitle and
>> av_frame_put_subtitle. Notice that the only use of any of these
>> functions outside of libavcodec is remove later again (patch #8 adds an
>> av_frame_get_subtitle() in ffmpeg.c, #16 removes it again) which shows
>> that such a compatibility stuff is not needed outside of libavcodec.
> 
> Moved to avcodec as ff_ functions.
> 
> 
>>> +    uint32_t pal[256]; ///< RGBA palette for the bitmap.
>>> +
>>> +    char *text;        ///< 0-terminated plain UTF-8 text
>>> +    char *ass;         ///< 0-terminated ASS/SSA compatible event line.
>>> +
>>> +} AVSubtitleArea;
>>
>> I presume that sizeof(AVSubtitleArea) is not supposed to be part of the
>> public API/ABI (after all, you used an array of pointers to
>> AVSubtitleArea in AVFrame). This needs to be documented.
> 
> Makes sense, but I'm not sure how and where?
> 

Whether sizeof(struct foo) is public or not is typically documented
directly before the definition of said struct.
Notice that latter patches of yours put AVSubtitleArea on the stack in
lavc, so you seem to want to make its sizeof fixed. Or you just didn't
know what doing this implied.

> 
> 
>>> +/**
>>> + * Return a subtitle format corresponding to name, or AV_SUBTITLE_FMT_NONE
>>> + * on error.
>>> + *
>>> + * @param name Subtitle format name.
>>> + */
>>> +enum AVSubtitleType av_get_subtitle_fmt(const char *name);
>>
>> Do we need this? Given that there are many possible ways to name the
>> subtitles, it doesn't seem useful to me. The only way to be sure to use
>> the "authoritative" name for a subtitle format is by
>> av_get_subtitle_fmt_name(). But then one doesn't need
>> av_get_subtitle_fmt() at all.
> 
> I've tried to implement everything analog to the functionality we have
> for audio and video. This corresponds to av_get_pix_fmt().
> 
> 
>>> + */
>>> +int av_subtitle_area2area(AVSubtitleArea *dst, const AVSubtitleArea *src,
>> int copy_data);
>>
>> This documentation does not mention that dst should be "blank". But this
>> actually poses a problem: How does the user know that an AVSubtitleArea
>> is blank given that ? For AVFrames the answer is this: An AVFrame is
>> blank if it comes directly from av_frame_alloc() or av_frame_unref().
>> But there is no equivalent for these functions here.
>> Furthermore, is this API supposed to be a standalone API or is
>> AVSubtitleArea something that is always supposed to be part of an
>> AVFrame? The usage in this patchset suggests the latter:
>> av_subtitle_area2area() is only used in frame.c.
> 
> AVSubtitleArea is always supposed to be part of an AVFrame.

Then you should have a chat with the author of patch 15 that puts an
AVSubtitleArea on the stack in dvdsubenc.c.
More seriously, this is an important point.

> I had thought the copy function might be useful outside, but I didn't 
> need it in any of the filter.
> Made it private in AVFrame now.
> 
> 
>>> +/**
>>> + * Copy data from @ref AVSubtitleArea to @ref AVSubtitleRect.
>>> + *
>>> + * @param dst        The target rect (@ref AVSubtitleRect).
>>> + * @param src        The source area (@ref AVSubtitleArea).
>>> + *
>>> + * @return 0 on success.
>>> + *
>>> + * @deprecated This is a compatibility method for interoperability with
>>> + * the legacy subtitle API.
>>> + */
>>> +int av_subtitle_area2rect(AVSubtitleRect *dst, const AVSubtitleArea *src);
>>> +
>>> +/**
>>> + * Copy data from @ref AVSubtitleRect to @ref AVSubtitleArea.
>>> + *
>>> + * @param dst        The source area (@ref AVSubtitleArea).
>>> + * @param src        The target rect (@ref AVSubtitleRect).
>>> + *
>>> + * @return 0 on success.
>>> + *
>>> + * @deprecated This is a compatibility method for interoperability with
>>> + * the legacy subtitle API.
>>> + */
>>> +int av_subtitle_rect2area(AVSubtitleArea *dst, const AVSubtitleRect *src);
>>
>> Do we need compatibility with the legacy API at all? I only see two
>> usecases for this:
>>
>> 1. One has a huge project using AVSubtitles that uses AVSubtitle so
>> pervasively that one can't transition to the new API in one step. But I
>> doubt that that's a thing.
>> 2. One is forced to provide compatibility with the legacy API while also
>> providing an API based upon the new API. This is of course the situation
>> with libavcodec, which for this reason needs such compatibility
>> functions (in libavcodec!). But I don't see a second user with the same
>> needs, so I don't see why these functions (which would actually need to
>> be declared as deprecated from the very beginning if public) should be
>> public.
> 
> Made those private in avcodec.
>
Anton Khirnov Nov. 27, 2021, 8:51 a.m. UTC | #6
Quoting Soft Works (2021-11-25 18:53:24)
> @@ -759,9 +802,39 @@ void av_frame_move_ref(AVFrame *dst, AVFrame *src);
>   *              recommended to pass 0 here unless you know what you are doing.
>   *
>   * @return 0 on success, a negative AVERROR on error.
> + *
> + * @deprecated Use @ref av_frame_get_buffer2 instead and set @ref AVFrame.type
> + * before calling.
>   */
> +attribute_deprecated
>  int av_frame_get_buffer(AVFrame *frame, int align);
>  
> +/**
> + * Allocate new buffer(s) for audio, video or subtitle data.
> + *
> + * The following fields must be set on frame before calling this function:
> + * - format (pixel format for video, sample format for audio)
> + * - width and height for video
> + * - nb_samples and channel_layout for audio
> + * - type (AVMediaType)
> + *
> + * This function will fill AVFrame.data and AVFrame.buf arrays and, if
> + * necessary, allocate and fill AVFrame.extended_data and AVFrame.extended_buf.
> + * For planar formats, one buffer will be allocated for each plane.
> + *
> + * @warning: if frame already has been allocated, calling this function will
> + *           leak memory. In addition, undefined behavior can occur in certain
> + *           cases.
> + *
> + * @param frame frame in which to store the new buffers.
> + * @param align Required buffer size alignment. If equal to 0, alignment will be
> + *              chosen automatically for the current CPU. It is highly
> + *              recommended to pass 0 here unless you know what you are doing.
> + *
> + * @return 0 on success, a negative AVERROR on error.
> + */
> +int av_frame_get_buffer2(AVFrame *frame, int align);

Not sure whether this was asked already - why do we need this new
function? Seems to me you can accomplish the same thing by just adding
the type field to AVFrame. Then
- if type is AVMEDIA_TYPE_SUBTITLE -> allocate a subtitle
- if type is AVMEDIA_TYPE_{VIDEO,AUDIO} -> allocate video/audio
- otherwise detect video/audio as we do now
Anton Khirnov Nov. 27, 2021, 9 a.m. UTC | #7
Quoting Soft Works (2021-11-26 19:25:17)
> 
> 
> > -----Original Message-----
> > From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Anton
> > Khirnov
> > Sent: Friday, November 26, 2021 5:21 PM
> > To: ffmpeg-devel@ffmpeg.org
> > Subject: Re: [FFmpeg-devel] [PATCH v16 01/16] global: Prepare AVFrame for
> > subtitle handling
> > 
> > You have completely disregarded my comments on the previous patch. Stop
> > doing that, it is rude.
> 
> I value everyone's time taken to review the patches I'm submitting.
> Rest assured that I'm going over every single comment and try to address 
> each one appropriately.
> 
> I think you misunderstood because I had answered one of your comments
> immediately. Right now I'm re-working the patchset to address all suggestions
> and eventually I'll respond to all those that I haven't addressed, 
> explaining why.

Okay, but in the interest of preventing such misunderstandings in the
future you could mention these facts somewhere. E.g. right after the
commit message, and/or put [RFC]/[WIP] in the subject. Otherwise people
assume that every patch is intended to be pushed to master as-is.
Andreas Rheinhardt Nov. 27, 2021, 9:06 a.m. UTC | #8
Anton Khirnov:
> Quoting Soft Works (2021-11-25 18:53:24)
>> @@ -759,9 +802,39 @@ void av_frame_move_ref(AVFrame *dst, AVFrame *src);
>>   *              recommended to pass 0 here unless you know what you are doing.
>>   *
>>   * @return 0 on success, a negative AVERROR on error.
>> + *
>> + * @deprecated Use @ref av_frame_get_buffer2 instead and set @ref AVFrame.type
>> + * before calling.
>>   */
>> +attribute_deprecated
>>  int av_frame_get_buffer(AVFrame *frame, int align);
>>  
>> +/**
>> + * Allocate new buffer(s) for audio, video or subtitle data.
>> + *
>> + * The following fields must be set on frame before calling this function:
>> + * - format (pixel format for video, sample format for audio)
>> + * - width and height for video
>> + * - nb_samples and channel_layout for audio
>> + * - type (AVMediaType)
>> + *
>> + * This function will fill AVFrame.data and AVFrame.buf arrays and, if
>> + * necessary, allocate and fill AVFrame.extended_data and AVFrame.extended_buf.
>> + * For planar formats, one buffer will be allocated for each plane.
>> + *
>> + * @warning: if frame already has been allocated, calling this function will
>> + *           leak memory. In addition, undefined behavior can occur in certain
>> + *           cases.
>> + *
>> + * @param frame frame in which to store the new buffers.
>> + * @param align Required buffer size alignment. If equal to 0, alignment will be
>> + *              chosen automatically for the current CPU. It is highly
>> + *              recommended to pass 0 here unless you know what you are doing.
>> + *
>> + * @return 0 on success, a negative AVERROR on error.
>> + */
>> +int av_frame_get_buffer2(AVFrame *frame, int align);
> 
> Not sure whether this was asked already - why do we need this new
> function? Seems to me you can accomplish the same thing by just adding
> the type field to AVFrame. Then
> - if type is AVMEDIA_TYPE_SUBTITLE -> allocate a subtitle
> - if type is AVMEDIA_TYPE_{VIDEO,AUDIO} -> allocate video/audio
> - otherwise detect video/audio as we do now
> 

https://ffmpeg.org/pipermail/ffmpeg-devel/2021-September/285185.html

- Andreas
Soft Works Nov. 27, 2021, 9:09 a.m. UTC | #9
> -----Original Message-----
> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Anton
> Khirnov
> Sent: Saturday, November 27, 2021 9:52 AM
> To: ffmpeg-devel@ffmpeg.org
> Subject: Re: [FFmpeg-devel] [PATCH v16 01/16] global: Prepare AVFrame for
> subtitle handling
> 
> Quoting Soft Works (2021-11-25 18:53:24)
> > @@ -759,9 +802,39 @@ void av_frame_move_ref(AVFrame *dst, AVFrame *src);
> >   *              recommended to pass 0 here unless you know what you are
> doing.
> >   *
> >   * @return 0 on success, a negative AVERROR on error.
> > + *
> > + * @deprecated Use @ref av_frame_get_buffer2 instead and set @ref
> AVFrame.type
> > + * before calling.
> >   */
> > +attribute_deprecated
> >  int av_frame_get_buffer(AVFrame *frame, int align);
> >
> > +/**
> > + * Allocate new buffer(s) for audio, video or subtitle data.
> > + *
> > + * The following fields must be set on frame before calling this function:
> > + * - format (pixel format for video, sample format for audio)
> > + * - width and height for video
> > + * - nb_samples and channel_layout for audio
> > + * - type (AVMediaType)
> > + *
> > + * This function will fill AVFrame.data and AVFrame.buf arrays and, if
> > + * necessary, allocate and fill AVFrame.extended_data and
> AVFrame.extended_buf.
> > + * For planar formats, one buffer will be allocated for each plane.
> > + *
> > + * @warning: if frame already has been allocated, calling this function
> will
> > + *           leak memory. In addition, undefined behavior can occur in
> certain
> > + *           cases.
> > + *
> > + * @param frame frame in which to store the new buffers.
> > + * @param align Required buffer size alignment. If equal to 0, alignment
> will be
> > + *              chosen automatically for the current CPU. It is highly
> > + *              recommended to pass 0 here unless you know what you are
> doing.
> > + *
> > + * @return 0 on success, a negative AVERROR on error.
> > + */
> > +int av_frame_get_buffer2(AVFrame *frame, int align);
> 
> Not sure whether this was asked already - why do we need this new
> function? Seems to me you can accomplish the same thing by just adding
> the type field to AVFrame. Then
> - if type is AVMEDIA_TYPE_SUBTITLE -> allocate a subtitle
> - if type is AVMEDIA_TYPE_{VIDEO,AUDIO} -> allocate video/audio
> - otherwise detect video/audio as we do now`


I can't find it on mail archive, so here are some quotes from the e-mails about it:

(originally I had av_frame_get_buffer2 with a AVMediaType parameter)

softworkz:


The problem is that the current implementation is inferring the media 
type by checking width and height (=> video, otherwise audio).

Subtitle frames can have w+h or not, so it can't continue to work
this way.

The idea of av_frame_get_buffer2 is to have an explicit AVMediaType 
parameter to specify.

An alternative would be to require the (new) type property to be 
set explicitly in case of subtitles and continue the current way
of inference when it's not set.
That would work but it wouldn't be a clean API design either.

What would you suggest?


Andreas:

Add an av_frame_get_buffer2 that requires the media type of the frame to
be set (on the frame, not as an additional parameter).


Paul: 


why? AVFrame is supposed to have media type in itself.



Andreas:


Yes, and therefore the media type should be read from the AVFrame, not
from an additional parameter to av_frame_get_buffer2. As I said.


softworkz:

> Are you sure? The default after av_frame_alloc is 0 (AVMEDIA_TYPE_VIDEO),
> so it's not possible to check whether the value 0 was set intentionally
> or not..?


Andreas:

Of course av_frame_alloc (or rather get_frame_defaults) should
initialize the media type to AVMEDIA_TYPE_UNKNOWN. And adding
av_frame_get_buffer2 is done precisely because of this: Any user of it
is by definition aware of the existence of the new AVFrame mediatype
field and therefore has to make sure that it is really set in accordance
with his intentions. On the other hand, if one reused
av_frame_get_buffer and used the media type if it is set and the current
rules if it is AVMEDIA_TYPE_UNKNOWN, then there is a risk that a user
not knowing the new field gets surprised.
Anton Khirnov Nov. 27, 2021, 9:24 a.m. UTC | #10
Quoting Andreas Rheinhardt (2021-11-27 10:06:35)
> Anton Khirnov:
> > 
> > Not sure whether this was asked already - why do we need this new
> > function? Seems to me you can accomplish the same thing by just adding
> > the type field to AVFrame. Then
> > - if type is AVMEDIA_TYPE_SUBTITLE -> allocate a subtitle
> > - if type is AVMEDIA_TYPE_{VIDEO,AUDIO} -> allocate video/audio
> > - otherwise detect video/audio as we do now
> > 
> 
> https://ffmpeg.org/pipermail/ffmpeg-devel/2021-September/285185.html

So IIUC the only concern is that a user might "manually" unref the frame
without calling av_frame_unref(). I would say that this is already
illegal, because we can (and did) add new allocated objects to AVFrame
that would break things if you just kept them from one frame to the
other (especially of a different type), e.g. hw_frames_ctx.
Soft Works Nov. 27, 2021, 9:25 a.m. UTC | #11
> -----Original Message-----
> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Anton
> Khirnov
> Sent: Saturday, November 27, 2021 10:01 AM
> To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
> Subject: Re: [FFmpeg-devel] [PATCH v16 01/16] global: Prepare AVFrame for
> subtitle handling
> 
> Quoting Soft Works (2021-11-26 19:25:17)
> >
> >
> > > -----Original Message-----
> > > From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Anton
> > > Khirnov
> > > Sent: Friday, November 26, 2021 5:21 PM
> > > To: ffmpeg-devel@ffmpeg.org
> > > Subject: Re: [FFmpeg-devel] [PATCH v16 01/16] global: Prepare AVFrame for
> > > subtitle handling
> > >
> > > You have completely disregarded my comments on the previous patch. Stop
> > > doing that, it is rude.
> >
> > I value everyone's time taken to review the patches I'm submitting.
> > Rest assured that I'm going over every single comment and try to address
> > each one appropriately.
> >
> > I think you misunderstood because I had answered one of your comments
> > immediately. Right now I'm re-working the patchset to address all
> suggestions
> > and eventually I'll respond to all those that I haven't addressed,
> > explaining why.
> 
> Okay, but in the interest of preventing such misunderstandings in the
> future you could mention these facts somewhere. E.g. right after the
> commit message, and/or put [RFC]/[WIP] in the subject. Otherwise people
> assume that every patch is intended to be pushed to master as-is.

That wasn't the point. All of the recent releases were intended to be 
pushed as-is.

I had quickly responded to one part of your comment, even before I got
through all in detail.

That's all ;-)

I apologize when it has created a bad impression for you. It wasn't intended.

sw
Soft Works Nov. 27, 2021, 9:37 a.m. UTC | #12
> -----Original Message-----
> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Anton
> Khirnov
> Sent: Saturday, November 27, 2021 10:25 AM
> To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
> Subject: Re: [FFmpeg-devel] [PATCH v16 01/16] global: Prepare AVFrame for
> subtitle handling
> 
> Quoting Andreas Rheinhardt (2021-11-27 10:06:35)
> > Anton Khirnov:
> > >
> > > Not sure whether this was asked already - why do we need this new
> > > function? Seems to me you can accomplish the same thing by just adding
> > > the type field to AVFrame. Then
> > > - if type is AVMEDIA_TYPE_SUBTITLE -> allocate a subtitle
> > > - if type is AVMEDIA_TYPE_{VIDEO,AUDIO} -> allocate video/audio
> > > - otherwise detect video/audio as we do now
> > >
> >
> > https://ffmpeg.org/pipermail/ffmpeg-devel/2021-September/285185.html
> 
> So IIUC the only concern is that a user might "manually" unref the frame
> without calling av_frame_unref(). I would say that this is already
> illegal, because we can (and did) add new allocated objects to AVFrame
> that would break things if you just kept them from one frame to the
> other (especially of a different type), e.g. hw_frames_ctx.
> 

The earlier conversation (from v3 of the patchset) explains it better.
This is what I quoted. Here's the link to it on patchwork:

https://patchwork.ffmpeg.org/project/ffmpeg/patch/MN2PR04MB5981A454384CFC369522E875BAD79@MN2PR04MB5981.namprd04.prod.outlook.com/

sw
Andreas Rheinhardt Nov. 27, 2021, 9:42 a.m. UTC | #13
Anton Khirnov:
> Quoting Andreas Rheinhardt (2021-11-27 10:06:35)
>> Anton Khirnov:
>>>
>>> Not sure whether this was asked already - why do we need this new
>>> function? Seems to me you can accomplish the same thing by just adding
>>> the type field to AVFrame. Then
>>> - if type is AVMEDIA_TYPE_SUBTITLE -> allocate a subtitle
>>> - if type is AVMEDIA_TYPE_{VIDEO,AUDIO} -> allocate video/audio
>>> - otherwise detect video/audio as we do now
>>>
>>
>> https://ffmpeg.org/pipermail/ffmpeg-devel/2021-September/285185.html
> 
> So IIUC the only concern is that a user might "manually" unref the frame
> without calling av_frame_unref(). I would say that this is already
> illegal, because we can (and did) add new allocated objects to AVFrame
> that would break things if you just kept them from one frame to the
> other (especially of a different type), e.g. hw_frames_ctx.
> 

And how is a user then supposed to know which modifications of an
AVFrame are legal or not? I think the onus is on us not to break user
apps and not on the user to behave in such a way that it is compatible
with all possible future additions to AVFrames (which would be
impossible anyway).

- Andreas
Anton Khirnov Nov. 27, 2021, 10:33 a.m. UTC | #14
Quoting Andreas Rheinhardt (2021-11-27 10:42:31)
> Anton Khirnov:
> > Quoting Andreas Rheinhardt (2021-11-27 10:06:35)
> >> Anton Khirnov:
> >>>
> >>> Not sure whether this was asked already - why do we need this new
> >>> function? Seems to me you can accomplish the same thing by just adding
> >>> the type field to AVFrame. Then
> >>> - if type is AVMEDIA_TYPE_SUBTITLE -> allocate a subtitle
> >>> - if type is AVMEDIA_TYPE_{VIDEO,AUDIO} -> allocate video/audio
> >>> - otherwise detect video/audio as we do now
> >>>
> >>
> >> https://ffmpeg.org/pipermail/ffmpeg-devel/2021-September/285185.html
> > 
> > So IIUC the only concern is that a user might "manually" unref the frame
> > without calling av_frame_unref(). I would say that this is already
> > illegal, because we can (and did) add new allocated objects to AVFrame
> > that would break things if you just kept them from one frame to the
> > other (especially of a different type), e.g. hw_frames_ctx.
> > 
> 
> And how is a user then supposed to know which modifications of an
> AVFrame are legal or not? I think the onus is on us not to break user
> apps and not on the user to behave in such a way that it is compatible
> with all possible future additions to AVFrames (which would be
> impossible anyway).

We explicitly allow adding new fields to AVFrame, so valid user code has
to consider the possibility. So any "substracting" operations on frames
it does not fully control (e.g. those it received from decoders or
filters) are potentially broken.

I believe we have to assume this, otherwise we cannot extend AVFrame or
other structs like we've been doing so far.
Soft Works Nov. 27, 2021, 10:41 a.m. UTC | #15
> -----Original Message-----
> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Anton
> Khirnov
> Sent: Saturday, November 27, 2021 11:33 AM
> To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
> Subject: Re: [FFmpeg-devel] [PATCH v16 01/16] global: Prepare AVFrame for
> subtitle handling
> 
> Quoting Andreas Rheinhardt (2021-11-27 10:42:31)
> > Anton Khirnov:
> > > Quoting Andreas Rheinhardt (2021-11-27 10:06:35)
> > >> Anton Khirnov:
> > >>>
> > >>> Not sure whether this was asked already - why do we need this new
> > >>> function? Seems to me you can accomplish the same thing by just adding
> > >>> the type field to AVFrame. Then
> > >>> - if type is AVMEDIA_TYPE_SUBTITLE -> allocate a subtitle
> > >>> - if type is AVMEDIA_TYPE_{VIDEO,AUDIO} -> allocate video/audio
> > >>> - otherwise detect video/audio as we do now
> > >>>
> > >>
> > >> https://ffmpeg.org/pipermail/ffmpeg-devel/2021-September/285185.html
> > >
> > > So IIUC the only concern is that a user might "manually" unref the frame
> > > without calling av_frame_unref(). I would say that this is already
> > > illegal, because we can (and did) add new allocated objects to AVFrame
> > > that would break things if you just kept them from one frame to the
> > > other (especially of a different type), e.g. hw_frames_ctx.
> > >
> >
> > And how is a user then supposed to know which modifications of an
> > AVFrame are legal or not? I think the onus is on us not to break user
> > apps and not on the user to behave in such a way that it is compatible
> > with all possible future additions to AVFrames (which would be
> > impossible anyway).
> 
> We explicitly allow adding new fields to AVFrame, so valid user code has
> to consider the possibility. So any "substracting" operations on frames
> it does not fully control (e.g. those it received from decoders or
> filters) are potentially broken.
> 
> I believe we have to assume this, otherwise we cannot extend AVFrame or
> other structs like we've been doing so far.

Back to the subtitles patch: 

I have added the AVMediaType field to the start of the struct. Once, because 
it seems too elementary to come at position sixty-something and also because
I was assuming it would cause a break anway.

Leaving the AVSubtitle and AVSubtitleRect structs inside libavcodec has definitely
reduced the list of breakage candidates.

Next candidate to consider regarding breakage would probably be the promotion 
of those ass functions from avcodec to avutil..

sw
diff mbox series

Patch

diff --git a/libavcodec/avcodec.c b/libavcodec/avcodec.c
index c00a9b2af8..13e3711b9c 100644
--- a/libavcodec/avcodec.c
+++ b/libavcodec/avcodec.c
@@ -422,25 +422,6 @@  void avcodec_flush_buffers(AVCodecContext *avctx)
         av_bsf_flush(avci->bsf);
 }
 
-void avsubtitle_free(AVSubtitle *sub)
-{
-    int i;
-
-    for (i = 0; i < sub->num_rects; i++) {
-        av_freep(&sub->rects[i]->data[0]);
-        av_freep(&sub->rects[i]->data[1]);
-        av_freep(&sub->rects[i]->data[2]);
-        av_freep(&sub->rects[i]->data[3]);
-        av_freep(&sub->rects[i]->text);
-        av_freep(&sub->rects[i]->ass);
-        av_freep(&sub->rects[i]);
-    }
-
-    av_freep(&sub->rects);
-
-    memset(sub, 0, sizeof(*sub));
-}
-
 av_cold int avcodec_close(AVCodecContext *avctx)
 {
     int i;
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 7ee8bc2b7c..0c5819b116 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -35,6 +35,7 @@ 
 #include "libavutil/frame.h"
 #include "libavutil/log.h"
 #include "libavutil/pixfmt.h"
+#include "libavutil/subfmt.h"
 #include "libavutil/rational.h"
 
 #include "codec.h"
@@ -1674,7 +1675,7 @@  typedef struct AVCodecContext {
 
     /**
      * Header containing style information for text subtitles.
-     * For SUBTITLE_ASS subtitle type, it should contain the whole ASS
+     * For AV_SUBTITLE_FMT_ASS subtitle type, it should contain the whole ASS
      * [Script Info] and [V4+ Styles] section, plus the [Events] line and
      * the Format line following. It shouldn't include any Dialogue line.
      * - encoding: Set/allocated/freed by user (before avcodec_open2())
@@ -2238,63 +2239,8 @@  typedef struct AVHWAccel {
  * @}
  */
 
-enum AVSubtitleType {
-    SUBTITLE_NONE,
-
-    SUBTITLE_BITMAP,                ///< A bitmap, pict will be set
-
-    /**
-     * Plain text, the text field must be set by the decoder and is
-     * authoritative. ass and pict fields may contain approximations.
-     */
-    SUBTITLE_TEXT,
-
-    /**
-     * Formatted text, the ass field must be set by the decoder and is
-     * authoritative. pict and text fields may contain approximations.
-     */
-    SUBTITLE_ASS,
-};
-
 #define AV_SUBTITLE_FLAG_FORCED 0x00000001
 
-typedef struct AVSubtitleRect {
-    int x;         ///< top left corner  of pict, undefined when pict is not set
-    int y;         ///< top left corner  of pict, undefined when pict is not set
-    int w;         ///< width            of pict, undefined when pict is not set
-    int h;         ///< height           of pict, undefined when pict is not set
-    int nb_colors; ///< number of colors in pict, undefined when pict is not set
-
-    /**
-     * data+linesize for the bitmap of this subtitle.
-     * Can be set for text/ass as well once they are rendered.
-     */
-    uint8_t *data[4];
-    int linesize[4];
-
-    enum AVSubtitleType type;
-
-    char *text;                     ///< 0 terminated plain UTF-8 text
-
-    /**
-     * 0 terminated ASS/SSA compatible event line.
-     * The presentation of this is unaffected by the other values in this
-     * struct.
-     */
-    char *ass;
-
-    int flags;
-} AVSubtitleRect;
-
-typedef struct AVSubtitle {
-    uint16_t format; /* 0 = graphics */
-    uint32_t start_display_time; /* relative to packet pts, in ms */
-    uint32_t end_display_time; /* relative to packet pts, in ms */
-    unsigned num_rects;
-    AVSubtitleRect **rects;
-    int64_t pts;    ///< Same as packet pts, in AV_TIME_BASE
-} AVSubtitle;
-
 /**
  * Return the LIBAVCODEC_VERSION_INT constant.
  */
@@ -2430,13 +2376,6 @@  int avcodec_open2(AVCodecContext *avctx, const AVCodec *codec, AVDictionary **op
  */
 int avcodec_close(AVCodecContext *avctx);
 
-/**
- * Free all allocated data in the given subtitle struct.
- *
- * @param sub AVSubtitle to free.
- */
-void avsubtitle_free(AVSubtitle *sub);
-
 /**
  * @}
  */
@@ -2527,6 +2466,8 @@  enum AVChromaLocation avcodec_chroma_pos_to_enum(int xpos, int ypos);
  *                 must be freed with avsubtitle_free if *got_sub_ptr is set.
  * @param[in,out] got_sub_ptr Zero if no subtitle could be decompressed, otherwise, it is nonzero.
  * @param[in] avpkt The input AVPacket containing the input buffer.
+ *
+ * @deprecated Use the new decode API (avcodec_send_packet, avcodec_receive_frame) instead.
  */
 int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
                             int *got_sub_ptr,
@@ -3125,6 +3066,14 @@  void avcodec_flush_buffers(AVCodecContext *avctx);
  */
 int av_get_audio_frame_duration(AVCodecContext *avctx, int frame_bytes);
 
+/**
+ * Return subtitle format from a codec descriptor
+ *
+ * @param codec_descriptor codec descriptor
+ * @return                 the subtitle type (e.g. bitmap, text)
+ */
+enum AVSubtitleType av_get_subtitle_format_from_codecdesc(const AVCodecDescriptor *codec_descriptor);
+
 /* memory */
 
 /**
diff --git a/libavcodec/decode.c b/libavcodec/decode.c
index c44724d150..788b043266 100644
--- a/libavcodec/decode.c
+++ b/libavcodec/decode.c
@@ -576,6 +576,34 @@  static int decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
     return ret;
 }
 
+static int decode_subtitle2_priv(AVCodecContext *avctx, AVSubtitle *sub,
+                                 int *got_sub_ptr, const AVPacket *avpkt);
+
+static int decode_subtitle_shim(AVCodecContext *avctx, AVFrame *frame, const AVPacket *avpkt)
+{
+    int ret, got_sub_ptr = 0;
+    AVSubtitle subtitle = { 0 };
+
+    av_frame_unref(frame);
+
+    ret = decode_subtitle2_priv(avctx, &subtitle, &got_sub_ptr, avpkt);
+
+    if (ret >= 0 && got_sub_ptr) {
+        frame->type = AVMEDIA_TYPE_SUBTITLE;
+        frame->format = subtitle.format;
+        ret = av_frame_get_buffer2(frame, 0);
+        if (ret < 0)
+            return ret;
+        av_frame_put_subtitle(frame, &subtitle);
+
+        frame->pkt_dts = avpkt->dts;
+    }
+
+    avsubtitle_free(&subtitle);
+
+    return ret;
+}
+
 int attribute_align_arg avcodec_send_packet(AVCodecContext *avctx, const AVPacket *avpkt)
 {
     AVCodecInternal *avci = avctx->internal;
@@ -590,6 +618,9 @@  int attribute_align_arg avcodec_send_packet(AVCodecContext *avctx, const AVPacke
     if (avpkt && !avpkt->size && avpkt->data)
         return AVERROR(EINVAL);
 
+    if (avctx->codec_type == AVMEDIA_TYPE_SUBTITLE)
+        return decode_subtitle_shim(avctx, avci->buffer_frame, avpkt);
+
     av_packet_unref(avci->buffer_pkt);
     if (avpkt && (avpkt->data || avpkt->side_data_elems)) {
         ret = av_packet_ref(avci->buffer_pkt, avpkt);
@@ -651,7 +682,9 @@  int attribute_align_arg avcodec_receive_frame(AVCodecContext *avctx, AVFrame *fr
 
     if (avci->buffer_frame->buf[0]) {
         av_frame_move_ref(frame, avci->buffer_frame);
-    } else {
+    } else if (avctx->codec_type == AVMEDIA_TYPE_SUBTITLE)
+        return AVERROR(EAGAIN);
+    else {
         ret = decode_receive_frame_internal(avctx, frame);
         if (ret < 0)
             return ret;
@@ -723,7 +756,7 @@  static void get_subtitle_defaults(AVSubtitle *sub)
 
 #define UTF8_MAX_BYTES 4 /* 5 and 6 bytes sequences should not be used */
 static int recode_subtitle(AVCodecContext *avctx, AVPacket **outpkt,
-                           AVPacket *inpkt, AVPacket *buf_pkt)
+                           const AVPacket *inpkt, AVPacket *buf_pkt)
 {
 #if CONFIG_ICONV
     iconv_t cd = (iconv_t)-1;
@@ -802,9 +835,8 @@  static int utf8_check(const uint8_t *str)
     return 1;
 }
 
-int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
-                             int *got_sub_ptr,
-                             AVPacket *avpkt)
+static int decode_subtitle2_priv(AVCodecContext *avctx, AVSubtitle *sub,
+                             int *got_sub_ptr, const AVPacket *avpkt)
 {
     int ret = 0;
 
@@ -844,10 +876,7 @@  int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
                                                  avctx->pkt_timebase, ms);
         }
 
-        if (avctx->codec_descriptor->props & AV_CODEC_PROP_BITMAP_SUB)
-            sub->format = 0;
-        else if (avctx->codec_descriptor->props & AV_CODEC_PROP_TEXT_SUB)
-            sub->format = 1;
+        sub->format = av_get_subtitle_format_from_codecdesc(avctx->codec_descriptor);
 
         for (unsigned i = 0; i < sub->num_rects; i++) {
             if (avctx->sub_charenc_mode != FF_SUB_CHARENC_MODE_IGNORE &&
@@ -871,6 +900,12 @@  int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
     return ret;
 }
 
+int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
+                             int *got_sub_ptr, AVPacket *avpkt)
+{
+    return decode_subtitle2_priv(avctx, sub, got_sub_ptr, avpkt);
+}
+
 enum AVPixelFormat avcodec_default_get_format(struct AVCodecContext *avctx,
                                               const enum AVPixelFormat *fmt)
 {
diff --git a/libavcodec/libzvbi-teletextdec.c b/libavcodec/libzvbi-teletextdec.c
index 1073d6a0bd..535c82d7a5 100644
--- a/libavcodec/libzvbi-teletextdec.c
+++ b/libavcodec/libzvbi-teletextdec.c
@@ -27,6 +27,7 @@ 
 #include "libavutil/intreadwrite.h"
 #include "libavutil/log.h"
 #include "libavutil/common.h"
+#include "libavutil/subfmt.h"
 
 #include <libzvbi.h>
 
diff --git a/libavcodec/pgssubdec.c b/libavcodec/pgssubdec.c
index 388639a110..5709058b1c 100644
--- a/libavcodec/pgssubdec.c
+++ b/libavcodec/pgssubdec.c
@@ -32,6 +32,7 @@ 
 #include "libavutil/colorspace.h"
 #include "libavutil/imgutils.h"
 #include "libavutil/opt.h"
+#include "libavutil/subfmt.h"
 
 #define RGBA(r,g,b,a) (((unsigned)(a) << 24) | ((r) << 16) | ((g) << 8) | (b))
 #define MAX_EPOCH_PALETTES 8   // Max 8 allowed per PGS epoch
diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index a91a54b0dc..efe14e5a43 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -824,6 +824,17 @@  int av_get_audio_frame_duration(AVCodecContext *avctx, int frame_bytes)
     return FFMAX(0, duration);
 }
 
+enum AVSubtitleType av_get_subtitle_format_from_codecdesc(const AVCodecDescriptor *codec_descriptor)
+{
+    if(codec_descriptor->props & AV_CODEC_PROP_BITMAP_SUB)
+        return AV_SUBTITLE_FMT_BITMAP;
+
+    if(codec_descriptor->props & AV_CODEC_PROP_TEXT_SUB)
+        return AV_SUBTITLE_FMT_ASS;
+
+    return AV_SUBTITLE_FMT_UNKNOWN;
+}
+
 int av_get_audio_frame_duration2(AVCodecParameters *par, int frame_bytes)
 {
     int duration = get_audio_frame_duration(par->codec_id, par->sample_rate,
diff --git a/libavfilter/vf_subtitles.c b/libavfilter/vf_subtitles.c
index 377160c72b..6b54cf669b 100644
--- a/libavfilter/vf_subtitles.c
+++ b/libavfilter/vf_subtitles.c
@@ -35,14 +35,12 @@ 
 # include "libavformat/avformat.h"
 #endif
 #include "libavutil/avstring.h"
-#include "libavutil/imgutils.h"
 #include "libavutil/opt.h"
 #include "libavutil/parseutils.h"
 #include "drawutils.h"
 #include "avfilter.h"
 #include "internal.h"
 #include "formats.h"
-#include "video.h"
 
 typedef struct AssContext {
     const AVClass *class;
@@ -292,6 +290,29 @@  static int attachment_is_font(AVStream * st)
     return 0;
 }
 
+static int decode(AVCodecContext *avctx, AVFrame *frame, int *got_frame, AVPacket *pkt)
+{
+    int ret;
+
+    *got_frame = 0;
+
+    if (pkt) {
+        ret = avcodec_send_packet(avctx, pkt);
+        // In particular, we don't expect AVERROR(EAGAIN), because we read all
+        // decoded frames with avcodec_receive_frame() until done.
+        if (ret < 0 && ret != AVERROR_EOF)
+            return ret;
+    }
+
+    ret = avcodec_receive_frame(avctx, frame);
+    if (ret < 0 && ret != AVERROR(EAGAIN))
+        return ret;
+    if (ret >= 0)
+        *got_frame = 1;
+
+    return 0;
+}
+
 AVFILTER_DEFINE_CLASS(subtitles);
 
 static av_cold int init_subtitles(AVFilterContext *ctx)
@@ -306,6 +327,7 @@  static av_cold int init_subtitles(AVFilterContext *ctx)
     AVStream *st;
     AVPacket pkt;
     AssContext *ass = ctx->priv;
+    enum AVSubtitleType subtitle_format;
 
     /* Init libass */
     ret = init(ctx);
@@ -386,13 +408,17 @@  static av_cold int init_subtitles(AVFilterContext *ctx)
         ret = AVERROR_DECODER_NOT_FOUND;
         goto end;
     }
+
     dec_desc = avcodec_descriptor_get(st->codecpar->codec_id);
-    if (dec_desc && !(dec_desc->props & AV_CODEC_PROP_TEXT_SUB)) {
+    subtitle_format = av_get_subtitle_format_from_codecdesc(dec_desc);
+
+    if (subtitle_format != AV_SUBTITLE_FMT_ASS) {
         av_log(ctx, AV_LOG_ERROR,
-               "Only text based subtitles are currently supported\n");
-        ret = AVERROR_PATCHWELCOME;
+               "Only text based subtitles are supported by this filter\n");
+        ret = AVERROR_INVALIDDATA;
         goto end;
     }
+
     if (ass->charenc)
         av_dict_set(&codec_opts, "sub_charenc", ass->charenc, 0);
 
@@ -448,18 +474,18 @@  static av_cold int init_subtitles(AVFilterContext *ctx)
                                   dec_ctx->subtitle_header_size);
     while (av_read_frame(fmt, &pkt) >= 0) {
         int i, got_subtitle;
-        AVSubtitle sub = {0};
+        AVFrame *sub = av_frame_alloc();
 
         if (pkt.stream_index == sid) {
-            ret = avcodec_decode_subtitle2(dec_ctx, &sub, &got_subtitle, &pkt);
+            ret = decode(dec_ctx, sub, &got_subtitle, &pkt);
             if (ret < 0) {
                 av_log(ctx, AV_LOG_WARNING, "Error decoding: %s (ignored)\n",
                        av_err2str(ret));
             } else if (got_subtitle) {
-                const int64_t start_time = av_rescale_q(sub.pts, AV_TIME_BASE_Q, av_make_q(1, 1000));
-                const int64_t duration   = sub.end_display_time;
-                for (i = 0; i < sub.num_rects; i++) {
-                    char *ass_line = sub.rects[i]->ass;
+                const int64_t start_time = av_rescale_q(sub->subtitle_pts, AV_TIME_BASE_Q, av_make_q(1, 1000));
+                const int64_t duration   = sub->subtitle_end_time;
+                for (i = 0; i < sub->num_subtitle_areas; i++) {
+                    char *ass_line = sub->subtitle_areas[i]->ass;
                     if (!ass_line)
                         break;
                     ass_process_chunk(ass->track, ass_line, strlen(ass_line),
@@ -468,7 +494,7 @@  static av_cold int init_subtitles(AVFilterContext *ctx)
             }
         }
         av_packet_unref(&pkt);
-        avsubtitle_free(&sub);
+        av_frame_free(&sub);
     }
 
 end:
diff --git a/libavformat/utils.c b/libavformat/utils.c
index 7840e8717c..d9b54c7725 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -32,6 +32,7 @@ 
 #include "libavutil/opt.h"
 #include "libavutil/parseutils.h"
 #include "libavutil/pixfmt.h"
+#include "libavutil/subfmt.h"
 #include "libavutil/thread.h"
 #include "libavutil/time.h"
 
diff --git a/libavutil/Makefile b/libavutil/Makefile
index 529046dbc8..7e79936876 100644
--- a/libavutil/Makefile
+++ b/libavutil/Makefile
@@ -74,6 +74,7 @@  HEADERS = adler32.h                                                     \
           sha512.h                                                      \
           spherical.h                                                   \
           stereo3d.h                                                    \
+          subfmt.h                                                      \
           threadmessage.h                                               \
           time.h                                                        \
           timecode.h                                                    \
@@ -159,6 +160,7 @@  OBJS = adler32.o                                                        \
        slicethread.o                                                    \
        spherical.o                                                      \
        stereo3d.o                                                       \
+       subfmt.o                                                         \
        threadmessage.o                                                  \
        time.o                                                           \
        timecode.o                                                       \
diff --git a/libavutil/frame.c b/libavutil/frame.c
index d4d3ad6988..dfe9c269a6 100644
--- a/libavutil/frame.c
+++ b/libavutil/frame.c
@@ -26,6 +26,7 @@ 
 #include "imgutils.h"
 #include "mem.h"
 #include "samplefmt.h"
+#include "subfmt.h"
 #include "hwcontext.h"
 
 #define CHECK_CHANNELS_CONSISTENCY(frame) \
@@ -50,6 +51,9 @@  const char *av_get_colorspace_name(enum AVColorSpace val)
     return name[val];
 }
 #endif
+
+static int frame_copy_subtitles(AVFrame *dst, const AVFrame *src, int copy_data);
+
 static void get_frame_defaults(AVFrame *frame)
 {
     if (frame->extended_data != frame->data)
@@ -72,7 +76,12 @@  static void get_frame_defaults(AVFrame *frame)
     frame->colorspace          = AVCOL_SPC_UNSPECIFIED;
     frame->color_range         = AVCOL_RANGE_UNSPECIFIED;
     frame->chroma_location     = AVCHROMA_LOC_UNSPECIFIED;
-    frame->flags               = 0;
+    frame->subtitle_start_time = 0;
+    frame->subtitle_end_time   = 0;
+    frame->num_subtitle_areas  = 0;
+    frame->subtitle_areas      = NULL;
+    frame->subtitle_pts        = 0;
+    frame->subtitle_header     = NULL;
 }
 
 static void free_side_data(AVFrameSideData **ptr_sd)
@@ -243,23 +252,55 @@  static int get_audio_buffer(AVFrame *frame, int align)
 
 }
 
+static int get_subtitle_buffer(AVFrame *frame)
+{
+    // Buffers in AVFrame->buf[] are not used in case of subtitle frames.
+    // To accomodate with existing code, checking ->buf[0] to determine
+    // whether a frame is ref-counted or has data, we're adding a 1-byte
+    // buffer here, which marks the subtitle frame to contain data.
+    frame->buf[0] = av_buffer_alloc(1);
+    if (!frame->buf[0]) {
+        av_frame_unref(frame);
+        return AVERROR(ENOMEM);
+    }
+
+    frame->extended_data = frame->data;
+
+    return 0;
+}
+
 int av_frame_get_buffer(AVFrame *frame, int align)
+{
+    if (frame->width > 0 && frame->height > 0)
+        frame->type = AVMEDIA_TYPE_VIDEO;
+    else if (frame->nb_samples > 0 && (frame->channel_layout || frame->channels > 0))
+        frame->type = AVMEDIA_TYPE_AUDIO;
+
+    return av_frame_get_buffer2(frame, align);
+}
+
+int av_frame_get_buffer2(AVFrame *frame, int align)
 {
     if (frame->format < 0)
         return AVERROR(EINVAL);
 
-    if (frame->width > 0 && frame->height > 0)
+    switch(frame->type) {
+    case AVMEDIA_TYPE_VIDEO:
         return get_video_buffer(frame, align);
-    else if (frame->nb_samples > 0 && (frame->channel_layout || frame->channels > 0))
+    case AVMEDIA_TYPE_AUDIO:
         return get_audio_buffer(frame, align);
-
-    return AVERROR(EINVAL);
+    case AVMEDIA_TYPE_SUBTITLE:
+        return get_subtitle_buffer(frame);
+    default:
+        return AVERROR(EINVAL);
+    }
 }
 
 static int frame_copy_props(AVFrame *dst, const AVFrame *src, int force_copy)
 {
     int ret, i;
 
+    dst->type                   = src->type;
     dst->key_frame              = src->key_frame;
     dst->pict_type              = src->pict_type;
     dst->sample_aspect_ratio    = src->sample_aspect_ratio;
@@ -290,6 +331,12 @@  static int frame_copy_props(AVFrame *dst, const AVFrame *src, int force_copy)
     dst->colorspace             = src->colorspace;
     dst->color_range            = src->color_range;
     dst->chroma_location        = src->chroma_location;
+    dst->subtitle_start_time    = src->subtitle_start_time;
+    dst->subtitle_end_time      = src->subtitle_end_time;
+    dst->subtitle_pts           = src->subtitle_pts;
+
+    if ((ret = av_buffer_replace(&dst->subtitle_header, src->subtitle_header)) < 0)
+        return ret;
 
     av_dict_copy(&dst->metadata, src->metadata, 0);
 
@@ -331,6 +378,7 @@  int av_frame_ref(AVFrame *dst, const AVFrame *src)
     av_assert1(dst->width == 0 && dst->height == 0);
     av_assert1(dst->channels == 0);
 
+    dst->type           = src->type;
     dst->format         = src->format;
     dst->width          = src->width;
     dst->height         = src->height;
@@ -344,7 +392,7 @@  int av_frame_ref(AVFrame *dst, const AVFrame *src)
 
     /* duplicate the frame data if it's not refcounted */
     if (!src->buf[0]) {
-        ret = av_frame_get_buffer(dst, 0);
+        ret = av_frame_get_buffer2(dst, 0);
         if (ret < 0)
             goto fail;
 
@@ -366,6 +414,10 @@  int av_frame_ref(AVFrame *dst, const AVFrame *src)
         }
     }
 
+    /* copy subtitle areas */
+    if (src->type == AVMEDIA_TYPE_SUBTITLE)
+        frame_copy_subtitles(dst, src, 0);
+
     if (src->extended_buf) {
         dst->extended_buf = av_calloc(src->nb_extended_buf,
                                       sizeof(*dst->extended_buf));
@@ -436,7 +488,7 @@  AVFrame *av_frame_clone(const AVFrame *src)
 
 void av_frame_unref(AVFrame *frame)
 {
-    int i;
+    unsigned i, n;
 
     if (!frame)
         return;
@@ -455,6 +507,21 @@  void av_frame_unref(AVFrame *frame)
     av_buffer_unref(&frame->opaque_ref);
     av_buffer_unref(&frame->private_ref);
 
+    av_buffer_unref(&frame->subtitle_header);
+
+    for (i = 0; i < frame->num_subtitle_areas; i++) {
+        AVSubtitleArea *area = frame->subtitle_areas[i];
+
+        for (n = 0; n < FF_ARRAY_ELEMS(area->buf); n++)
+            av_buffer_unref(&area->buf[n]);
+
+        av_freep(&area->text);
+        av_freep(&area->ass);
+        av_free(area);
+    }
+
+    av_freep(&frame->subtitle_areas);
+
     get_frame_defaults(frame);
 }
 
@@ -472,7 +539,8 @@  void av_frame_move_ref(AVFrame *dst, AVFrame *src)
 
 int av_frame_is_writable(AVFrame *frame)
 {
-    int i, ret = 1;
+    AVSubtitleArea *area;
+    unsigned i, n, ret = 1;
 
     /* assume non-refcounted frames are not writable */
     if (!frame->buf[0])
@@ -484,6 +552,15 @@  int av_frame_is_writable(AVFrame *frame)
     for (i = 0; i < frame->nb_extended_buf; i++)
         ret &= !!av_buffer_is_writable(frame->extended_buf[i]);
 
+    for (i = 0; i < frame->num_subtitle_areas; i++) {
+        area = frame->subtitle_areas[i];
+        if (area) {
+            for (n = 0; n < FF_ARRAY_ELEMS(area->buf); n++)
+                if (area->buf[n])
+                    ret &= !!av_buffer_is_writable(area->buf[n]);
+            }
+    }
+
     return ret;
 }
 
@@ -499,6 +576,7 @@  int av_frame_make_writable(AVFrame *frame)
         return 0;
 
     memset(&tmp, 0, sizeof(tmp));
+    tmp.type           = frame->type;
     tmp.format         = frame->format;
     tmp.width          = frame->width;
     tmp.height         = frame->height;
@@ -509,7 +587,7 @@  int av_frame_make_writable(AVFrame *frame)
     if (frame->hw_frames_ctx)
         ret = av_hwframe_get_buffer(frame->hw_frames_ctx, &tmp, 0);
     else
-        ret = av_frame_get_buffer(&tmp, 0);
+        ret = av_frame_get_buffer2(&tmp, 0);
     if (ret < 0)
         return ret;
 
@@ -544,14 +622,22 @@  AVBufferRef *av_frame_get_plane_buffer(AVFrame *frame, int plane)
     uint8_t *data;
     int planes, i;
 
-    if (frame->nb_samples) {
-        int channels = frame->channels;
-        if (!channels)
-            return NULL;
-        CHECK_CHANNELS_CONSISTENCY(frame);
-        planes = av_sample_fmt_is_planar(frame->format) ? channels : 1;
-    } else
+    switch(frame->type) {
+    case AVMEDIA_TYPE_VIDEO:
         planes = 4;
+        break;
+    case AVMEDIA_TYPE_AUDIO:
+        {
+            int channels = frame->channels;
+            if (!channels)
+                return NULL;
+            CHECK_CHANNELS_CONSISTENCY(frame);
+            planes = av_sample_fmt_is_planar(frame->format) ? channels : 1;
+            break;
+        }
+    default:
+        return NULL;
+    }
 
     if (plane < 0 || plane >= planes || !frame->extended_data[plane])
         return NULL;
@@ -675,17 +761,39 @@  static int frame_copy_audio(AVFrame *dst, const AVFrame *src)
     return 0;
 }
 
+static int frame_copy_subtitles(AVFrame *dst, const AVFrame *src, int copy_data)
+{
+    dst->format = src->format;
+
+    dst->num_subtitle_areas = src->num_subtitle_areas;
+
+    if (src->num_subtitle_areas) {
+        dst->subtitle_areas = av_malloc_array(src->num_subtitle_areas, sizeof(AVSubtitleArea *));
+
+        for (unsigned i = 0; i < src->num_subtitle_areas; i++) {
+            dst->subtitle_areas[i] = av_mallocz(sizeof(AVSubtitleArea));
+            av_subtitle_area2area(dst->subtitle_areas[i], src->subtitle_areas[i], copy_data);
+        }
+    }
+
+    return 0;
+}
+
 int av_frame_copy(AVFrame *dst, const AVFrame *src)
 {
     if (dst->format != src->format || dst->format < 0)
         return AVERROR(EINVAL);
 
-    if (dst->width > 0 && dst->height > 0)
+    switch(dst->type) {
+    case AVMEDIA_TYPE_VIDEO:
         return frame_copy_video(dst, src);
-    else if (dst->nb_samples > 0 && dst->channels > 0)
+    case AVMEDIA_TYPE_AUDIO:
         return frame_copy_audio(dst, src);
-
-    return AVERROR(EINVAL);
+    case AVMEDIA_TYPE_SUBTITLE:
+        return frame_copy_subtitles(dst, src, 1);
+    default:
+        return AVERROR(EINVAL);
+    }
 }
 
 void av_frame_remove_side_data(AVFrame *frame, enum AVFrameSideDataType type)
@@ -832,3 +940,49 @@  int av_frame_apply_cropping(AVFrame *frame, int flags)
 
     return 0;
 }
+
+/**
+ * Copies subtitle data from AVSubtitle (deprecated) to AVFrame
+ *
+ * @note This is a compatibility method for conversion to the legacy API
+ */
+void av_frame_put_subtitle(AVFrame *frame, const AVSubtitle *sub)
+{
+    frame->format              = sub->format;
+    frame->subtitle_start_time = sub->start_display_time;
+    frame->subtitle_end_time   = sub->end_display_time;
+    frame->subtitle_pts        = sub->pts;
+    frame->num_subtitle_areas  = sub->num_rects;
+
+    if (sub->num_rects) {
+        frame->subtitle_areas = av_malloc_array(sub->num_rects, sizeof(AVSubtitleArea *));
+
+        for (unsigned i = 0; i < sub->num_rects; i++) {
+            frame->subtitle_areas[i] = av_mallocz(sizeof(AVSubtitleArea));
+            av_subtitle_rect2area(frame->subtitle_areas[i], sub->rects[i]);
+        }
+    }
+}
+
+/**
+ * Copies subtitle data from AVFrame to AVSubtitle (deprecated)
+ *
+ * @note This is a compatibility method for conversion to the legacy API
+ */
+void av_frame_get_subtitle(AVSubtitle *sub, AVFrame *frame)
+{
+    sub->start_display_time = frame->subtitle_start_time;
+    sub->end_display_time   = frame->subtitle_end_time;
+    sub->pts                = frame->subtitle_pts;
+    sub->num_rects          = frame->num_subtitle_areas;
+
+    if (frame->num_subtitle_areas) {
+        sub->rects = av_malloc_array(frame->num_subtitle_areas, sizeof(AVSubtitle *));
+
+        for (unsigned i = 0; i < frame->num_subtitle_areas; i++) {
+            sub->rects[i] = av_mallocz(sizeof(AVSubtitleRect));
+            av_subtitle_area2rect(sub->rects[i], frame->subtitle_areas[i]);
+        }
+    }
+}
+
diff --git a/libavutil/frame.h b/libavutil/frame.h
index 753234792e..742f4ba07e 100644
--- a/libavutil/frame.h
+++ b/libavutil/frame.h
@@ -34,6 +34,7 @@ 
 #include "rational.h"
 #include "samplefmt.h"
 #include "pixfmt.h"
+#include "subfmt.h"
 #include "version.h"
 
 
@@ -278,7 +279,7 @@  typedef struct AVRegionOfInterest {
 } AVRegionOfInterest;
 
 /**
- * This structure describes decoded (raw) audio or video data.
+ * This structure describes decoded (raw) audio, video or subtitle data.
  *
  * AVFrame must be allocated using av_frame_alloc(). Note that this only
  * allocates the AVFrame itself, the buffers for the data must be managed
@@ -309,6 +310,13 @@  typedef struct AVRegionOfInterest {
  */
 typedef struct AVFrame {
 #define AV_NUM_DATA_POINTERS 8
+    /**
+     * Media type of the frame (audio, video, subtitles..)
+     *
+     * See AVMEDIA_TYPE_xxx
+     */
+    enum AVMediaType type;
+
     /**
      * pointer to the picture/channel planes.
      * This might be different from the first allocated byte. For video,
@@ -390,7 +398,7 @@  typedef struct AVFrame {
     /**
      * format of the frame, -1 if unknown or unset
      * Values correspond to enum AVPixelFormat for video frames,
-     * enum AVSampleFormat for audio)
+     * enum AVSampleFormat for audio, AVSubtitleType for subtitles)
      */
     int format;
 
@@ -481,6 +489,39 @@  typedef struct AVFrame {
      */
     uint64_t channel_layout;
 
+    /**
+     * Display start time, relative to packet pts, in ms.
+     */
+    uint32_t subtitle_start_time;
+
+    /**
+     * Display end time, relative to packet pts, in ms.
+     */
+    uint32_t subtitle_end_time;
+
+    /**
+     * Number of items in the @ref subtitle_areas array.
+     */
+    unsigned num_subtitle_areas;
+
+    /**
+     * Array of subtitle areas, may be empty.
+     */
+    AVSubtitleArea **subtitle_areas;
+
+    /**
+     * Usually the same as packet pts, in AV_TIME_BASE.
+     *
+     * @deprecated This is kept for compatibility reasons and corresponds to
+     * AVSubtitle->pts. Might be removed in the future.
+     */
+    int64_t subtitle_pts;
+
+    /**
+     * Header containing style information for text subtitles.
+     */
+    AVBufferRef *subtitle_header;
+
     /**
      * AVBuffer references backing the data for this frame. If all elements of
      * this array are NULL, then this frame is not reference counted. This array
@@ -740,6 +781,8 @@  void av_frame_move_ref(AVFrame *dst, AVFrame *src);
 /**
  * Allocate new buffer(s) for audio or video data.
  *
+ * Note: For subtitle data, use av_frame_get_buffer2
+ *
  * The following fields must be set on frame before calling this function:
  * - format (pixel format for video, sample format for audio)
  * - width and height for video
@@ -759,9 +802,39 @@  void av_frame_move_ref(AVFrame *dst, AVFrame *src);
  *              recommended to pass 0 here unless you know what you are doing.
  *
  * @return 0 on success, a negative AVERROR on error.
+ *
+ * @deprecated Use @ref av_frame_get_buffer2 instead and set @ref AVFrame.type
+ * before calling.
  */
+attribute_deprecated
 int av_frame_get_buffer(AVFrame *frame, int align);
 
+/**
+ * Allocate new buffer(s) for audio, video or subtitle data.
+ *
+ * The following fields must be set on frame before calling this function:
+ * - format (pixel format for video, sample format for audio)
+ * - width and height for video
+ * - nb_samples and channel_layout for audio
+ * - type (AVMediaType)
+ *
+ * This function will fill AVFrame.data and AVFrame.buf arrays and, if
+ * necessary, allocate and fill AVFrame.extended_data and AVFrame.extended_buf.
+ * For planar formats, one buffer will be allocated for each plane.
+ *
+ * @warning: if frame already has been allocated, calling this function will
+ *           leak memory. In addition, undefined behavior can occur in certain
+ *           cases.
+ *
+ * @param frame frame in which to store the new buffers.
+ * @param align Required buffer size alignment. If equal to 0, alignment will be
+ *              chosen automatically for the current CPU. It is highly
+ *              recommended to pass 0 here unless you know what you are doing.
+ *
+ * @return 0 on success, a negative AVERROR on error.
+ */
+int av_frame_get_buffer2(AVFrame *frame, int align);
+
 /**
  * Check if the frame data is writable.
  *
@@ -863,6 +936,22 @@  AVFrameSideData *av_frame_get_side_data(const AVFrame *frame,
  */
 void av_frame_remove_side_data(AVFrame *frame, enum AVFrameSideDataType type);
 
+/**
+ * Copies subtitle data from AVSubtitle to AVFrame.
+ *
+ * @deprecated This is a compatibility method for interoperability with
+ * the legacy subtitle API.
+ */
+void av_frame_put_subtitle(AVFrame *frame, const AVSubtitle *sub);
+
+/**
+ * Copies subtitle data from AVFrame to AVSubtitle.
+ *
+ * @deprecated This is a compatibility method for interoperability with
+ * the legacy subtitle API.
+ */
+void av_frame_get_subtitle(AVSubtitle *sub, AVFrame *frame);
+
 
 /**
  * Flags for frame cropping.
diff --git a/libavutil/subfmt.c b/libavutil/subfmt.c
new file mode 100644
index 0000000000..f68907a644
--- /dev/null
+++ b/libavutil/subfmt.c
@@ -0,0 +1,243 @@ 
+/*
+ * Copyright (c) 2021 softworkz
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <string.h>
+#include "common.h"
+#include "subfmt.h"
+
+typedef struct SubtitleFmtInfo {
+    enum AVSubtitleType fmt;
+    const char* name;
+} SubtitleFmtInfo;
+
+static SubtitleFmtInfo sub_fmt_info[AV_SUBTITLE_FMT_NB] = {
+    {.fmt = AV_SUBTITLE_FMT_UNKNOWN, .name = "Unknown subtitle format", },
+    {.fmt = AV_SUBTITLE_FMT_BITMAP,  .name = "Graphical subtitles",     },
+    {.fmt = AV_SUBTITLE_FMT_TEXT,    .name = "Text subtitles (plain)",  },
+    {.fmt = AV_SUBTITLE_FMT_ASS,     .name = "Text subtitles (ass)",    },
+};
+
+const char *av_get_subtitle_fmt_name(enum AVSubtitleType sub_fmt)
+{
+    if (sub_fmt < 0 || sub_fmt >= AV_SUBTITLE_FMT_NB)
+        return NULL;
+    return sub_fmt_info[sub_fmt].name;
+}
+
+enum AVSubtitleType av_get_subtitle_fmt(const char *name)
+{
+    for (int i = 0; i < AV_SUBTITLE_FMT_NB; i++)
+        if (!strcmp(sub_fmt_info[i].name, name))
+            return i;
+    return AV_SUBTITLE_FMT_NONE;
+}
+
+int av_subtitle_area2rect(AVSubtitleRect *dst, const AVSubtitleArea *src)
+{
+    dst->x         =  src->x;        
+    dst->y         =  src->y;        
+    dst->w         =  src->w;        
+    dst->h         =  src->h;        
+    dst->nb_colors =  src->nb_colors;
+    dst->type      =  src->type;                     
+    dst->flags     =  src->flags;
+
+    switch (dst->type) {
+    case AV_SUBTITLE_FMT_BITMAP:
+
+        if (src->h > 0 && src->w > 0 && src->buf[0]) {
+            uint32_t *pal;
+            AVBufferRef *buf = src->buf[0];
+            dst->data[0] = av_mallocz(buf->size);
+            memcpy(dst->data[0], buf->data, buf->size);
+            dst->linesize[0] = src->linesize[0];
+
+            dst->data[1] = av_mallocz(256 * 4);
+            pal = (uint32_t *)dst->data[1];
+
+            for (unsigned i = 0; i < 256; i++) {
+                pal[i] = src->pal[i];
+            }
+        }
+
+        break;
+    case AV_SUBTITLE_FMT_TEXT:
+
+        if (src->text)
+            dst->text = av_strdup(src->text);
+        else
+            dst->text = av_strdup("");
+
+        if (!dst->text)
+            return AVERROR(ENOMEM);
+
+        break;
+    case AV_SUBTITLE_FMT_ASS:
+
+        if (src->ass)
+            dst->ass = av_strdup(src->ass);
+        else
+            dst->ass = av_strdup("");
+
+        if (!dst->ass)
+            return AVERROR(ENOMEM);
+
+        break;
+    default:
+
+        av_log(NULL, AV_LOG_ERROR, "Subtitle rect has invalid format: %d", dst->type);
+        return AVERROR(ENOMEM);
+    }
+
+    return 0;
+}
+
+int av_subtitle_rect2area(AVSubtitleArea *dst, const AVSubtitleRect *src)
+{
+    dst->x         =  src->x;        
+    dst->y         =  src->y;        
+    dst->w         =  src->w;        
+    dst->h         =  src->h;        
+    dst->nb_colors =  src->nb_colors;
+    dst->type      =  src->type;                     
+    dst->flags     =  src->flags;
+
+    switch (dst->type) {
+    case AV_SUBTITLE_FMT_BITMAP:
+
+        if (src->h > 0 && src->w > 0 && src->data[0]) {
+            AVBufferRef *buf = av_buffer_allocz(src->h * src->linesize[0]);
+            memcpy(buf->data, src->data[0], buf->size);
+
+            dst->buf[0] = buf;
+            dst->linesize[0] = src->linesize[0];
+        }
+
+        if (src->data[1]) {
+            uint32_t *pal = (uint32_t *)src->data[1];
+
+            for (unsigned i = 0; i < 256; i++) {
+                dst->pal[i] = pal[i];
+            }
+        }
+
+        break;
+    case AV_SUBTITLE_FMT_TEXT:
+
+        if (src->text) {
+            dst->text = av_strdup(src->text);
+            if (!dst->text)
+                return AVERROR(ENOMEM);
+        }
+
+        break;
+    case AV_SUBTITLE_FMT_ASS:
+
+        if (src->ass) {
+            dst->ass = av_strdup(src->ass);
+            if (!dst->ass)
+                return AVERROR(ENOMEM);
+        }
+
+        break;
+    default:
+
+        av_log(NULL, AV_LOG_ERROR, "Subtitle area has invalid format: %d", dst->type);
+        return AVERROR(ENOMEM);
+    }
+
+    return 0;
+}
+
+int av_subtitle_area2area(AVSubtitleArea *dst, const AVSubtitleArea *src, int copy_data)
+{
+    dst->x         =  src->x;        
+    dst->y         =  src->y;        
+    dst->w         =  src->w;        
+    dst->h         =  src->h;        
+    dst->nb_colors =  src->nb_colors;
+    dst->type      =  src->type;                     
+    dst->flags     =  src->flags;
+
+    switch (dst->type) {
+    case AV_SUBTITLE_FMT_BITMAP:
+
+        if (src->h > 0 && src->w > 0 && src->buf[0]) {
+            dst->buf[0] = av_buffer_ref(src->buf[0]);
+            if (!dst->buf[0])
+                return AVERROR(ENOMEM);
+
+            if (copy_data) {
+                const int ret = av_buffer_make_writable(&dst->buf[0]);
+                if (ret < 0)
+                    return ret;
+            }
+
+            dst->linesize[0] = src->linesize[0];
+        }
+
+        for (unsigned i = 0; i < 256; i++)
+            dst->pal[i] = src->pal[i];
+
+        break;
+    case AV_SUBTITLE_FMT_TEXT:
+
+        if (src->text) {
+            dst->text = av_strdup(src->text);
+            if (!dst->text)
+                return AVERROR(ENOMEM);
+        }
+
+        break;
+    case AV_SUBTITLE_FMT_ASS:
+
+        if (src->ass) {
+            dst->ass = av_strdup(src->ass);
+            if (!dst->ass)
+                return AVERROR(ENOMEM);
+        }
+
+        break;
+    default:
+
+        av_log(NULL, AV_LOG_ERROR, "Subtitle area has invalid format: %d", dst->type);
+        return AVERROR(ENOMEM);
+    }
+
+    return 0;
+}
+
+void avsubtitle_free(AVSubtitle *sub)
+{
+    for (unsigned i = 0; i < sub->num_rects; i++) {
+        av_freep(&sub->rects[i]->data[0]);
+        av_freep(&sub->rects[i]->data[1]);
+        av_freep(&sub->rects[i]->data[2]);
+        av_freep(&sub->rects[i]->data[3]);
+        av_freep(&sub->rects[i]->text);
+        av_freep(&sub->rects[i]->ass);
+        av_freep(&sub->rects[i]);
+    }
+
+    av_freep(&sub->rects);
+
+    memset(sub, 0, sizeof(*sub));
+}
+
diff --git a/libavutil/subfmt.h b/libavutil/subfmt.h
new file mode 100644
index 0000000000..372e3db413
--- /dev/null
+++ b/libavutil/subfmt.h
@@ -0,0 +1,185 @@ 
+/*
+ * Copyright (c) 2021 softworkz
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_SUBFMT_H
+#define AVUTIL_SUBFMT_H
+
+#include <inttypes.h>
+
+#include "buffer.h"
+
+enum AVSubtitleType {
+
+    /**
+     * Subtitle format unknown.
+     */
+    AV_SUBTITLE_FMT_NONE = -1,
+
+    /**
+     * Subtitle format unknown.
+     */
+    AV_SUBTITLE_FMT_UNKNOWN = 0,
+
+    /**
+     * Bitmap area in AVSubtitleRect.data, pixfmt AV_PIX_FMT_PAL8.
+     */
+    AV_SUBTITLE_FMT_BITMAP = 1,
+    SUBTITLE_BITMAP = 1,        ///< Deprecated, use AV_SUBTITLE_FMT_BITMAP instead.
+
+    /**
+     * Plain text in AVSubtitleRect.text.
+     */
+    AV_SUBTITLE_FMT_TEXT = 2,
+    SUBTITLE_TEXT = 2,          ///< Deprecated, use AV_SUBTITLE_FMT_TEXT instead.
+
+    /**
+     * Text Formatted as per ASS specification, contained AVSubtitleRect.ass.
+     */
+    AV_SUBTITLE_FMT_ASS = 3,
+    SUBTITLE_ASS = 3,           ///< Deprecated, use AV_SUBTITLE_FMT_ASS instead.
+
+    AV_SUBTITLE_FMT_NB,
+};
+
+typedef struct AVSubtitleArea {
+#define AV_NUM_BUFFER_POINTERS 1
+
+    enum AVSubtitleType type;
+    int flags;
+
+    int x;         ///< top left corner  of area.
+    int y;         ///< top left corner  of area.
+    int w;         ///< width            of area.
+    int h;         ///< height           of area.
+    int nb_colors; ///< number of colors in bitmap palette (@ref pal).
+
+    /**
+     * Buffers and line sizes for the bitmap of this subtitle.
+     * 
+     * @{
+     */
+    AVBufferRef *buf[AV_NUM_BUFFER_POINTERS];
+    int linesize[AV_NUM_BUFFER_POINTERS];
+    /**
+     * @}
+     */
+
+    uint32_t pal[256]; ///< RGBA palette for the bitmap.
+
+    char *text;        ///< 0-terminated plain UTF-8 text
+    char *ass;         ///< 0-terminated ASS/SSA compatible event line.
+
+} AVSubtitleArea;
+
+typedef struct AVSubtitleRect {
+    unsigned nb_refs;
+    int x;         ///< top left corner  of pict, undefined when pict is not set
+    int y;         ///< top left corner  of pict, undefined when pict is not set
+    int w;         ///< width            of pict, undefined when pict is not set
+    int h;         ///< height           of pict, undefined when pict is not set
+    int nb_colors; ///< number of colors in pict, undefined when pict is not set
+
+    /**
+     * data+linesize for the bitmap of this subtitle.
+     */
+    uint8_t *data[4];
+    int linesize[4];
+
+    enum AVSubtitleType type;
+
+    char *text;                     ///< 0 terminated plain UTF-8 text
+
+    /**
+     * 0-terminated ASS/SSA compatible event line.
+     */
+    char *ass;
+
+    int flags;
+} AVSubtitleRect;
+
+typedef struct AVSubtitle {
+    uint16_t format; /* 0 = graphics */
+    uint32_t start_display_time; /* relative to packet pts, in ms */
+    uint32_t end_display_time; /* relative to packet pts, in ms */
+    unsigned num_rects;
+    AVSubtitleRect **rects;
+    int64_t pts;    ///< Same as packet pts, in AV_TIME_BASE
+} AVSubtitle;
+
+/**
+ * Return the name of sub_fmt, or NULL if sub_fmt is not
+ * recognized.
+ */
+const char *av_get_subtitle_fmt_name(enum AVSubtitleType sub_fmt);
+
+/**
+ * Return a subtitle format corresponding to name, or AV_SUBTITLE_FMT_NONE
+ * on error.
+ *
+ * @param name Subtitle format name.
+ */
+enum AVSubtitleType av_get_subtitle_fmt(const char *name);
+
+/**
+ * Copy a subtitle area.
+ *
+ * @param dst        The target area.
+ * @param src        The source area.
+ * @param copy_data  Determines whether to copy references or actual
+ *                   data from @ref AVSubtitleArea.buf.
+ *
+ * @return 0 on success.
+ */
+int av_subtitle_area2area(AVSubtitleArea *dst, const AVSubtitleArea *src, int copy_data);
+
+/**
+ * Copy data from @ref AVSubtitleArea to @ref AVSubtitleRect.
+ *
+ * @param dst        The target rect (@ref AVSubtitleRect).
+ * @param src        The source area (@ref AVSubtitleArea).
+ *
+ * @return 0 on success.
+ *
+ * @deprecated This is a compatibility method for interoperability with
+ * the legacy subtitle API.
+ */
+int av_subtitle_area2rect(AVSubtitleRect *dst, const AVSubtitleArea *src);
+
+/**
+ * Copy data from @ref AVSubtitleRect to @ref AVSubtitleArea.
+ *
+ * @param dst        The source area (@ref AVSubtitleArea).
+ * @param src        The target rect (@ref AVSubtitleRect).
+ *
+ * @return 0 on success.
+ *
+ * @deprecated This is a compatibility method for interoperability with
+ * the legacy subtitle API.
+ */
+int av_subtitle_rect2area(AVSubtitleArea *dst, const AVSubtitleRect *src);
+
+/**
+ * Free all allocated data in the given subtitle struct.
+ *
+ * @param sub AVSubtitle to free.
+ */
+void avsubtitle_free(AVSubtitle *sub);
+
+#endif /* AVUTIL_SUBFMT_H */