Message ID | DM8P223MB0365D2E18075BEB87B865FB0BA629@DM8P223MB0365.NAMP223.PROD.OUTLOOK.COM |
---|---|
State | Superseded, archived |
Headers | show |
Series | [FFmpeg-devel,v16,01/16] global: Prepare AVFrame for subtitle handling | expand |
Context | Check | Description |
---|---|---|
andriy/make_x86 | success | Make finished |
andriy/make_fate_x86 | success | Make fate finished |
andriy/makex86 | warning | New warnings during build |
andriy/make_ppc | success | Make finished |
andriy/make_fate_ppc | success | Make fate finished |
andriy/makeppc | warning | New warnings during build |
Soft Works: > Root commit for adding subtitle filtering capabilities. > In detail: > > - Add type (AVMediaType) field to AVFrame > Replaces previous way of distinction which was based on checking > width and height to determine whether a frame is audio or video > - Add subtitle fields to AVFrame > - Add new struct AVSubtitleArea, similar to AVSubtitleRect, but different > allocation logic. Cannot and must not be used interchangeably, hence > the new struct > - Move enum AVSubtitleType, AVSubtitle and AVSubtitleRect to avutil > - Add public-named members to enum AVSubtitleType (AV_SUBTITLE_FMT_) > - Add avcodec_decode_subtitle3 which takes subtitle frames, > serving as compatibility shim to legacy subtitle decoding > - Add additional methods for conversion between old and new API > > Signed-off-by: softworkz <softworkz@hotmail.com> > --- > libavcodec/avcodec.c | 19 --- > libavcodec/avcodec.h | 75 ++-------- > libavcodec/decode.c | 53 +++++-- > libavcodec/libzvbi-teletextdec.c | 1 + > libavcodec/pgssubdec.c | 1 + > libavcodec/utils.c | 11 ++ > libavfilter/vf_subtitles.c | 50 +++++-- > libavformat/utils.c | 1 + > libavutil/Makefile | 2 + > libavutil/frame.c | 194 +++++++++++++++++++++--- > libavutil/frame.h | 93 +++++++++++- > libavutil/subfmt.c | 243 +++++++++++++++++++++++++++++++ > libavutil/subfmt.h | 185 +++++++++++++++++++++++ As has already been said by others, this should be split: One patch for the actual new libavutil additions, one patch for the lavc decoding API, one patch for the encoding API, one patch for every user that is made to use the new APIs. > 13 files changed, 803 insertions(+), 125 deletions(-) > create mode 100644 libavutil/subfmt.c > create mode 100644 libavutil/subfmt.h > > diff --git a/libavcodec/avcodec.c b/libavcodec/avcodec.c > index c00a9b2af8..13e3711b9c 100644 > --- a/libavcodec/avcodec.c > +++ b/libavcodec/avcodec.c > @@ -422,25 +422,6 @@ void avcodec_flush_buffers(AVCodecContext *avctx) > av_bsf_flush(avci->bsf); > } > > -void avsubtitle_free(AVSubtitle *sub) > -{ > - int i; > - > - for (i = 0; i < sub->num_rects; i++) { > - av_freep(&sub->rects[i]->data[0]); > - av_freep(&sub->rects[i]->data[1]); > - av_freep(&sub->rects[i]->data[2]); > - av_freep(&sub->rects[i]->data[3]); > - av_freep(&sub->rects[i]->text); > - av_freep(&sub->rects[i]->ass); > - av_freep(&sub->rects[i]); > - } > - > - av_freep(&sub->rects); > - > - memset(sub, 0, sizeof(*sub)); > -} > - > av_cold int avcodec_close(AVCodecContext *avctx) > { > int i; > diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h > index 7ee8bc2b7c..0c5819b116 100644 > --- a/libavcodec/avcodec.h > +++ b/libavcodec/avcodec.h > @@ -35,6 +35,7 @@ > #include "libavutil/frame.h" > #include "libavutil/log.h" > #include "libavutil/pixfmt.h" > +#include "libavutil/subfmt.h" > #include "libavutil/rational.h" > > #include "codec.h" > @@ -1674,7 +1675,7 @@ typedef struct AVCodecContext { > > /** > * Header containing style information for text subtitles. > - * For SUBTITLE_ASS subtitle type, it should contain the whole ASS > + * For AV_SUBTITLE_FMT_ASS subtitle type, it should contain the whole ASS > * [Script Info] and [V4+ Styles] section, plus the [Events] line and > * the Format line following. It shouldn't include any Dialogue line. > * - encoding: Set/allocated/freed by user (before avcodec_open2()) > @@ -2238,63 +2239,8 @@ typedef struct AVHWAccel { > * @} > */ > > -enum AVSubtitleType { > - SUBTITLE_NONE, > - > - SUBTITLE_BITMAP, ///< A bitmap, pict will be set > - > - /** > - * Plain text, the text field must be set by the decoder and is > - * authoritative. ass and pict fields may contain approximations. > - */ > - SUBTITLE_TEXT, > - > - /** > - * Formatted text, the ass field must be set by the decoder and is > - * authoritative. pict and text fields may contain approximations. > - */ > - SUBTITLE_ASS, > -}; > - > #define AV_SUBTITLE_FLAG_FORCED 0x00000001 > > -typedef struct AVSubtitleRect { > - int x; ///< top left corner of pict, undefined when pict is not set > - int y; ///< top left corner of pict, undefined when pict is not set > - int w; ///< width of pict, undefined when pict is not set > - int h; ///< height of pict, undefined when pict is not set > - int nb_colors; ///< number of colors in pict, undefined when pict is not set > - > - /** > - * data+linesize for the bitmap of this subtitle. > - * Can be set for text/ass as well once they are rendered. > - */ > - uint8_t *data[4]; > - int linesize[4]; > - > - enum AVSubtitleType type; > - > - char *text; ///< 0 terminated plain UTF-8 text > - > - /** > - * 0 terminated ASS/SSA compatible event line. > - * The presentation of this is unaffected by the other values in this > - * struct. > - */ > - char *ass; > - > - int flags; > -} AVSubtitleRect; > - > -typedef struct AVSubtitle { > - uint16_t format; /* 0 = graphics */ > - uint32_t start_display_time; /* relative to packet pts, in ms */ > - uint32_t end_display_time; /* relative to packet pts, in ms */ > - unsigned num_rects; > - AVSubtitleRect **rects; > - int64_t pts; ///< Same as packet pts, in AV_TIME_BASE > -} AVSubtitle; > - > /** > * Return the LIBAVCODEC_VERSION_INT constant. > */ > @@ -2430,13 +2376,6 @@ int avcodec_open2(AVCodecContext *avctx, const AVCodec *codec, AVDictionary **op > */ > int avcodec_close(AVCodecContext *avctx); > > -/** > - * Free all allocated data in the given subtitle struct. > - * > - * @param sub AVSubtitle to free. > - */ > -void avsubtitle_free(AVSubtitle *sub); > - > /** > * @} > */ > @@ -2527,6 +2466,8 @@ enum AVChromaLocation avcodec_chroma_pos_to_enum(int xpos, int ypos); > * must be freed with avsubtitle_free if *got_sub_ptr is set. > * @param[in,out] got_sub_ptr Zero if no subtitle could be decompressed, otherwise, it is nonzero. > * @param[in] avpkt The input AVPacket containing the input buffer. > + * > + * @deprecated Use the new decode API (avcodec_send_packet, avcodec_receive_frame) instead. This is not how you deprecate something: You have to add a @deprecated doxygen comment as well as the attribute_deprecated to make some noise. Actually, you should deprecate the whole AVSubtitle API. > */ > int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub, > int *got_sub_ptr, > @@ -3125,6 +3066,14 @@ void avcodec_flush_buffers(AVCodecContext *avctx); > */ > int av_get_audio_frame_duration(AVCodecContext *avctx, int frame_bytes); > > +/** > + * Return subtitle format from a codec descriptor > + * > + * @param codec_descriptor codec descriptor > + * @return the subtitle type (e.g. bitmap, text) > + */ > +enum AVSubtitleType av_get_subtitle_format_from_codecdesc(const AVCodecDescriptor *codec_descriptor); Do we need this function? It seems too trivial and as Anton has pointed out is flawed. And anyway, this would belong into codec_desc.h. > + > /* memory */ > > /** > diff --git a/libavcodec/decode.c b/libavcodec/decode.c > index c44724d150..788b043266 100644 > --- a/libavcodec/decode.c > +++ b/libavcodec/decode.c > @@ -576,6 +576,34 @@ static int decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame) > return ret; > } > > +static int decode_subtitle2_priv(AVCodecContext *avctx, AVSubtitle *sub, > + int *got_sub_ptr, const AVPacket *avpkt); > + > +static int decode_subtitle_shim(AVCodecContext *avctx, AVFrame *frame, const AVPacket *avpkt) > +{ > + int ret, got_sub_ptr = 0; > + AVSubtitle subtitle = { 0 }; > + > + av_frame_unref(frame); > + > + ret = decode_subtitle2_priv(avctx, &subtitle, &got_sub_ptr, avpkt); > + > + if (ret >= 0 && got_sub_ptr) { > + frame->type = AVMEDIA_TYPE_SUBTITLE; > + frame->format = subtitle.format; > + ret = av_frame_get_buffer2(frame, 0); > + if (ret < 0) > + return ret; > + av_frame_put_subtitle(frame, &subtitle); > + > + frame->pkt_dts = avpkt->dts; > + } > + > + avsubtitle_free(&subtitle); > + > + return ret; > +} > + > int attribute_align_arg avcodec_send_packet(AVCodecContext *avctx, const AVPacket *avpkt) > { > AVCodecInternal *avci = avctx->internal; > @@ -590,6 +618,9 @@ int attribute_align_arg avcodec_send_packet(AVCodecContext *avctx, const AVPacke > if (avpkt && !avpkt->size && avpkt->data) > return AVERROR(EINVAL); > > + if (avctx->codec_type == AVMEDIA_TYPE_SUBTITLE) > + return decode_subtitle_shim(avctx, avci->buffer_frame, avpkt); 1. This is missing a check whether buffer_frame is empty or not; in the latter case, you need to return AVERROR(EAGAIN); instead decode_subtitle_shim() destroys what is already there. (Said check is currently offloaded to the BSF API in the audio/video codepath. This is actually a violation of the BSF API.) 2. Flushing is not really implemented well: a) It is legal to call avcodec_send_packet() with avpkt == NULL to indicate EOF. If I am not mistaken, decode_subtitle2_priv() will just crash in this case. b) avcodec_receive_frame() only returns what is in buffer_frame; it never calls decode_subtitle2_priv() or the underlying decode function at all. This basically presumes that a subtitle decoder can only return one subtitle after flushing. I don't know whether this is true for our decoders with the delay cap. c) avcodec_receive_frame() seems to never return AVERROR_EOF after flushing, but always AVERROR(EAGAIN) (if the delay cap is set, the first call to avcodec_receive_frame() after flushing can also result in a frame being returned). Yet this makes no sense and is IMO an API violation on lavc's part. (While we have subtitle decoders with the delay cap, I don't know whether this is actually tested by fate and whether all code actually flushes subtitle decoders at all.) > + > av_packet_unref(avci->buffer_pkt); > if (avpkt && (avpkt->data || avpkt->side_data_elems)) { > ret = av_packet_ref(avci->buffer_pkt, avpkt); > @@ -651,7 +682,9 @@ int attribute_align_arg avcodec_receive_frame(AVCodecContext *avctx, AVFrame *fr > > if (avci->buffer_frame->buf[0]) { > av_frame_move_ref(frame, avci->buffer_frame); > - } else { > + } else if (avctx->codec_type == AVMEDIA_TYPE_SUBTITLE) > + return AVERROR(EAGAIN); > + else { > ret = decode_receive_frame_internal(avctx, frame); > if (ret < 0) > return ret; > @@ -723,7 +756,7 @@ static void get_subtitle_defaults(AVSubtitle *sub) > > #define UTF8_MAX_BYTES 4 /* 5 and 6 bytes sequences should not be used */ > static int recode_subtitle(AVCodecContext *avctx, AVPacket **outpkt, > - AVPacket *inpkt, AVPacket *buf_pkt) > + const AVPacket *inpkt, AVPacket *buf_pkt) *outpkt = inpkt; is now no longer const-correct. And if you make outpkt const AVPacket**, then avctx->codec->decode(avctx, sub, got_sub_ptr, pkt) is no longer const-correct (although no subtitle decoder modifies the packet it is given!). In other words: Been there, done that. > { > #if CONFIG_ICONV > iconv_t cd = (iconv_t)-1; > @@ -802,9 +835,8 @@ static int utf8_check(const uint8_t *str) > return 1; > } > > -int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub, > - int *got_sub_ptr, > - AVPacket *avpkt) > +static int decode_subtitle2_priv(AVCodecContext *avctx, AVSubtitle *sub, > + int *got_sub_ptr, const AVPacket *avpkt) > { > int ret = 0; > > @@ -844,10 +876,7 @@ int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub, > avctx->pkt_timebase, ms); > } > > - if (avctx->codec_descriptor->props & AV_CODEC_PROP_BITMAP_SUB) > - sub->format = 0; > - else if (avctx->codec_descriptor->props & AV_CODEC_PROP_TEXT_SUB) > - sub->format = 1; > + sub->format = av_get_subtitle_format_from_codecdesc(avctx->codec_descriptor); > > for (unsigned i = 0; i < sub->num_rects; i++) { > if (avctx->sub_charenc_mode != FF_SUB_CHARENC_MODE_IGNORE && > @@ -871,6 +900,12 @@ int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub, > return ret; > } > > +int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub, > + int *got_sub_ptr, AVPacket *avpkt) > +{ > + return decode_subtitle2_priv(avctx, sub, got_sub_ptr, avpkt); > +} > + > enum AVPixelFormat avcodec_default_get_format(struct AVCodecContext *avctx, > const enum AVPixelFormat *fmt) > { > diff --git a/libavcodec/libzvbi-teletextdec.c b/libavcodec/libzvbi-teletextdec.c > index 1073d6a0bd..535c82d7a5 100644 > --- a/libavcodec/libzvbi-teletextdec.c > +++ b/libavcodec/libzvbi-teletextdec.c > @@ -27,6 +27,7 @@ > #include "libavutil/intreadwrite.h" > #include "libavutil/log.h" > #include "libavutil/common.h" > +#include "libavutil/subfmt.h" If you need to add a new header here for this (which I don't think), then this means that you broke API. > > #include <libzvbi.h> > > diff --git a/libavcodec/pgssubdec.c b/libavcodec/pgssubdec.c > index 388639a110..5709058b1c 100644 > --- a/libavcodec/pgssubdec.c > +++ b/libavcodec/pgssubdec.c > @@ -32,6 +32,7 @@ > #include "libavutil/colorspace.h" > #include "libavutil/imgutils.h" > #include "libavutil/opt.h" > +#include "libavutil/subfmt.h" > > #define RGBA(r,g,b,a) (((unsigned)(a) << 24) | ((r) << 16) | ((g) << 8) | (b)) > #define MAX_EPOCH_PALETTES 8 // Max 8 allowed per PGS epoch > diff --git a/libavcodec/utils.c b/libavcodec/utils.c > index a91a54b0dc..efe14e5a43 100644 > --- a/libavcodec/utils.c > +++ b/libavcodec/utils.c > @@ -824,6 +824,17 @@ int av_get_audio_frame_duration(AVCodecContext *avctx, int frame_bytes) > return FFMAX(0, duration); > } > > +enum AVSubtitleType av_get_subtitle_format_from_codecdesc(const AVCodecDescriptor *codec_descriptor) > +{ > + if(codec_descriptor->props & AV_CODEC_PROP_BITMAP_SUB) > + return AV_SUBTITLE_FMT_BITMAP; > + > + if(codec_descriptor->props & AV_CODEC_PROP_TEXT_SUB) > + return AV_SUBTITLE_FMT_ASS; > + > + return AV_SUBTITLE_FMT_UNKNOWN; > +} > + > int av_get_audio_frame_duration2(AVCodecParameters *par, int frame_bytes) > { > int duration = get_audio_frame_duration(par->codec_id, par->sample_rate, > diff --git a/libavfilter/vf_subtitles.c b/libavfilter/vf_subtitles.c > index 377160c72b..6b54cf669b 100644 > --- a/libavfilter/vf_subtitles.c > +++ b/libavfilter/vf_subtitles.c > @@ -35,14 +35,12 @@ > # include "libavformat/avformat.h" > #endif > #include "libavutil/avstring.h" > -#include "libavutil/imgutils.h" > #include "libavutil/opt.h" > #include "libavutil/parseutils.h" > #include "drawutils.h" > #include "avfilter.h" > #include "internal.h" > #include "formats.h" > -#include "video.h" > > typedef struct AssContext { > const AVClass *class; > @@ -292,6 +290,29 @@ static int attachment_is_font(AVStream * st) > return 0; > } > > +static int decode(AVCodecContext *avctx, AVFrame *frame, int *got_frame, AVPacket *pkt) > +{ > + int ret; > + > + *got_frame = 0; You don't really need this: You could just return 0 if no frame was returned, 1 if a frame was returned and < 0 on (actual, not EAGAIN) errors. > + > + if (pkt) { > + ret = avcodec_send_packet(avctx, pkt); > + // In particular, we don't expect AVERROR(EAGAIN), because we read all > + // decoded frames with avcodec_receive_frame() until done. Where do you do this? For this one would need to call avcodec_receive_frame() in a loop until it returns EAGAIN, but you don't do this. > + if (ret < 0 && ret != AVERROR_EOF) > + return ret; > + } > + > + ret = avcodec_receive_frame(avctx, frame); > + if (ret < 0 && ret != AVERROR(EAGAIN)) > + return ret; > + if (ret >= 0) > + *got_frame = 1; > + > + return 0; > +} > + > AVFILTER_DEFINE_CLASS(subtitles); > > static av_cold int init_subtitles(AVFilterContext *ctx) > @@ -306,6 +327,7 @@ static av_cold int init_subtitles(AVFilterContext *ctx) > AVStream *st; > AVPacket pkt; > AssContext *ass = ctx->priv; > + enum AVSubtitleType subtitle_format; > > /* Init libass */ > ret = init(ctx); > @@ -386,13 +408,17 @@ static av_cold int init_subtitles(AVFilterContext *ctx) > ret = AVERROR_DECODER_NOT_FOUND; > goto end; > } > + > dec_desc = avcodec_descriptor_get(st->codecpar->codec_id); > - if (dec_desc && !(dec_desc->props & AV_CODEC_PROP_TEXT_SUB)) { > + subtitle_format = av_get_subtitle_format_from_codecdesc(dec_desc); > + > + if (subtitle_format != AV_SUBTITLE_FMT_ASS) { > av_log(ctx, AV_LOG_ERROR, > - "Only text based subtitles are currently supported\n"); > - ret = AVERROR_PATCHWELCOME; > + "Only text based subtitles are supported by this filter\n"); > + ret = AVERROR_INVALIDDATA; > goto end; > } > + > if (ass->charenc) > av_dict_set(&codec_opts, "sub_charenc", ass->charenc, 0); > > @@ -448,18 +474,18 @@ static av_cold int init_subtitles(AVFilterContext *ctx) > dec_ctx->subtitle_header_size); > while (av_read_frame(fmt, &pkt) >= 0) { > int i, got_subtitle; > - AVSubtitle sub = {0}; > + AVFrame *sub = av_frame_alloc(); Unchecked allocation. And anyway, this should not be allocated once per loop iteration. > > if (pkt.stream_index == sid) { > - ret = avcodec_decode_subtitle2(dec_ctx, &sub, &got_subtitle, &pkt); > + ret = decode(dec_ctx, sub, &got_subtitle, &pkt); > if (ret < 0) { > av_log(ctx, AV_LOG_WARNING, "Error decoding: %s (ignored)\n", > av_err2str(ret)); > } else if (got_subtitle) { > - const int64_t start_time = av_rescale_q(sub.pts, AV_TIME_BASE_Q, av_make_q(1, 1000)); > - const int64_t duration = sub.end_display_time; > - for (i = 0; i < sub.num_rects; i++) { > - char *ass_line = sub.rects[i]->ass; > + const int64_t start_time = av_rescale_q(sub->subtitle_pts, AV_TIME_BASE_Q, av_make_q(1, 1000)); > + const int64_t duration = sub->subtitle_end_time; > + for (i = 0; i < sub->num_subtitle_areas; i++) { > + char *ass_line = sub->subtitle_areas[i]->ass; > if (!ass_line) > break; > ass_process_chunk(ass->track, ass_line, strlen(ass_line), > @@ -468,7 +494,7 @@ static av_cold int init_subtitles(AVFilterContext *ctx) > } > } > av_packet_unref(&pkt); > - avsubtitle_free(&sub); > + av_frame_free(&sub); > } > > end: > diff --git a/libavformat/utils.c b/libavformat/utils.c > index 7840e8717c..d9b54c7725 100644 > --- a/libavformat/utils.c > +++ b/libavformat/utils.c > @@ -32,6 +32,7 @@ > #include "libavutil/opt.h" > #include "libavutil/parseutils.h" > #include "libavutil/pixfmt.h" > +#include "libavutil/subfmt.h" > #include "libavutil/thread.h" > #include "libavutil/time.h" > > diff --git a/libavutil/Makefile b/libavutil/Makefile > index 529046dbc8..7e79936876 100644 > --- a/libavutil/Makefile > +++ b/libavutil/Makefile > @@ -74,6 +74,7 @@ HEADERS = adler32.h \ > sha512.h \ > spherical.h \ > stereo3d.h \ > + subfmt.h \ > threadmessage.h \ > time.h \ > timecode.h \ > @@ -159,6 +160,7 @@ OBJS = adler32.o \ > slicethread.o \ > spherical.o \ > stereo3d.o \ > + subfmt.o \ > threadmessage.o \ > time.o \ > timecode.o \ > diff --git a/libavutil/frame.c b/libavutil/frame.c > index d4d3ad6988..dfe9c269a6 100644 > --- a/libavutil/frame.c > +++ b/libavutil/frame.c > @@ -26,6 +26,7 @@ > #include "imgutils.h" > #include "mem.h" > #include "samplefmt.h" > +#include "subfmt.h" > #include "hwcontext.h" > > #define CHECK_CHANNELS_CONSISTENCY(frame) \ > @@ -50,6 +51,9 @@ const char *av_get_colorspace_name(enum AVColorSpace val) > return name[val]; > } > #endif > + > +static int frame_copy_subtitles(AVFrame *dst, const AVFrame *src, int copy_data); > + > static void get_frame_defaults(AVFrame *frame) > { > if (frame->extended_data != frame->data) > @@ -72,7 +76,12 @@ static void get_frame_defaults(AVFrame *frame) > frame->colorspace = AVCOL_SPC_UNSPECIFIED; > frame->color_range = AVCOL_RANGE_UNSPECIFIED; > frame->chroma_location = AVCHROMA_LOC_UNSPECIFIED; > - frame->flags = 0; > + frame->subtitle_start_time = 0; > + frame->subtitle_end_time = 0; > + frame->num_subtitle_areas = 0; > + frame->subtitle_areas = NULL; > + frame->subtitle_pts = 0; > + frame->subtitle_header = NULL; > } > > static void free_side_data(AVFrameSideData **ptr_sd) > @@ -243,23 +252,55 @@ static int get_audio_buffer(AVFrame *frame, int align) > > } > > +static int get_subtitle_buffer(AVFrame *frame) > +{ > + // Buffers in AVFrame->buf[] are not used in case of subtitle frames. > + // To accomodate with existing code, checking ->buf[0] to determine > + // whether a frame is ref-counted or has data, we're adding a 1-byte > + // buffer here, which marks the subtitle frame to contain data. This is a terrible hack that might be necessary for now. But I don't see anything > + frame->buf[0] = av_buffer_alloc(1); > + if (!frame->buf[0]) { > + av_frame_unref(frame); > + return AVERROR(ENOMEM); > + } > + > + frame->extended_data = frame->data; > + > + return 0; > +} > + > int av_frame_get_buffer(AVFrame *frame, int align) > +{ > + if (frame->width > 0 && frame->height > 0) > + frame->type = AVMEDIA_TYPE_VIDEO; > + else if (frame->nb_samples > 0 && (frame->channel_layout || frame->channels > 0)) > + frame->type = AVMEDIA_TYPE_AUDIO; > + > + return av_frame_get_buffer2(frame, align); > +} > + > +int av_frame_get_buffer2(AVFrame *frame, int align) > { > if (frame->format < 0) > return AVERROR(EINVAL); > > - if (frame->width > 0 && frame->height > 0) > + switch(frame->type) { > + case AVMEDIA_TYPE_VIDEO: > return get_video_buffer(frame, align); > - else if (frame->nb_samples > 0 && (frame->channel_layout || frame->channels > 0)) > + case AVMEDIA_TYPE_AUDIO: > return get_audio_buffer(frame, align); > - > - return AVERROR(EINVAL); > + case AVMEDIA_TYPE_SUBTITLE: > + return get_subtitle_buffer(frame); > + default: > + return AVERROR(EINVAL); > + } > } > > static int frame_copy_props(AVFrame *dst, const AVFrame *src, int force_copy) > { > int ret, i; > > + dst->type = src->type; > dst->key_frame = src->key_frame; > dst->pict_type = src->pict_type; > dst->sample_aspect_ratio = src->sample_aspect_ratio; > @@ -290,6 +331,12 @@ static int frame_copy_props(AVFrame *dst, const AVFrame *src, int force_copy) > dst->colorspace = src->colorspace; > dst->color_range = src->color_range; > dst->chroma_location = src->chroma_location; > + dst->subtitle_start_time = src->subtitle_start_time; > + dst->subtitle_end_time = src->subtitle_end_time; > + dst->subtitle_pts = src->subtitle_pts; > + > + if ((ret = av_buffer_replace(&dst->subtitle_header, src->subtitle_header)) < 0) > + return ret; > > av_dict_copy(&dst->metadata, src->metadata, 0); > > @@ -331,6 +378,7 @@ int av_frame_ref(AVFrame *dst, const AVFrame *src) > av_assert1(dst->width == 0 && dst->height == 0); > av_assert1(dst->channels == 0); > > + dst->type = src->type; > dst->format = src->format; > dst->width = src->width; > dst->height = src->height; > @@ -344,7 +392,7 @@ int av_frame_ref(AVFrame *dst, const AVFrame *src) > > /* duplicate the frame data if it's not refcounted */ > if (!src->buf[0]) { > - ret = av_frame_get_buffer(dst, 0); > + ret = av_frame_get_buffer2(dst, 0); > if (ret < 0) > goto fail; > > @@ -366,6 +414,10 @@ int av_frame_ref(AVFrame *dst, const AVFrame *src) > } > } > > + /* copy subtitle areas */ > + if (src->type == AVMEDIA_TYPE_SUBTITLE) > + frame_copy_subtitles(dst, src, 0); > + > if (src->extended_buf) { > dst->extended_buf = av_calloc(src->nb_extended_buf, > sizeof(*dst->extended_buf)); > @@ -436,7 +488,7 @@ AVFrame *av_frame_clone(const AVFrame *src) > > void av_frame_unref(AVFrame *frame) > { > - int i; > + unsigned i, n; > > if (!frame) > return; > @@ -455,6 +507,21 @@ void av_frame_unref(AVFrame *frame) > av_buffer_unref(&frame->opaque_ref); > av_buffer_unref(&frame->private_ref); > > + av_buffer_unref(&frame->subtitle_header); > + > + for (i = 0; i < frame->num_subtitle_areas; i++) { > + AVSubtitleArea *area = frame->subtitle_areas[i]; > + > + for (n = 0; n < FF_ARRAY_ELEMS(area->buf); n++) > + av_buffer_unref(&area->buf[n]); > + > + av_freep(&area->text); > + av_freep(&area->ass); > + av_free(area); > + } > + > + av_freep(&frame->subtitle_areas); > + > get_frame_defaults(frame); > } > > @@ -472,7 +539,8 @@ void av_frame_move_ref(AVFrame *dst, AVFrame *src) > > int av_frame_is_writable(AVFrame *frame) > { > - int i, ret = 1; > + AVSubtitleArea *area; > + unsigned i, n, ret = 1; Use smaller scope for the variables that you add here (i.e. don't add them here). > > /* assume non-refcounted frames are not writable */ > if (!frame->buf[0]) > @@ -484,6 +552,15 @@ int av_frame_is_writable(AVFrame *frame) > for (i = 0; i < frame->nb_extended_buf; i++) > ret &= !!av_buffer_is_writable(frame->extended_buf[i]); > > + for (i = 0; i < frame->num_subtitle_areas; i++) { > + area = frame->subtitle_areas[i]; > + if (area) { Who sets an incorrect num_subtitle_areas? > + for (n = 0; n < FF_ARRAY_ELEMS(area->buf); n++) > + if (area->buf[n]) > + ret &= !!av_buffer_is_writable(area->buf[n]); > + } > + } > + > return ret; > } > > @@ -499,6 +576,7 @@ int av_frame_make_writable(AVFrame *frame) > return 0; > > memset(&tmp, 0, sizeof(tmp)); > + tmp.type = frame->type; > tmp.format = frame->format; > tmp.width = frame->width; > tmp.height = frame->height; > @@ -509,7 +587,7 @@ int av_frame_make_writable(AVFrame *frame) > if (frame->hw_frames_ctx) > ret = av_hwframe_get_buffer(frame->hw_frames_ctx, &tmp, 0); > else > - ret = av_frame_get_buffer(&tmp, 0); > + ret = av_frame_get_buffer2(&tmp, 0); > if (ret < 0) > return ret; > > @@ -544,14 +622,22 @@ AVBufferRef *av_frame_get_plane_buffer(AVFrame *frame, int plane) > uint8_t *data; > int planes, i; > > - if (frame->nb_samples) { > - int channels = frame->channels; > - if (!channels) > - return NULL; > - CHECK_CHANNELS_CONSISTENCY(frame); > - planes = av_sample_fmt_is_planar(frame->format) ? channels : 1; > - } else > + switch(frame->type) { > + case AVMEDIA_TYPE_VIDEO: > planes = 4; > + break; > + case AVMEDIA_TYPE_AUDIO: > + { > + int channels = frame->channels; > + if (!channels) > + return NULL; > + CHECK_CHANNELS_CONSISTENCY(frame); > + planes = av_sample_fmt_is_planar(frame->format) ? channels : 1; > + break; > + } > + default: > + return NULL; > + } > > if (plane < 0 || plane >= planes || !frame->extended_data[plane]) > return NULL; > @@ -675,17 +761,39 @@ static int frame_copy_audio(AVFrame *dst, const AVFrame *src) > return 0; > } > > +static int frame_copy_subtitles(AVFrame *dst, const AVFrame *src, int copy_data) > +{ > + dst->format = src->format; > + > + dst->num_subtitle_areas = src->num_subtitle_areas; > + > + if (src->num_subtitle_areas) { > + dst->subtitle_areas = av_malloc_array(src->num_subtitle_areas, sizeof(AVSubtitleArea *)); > + > + for (unsigned i = 0; i < src->num_subtitle_areas; i++) { > + dst->subtitle_areas[i] = av_mallocz(sizeof(AVSubtitleArea)); > + av_subtitle_area2area(dst->subtitle_areas[i], src->subtitle_areas[i], copy_data); > + } > + } > + > + return 0; > +} > + > int av_frame_copy(AVFrame *dst, const AVFrame *src) > { > if (dst->format != src->format || dst->format < 0) > return AVERROR(EINVAL); > > - if (dst->width > 0 && dst->height > 0) > + switch(dst->type) { > + case AVMEDIA_TYPE_VIDEO: > return frame_copy_video(dst, src); > - else if (dst->nb_samples > 0 && dst->channels > 0) > + case AVMEDIA_TYPE_AUDIO: > return frame_copy_audio(dst, src); > - > - return AVERROR(EINVAL); > + case AVMEDIA_TYPE_SUBTITLE: > + return frame_copy_subtitles(dst, src, 1); > + default: > + return AVERROR(EINVAL); > + } > } > > void av_frame_remove_side_data(AVFrame *frame, enum AVFrameSideDataType type) > @@ -832,3 +940,49 @@ int av_frame_apply_cropping(AVFrame *frame, int flags) > > return 0; > } > + > +/** > + * Copies subtitle data from AVSubtitle (deprecated) to AVFrame > + * > + * @note This is a compatibility method for conversion to the legacy API > + */ > +void av_frame_put_subtitle(AVFrame *frame, const AVSubtitle *sub) > +{ > + frame->format = sub->format; > + frame->subtitle_start_time = sub->start_display_time; > + frame->subtitle_end_time = sub->end_display_time; > + frame->subtitle_pts = sub->pts; > + frame->num_subtitle_areas = sub->num_rects; > + > + if (sub->num_rects) { > + frame->subtitle_areas = av_malloc_array(sub->num_rects, sizeof(AVSubtitleArea *)); > + > + for (unsigned i = 0; i < sub->num_rects; i++) { > + frame->subtitle_areas[i] = av_mallocz(sizeof(AVSubtitleArea)); > + av_subtitle_rect2area(frame->subtitle_areas[i], sub->rects[i]); > + } > + } > +} > + > +/** > + * Copies subtitle data from AVFrame to AVSubtitle (deprecated) > + * > + * @note This is a compatibility method for conversion to the legacy API > + */ > +void av_frame_get_subtitle(AVSubtitle *sub, AVFrame *frame) > +{ > + sub->start_display_time = frame->subtitle_start_time; > + sub->end_display_time = frame->subtitle_end_time; > + sub->pts = frame->subtitle_pts; > + sub->num_rects = frame->num_subtitle_areas; > + > + if (frame->num_subtitle_areas) { > + sub->rects = av_malloc_array(frame->num_subtitle_areas, sizeof(AVSubtitle *)); Unchecked. Furthermore, wrong sizeof operand. Notice that you have already set num_rects before the allocation, so on error, the AVSubtitle would be inconsistent if you simply returned AVERROR(ENOMEM). > + > + for (unsigned i = 0; i < frame->num_subtitle_areas; i++) { > + sub->rects[i] = av_mallocz(sizeof(AVSubtitleRect)); Unchecked allocations. > + av_subtitle_area2rect(sub->rects[i], frame->subtitle_areas[i]); Missing check. > + } > + } > +} > + > diff --git a/libavutil/frame.h b/libavutil/frame.h > index 753234792e..742f4ba07e 100644 > --- a/libavutil/frame.h > +++ b/libavutil/frame.h > @@ -34,6 +34,7 @@ > #include "rational.h" > #include "samplefmt.h" > #include "pixfmt.h" > +#include "subfmt.h" > #include "version.h" > > > @@ -278,7 +279,7 @@ typedef struct AVRegionOfInterest { > } AVRegionOfInterest; > > /** > - * This structure describes decoded (raw) audio or video data. > + * This structure describes decoded (raw) audio, video or subtitle data. > * > * AVFrame must be allocated using av_frame_alloc(). Note that this only > * allocates the AVFrame itself, the buffers for the data must be managed > @@ -309,6 +310,13 @@ typedef struct AVRegionOfInterest { > */ > typedef struct AVFrame { > #define AV_NUM_DATA_POINTERS 8 > + /** > + * Media type of the frame (audio, video, subtitles..) > + * > + * See AVMEDIA_TYPE_xxx > + */ > + enum AVMediaType type; > + > /** > * pointer to the picture/channel planes. > * This might be different from the first allocated byte. For video, > @@ -390,7 +398,7 @@ typedef struct AVFrame { > /** > * format of the frame, -1 if unknown or unset > * Values correspond to enum AVPixelFormat for video frames, > - * enum AVSampleFormat for audio) > + * enum AVSampleFormat for audio, AVSubtitleType for subtitles) > */ > int format; > > @@ -481,6 +489,39 @@ typedef struct AVFrame { > */ > uint64_t channel_layout; > > + /** > + * Display start time, relative to packet pts, in ms. > + */ > + uint32_t subtitle_start_time; > + > + /** > + * Display end time, relative to packet pts, in ms. > + */ > + uint32_t subtitle_end_time; Hardcoding a millisecond timestamp precision into the API doesn't seem good. As does using 32bit. > + > + /** > + * Number of items in the @ref subtitle_areas array. > + */ > + unsigned num_subtitle_areas; > + > + /** > + * Array of subtitle areas, may be empty. > + */ > + AVSubtitleArea **subtitle_areas; > + > + /** > + * Usually the same as packet pts, in AV_TIME_BASE. > + * > + * @deprecated This is kept for compatibility reasons and corresponds to > + * AVSubtitle->pts. Might be removed in the future. > + */ > + int64_t subtitle_pts; What exactly is the point of having two different timestamps? > + > + /** > + * Header containing style information for text subtitles. > + */ > + AVBufferRef *subtitle_header; > + > /** > * AVBuffer references backing the data for this frame. If all elements of > * this array are NULL, then this frame is not reference counted. This array > @@ -740,6 +781,8 @@ void av_frame_move_ref(AVFrame *dst, AVFrame *src); > /** > * Allocate new buffer(s) for audio or video data. > * > + * Note: For subtitle data, use av_frame_get_buffer2 > + * > * The following fields must be set on frame before calling this function: > * - format (pixel format for video, sample format for audio) > * - width and height for video > @@ -759,9 +802,39 @@ void av_frame_move_ref(AVFrame *dst, AVFrame *src); > * recommended to pass 0 here unless you know what you are doing. > * > * @return 0 on success, a negative AVERROR on error. > + * > + * @deprecated Use @ref av_frame_get_buffer2 instead and set @ref AVFrame.type > + * before calling. > */ > +attribute_deprecated > int av_frame_get_buffer(AVFrame *frame, int align); > > +/** > + * Allocate new buffer(s) for audio, video or subtitle data. > + * > + * The following fields must be set on frame before calling this function: > + * - format (pixel format for video, sample format for audio) > + * - width and height for video > + * - nb_samples and channel_layout for audio > + * - type (AVMediaType) > + * > + * This function will fill AVFrame.data and AVFrame.buf arrays and, if > + * necessary, allocate and fill AVFrame.extended_data and AVFrame.extended_buf. > + * For planar formats, one buffer will be allocated for each plane. > + * > + * @warning: if frame already has been allocated, calling this function will > + * leak memory. In addition, undefined behavior can occur in certain > + * cases. > + * > + * @param frame frame in which to store the new buffers. > + * @param align Required buffer size alignment. If equal to 0, alignment will be > + * chosen automatically for the current CPU. It is highly > + * recommended to pass 0 here unless you know what you are doing. > + * > + * @return 0 on success, a negative AVERROR on error. > + */ > +int av_frame_get_buffer2(AVFrame *frame, int align); > + > /** > * Check if the frame data is writable. > * > @@ -863,6 +936,22 @@ AVFrameSideData *av_frame_get_side_data(const AVFrame *frame, > */ > void av_frame_remove_side_data(AVFrame *frame, enum AVFrameSideDataType type); > > +/** > + * Copies subtitle data from AVSubtitle to AVFrame. > + * > + * @deprecated This is a compatibility method for interoperability with > + * the legacy subtitle API. > + */ > +void av_frame_put_subtitle(AVFrame *frame, const AVSubtitle *sub); > + > +/** > + * Copies subtitle data from AVFrame to AVSubtitle. > + * > + * @deprecated This is a compatibility method for interoperability with > + * the legacy subtitle API. > + */ > +void av_frame_get_subtitle(AVSubtitle *sub, AVFrame *frame); 1. Missing const. 2. Wrong return type. 3. I don't see a need for this compatibility at all. As explained below, the only user that needs something like this is libavcodec and then it should live in libavcodec. The last two points apply to both av_frame_get_subtitle and av_frame_put_subtitle. Notice that the only use of any of these functions outside of libavcodec is remove later again (patch #8 adds an av_frame_get_subtitle() in ffmpeg.c, #16 removes it again) which shows that such a compatibility stuff is not needed outside of libavcodec. > + > > /** > * Flags for frame cropping. > diff --git a/libavutil/subfmt.c b/libavutil/subfmt.c > new file mode 100644 > index 0000000000..f68907a644 > --- /dev/null > +++ b/libavutil/subfmt.c > @@ -0,0 +1,243 @@ > +/* > + * Copyright (c) 2021 softworkz > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > + */ > + > +#include <string.h> > +#include "common.h" > +#include "subfmt.h" > + > +typedef struct SubtitleFmtInfo { > + enum AVSubtitleType fmt; > + const char* name; > +} SubtitleFmtInfo; > + > +static SubtitleFmtInfo sub_fmt_info[AV_SUBTITLE_FMT_NB] = { > + {.fmt = AV_SUBTITLE_FMT_UNKNOWN, .name = "Unknown subtitle format", }, > + {.fmt = AV_SUBTITLE_FMT_BITMAP, .name = "Graphical subtitles", }, > + {.fmt = AV_SUBTITLE_FMT_TEXT, .name = "Text subtitles (plain)", }, > + {.fmt = AV_SUBTITLE_FMT_ASS, .name = "Text subtitles (ass)", }, > +}; There is no need to add an enum AVSubtitleType to the struct. After all, the valid range of enum AVSubtitleType will always be 0..(AV_SUBTITLE_FMT_NB - 1) as long as we don't shoot ourselves in the foot. Notice that av_get_subtitle_fmt_name() requires sub_fmt_info[i].fmt == i for i in 0..(AV_SUBTITLE_FMT_NB - 1), i.e. it both relies on there not being gaps (that's ok) and on the particular values of AV_SUBTITLE_FMT_* being set in the way they are. The latter is not ok, use [AV_SUBTITLE_FMT_UNKNOWN] = "Unknown subtitle format" etc. instead. Furthermore, these lengths of the names are so close to each other that it is advantageous to use an array of char[24] instead of an array of pointers. This also saves relocations. > + > +const char *av_get_subtitle_fmt_name(enum AVSubtitleType sub_fmt) > +{ > + if (sub_fmt < 0 || sub_fmt >= AV_SUBTITLE_FMT_NB) > + return NULL; > + return sub_fmt_info[sub_fmt].name; > +} > + > +enum AVSubtitleType av_get_subtitle_fmt(const char *name) > +{ > + for (int i = 0; i < AV_SUBTITLE_FMT_NB; i++) > + if (!strcmp(sub_fmt_info[i].name, name)) > + return i; > + return AV_SUBTITLE_FMT_NONE; > +} > + > +int av_subtitle_area2rect(AVSubtitleRect *dst, const AVSubtitleArea *src) > +{ > + dst->x = src->x; > + dst->y = src->y; > + dst->w = src->w; > + dst->h = src->h; > + dst->nb_colors = src->nb_colors; > + dst->type = src->type; > + dst->flags = src->flags; > + > + switch (dst->type) { > + case AV_SUBTITLE_FMT_BITMAP: > + > + if (src->h > 0 && src->w > 0 && src->buf[0]) { > + uint32_t *pal; > + AVBufferRef *buf = src->buf[0]; > + dst->data[0] = av_mallocz(buf->size); > + memcpy(dst->data[0], buf->data, buf->size); av_memdup(); also unchecked allocation and unnecessary zeroing. > + dst->linesize[0] = src->linesize[0]; > + > + dst->data[1] = av_mallocz(256 * 4); Unchecked allocation. > + pal = (uint32_t *)dst->data[1]; > + > + for (unsigned i = 0; i < 256; i++) { > + pal[i] = src->pal[i]; > + } This loop could be avoided by using av_memdup(). > + } > + > + break; > + case AV_SUBTITLE_FMT_TEXT: > + > + if (src->text) > + dst->text = av_strdup(src->text); > + else > + dst->text = av_strdup(""); > + > + if (!dst->text) > + return AVERROR(ENOMEM); > + > + break; > + case AV_SUBTITLE_FMT_ASS: > + > + if (src->ass) > + dst->ass = av_strdup(src->ass); > + else > + dst->ass = av_strdup(""); > + > + if (!dst->ass) > + return AVERROR(ENOMEM); > + > + break; > + default: > + > + av_log(NULL, AV_LOG_ERROR, "Subtitle rect has invalid format: %d", dst->type); > + return AVERROR(ENOMEM); > + } > + > + return 0; > +} > + > +int av_subtitle_rect2area(AVSubtitleArea *dst, const AVSubtitleRect *src) > +{ > + dst->x = src->x; > + dst->y = src->y; > + dst->w = src->w; > + dst->h = src->h; > + dst->nb_colors = src->nb_colors; > + dst->type = src->type; There seems to be an awful lot of trailing whitespace in the above lines (and probably many more lines). > + dst->flags = src->flags; > + > + switch (dst->type) { > + case AV_SUBTITLE_FMT_BITMAP: > + > + if (src->h > 0 && src->w > 0 && src->data[0]) { > + AVBufferRef *buf = av_buffer_allocz(src->h * src->linesize[0]); Unchecked allocation; also zeroing is unnecessary here. > + memcpy(buf->data, src->data[0], buf->size); > + > + dst->buf[0] = buf; > + dst->linesize[0] = src->linesize[0]; > + } > + > + if (src->data[1]) { > + uint32_t *pal = (uint32_t *)src->data[1]; > + > + for (unsigned i = 0; i < 256; i++) { > + dst->pal[i] = pal[i]; > + } > + } > + > + break; > + case AV_SUBTITLE_FMT_TEXT: > + > + if (src->text) { > + dst->text = av_strdup(src->text); > + if (!dst->text) > + return AVERROR(ENOMEM); > + } > + > + break; > + case AV_SUBTITLE_FMT_ASS: > + > + if (src->ass) { > + dst->ass = av_strdup(src->ass); > + if (!dst->ass) > + return AVERROR(ENOMEM); > + } > + > + break; > + default: > + > + av_log(NULL, AV_LOG_ERROR, "Subtitle area has invalid format: %d", dst->type); > + return AVERROR(ENOMEM); ENOMEM? > + } > + > + return 0; > +} > + > +int av_subtitle_area2area(AVSubtitleArea *dst, const AVSubtitleArea *src, int copy_data) > +{ > + dst->x = src->x; > + dst->y = src->y; > + dst->w = src->w; > + dst->h = src->h; > + dst->nb_colors = src->nb_colors; > + dst->type = src->type; > + dst->flags = src->flags; > + > + switch (dst->type) { > + case AV_SUBTITLE_FMT_BITMAP: > + > + if (src->h > 0 && src->w > 0 && src->buf[0]) { > + dst->buf[0] = av_buffer_ref(src->buf[0]); > + if (!dst->buf[0]) > + return AVERROR(ENOMEM); > + > + if (copy_data) { > + const int ret = av_buffer_make_writable(&dst->buf[0]); > + if (ret < 0) > + return ret; > + } > + > + dst->linesize[0] = src->linesize[0]; > + } This would actually need to be a loop (for (int i = 0; i < AV_NUM_BUFFER_POINTERS; i++)). Furthermore, is it intended that there is no cleanup code in case only a subset of the allocations succeed? > + > + for (unsigned i = 0; i < 256; i++) > + dst->pal[i] = src->pal[i]; memcpy? > + > + break; > + case AV_SUBTITLE_FMT_TEXT: > + > + if (src->text) { > + dst->text = av_strdup(src->text); > + if (!dst->text) > + return AVERROR(ENOMEM); > + } > + > + break; > + case AV_SUBTITLE_FMT_ASS: > + > + if (src->ass) { > + dst->ass = av_strdup(src->ass); > + if (!dst->ass) > + return AVERROR(ENOMEM); > + } > + > + break; > + default: > + > + av_log(NULL, AV_LOG_ERROR, "Subtitle area has invalid format: %d", dst->type); > + return AVERROR(ENOMEM); > + } > + > + return 0; > +} > + > +void avsubtitle_free(AVSubtitle *sub) > +{ > + for (unsigned i = 0; i < sub->num_rects; i++) { > + av_freep(&sub->rects[i]->data[0]); > + av_freep(&sub->rects[i]->data[1]); > + av_freep(&sub->rects[i]->data[2]); > + av_freep(&sub->rects[i]->data[3]); > + av_freep(&sub->rects[i]->text); > + av_freep(&sub->rects[i]->ass); > + av_freep(&sub->rects[i]); > + } > + > + av_freep(&sub->rects); > + > + memset(sub, 0, sizeof(*sub)); > +} > + > diff --git a/libavutil/subfmt.h b/libavutil/subfmt.h > new file mode 100644 > index 0000000000..372e3db413 > --- /dev/null > +++ b/libavutil/subfmt.h > @@ -0,0 +1,185 @@ > +/* > + * Copyright (c) 2021 softworkz > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > + */ > + > +#ifndef AVUTIL_SUBFMT_H > +#define AVUTIL_SUBFMT_H > + > +#include <inttypes.h> > + stdint is enough. > +#include "buffer.h" > + > +enum AVSubtitleType { > + > + /** > + * Subtitle format unknown. > + */ > + AV_SUBTITLE_FMT_NONE = -1, > + > + /** > + * Subtitle format unknown. > + */ > + AV_SUBTITLE_FMT_UNKNOWN = 0, > + > + /** > + * Bitmap area in AVSubtitleRect.data, pixfmt AV_PIX_FMT_PAL8. > + */ > + AV_SUBTITLE_FMT_BITMAP = 1, > + SUBTITLE_BITMAP = 1, ///< Deprecated, use AV_SUBTITLE_FMT_BITMAP instead. > + > + /** > + * Plain text in AVSubtitleRect.text. > + */ > + AV_SUBTITLE_FMT_TEXT = 2, > + SUBTITLE_TEXT = 2, ///< Deprecated, use AV_SUBTITLE_FMT_TEXT instead. > + > + /** > + * Text Formatted as per ASS specification, contained AVSubtitleRect.ass. > + */ > + AV_SUBTITLE_FMT_ASS = 3, > + SUBTITLE_ASS = 3, ///< Deprecated, use AV_SUBTITLE_FMT_ASS instead. > + > + AV_SUBTITLE_FMT_NB, > +}; > + > +typedef struct AVSubtitleArea { > +#define AV_NUM_BUFFER_POINTERS 1 > + > + enum AVSubtitleType type; > + int flags; > + > + int x; ///< top left corner of area. > + int y; ///< top left corner of area. > + int w; ///< width of area. > + int h; ///< height of area. > + int nb_colors; ///< number of colors in bitmap palette (@ref pal). > + > + /** > + * Buffers and line sizes for the bitmap of this subtitle. > + * > + * @{ > + */ > + AVBufferRef *buf[AV_NUM_BUFFER_POINTERS]; > + int linesize[AV_NUM_BUFFER_POINTERS]; > + /** > + * @} > + */ > + > + uint32_t pal[256]; ///< RGBA palette for the bitmap. > + > + char *text; ///< 0-terminated plain UTF-8 text > + char *ass; ///< 0-terminated ASS/SSA compatible event line. > + > +} AVSubtitleArea; I presume that sizeof(AVSubtitleArea) is not supposed to be part of the public API/ABI (after all, you used an array of pointers to AVSubtitleArea in AVFrame). This needs to be documented. > + > +typedef struct AVSubtitleRect { > + unsigned nb_refs; > + int x; ///< top left corner of pict, undefined when pict is not set > + int y; ///< top left corner of pict, undefined when pict is not set > + int w; ///< width of pict, undefined when pict is not set > + int h; ///< height of pict, undefined when pict is not set > + int nb_colors; ///< number of colors in pict, undefined when pict is not set > + > + /** > + * data+linesize for the bitmap of this subtitle. > + */ > + uint8_t *data[4]; > + int linesize[4]; > + > + enum AVSubtitleType type; > + > + char *text; ///< 0 terminated plain UTF-8 text > + > + /** > + * 0-terminated ASS/SSA compatible event line. > + */ > + char *ass; > + > + int flags; > +} AVSubtitleRect; > + > +typedef struct AVSubtitle { > + uint16_t format; /* 0 = graphics */ > + uint32_t start_display_time; /* relative to packet pts, in ms */ > + uint32_t end_display_time; /* relative to packet pts, in ms */ > + unsigned num_rects; > + AVSubtitleRect **rects; > + int64_t pts; ///< Same as packet pts, in AV_TIME_BASE > +} AVSubtitle; > + > +/** > + * Return the name of sub_fmt, or NULL if sub_fmt is not > + * recognized. > + */ > +const char *av_get_subtitle_fmt_name(enum AVSubtitleType sub_fmt); > + > +/** > + * Return a subtitle format corresponding to name, or AV_SUBTITLE_FMT_NONE > + * on error. > + * > + * @param name Subtitle format name. > + */ > +enum AVSubtitleType av_get_subtitle_fmt(const char *name); Do we need this? Given that there are many possible ways to name the subtitles, it doesn't seem useful to me. The only way to be sure to use the "authoritative" name for a subtitle format is by av_get_subtitle_fmt_name(). But then one doesn't need av_get_subtitle_fmt() at all. > + > +/** > + * Copy a subtitle area. > + * > + * @param dst The target area. > + * @param src The source area. > + * @param copy_data Determines whether to copy references or actual > + * data from @ref AVSubtitleArea.buf. > + * > + * @return 0 on success. Better make it "< 0 on error" to reserve positive return values in case we ever needed them. > + */ > +int av_subtitle_area2area(AVSubtitleArea *dst, const AVSubtitleArea *src, int copy_data); This documentation does not mention that dst should be "blank". But this actually poses a problem: How does the user know that an AVSubtitleArea is blank given that ? For AVFrames the answer is this: An AVFrame is blank if it comes directly from av_frame_alloc() or av_frame_unref(). But there is no equivalent for these functions here. Furthermore, is this API supposed to be a standalone API or is AVSubtitleArea something that is always supposed to be part of an AVFrame? The usage in this patchset suggests the latter: av_subtitle_area2area() is only used in frame.c. > + > +/** > + * Copy data from @ref AVSubtitleArea to @ref AVSubtitleRect. > + * > + * @param dst The target rect (@ref AVSubtitleRect). > + * @param src The source area (@ref AVSubtitleArea). > + * > + * @return 0 on success. > + * > + * @deprecated This is a compatibility method for interoperability with > + * the legacy subtitle API. > + */ > +int av_subtitle_area2rect(AVSubtitleRect *dst, const AVSubtitleArea *src); > + > +/** > + * Copy data from @ref AVSubtitleRect to @ref AVSubtitleArea. > + * > + * @param dst The source area (@ref AVSubtitleArea). > + * @param src The target rect (@ref AVSubtitleRect). > + * > + * @return 0 on success. > + * > + * @deprecated This is a compatibility method for interoperability with > + * the legacy subtitle API. > + */ > +int av_subtitle_rect2area(AVSubtitleArea *dst, const AVSubtitleRect *src); Do we need compatibility with the legacy API at all? I only see two usecases for this: 1. One has a huge project using AVSubtitles that uses AVSubtitle so pervasively that one can't transition to the new API in one step. But I doubt that that's a thing. 2. One is forced to provide compatibility with the legacy API while also providing an API based upon the new API. This is of course the situation with libavcodec, which for this reason needs such compatibility functions (in libavcodec!). But I don't see a second user with the same needs, so I don't see why these functions (which would actually need to be declared as deprecated from the very beginning if public) should be public. > + > +/** > + * Free all allocated data in the given subtitle struct. > + * > + * @param sub AVSubtitle to free. > + */ > +void avsubtitle_free(AVSubtitle *sub); > + > +#endif /* AVUTIL_SUBFMT_H */ >
You have completely disregarded my comments on the previous patch. Stop doing that, it is rude. Your patchset is unacceptable until you deal with them.
> -----Original Message----- > From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Anton > Khirnov > Sent: Friday, November 26, 2021 5:21 PM > To: ffmpeg-devel@ffmpeg.org > Subject: Re: [FFmpeg-devel] [PATCH v16 01/16] global: Prepare AVFrame for > subtitle handling > > You have completely disregarded my comments on the previous patch. Stop > doing that, it is rude. I value everyone's time taken to review the patches I'm submitting. Rest assured that I'm going over every single comment and try to address each one appropriately. I think you misunderstood because I had answered one of your comments immediately. Right now I'm re-working the patchset to address all suggestions and eventually I'll respond to all those that I haven't addressed, explaining why. Thank you very much for reviewing my code! softworkz
> -----Original Message----- > From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Andreas > Rheinhardt > Sent: Friday, November 26, 2021 11:35 AM > To: ffmpeg-devel@ffmpeg.org > Subject: Re: [FFmpeg-devel] [PATCH v16 01/16] global: Prepare AVFrame for > subtitle handling Hi Andreas, thanks for the detailed review. There was hardly anything to object or disagree. Everything I don't respond to is implemented as suggested. > As has already been said by others, this should be split: One patch for > the actual new libavutil additions, one patch for the lavc decoding API, > one patch for the encoding API, one patch for every user that is made to > use the new APIs. By keeping the AVSubtitle definition in its original location, it was possible to split this as suggested on IRC. Thanks. > This is not how you deprecate something: You have to add a @deprecated > doxygen comment as well as the attribute_deprecated to make some noise. I didn’t want the noise before things get serious ;-) > > Actually, you should deprecate the whole AVSubtitle API. Done. Do we deprecate structs as well? Also, there's a function avcodec_get_subtitle_rect_class(). I have no idea whether it ever had any practical use. A similar function above (avcodec_get_frame_class) is deprecated. > > +/** > > + * Return subtitle format from a codec descriptor > > + * > > + * @param codec_descriptor codec descriptor > > + * @return the subtitle type (e.g. bitmap, text) > > + */ > > +enum AVSubtitleType av_get_subtitle_format_from_codecdesc(const > AVCodecDescriptor *codec_descriptor); > > Do we need this function? It seems too trivial and as Anton has pointed > out is flawed. And anyway, this would belong into codec_desc.h. I had replied to Anton why it's not flawed. Moved it to codec_desc and named it as Anton had suggested. > > int attribute_align_arg avcodec_send_packet(AVCodecContext *avctx, const > AVPacket *avpkt) > > { > > AVCodecInternal *avci = avctx->internal; > > @@ -590,6 +618,9 @@ int attribute_align_arg > avcodec_send_packet(AVCodecContext *avctx, const AVPacke > > if (avpkt && !avpkt->size && avpkt->data) > > return AVERROR(EINVAL); > > > > + if (avctx->codec_type == AVMEDIA_TYPE_SUBTITLE) > > + return decode_subtitle_shim(avctx, avci->buffer_frame, avpkt); > > 1. This is missing a check whether buffer_frame is empty or not; in the > latter case, you need to return AVERROR(EAGAIN); instead > decode_subtitle_shim() destroys what is already there. > (Said check is currently offloaded to the BSF API in the audio/video > codepath. This is actually a violation of the BSF API.) Added that check. > 2. Flushing is not really implemented well: > a) It is legal to call avcodec_send_packet() with avpkt == NULL to > indicate EOF. If I am not mistaken, decode_subtitle2_priv() will just > crash in this case. Like all subtitle decoders would when supplying a NULL packet. What should I return when a null packet is supplied? EAGAIN or EOF? > b) avcodec_receive_frame() only returns what is in buffer_frame; it > never calls decode_subtitle2_priv() or the underlying decode function at > all. This basically presumes that a subtitle decoder can only return one > subtitle after flushing. I don't know whether this is true for our > decoders with the delay cap. The only one I could see with that flag is cc_dec, which is a special case anyway as it doesn't work on regular streams. (even this one would crash when providing a NULL packet) > c) avcodec_receive_frame() seems to never return AVERROR_EOF after > flushing, but always AVERROR(EAGAIN) (if the delay cap is set, the first > call to avcodec_receive_frame() after flushing can also result in a > frame being returned). Yet this makes no sense and is IMO an API > violation on lavc's part. > (While we have subtitle decoders with the delay cap, I don't know > whether this is actually tested by fate and whether all code actually > flushes subtitle decoders at all.) I don't think so. > > diff --git a/libavfilter/vf_subtitles.c b/libavfilter/vf_subtitles.c > > index 377160c72b..6b54cf669b 100644 > > --- a/libavfilter/vf_subtitles.c > > +++ b/libavfilter/vf_subtitles.c > > @@ -35,14 +35,12 @@ > > # include "libavformat/avformat.h" > > #endif > > #include "libavutil/avstring.h" > > -#include "libavutil/imgutils.h" > > #include "libavutil/opt.h" > > #include "libavutil/parseutils.h" > > #include "drawutils.h" > > #include "avfilter.h" > > #include "internal.h" > > #include "formats.h" > > -#include "video.h" > > > > typedef struct AssContext { > > const AVClass *class; > > @@ -292,6 +290,29 @@ static int attachment_is_font(AVStream * st) > > return 0; > > } > > > > +static int decode(AVCodecContext *avctx, AVFrame *frame, int *got_frame, > AVPacket *pkt) > > +{ > > + int ret; > > + > > + *got_frame = 0; > > You don't really need this: You could just return 0 if no frame was > returned, 1 if a frame was returned and < 0 on (actual, not EAGAIN) errors. > > > + > > + if (pkt) { > > + ret = avcodec_send_packet(avctx, pkt); > > + // In particular, we don't expect AVERROR(EAGAIN), because we read > all > > + // decoded frames with avcodec_receive_frame() until done. > > Where do you do this? For this one would need to call > avcodec_receive_frame() in a loop until it returns EAGAIN, but you don't > do this. This decode() function is copied from ffmpeg.c > > +static int get_subtitle_buffer(AVFrame *frame) > > +{ > > + // Buffers in AVFrame->buf[] are not used in case of subtitle frames. > > + // To accomodate with existing code, checking ->buf[0] to determine > > + // whether a frame is ref-counted or has data, we're adding a 1-byte > > + // buffer here, which marks the subtitle frame to contain data. > > This is a terrible hack that might be necessary for now. But I don't see > anything I'm not sure about the second half sentence, but what I can say is that I had taken the time to try and replace the checks for ->buf[0] to a frame field, but it turned out that this change would be so big that it cannot be part of the subtitle patchset. (also talked about this with Hendrik a while ago) > > + for (i = 0; i < frame->num_subtitle_areas; i++) { > > + area = frame->subtitle_areas[i]; > > + if (area) { > > Who sets an incorrect num_subtitle_areas? Maybe code that hasn't gotten reviewed by you ;-) > > + /** > > + * Display start time, relative to packet pts, in ms. > > + */ > > + uint32_t subtitle_start_time; > > + > > + /** > > + * Display end time, relative to packet pts, in ms. > > + */ > > + uint32_t subtitle_end_time; > > Hardcoding a millisecond timestamp precision into the API doesn't seem > good. As does using 32bit. Our internal text subtitle format is defined to be ASS and those variables are reflecting just that. > > +/** > > + * Copies subtitle data from AVFrame to AVSubtitle. > > + * > > + * @deprecated This is a compatibility method for interoperability with > > + * the legacy subtitle API. > > + */ > > +void av_frame_get_subtitle(AVSubtitle *sub, AVFrame *frame); > > 1. Missing const. > 2. Wrong return type. > 3. I don't see a need for this compatibility at all. As explained below, > the only user that needs something like this is libavcodec and then it > should live in libavcodec. > The last two points apply to both av_frame_get_subtitle and > av_frame_put_subtitle. Notice that the only use of any of these > functions outside of libavcodec is remove later again (patch #8 adds an > av_frame_get_subtitle() in ffmpeg.c, #16 removes it again) which shows > that such a compatibility stuff is not needed outside of libavcodec. Moved to avcodec as ff_ functions. > > + uint32_t pal[256]; ///< RGBA palette for the bitmap. > > + > > + char *text; ///< 0-terminated plain UTF-8 text > > + char *ass; ///< 0-terminated ASS/SSA compatible event line. > > + > > +} AVSubtitleArea; > > I presume that sizeof(AVSubtitleArea) is not supposed to be part of the > public API/ABI (after all, you used an array of pointers to > AVSubtitleArea in AVFrame). This needs to be documented. Makes sense, but I'm not sure how and where? > > +/** > > + * Return a subtitle format corresponding to name, or AV_SUBTITLE_FMT_NONE > > + * on error. > > + * > > + * @param name Subtitle format name. > > + */ > > +enum AVSubtitleType av_get_subtitle_fmt(const char *name); > > Do we need this? Given that there are many possible ways to name the > subtitles, it doesn't seem useful to me. The only way to be sure to use > the "authoritative" name for a subtitle format is by > av_get_subtitle_fmt_name(). But then one doesn't need > av_get_subtitle_fmt() at all. I've tried to implement everything analog to the functionality we have for audio and video. This corresponds to av_get_pix_fmt(). > > + */ > > +int av_subtitle_area2area(AVSubtitleArea *dst, const AVSubtitleArea *src, > int copy_data); > > This documentation does not mention that dst should be "blank". But this > actually poses a problem: How does the user know that an AVSubtitleArea > is blank given that ? For AVFrames the answer is this: An AVFrame is > blank if it comes directly from av_frame_alloc() or av_frame_unref(). > But there is no equivalent for these functions here. > Furthermore, is this API supposed to be a standalone API or is > AVSubtitleArea something that is always supposed to be part of an > AVFrame? The usage in this patchset suggests the latter: > av_subtitle_area2area() is only used in frame.c. AVSubtitleArea is always supposed to be part of an AVFrame. I had thought the copy function might be useful outside, but I didn't need it in any of the filter. Made it private in AVFrame now. > > +/** > > + * Copy data from @ref AVSubtitleArea to @ref AVSubtitleRect. > > + * > > + * @param dst The target rect (@ref AVSubtitleRect). > > + * @param src The source area (@ref AVSubtitleArea). > > + * > > + * @return 0 on success. > > + * > > + * @deprecated This is a compatibility method for interoperability with > > + * the legacy subtitle API. > > + */ > > +int av_subtitle_area2rect(AVSubtitleRect *dst, const AVSubtitleArea *src); > > + > > +/** > > + * Copy data from @ref AVSubtitleRect to @ref AVSubtitleArea. > > + * > > + * @param dst The source area (@ref AVSubtitleArea). > > + * @param src The target rect (@ref AVSubtitleRect). > > + * > > + * @return 0 on success. > > + * > > + * @deprecated This is a compatibility method for interoperability with > > + * the legacy subtitle API. > > + */ > > +int av_subtitle_rect2area(AVSubtitleArea *dst, const AVSubtitleRect *src); > > Do we need compatibility with the legacy API at all? I only see two > usecases for this: > > 1. One has a huge project using AVSubtitles that uses AVSubtitle so > pervasively that one can't transition to the new API in one step. But I > doubt that that's a thing. > 2. One is forced to provide compatibility with the legacy API while also > providing an API based upon the new API. This is of course the situation > with libavcodec, which for this reason needs such compatibility > functions (in libavcodec!). But I don't see a second user with the same > needs, so I don't see why these functions (which would actually need to > be declared as deprecated from the very beginning if public) should be > public. Made those private in avcodec. Thanks again, softworkz
Soft Works: > > >> -----Original Message----- >> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Andreas >> Rheinhardt >> Sent: Friday, November 26, 2021 11:35 AM >> To: ffmpeg-devel@ffmpeg.org >> Subject: Re: [FFmpeg-devel] [PATCH v16 01/16] global: Prepare AVFrame for >> subtitle handling > > Hi Andreas, > > thanks for the detailed review. There was hardly anything to object or disagree. > Everything I don't respond to is implemented as suggested. > >> As has already been said by others, this should be split: One patch for >> the actual new libavutil additions, one patch for the lavc decoding API, >> one patch for the encoding API, one patch for every user that is made to >> use the new APIs. > > By keeping the AVSubtitle definition in its original location, it was > possible to split this as suggested on IRC. > Thanks. > >> This is not how you deprecate something: You have to add a @deprecated >> doxygen comment as well as the attribute_deprecated to make some noise. > > I didn’t want the noise before things get serious ;-) > >> >> Actually, you should deprecate the whole AVSubtitle API. > > Done. Do we deprecate structs as well? > > Also, there's a function avcodec_get_subtitle_rect_class(). > > I have no idea whether it ever had any practical use. A similar function > above (avcodec_get_frame_class) is deprecated. > It never had any practical use. AVSubtitleRect elements can just be set directly. I always pondered deprecating it, but was somehow too lazy. > >>> +/** >>> + * Return subtitle format from a codec descriptor >>> + * >>> + * @param codec_descriptor codec descriptor >>> + * @return the subtitle type (e.g. bitmap, text) >>> + */ >>> +enum AVSubtitleType av_get_subtitle_format_from_codecdesc(const >> AVCodecDescriptor *codec_descriptor); >> >> Do we need this function? It seems too trivial and as Anton has pointed >> out is flawed. And anyway, this would belong into codec_desc.h. > > I had replied to Anton why it's not flawed. Moved it to codec_desc and > named it as Anton had suggested. > > >>> int attribute_align_arg avcodec_send_packet(AVCodecContext *avctx, const >> AVPacket *avpkt) >>> { >>> AVCodecInternal *avci = avctx->internal; >>> @@ -590,6 +618,9 @@ int attribute_align_arg >> avcodec_send_packet(AVCodecContext *avctx, const AVPacke >>> if (avpkt && !avpkt->size && avpkt->data) >>> return AVERROR(EINVAL); >>> >>> + if (avctx->codec_type == AVMEDIA_TYPE_SUBTITLE) >>> + return decode_subtitle_shim(avctx, avci->buffer_frame, avpkt); >> >> 1. This is missing a check whether buffer_frame is empty or not; in the >> latter case, you need to return AVERROR(EAGAIN); instead >> decode_subtitle_shim() destroys what is already there. >> (Said check is currently offloaded to the BSF API in the audio/video >> codepath. This is actually a violation of the BSF API.) > > Added that check. > >> 2. Flushing is not really implemented well: >> a) It is legal to call avcodec_send_packet() with avpkt == NULL to >> indicate EOF. If I am not mistaken, decode_subtitle2_priv() will just >> crash in this case. > > Like all subtitle decoders would when supplying a NULL packet. > What should I return when a null packet is supplied? > EAGAIN or EOF? > Treat is as an ordinary flush packet and abide by the docs of avcodec_send_packet(): "Sending the first flush packet will return success. Subsequent ones are unnecessary and will return AVERROR_EOF." (avcodec_send_packet() by the way does not abide by its own documentation: Packets with side data are never considered flush packets. And in case a BSF outputs multiple packets for an input packet or a decoder outputs multiple frames for an input packet, sending a flush packet can actually return errors; furthermore, I also don't see that subsequent flush packets always return EOF. Will look into this.) >> b) avcodec_receive_frame() only returns what is in buffer_frame; it >> never calls decode_subtitle2_priv() or the underlying decode function at >> all. This basically presumes that a subtitle decoder can only return one >> subtitle after flushing. I don't know whether this is true for our >> decoders with the delay cap. > > The only one I could see with that flag is cc_dec, which is a special case > anyway as it doesn't work on regular streams. You missed libzvbi. > (even this one would crash when providing a NULL packet) Of course they do. The subtitle API treats packets with size == 0 as flush packets. > >> c) avcodec_receive_frame() seems to never return AVERROR_EOF after >> flushing, but always AVERROR(EAGAIN) (if the delay cap is set, the first >> call to avcodec_receive_frame() after flushing can also result in a >> frame being returned). Yet this makes no sense and is IMO an API >> violation on lavc's part. >> (While we have subtitle decoders with the delay cap, I don't know >> whether this is actually tested by fate and whether all code actually >> flushes subtitle decoders at all.) > > I don't think so. > > >>> diff --git a/libavfilter/vf_subtitles.c b/libavfilter/vf_subtitles.c >>> index 377160c72b..6b54cf669b 100644 >>> --- a/libavfilter/vf_subtitles.c >>> +++ b/libavfilter/vf_subtitles.c >>> @@ -35,14 +35,12 @@ >>> # include "libavformat/avformat.h" >>> #endif >>> #include "libavutil/avstring.h" >>> -#include "libavutil/imgutils.h" >>> #include "libavutil/opt.h" >>> #include "libavutil/parseutils.h" >>> #include "drawutils.h" >>> #include "avfilter.h" >>> #include "internal.h" >>> #include "formats.h" >>> -#include "video.h" >>> >>> typedef struct AssContext { >>> const AVClass *class; >>> @@ -292,6 +290,29 @@ static int attachment_is_font(AVStream * st) >>> return 0; >>> } >>> >>> +static int decode(AVCodecContext *avctx, AVFrame *frame, int *got_frame, >> AVPacket *pkt) >>> +{ >>> + int ret; >>> + >>> + *got_frame = 0; >> >> You don't really need this: You could just return 0 if no frame was >> returned, 1 if a frame was returned and < 0 on (actual, not EAGAIN) errors. >> >>> + >>> + if (pkt) { >>> + ret = avcodec_send_packet(avctx, pkt); >>> + // In particular, we don't expect AVERROR(EAGAIN), because we read >> all >>> + // decoded frames with avcodec_receive_frame() until done. >> >> Where do you do this? For this one would need to call >> avcodec_receive_frame() in a loop until it returns EAGAIN, but you don't >> do this. > > This decode() function is copied from ffmpeg.c > From the documentation of decode() in ffmpeg.c: "if you got a frame, you must call it again with pkt=NULL." Notice that transcode_subtitles() is not called in a loop, which means that the subtitle decoding API is not properly drained. The check would need to be modified to "if (repeating && pkt)". I wonder whether this would really change anything. > >>> +static int get_subtitle_buffer(AVFrame *frame) >>> +{ >>> + // Buffers in AVFrame->buf[] are not used in case of subtitle frames. >>> + // To accomodate with existing code, checking ->buf[0] to determine >>> + // whether a frame is ref-counted or has data, we're adding a 1-byte >>> + // buffer here, which marks the subtitle frame to contain data. >> >> This is a terrible hack that might be necessary for now. But I don't see >> anything > > I'm not sure about the second half sentence, but what I can say is that I > had taken the time to try and replace the checks for ->buf[0] to a frame > field, but it turned out that this change would be so big that it cannot > be part of the subtitle patchset. > (also talked about this with Hendrik a while ago) > > > >>> + for (i = 0; i < frame->num_subtitle_areas; i++) { >>> + area = frame->subtitle_areas[i]; >>> + if (area) { >> >> Who sets an incorrect num_subtitle_areas? > > Maybe code that hasn't gotten reviewed by you ;-) > >>> + /** >>> + * Display start time, relative to packet pts, in ms. >>> + */ >>> + uint32_t subtitle_start_time; >>> + >>> + /** >>> + * Display end time, relative to packet pts, in ms. >>> + */ >>> + uint32_t subtitle_end_time; >> >> Hardcoding a millisecond timestamp precision into the API doesn't seem >> good. As does using 32bit. > > Our internal text subtitle format is defined to be ASS and those > variables are reflecting just that. > But the timing of ASS (both when in AVPackets as well when in AVSubtitles) is external to the ASS data; so there is no need to impose this ms constraint on the whole API (even on non-ASS subtitles). > >>> +/** >>> + * Copies subtitle data from AVFrame to AVSubtitle. >>> + * >>> + * @deprecated This is a compatibility method for interoperability with >>> + * the legacy subtitle API. >>> + */ >>> +void av_frame_get_subtitle(AVSubtitle *sub, AVFrame *frame); >> >> 1. Missing const. >> 2. Wrong return type. >> 3. I don't see a need for this compatibility at all. As explained below, >> the only user that needs something like this is libavcodec and then it >> should live in libavcodec. >> The last two points apply to both av_frame_get_subtitle and >> av_frame_put_subtitle. Notice that the only use of any of these >> functions outside of libavcodec is remove later again (patch #8 adds an >> av_frame_get_subtitle() in ffmpeg.c, #16 removes it again) which shows >> that such a compatibility stuff is not needed outside of libavcodec. > > Moved to avcodec as ff_ functions. > > >>> + uint32_t pal[256]; ///< RGBA palette for the bitmap. >>> + >>> + char *text; ///< 0-terminated plain UTF-8 text >>> + char *ass; ///< 0-terminated ASS/SSA compatible event line. >>> + >>> +} AVSubtitleArea; >> >> I presume that sizeof(AVSubtitleArea) is not supposed to be part of the >> public API/ABI (after all, you used an array of pointers to >> AVSubtitleArea in AVFrame). This needs to be documented. > > Makes sense, but I'm not sure how and where? > Whether sizeof(struct foo) is public or not is typically documented directly before the definition of said struct. Notice that latter patches of yours put AVSubtitleArea on the stack in lavc, so you seem to want to make its sizeof fixed. Or you just didn't know what doing this implied. > > >>> +/** >>> + * Return a subtitle format corresponding to name, or AV_SUBTITLE_FMT_NONE >>> + * on error. >>> + * >>> + * @param name Subtitle format name. >>> + */ >>> +enum AVSubtitleType av_get_subtitle_fmt(const char *name); >> >> Do we need this? Given that there are many possible ways to name the >> subtitles, it doesn't seem useful to me. The only way to be sure to use >> the "authoritative" name for a subtitle format is by >> av_get_subtitle_fmt_name(). But then one doesn't need >> av_get_subtitle_fmt() at all. > > I've tried to implement everything analog to the functionality we have > for audio and video. This corresponds to av_get_pix_fmt(). > > >>> + */ >>> +int av_subtitle_area2area(AVSubtitleArea *dst, const AVSubtitleArea *src, >> int copy_data); >> >> This documentation does not mention that dst should be "blank". But this >> actually poses a problem: How does the user know that an AVSubtitleArea >> is blank given that ? For AVFrames the answer is this: An AVFrame is >> blank if it comes directly from av_frame_alloc() or av_frame_unref(). >> But there is no equivalent for these functions here. >> Furthermore, is this API supposed to be a standalone API or is >> AVSubtitleArea something that is always supposed to be part of an >> AVFrame? The usage in this patchset suggests the latter: >> av_subtitle_area2area() is only used in frame.c. > > AVSubtitleArea is always supposed to be part of an AVFrame. Then you should have a chat with the author of patch 15 that puts an AVSubtitleArea on the stack in dvdsubenc.c. More seriously, this is an important point. > I had thought the copy function might be useful outside, but I didn't > need it in any of the filter. > Made it private in AVFrame now. > > >>> +/** >>> + * Copy data from @ref AVSubtitleArea to @ref AVSubtitleRect. >>> + * >>> + * @param dst The target rect (@ref AVSubtitleRect). >>> + * @param src The source area (@ref AVSubtitleArea). >>> + * >>> + * @return 0 on success. >>> + * >>> + * @deprecated This is a compatibility method for interoperability with >>> + * the legacy subtitle API. >>> + */ >>> +int av_subtitle_area2rect(AVSubtitleRect *dst, const AVSubtitleArea *src); >>> + >>> +/** >>> + * Copy data from @ref AVSubtitleRect to @ref AVSubtitleArea. >>> + * >>> + * @param dst The source area (@ref AVSubtitleArea). >>> + * @param src The target rect (@ref AVSubtitleRect). >>> + * >>> + * @return 0 on success. >>> + * >>> + * @deprecated This is a compatibility method for interoperability with >>> + * the legacy subtitle API. >>> + */ >>> +int av_subtitle_rect2area(AVSubtitleArea *dst, const AVSubtitleRect *src); >> >> Do we need compatibility with the legacy API at all? I only see two >> usecases for this: >> >> 1. One has a huge project using AVSubtitles that uses AVSubtitle so >> pervasively that one can't transition to the new API in one step. But I >> doubt that that's a thing. >> 2. One is forced to provide compatibility with the legacy API while also >> providing an API based upon the new API. This is of course the situation >> with libavcodec, which for this reason needs such compatibility >> functions (in libavcodec!). But I don't see a second user with the same >> needs, so I don't see why these functions (which would actually need to >> be declared as deprecated from the very beginning if public) should be >> public. > > Made those private in avcodec. >
Quoting Soft Works (2021-11-25 18:53:24) > @@ -759,9 +802,39 @@ void av_frame_move_ref(AVFrame *dst, AVFrame *src); > * recommended to pass 0 here unless you know what you are doing. > * > * @return 0 on success, a negative AVERROR on error. > + * > + * @deprecated Use @ref av_frame_get_buffer2 instead and set @ref AVFrame.type > + * before calling. > */ > +attribute_deprecated > int av_frame_get_buffer(AVFrame *frame, int align); > > +/** > + * Allocate new buffer(s) for audio, video or subtitle data. > + * > + * The following fields must be set on frame before calling this function: > + * - format (pixel format for video, sample format for audio) > + * - width and height for video > + * - nb_samples and channel_layout for audio > + * - type (AVMediaType) > + * > + * This function will fill AVFrame.data and AVFrame.buf arrays and, if > + * necessary, allocate and fill AVFrame.extended_data and AVFrame.extended_buf. > + * For planar formats, one buffer will be allocated for each plane. > + * > + * @warning: if frame already has been allocated, calling this function will > + * leak memory. In addition, undefined behavior can occur in certain > + * cases. > + * > + * @param frame frame in which to store the new buffers. > + * @param align Required buffer size alignment. If equal to 0, alignment will be > + * chosen automatically for the current CPU. It is highly > + * recommended to pass 0 here unless you know what you are doing. > + * > + * @return 0 on success, a negative AVERROR on error. > + */ > +int av_frame_get_buffer2(AVFrame *frame, int align); Not sure whether this was asked already - why do we need this new function? Seems to me you can accomplish the same thing by just adding the type field to AVFrame. Then - if type is AVMEDIA_TYPE_SUBTITLE -> allocate a subtitle - if type is AVMEDIA_TYPE_{VIDEO,AUDIO} -> allocate video/audio - otherwise detect video/audio as we do now
Quoting Soft Works (2021-11-26 19:25:17) > > > > -----Original Message----- > > From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Anton > > Khirnov > > Sent: Friday, November 26, 2021 5:21 PM > > To: ffmpeg-devel@ffmpeg.org > > Subject: Re: [FFmpeg-devel] [PATCH v16 01/16] global: Prepare AVFrame for > > subtitle handling > > > > You have completely disregarded my comments on the previous patch. Stop > > doing that, it is rude. > > I value everyone's time taken to review the patches I'm submitting. > Rest assured that I'm going over every single comment and try to address > each one appropriately. > > I think you misunderstood because I had answered one of your comments > immediately. Right now I'm re-working the patchset to address all suggestions > and eventually I'll respond to all those that I haven't addressed, > explaining why. Okay, but in the interest of preventing such misunderstandings in the future you could mention these facts somewhere. E.g. right after the commit message, and/or put [RFC]/[WIP] in the subject. Otherwise people assume that every patch is intended to be pushed to master as-is.
Anton Khirnov: > Quoting Soft Works (2021-11-25 18:53:24) >> @@ -759,9 +802,39 @@ void av_frame_move_ref(AVFrame *dst, AVFrame *src); >> * recommended to pass 0 here unless you know what you are doing. >> * >> * @return 0 on success, a negative AVERROR on error. >> + * >> + * @deprecated Use @ref av_frame_get_buffer2 instead and set @ref AVFrame.type >> + * before calling. >> */ >> +attribute_deprecated >> int av_frame_get_buffer(AVFrame *frame, int align); >> >> +/** >> + * Allocate new buffer(s) for audio, video or subtitle data. >> + * >> + * The following fields must be set on frame before calling this function: >> + * - format (pixel format for video, sample format for audio) >> + * - width and height for video >> + * - nb_samples and channel_layout for audio >> + * - type (AVMediaType) >> + * >> + * This function will fill AVFrame.data and AVFrame.buf arrays and, if >> + * necessary, allocate and fill AVFrame.extended_data and AVFrame.extended_buf. >> + * For planar formats, one buffer will be allocated for each plane. >> + * >> + * @warning: if frame already has been allocated, calling this function will >> + * leak memory. In addition, undefined behavior can occur in certain >> + * cases. >> + * >> + * @param frame frame in which to store the new buffers. >> + * @param align Required buffer size alignment. If equal to 0, alignment will be >> + * chosen automatically for the current CPU. It is highly >> + * recommended to pass 0 here unless you know what you are doing. >> + * >> + * @return 0 on success, a negative AVERROR on error. >> + */ >> +int av_frame_get_buffer2(AVFrame *frame, int align); > > Not sure whether this was asked already - why do we need this new > function? Seems to me you can accomplish the same thing by just adding > the type field to AVFrame. Then > - if type is AVMEDIA_TYPE_SUBTITLE -> allocate a subtitle > - if type is AVMEDIA_TYPE_{VIDEO,AUDIO} -> allocate video/audio > - otherwise detect video/audio as we do now > https://ffmpeg.org/pipermail/ffmpeg-devel/2021-September/285185.html - Andreas
> -----Original Message----- > From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Anton > Khirnov > Sent: Saturday, November 27, 2021 9:52 AM > To: ffmpeg-devel@ffmpeg.org > Subject: Re: [FFmpeg-devel] [PATCH v16 01/16] global: Prepare AVFrame for > subtitle handling > > Quoting Soft Works (2021-11-25 18:53:24) > > @@ -759,9 +802,39 @@ void av_frame_move_ref(AVFrame *dst, AVFrame *src); > > * recommended to pass 0 here unless you know what you are > doing. > > * > > * @return 0 on success, a negative AVERROR on error. > > + * > > + * @deprecated Use @ref av_frame_get_buffer2 instead and set @ref > AVFrame.type > > + * before calling. > > */ > > +attribute_deprecated > > int av_frame_get_buffer(AVFrame *frame, int align); > > > > +/** > > + * Allocate new buffer(s) for audio, video or subtitle data. > > + * > > + * The following fields must be set on frame before calling this function: > > + * - format (pixel format for video, sample format for audio) > > + * - width and height for video > > + * - nb_samples and channel_layout for audio > > + * - type (AVMediaType) > > + * > > + * This function will fill AVFrame.data and AVFrame.buf arrays and, if > > + * necessary, allocate and fill AVFrame.extended_data and > AVFrame.extended_buf. > > + * For planar formats, one buffer will be allocated for each plane. > > + * > > + * @warning: if frame already has been allocated, calling this function > will > > + * leak memory. In addition, undefined behavior can occur in > certain > > + * cases. > > + * > > + * @param frame frame in which to store the new buffers. > > + * @param align Required buffer size alignment. If equal to 0, alignment > will be > > + * chosen automatically for the current CPU. It is highly > > + * recommended to pass 0 here unless you know what you are > doing. > > + * > > + * @return 0 on success, a negative AVERROR on error. > > + */ > > +int av_frame_get_buffer2(AVFrame *frame, int align); > > Not sure whether this was asked already - why do we need this new > function? Seems to me you can accomplish the same thing by just adding > the type field to AVFrame. Then > - if type is AVMEDIA_TYPE_SUBTITLE -> allocate a subtitle > - if type is AVMEDIA_TYPE_{VIDEO,AUDIO} -> allocate video/audio > - otherwise detect video/audio as we do now` I can't find it on mail archive, so here are some quotes from the e-mails about it: (originally I had av_frame_get_buffer2 with a AVMediaType parameter) softworkz: The problem is that the current implementation is inferring the media type by checking width and height (=> video, otherwise audio). Subtitle frames can have w+h or not, so it can't continue to work this way. The idea of av_frame_get_buffer2 is to have an explicit AVMediaType parameter to specify. An alternative would be to require the (new) type property to be set explicitly in case of subtitles and continue the current way of inference when it's not set. That would work but it wouldn't be a clean API design either. What would you suggest? Andreas: Add an av_frame_get_buffer2 that requires the media type of the frame to be set (on the frame, not as an additional parameter). Paul: why? AVFrame is supposed to have media type in itself. Andreas: Yes, and therefore the media type should be read from the AVFrame, not from an additional parameter to av_frame_get_buffer2. As I said. softworkz: > Are you sure? The default after av_frame_alloc is 0 (AVMEDIA_TYPE_VIDEO), > so it's not possible to check whether the value 0 was set intentionally > or not..? Andreas: Of course av_frame_alloc (or rather get_frame_defaults) should initialize the media type to AVMEDIA_TYPE_UNKNOWN. And adding av_frame_get_buffer2 is done precisely because of this: Any user of it is by definition aware of the existence of the new AVFrame mediatype field and therefore has to make sure that it is really set in accordance with his intentions. On the other hand, if one reused av_frame_get_buffer and used the media type if it is set and the current rules if it is AVMEDIA_TYPE_UNKNOWN, then there is a risk that a user not knowing the new field gets surprised.
Quoting Andreas Rheinhardt (2021-11-27 10:06:35) > Anton Khirnov: > > > > Not sure whether this was asked already - why do we need this new > > function? Seems to me you can accomplish the same thing by just adding > > the type field to AVFrame. Then > > - if type is AVMEDIA_TYPE_SUBTITLE -> allocate a subtitle > > - if type is AVMEDIA_TYPE_{VIDEO,AUDIO} -> allocate video/audio > > - otherwise detect video/audio as we do now > > > > https://ffmpeg.org/pipermail/ffmpeg-devel/2021-September/285185.html So IIUC the only concern is that a user might "manually" unref the frame without calling av_frame_unref(). I would say that this is already illegal, because we can (and did) add new allocated objects to AVFrame that would break things if you just kept them from one frame to the other (especially of a different type), e.g. hw_frames_ctx.
> -----Original Message----- > From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Anton > Khirnov > Sent: Saturday, November 27, 2021 10:01 AM > To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> > Subject: Re: [FFmpeg-devel] [PATCH v16 01/16] global: Prepare AVFrame for > subtitle handling > > Quoting Soft Works (2021-11-26 19:25:17) > > > > > > > -----Original Message----- > > > From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Anton > > > Khirnov > > > Sent: Friday, November 26, 2021 5:21 PM > > > To: ffmpeg-devel@ffmpeg.org > > > Subject: Re: [FFmpeg-devel] [PATCH v16 01/16] global: Prepare AVFrame for > > > subtitle handling > > > > > > You have completely disregarded my comments on the previous patch. Stop > > > doing that, it is rude. > > > > I value everyone's time taken to review the patches I'm submitting. > > Rest assured that I'm going over every single comment and try to address > > each one appropriately. > > > > I think you misunderstood because I had answered one of your comments > > immediately. Right now I'm re-working the patchset to address all > suggestions > > and eventually I'll respond to all those that I haven't addressed, > > explaining why. > > Okay, but in the interest of preventing such misunderstandings in the > future you could mention these facts somewhere. E.g. right after the > commit message, and/or put [RFC]/[WIP] in the subject. Otherwise people > assume that every patch is intended to be pushed to master as-is. That wasn't the point. All of the recent releases were intended to be pushed as-is. I had quickly responded to one part of your comment, even before I got through all in detail. That's all ;-) I apologize when it has created a bad impression for you. It wasn't intended. sw
> -----Original Message----- > From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Anton > Khirnov > Sent: Saturday, November 27, 2021 10:25 AM > To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> > Subject: Re: [FFmpeg-devel] [PATCH v16 01/16] global: Prepare AVFrame for > subtitle handling > > Quoting Andreas Rheinhardt (2021-11-27 10:06:35) > > Anton Khirnov: > > > > > > Not sure whether this was asked already - why do we need this new > > > function? Seems to me you can accomplish the same thing by just adding > > > the type field to AVFrame. Then > > > - if type is AVMEDIA_TYPE_SUBTITLE -> allocate a subtitle > > > - if type is AVMEDIA_TYPE_{VIDEO,AUDIO} -> allocate video/audio > > > - otherwise detect video/audio as we do now > > > > > > > https://ffmpeg.org/pipermail/ffmpeg-devel/2021-September/285185.html > > So IIUC the only concern is that a user might "manually" unref the frame > without calling av_frame_unref(). I would say that this is already > illegal, because we can (and did) add new allocated objects to AVFrame > that would break things if you just kept them from one frame to the > other (especially of a different type), e.g. hw_frames_ctx. > The earlier conversation (from v3 of the patchset) explains it better. This is what I quoted. Here's the link to it on patchwork: https://patchwork.ffmpeg.org/project/ffmpeg/patch/MN2PR04MB5981A454384CFC369522E875BAD79@MN2PR04MB5981.namprd04.prod.outlook.com/ sw
Anton Khirnov: > Quoting Andreas Rheinhardt (2021-11-27 10:06:35) >> Anton Khirnov: >>> >>> Not sure whether this was asked already - why do we need this new >>> function? Seems to me you can accomplish the same thing by just adding >>> the type field to AVFrame. Then >>> - if type is AVMEDIA_TYPE_SUBTITLE -> allocate a subtitle >>> - if type is AVMEDIA_TYPE_{VIDEO,AUDIO} -> allocate video/audio >>> - otherwise detect video/audio as we do now >>> >> >> https://ffmpeg.org/pipermail/ffmpeg-devel/2021-September/285185.html > > So IIUC the only concern is that a user might "manually" unref the frame > without calling av_frame_unref(). I would say that this is already > illegal, because we can (and did) add new allocated objects to AVFrame > that would break things if you just kept them from one frame to the > other (especially of a different type), e.g. hw_frames_ctx. > And how is a user then supposed to know which modifications of an AVFrame are legal or not? I think the onus is on us not to break user apps and not on the user to behave in such a way that it is compatible with all possible future additions to AVFrames (which would be impossible anyway). - Andreas
Quoting Andreas Rheinhardt (2021-11-27 10:42:31) > Anton Khirnov: > > Quoting Andreas Rheinhardt (2021-11-27 10:06:35) > >> Anton Khirnov: > >>> > >>> Not sure whether this was asked already - why do we need this new > >>> function? Seems to me you can accomplish the same thing by just adding > >>> the type field to AVFrame. Then > >>> - if type is AVMEDIA_TYPE_SUBTITLE -> allocate a subtitle > >>> - if type is AVMEDIA_TYPE_{VIDEO,AUDIO} -> allocate video/audio > >>> - otherwise detect video/audio as we do now > >>> > >> > >> https://ffmpeg.org/pipermail/ffmpeg-devel/2021-September/285185.html > > > > So IIUC the only concern is that a user might "manually" unref the frame > > without calling av_frame_unref(). I would say that this is already > > illegal, because we can (and did) add new allocated objects to AVFrame > > that would break things if you just kept them from one frame to the > > other (especially of a different type), e.g. hw_frames_ctx. > > > > And how is a user then supposed to know which modifications of an > AVFrame are legal or not? I think the onus is on us not to break user > apps and not on the user to behave in such a way that it is compatible > with all possible future additions to AVFrames (which would be > impossible anyway). We explicitly allow adding new fields to AVFrame, so valid user code has to consider the possibility. So any "substracting" operations on frames it does not fully control (e.g. those it received from decoders or filters) are potentially broken. I believe we have to assume this, otherwise we cannot extend AVFrame or other structs like we've been doing so far.
> -----Original Message----- > From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Anton > Khirnov > Sent: Saturday, November 27, 2021 11:33 AM > To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> > Subject: Re: [FFmpeg-devel] [PATCH v16 01/16] global: Prepare AVFrame for > subtitle handling > > Quoting Andreas Rheinhardt (2021-11-27 10:42:31) > > Anton Khirnov: > > > Quoting Andreas Rheinhardt (2021-11-27 10:06:35) > > >> Anton Khirnov: > > >>> > > >>> Not sure whether this was asked already - why do we need this new > > >>> function? Seems to me you can accomplish the same thing by just adding > > >>> the type field to AVFrame. Then > > >>> - if type is AVMEDIA_TYPE_SUBTITLE -> allocate a subtitle > > >>> - if type is AVMEDIA_TYPE_{VIDEO,AUDIO} -> allocate video/audio > > >>> - otherwise detect video/audio as we do now > > >>> > > >> > > >> https://ffmpeg.org/pipermail/ffmpeg-devel/2021-September/285185.html > > > > > > So IIUC the only concern is that a user might "manually" unref the frame > > > without calling av_frame_unref(). I would say that this is already > > > illegal, because we can (and did) add new allocated objects to AVFrame > > > that would break things if you just kept them from one frame to the > > > other (especially of a different type), e.g. hw_frames_ctx. > > > > > > > And how is a user then supposed to know which modifications of an > > AVFrame are legal or not? I think the onus is on us not to break user > > apps and not on the user to behave in such a way that it is compatible > > with all possible future additions to AVFrames (which would be > > impossible anyway). > > We explicitly allow adding new fields to AVFrame, so valid user code has > to consider the possibility. So any "substracting" operations on frames > it does not fully control (e.g. those it received from decoders or > filters) are potentially broken. > > I believe we have to assume this, otherwise we cannot extend AVFrame or > other structs like we've been doing so far. Back to the subtitles patch: I have added the AVMediaType field to the start of the struct. Once, because it seems too elementary to come at position sixty-something and also because I was assuming it would cause a break anway. Leaving the AVSubtitle and AVSubtitleRect structs inside libavcodec has definitely reduced the list of breakage candidates. Next candidate to consider regarding breakage would probably be the promotion of those ass functions from avcodec to avutil.. sw
diff --git a/libavcodec/avcodec.c b/libavcodec/avcodec.c index c00a9b2af8..13e3711b9c 100644 --- a/libavcodec/avcodec.c +++ b/libavcodec/avcodec.c @@ -422,25 +422,6 @@ void avcodec_flush_buffers(AVCodecContext *avctx) av_bsf_flush(avci->bsf); } -void avsubtitle_free(AVSubtitle *sub) -{ - int i; - - for (i = 0; i < sub->num_rects; i++) { - av_freep(&sub->rects[i]->data[0]); - av_freep(&sub->rects[i]->data[1]); - av_freep(&sub->rects[i]->data[2]); - av_freep(&sub->rects[i]->data[3]); - av_freep(&sub->rects[i]->text); - av_freep(&sub->rects[i]->ass); - av_freep(&sub->rects[i]); - } - - av_freep(&sub->rects); - - memset(sub, 0, sizeof(*sub)); -} - av_cold int avcodec_close(AVCodecContext *avctx) { int i; diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index 7ee8bc2b7c..0c5819b116 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -35,6 +35,7 @@ #include "libavutil/frame.h" #include "libavutil/log.h" #include "libavutil/pixfmt.h" +#include "libavutil/subfmt.h" #include "libavutil/rational.h" #include "codec.h" @@ -1674,7 +1675,7 @@ typedef struct AVCodecContext { /** * Header containing style information for text subtitles. - * For SUBTITLE_ASS subtitle type, it should contain the whole ASS + * For AV_SUBTITLE_FMT_ASS subtitle type, it should contain the whole ASS * [Script Info] and [V4+ Styles] section, plus the [Events] line and * the Format line following. It shouldn't include any Dialogue line. * - encoding: Set/allocated/freed by user (before avcodec_open2()) @@ -2238,63 +2239,8 @@ typedef struct AVHWAccel { * @} */ -enum AVSubtitleType { - SUBTITLE_NONE, - - SUBTITLE_BITMAP, ///< A bitmap, pict will be set - - /** - * Plain text, the text field must be set by the decoder and is - * authoritative. ass and pict fields may contain approximations. - */ - SUBTITLE_TEXT, - - /** - * Formatted text, the ass field must be set by the decoder and is - * authoritative. pict and text fields may contain approximations. - */ - SUBTITLE_ASS, -}; - #define AV_SUBTITLE_FLAG_FORCED 0x00000001 -typedef struct AVSubtitleRect { - int x; ///< top left corner of pict, undefined when pict is not set - int y; ///< top left corner of pict, undefined when pict is not set - int w; ///< width of pict, undefined when pict is not set - int h; ///< height of pict, undefined when pict is not set - int nb_colors; ///< number of colors in pict, undefined when pict is not set - - /** - * data+linesize for the bitmap of this subtitle. - * Can be set for text/ass as well once they are rendered. - */ - uint8_t *data[4]; - int linesize[4]; - - enum AVSubtitleType type; - - char *text; ///< 0 terminated plain UTF-8 text - - /** - * 0 terminated ASS/SSA compatible event line. - * The presentation of this is unaffected by the other values in this - * struct. - */ - char *ass; - - int flags; -} AVSubtitleRect; - -typedef struct AVSubtitle { - uint16_t format; /* 0 = graphics */ - uint32_t start_display_time; /* relative to packet pts, in ms */ - uint32_t end_display_time; /* relative to packet pts, in ms */ - unsigned num_rects; - AVSubtitleRect **rects; - int64_t pts; ///< Same as packet pts, in AV_TIME_BASE -} AVSubtitle; - /** * Return the LIBAVCODEC_VERSION_INT constant. */ @@ -2430,13 +2376,6 @@ int avcodec_open2(AVCodecContext *avctx, const AVCodec *codec, AVDictionary **op */ int avcodec_close(AVCodecContext *avctx); -/** - * Free all allocated data in the given subtitle struct. - * - * @param sub AVSubtitle to free. - */ -void avsubtitle_free(AVSubtitle *sub); - /** * @} */ @@ -2527,6 +2466,8 @@ enum AVChromaLocation avcodec_chroma_pos_to_enum(int xpos, int ypos); * must be freed with avsubtitle_free if *got_sub_ptr is set. * @param[in,out] got_sub_ptr Zero if no subtitle could be decompressed, otherwise, it is nonzero. * @param[in] avpkt The input AVPacket containing the input buffer. + * + * @deprecated Use the new decode API (avcodec_send_packet, avcodec_receive_frame) instead. */ int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub, int *got_sub_ptr, @@ -3125,6 +3066,14 @@ void avcodec_flush_buffers(AVCodecContext *avctx); */ int av_get_audio_frame_duration(AVCodecContext *avctx, int frame_bytes); +/** + * Return subtitle format from a codec descriptor + * + * @param codec_descriptor codec descriptor + * @return the subtitle type (e.g. bitmap, text) + */ +enum AVSubtitleType av_get_subtitle_format_from_codecdesc(const AVCodecDescriptor *codec_descriptor); + /* memory */ /** diff --git a/libavcodec/decode.c b/libavcodec/decode.c index c44724d150..788b043266 100644 --- a/libavcodec/decode.c +++ b/libavcodec/decode.c @@ -576,6 +576,34 @@ static int decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame) return ret; } +static int decode_subtitle2_priv(AVCodecContext *avctx, AVSubtitle *sub, + int *got_sub_ptr, const AVPacket *avpkt); + +static int decode_subtitle_shim(AVCodecContext *avctx, AVFrame *frame, const AVPacket *avpkt) +{ + int ret, got_sub_ptr = 0; + AVSubtitle subtitle = { 0 }; + + av_frame_unref(frame); + + ret = decode_subtitle2_priv(avctx, &subtitle, &got_sub_ptr, avpkt); + + if (ret >= 0 && got_sub_ptr) { + frame->type = AVMEDIA_TYPE_SUBTITLE; + frame->format = subtitle.format; + ret = av_frame_get_buffer2(frame, 0); + if (ret < 0) + return ret; + av_frame_put_subtitle(frame, &subtitle); + + frame->pkt_dts = avpkt->dts; + } + + avsubtitle_free(&subtitle); + + return ret; +} + int attribute_align_arg avcodec_send_packet(AVCodecContext *avctx, const AVPacket *avpkt) { AVCodecInternal *avci = avctx->internal; @@ -590,6 +618,9 @@ int attribute_align_arg avcodec_send_packet(AVCodecContext *avctx, const AVPacke if (avpkt && !avpkt->size && avpkt->data) return AVERROR(EINVAL); + if (avctx->codec_type == AVMEDIA_TYPE_SUBTITLE) + return decode_subtitle_shim(avctx, avci->buffer_frame, avpkt); + av_packet_unref(avci->buffer_pkt); if (avpkt && (avpkt->data || avpkt->side_data_elems)) { ret = av_packet_ref(avci->buffer_pkt, avpkt); @@ -651,7 +682,9 @@ int attribute_align_arg avcodec_receive_frame(AVCodecContext *avctx, AVFrame *fr if (avci->buffer_frame->buf[0]) { av_frame_move_ref(frame, avci->buffer_frame); - } else { + } else if (avctx->codec_type == AVMEDIA_TYPE_SUBTITLE) + return AVERROR(EAGAIN); + else { ret = decode_receive_frame_internal(avctx, frame); if (ret < 0) return ret; @@ -723,7 +756,7 @@ static void get_subtitle_defaults(AVSubtitle *sub) #define UTF8_MAX_BYTES 4 /* 5 and 6 bytes sequences should not be used */ static int recode_subtitle(AVCodecContext *avctx, AVPacket **outpkt, - AVPacket *inpkt, AVPacket *buf_pkt) + const AVPacket *inpkt, AVPacket *buf_pkt) { #if CONFIG_ICONV iconv_t cd = (iconv_t)-1; @@ -802,9 +835,8 @@ static int utf8_check(const uint8_t *str) return 1; } -int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub, - int *got_sub_ptr, - AVPacket *avpkt) +static int decode_subtitle2_priv(AVCodecContext *avctx, AVSubtitle *sub, + int *got_sub_ptr, const AVPacket *avpkt) { int ret = 0; @@ -844,10 +876,7 @@ int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub, avctx->pkt_timebase, ms); } - if (avctx->codec_descriptor->props & AV_CODEC_PROP_BITMAP_SUB) - sub->format = 0; - else if (avctx->codec_descriptor->props & AV_CODEC_PROP_TEXT_SUB) - sub->format = 1; + sub->format = av_get_subtitle_format_from_codecdesc(avctx->codec_descriptor); for (unsigned i = 0; i < sub->num_rects; i++) { if (avctx->sub_charenc_mode != FF_SUB_CHARENC_MODE_IGNORE && @@ -871,6 +900,12 @@ int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub, return ret; } +int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub, + int *got_sub_ptr, AVPacket *avpkt) +{ + return decode_subtitle2_priv(avctx, sub, got_sub_ptr, avpkt); +} + enum AVPixelFormat avcodec_default_get_format(struct AVCodecContext *avctx, const enum AVPixelFormat *fmt) { diff --git a/libavcodec/libzvbi-teletextdec.c b/libavcodec/libzvbi-teletextdec.c index 1073d6a0bd..535c82d7a5 100644 --- a/libavcodec/libzvbi-teletextdec.c +++ b/libavcodec/libzvbi-teletextdec.c @@ -27,6 +27,7 @@ #include "libavutil/intreadwrite.h" #include "libavutil/log.h" #include "libavutil/common.h" +#include "libavutil/subfmt.h" #include <libzvbi.h> diff --git a/libavcodec/pgssubdec.c b/libavcodec/pgssubdec.c index 388639a110..5709058b1c 100644 --- a/libavcodec/pgssubdec.c +++ b/libavcodec/pgssubdec.c @@ -32,6 +32,7 @@ #include "libavutil/colorspace.h" #include "libavutil/imgutils.h" #include "libavutil/opt.h" +#include "libavutil/subfmt.h" #define RGBA(r,g,b,a) (((unsigned)(a) << 24) | ((r) << 16) | ((g) << 8) | (b)) #define MAX_EPOCH_PALETTES 8 // Max 8 allowed per PGS epoch diff --git a/libavcodec/utils.c b/libavcodec/utils.c index a91a54b0dc..efe14e5a43 100644 --- a/libavcodec/utils.c +++ b/libavcodec/utils.c @@ -824,6 +824,17 @@ int av_get_audio_frame_duration(AVCodecContext *avctx, int frame_bytes) return FFMAX(0, duration); } +enum AVSubtitleType av_get_subtitle_format_from_codecdesc(const AVCodecDescriptor *codec_descriptor) +{ + if(codec_descriptor->props & AV_CODEC_PROP_BITMAP_SUB) + return AV_SUBTITLE_FMT_BITMAP; + + if(codec_descriptor->props & AV_CODEC_PROP_TEXT_SUB) + return AV_SUBTITLE_FMT_ASS; + + return AV_SUBTITLE_FMT_UNKNOWN; +} + int av_get_audio_frame_duration2(AVCodecParameters *par, int frame_bytes) { int duration = get_audio_frame_duration(par->codec_id, par->sample_rate, diff --git a/libavfilter/vf_subtitles.c b/libavfilter/vf_subtitles.c index 377160c72b..6b54cf669b 100644 --- a/libavfilter/vf_subtitles.c +++ b/libavfilter/vf_subtitles.c @@ -35,14 +35,12 @@ # include "libavformat/avformat.h" #endif #include "libavutil/avstring.h" -#include "libavutil/imgutils.h" #include "libavutil/opt.h" #include "libavutil/parseutils.h" #include "drawutils.h" #include "avfilter.h" #include "internal.h" #include "formats.h" -#include "video.h" typedef struct AssContext { const AVClass *class; @@ -292,6 +290,29 @@ static int attachment_is_font(AVStream * st) return 0; } +static int decode(AVCodecContext *avctx, AVFrame *frame, int *got_frame, AVPacket *pkt) +{ + int ret; + + *got_frame = 0; + + if (pkt) { + ret = avcodec_send_packet(avctx, pkt); + // In particular, we don't expect AVERROR(EAGAIN), because we read all + // decoded frames with avcodec_receive_frame() until done. + if (ret < 0 && ret != AVERROR_EOF) + return ret; + } + + ret = avcodec_receive_frame(avctx, frame); + if (ret < 0 && ret != AVERROR(EAGAIN)) + return ret; + if (ret >= 0) + *got_frame = 1; + + return 0; +} + AVFILTER_DEFINE_CLASS(subtitles); static av_cold int init_subtitles(AVFilterContext *ctx) @@ -306,6 +327,7 @@ static av_cold int init_subtitles(AVFilterContext *ctx) AVStream *st; AVPacket pkt; AssContext *ass = ctx->priv; + enum AVSubtitleType subtitle_format; /* Init libass */ ret = init(ctx); @@ -386,13 +408,17 @@ static av_cold int init_subtitles(AVFilterContext *ctx) ret = AVERROR_DECODER_NOT_FOUND; goto end; } + dec_desc = avcodec_descriptor_get(st->codecpar->codec_id); - if (dec_desc && !(dec_desc->props & AV_CODEC_PROP_TEXT_SUB)) { + subtitle_format = av_get_subtitle_format_from_codecdesc(dec_desc); + + if (subtitle_format != AV_SUBTITLE_FMT_ASS) { av_log(ctx, AV_LOG_ERROR, - "Only text based subtitles are currently supported\n"); - ret = AVERROR_PATCHWELCOME; + "Only text based subtitles are supported by this filter\n"); + ret = AVERROR_INVALIDDATA; goto end; } + if (ass->charenc) av_dict_set(&codec_opts, "sub_charenc", ass->charenc, 0); @@ -448,18 +474,18 @@ static av_cold int init_subtitles(AVFilterContext *ctx) dec_ctx->subtitle_header_size); while (av_read_frame(fmt, &pkt) >= 0) { int i, got_subtitle; - AVSubtitle sub = {0}; + AVFrame *sub = av_frame_alloc(); if (pkt.stream_index == sid) { - ret = avcodec_decode_subtitle2(dec_ctx, &sub, &got_subtitle, &pkt); + ret = decode(dec_ctx, sub, &got_subtitle, &pkt); if (ret < 0) { av_log(ctx, AV_LOG_WARNING, "Error decoding: %s (ignored)\n", av_err2str(ret)); } else if (got_subtitle) { - const int64_t start_time = av_rescale_q(sub.pts, AV_TIME_BASE_Q, av_make_q(1, 1000)); - const int64_t duration = sub.end_display_time; - for (i = 0; i < sub.num_rects; i++) { - char *ass_line = sub.rects[i]->ass; + const int64_t start_time = av_rescale_q(sub->subtitle_pts, AV_TIME_BASE_Q, av_make_q(1, 1000)); + const int64_t duration = sub->subtitle_end_time; + for (i = 0; i < sub->num_subtitle_areas; i++) { + char *ass_line = sub->subtitle_areas[i]->ass; if (!ass_line) break; ass_process_chunk(ass->track, ass_line, strlen(ass_line), @@ -468,7 +494,7 @@ static av_cold int init_subtitles(AVFilterContext *ctx) } } av_packet_unref(&pkt); - avsubtitle_free(&sub); + av_frame_free(&sub); } end: diff --git a/libavformat/utils.c b/libavformat/utils.c index 7840e8717c..d9b54c7725 100644 --- a/libavformat/utils.c +++ b/libavformat/utils.c @@ -32,6 +32,7 @@ #include "libavutil/opt.h" #include "libavutil/parseutils.h" #include "libavutil/pixfmt.h" +#include "libavutil/subfmt.h" #include "libavutil/thread.h" #include "libavutil/time.h" diff --git a/libavutil/Makefile b/libavutil/Makefile index 529046dbc8..7e79936876 100644 --- a/libavutil/Makefile +++ b/libavutil/Makefile @@ -74,6 +74,7 @@ HEADERS = adler32.h \ sha512.h \ spherical.h \ stereo3d.h \ + subfmt.h \ threadmessage.h \ time.h \ timecode.h \ @@ -159,6 +160,7 @@ OBJS = adler32.o \ slicethread.o \ spherical.o \ stereo3d.o \ + subfmt.o \ threadmessage.o \ time.o \ timecode.o \ diff --git a/libavutil/frame.c b/libavutil/frame.c index d4d3ad6988..dfe9c269a6 100644 --- a/libavutil/frame.c +++ b/libavutil/frame.c @@ -26,6 +26,7 @@ #include "imgutils.h" #include "mem.h" #include "samplefmt.h" +#include "subfmt.h" #include "hwcontext.h" #define CHECK_CHANNELS_CONSISTENCY(frame) \ @@ -50,6 +51,9 @@ const char *av_get_colorspace_name(enum AVColorSpace val) return name[val]; } #endif + +static int frame_copy_subtitles(AVFrame *dst, const AVFrame *src, int copy_data); + static void get_frame_defaults(AVFrame *frame) { if (frame->extended_data != frame->data) @@ -72,7 +76,12 @@ static void get_frame_defaults(AVFrame *frame) frame->colorspace = AVCOL_SPC_UNSPECIFIED; frame->color_range = AVCOL_RANGE_UNSPECIFIED; frame->chroma_location = AVCHROMA_LOC_UNSPECIFIED; - frame->flags = 0; + frame->subtitle_start_time = 0; + frame->subtitle_end_time = 0; + frame->num_subtitle_areas = 0; + frame->subtitle_areas = NULL; + frame->subtitle_pts = 0; + frame->subtitle_header = NULL; } static void free_side_data(AVFrameSideData **ptr_sd) @@ -243,23 +252,55 @@ static int get_audio_buffer(AVFrame *frame, int align) } +static int get_subtitle_buffer(AVFrame *frame) +{ + // Buffers in AVFrame->buf[] are not used in case of subtitle frames. + // To accomodate with existing code, checking ->buf[0] to determine + // whether a frame is ref-counted or has data, we're adding a 1-byte + // buffer here, which marks the subtitle frame to contain data. + frame->buf[0] = av_buffer_alloc(1); + if (!frame->buf[0]) { + av_frame_unref(frame); + return AVERROR(ENOMEM); + } + + frame->extended_data = frame->data; + + return 0; +} + int av_frame_get_buffer(AVFrame *frame, int align) +{ + if (frame->width > 0 && frame->height > 0) + frame->type = AVMEDIA_TYPE_VIDEO; + else if (frame->nb_samples > 0 && (frame->channel_layout || frame->channels > 0)) + frame->type = AVMEDIA_TYPE_AUDIO; + + return av_frame_get_buffer2(frame, align); +} + +int av_frame_get_buffer2(AVFrame *frame, int align) { if (frame->format < 0) return AVERROR(EINVAL); - if (frame->width > 0 && frame->height > 0) + switch(frame->type) { + case AVMEDIA_TYPE_VIDEO: return get_video_buffer(frame, align); - else if (frame->nb_samples > 0 && (frame->channel_layout || frame->channels > 0)) + case AVMEDIA_TYPE_AUDIO: return get_audio_buffer(frame, align); - - return AVERROR(EINVAL); + case AVMEDIA_TYPE_SUBTITLE: + return get_subtitle_buffer(frame); + default: + return AVERROR(EINVAL); + } } static int frame_copy_props(AVFrame *dst, const AVFrame *src, int force_copy) { int ret, i; + dst->type = src->type; dst->key_frame = src->key_frame; dst->pict_type = src->pict_type; dst->sample_aspect_ratio = src->sample_aspect_ratio; @@ -290,6 +331,12 @@ static int frame_copy_props(AVFrame *dst, const AVFrame *src, int force_copy) dst->colorspace = src->colorspace; dst->color_range = src->color_range; dst->chroma_location = src->chroma_location; + dst->subtitle_start_time = src->subtitle_start_time; + dst->subtitle_end_time = src->subtitle_end_time; + dst->subtitle_pts = src->subtitle_pts; + + if ((ret = av_buffer_replace(&dst->subtitle_header, src->subtitle_header)) < 0) + return ret; av_dict_copy(&dst->metadata, src->metadata, 0); @@ -331,6 +378,7 @@ int av_frame_ref(AVFrame *dst, const AVFrame *src) av_assert1(dst->width == 0 && dst->height == 0); av_assert1(dst->channels == 0); + dst->type = src->type; dst->format = src->format; dst->width = src->width; dst->height = src->height; @@ -344,7 +392,7 @@ int av_frame_ref(AVFrame *dst, const AVFrame *src) /* duplicate the frame data if it's not refcounted */ if (!src->buf[0]) { - ret = av_frame_get_buffer(dst, 0); + ret = av_frame_get_buffer2(dst, 0); if (ret < 0) goto fail; @@ -366,6 +414,10 @@ int av_frame_ref(AVFrame *dst, const AVFrame *src) } } + /* copy subtitle areas */ + if (src->type == AVMEDIA_TYPE_SUBTITLE) + frame_copy_subtitles(dst, src, 0); + if (src->extended_buf) { dst->extended_buf = av_calloc(src->nb_extended_buf, sizeof(*dst->extended_buf)); @@ -436,7 +488,7 @@ AVFrame *av_frame_clone(const AVFrame *src) void av_frame_unref(AVFrame *frame) { - int i; + unsigned i, n; if (!frame) return; @@ -455,6 +507,21 @@ void av_frame_unref(AVFrame *frame) av_buffer_unref(&frame->opaque_ref); av_buffer_unref(&frame->private_ref); + av_buffer_unref(&frame->subtitle_header); + + for (i = 0; i < frame->num_subtitle_areas; i++) { + AVSubtitleArea *area = frame->subtitle_areas[i]; + + for (n = 0; n < FF_ARRAY_ELEMS(area->buf); n++) + av_buffer_unref(&area->buf[n]); + + av_freep(&area->text); + av_freep(&area->ass); + av_free(area); + } + + av_freep(&frame->subtitle_areas); + get_frame_defaults(frame); } @@ -472,7 +539,8 @@ void av_frame_move_ref(AVFrame *dst, AVFrame *src) int av_frame_is_writable(AVFrame *frame) { - int i, ret = 1; + AVSubtitleArea *area; + unsigned i, n, ret = 1; /* assume non-refcounted frames are not writable */ if (!frame->buf[0]) @@ -484,6 +552,15 @@ int av_frame_is_writable(AVFrame *frame) for (i = 0; i < frame->nb_extended_buf; i++) ret &= !!av_buffer_is_writable(frame->extended_buf[i]); + for (i = 0; i < frame->num_subtitle_areas; i++) { + area = frame->subtitle_areas[i]; + if (area) { + for (n = 0; n < FF_ARRAY_ELEMS(area->buf); n++) + if (area->buf[n]) + ret &= !!av_buffer_is_writable(area->buf[n]); + } + } + return ret; } @@ -499,6 +576,7 @@ int av_frame_make_writable(AVFrame *frame) return 0; memset(&tmp, 0, sizeof(tmp)); + tmp.type = frame->type; tmp.format = frame->format; tmp.width = frame->width; tmp.height = frame->height; @@ -509,7 +587,7 @@ int av_frame_make_writable(AVFrame *frame) if (frame->hw_frames_ctx) ret = av_hwframe_get_buffer(frame->hw_frames_ctx, &tmp, 0); else - ret = av_frame_get_buffer(&tmp, 0); + ret = av_frame_get_buffer2(&tmp, 0); if (ret < 0) return ret; @@ -544,14 +622,22 @@ AVBufferRef *av_frame_get_plane_buffer(AVFrame *frame, int plane) uint8_t *data; int planes, i; - if (frame->nb_samples) { - int channels = frame->channels; - if (!channels) - return NULL; - CHECK_CHANNELS_CONSISTENCY(frame); - planes = av_sample_fmt_is_planar(frame->format) ? channels : 1; - } else + switch(frame->type) { + case AVMEDIA_TYPE_VIDEO: planes = 4; + break; + case AVMEDIA_TYPE_AUDIO: + { + int channels = frame->channels; + if (!channels) + return NULL; + CHECK_CHANNELS_CONSISTENCY(frame); + planes = av_sample_fmt_is_planar(frame->format) ? channels : 1; + break; + } + default: + return NULL; + } if (plane < 0 || plane >= planes || !frame->extended_data[plane]) return NULL; @@ -675,17 +761,39 @@ static int frame_copy_audio(AVFrame *dst, const AVFrame *src) return 0; } +static int frame_copy_subtitles(AVFrame *dst, const AVFrame *src, int copy_data) +{ + dst->format = src->format; + + dst->num_subtitle_areas = src->num_subtitle_areas; + + if (src->num_subtitle_areas) { + dst->subtitle_areas = av_malloc_array(src->num_subtitle_areas, sizeof(AVSubtitleArea *)); + + for (unsigned i = 0; i < src->num_subtitle_areas; i++) { + dst->subtitle_areas[i] = av_mallocz(sizeof(AVSubtitleArea)); + av_subtitle_area2area(dst->subtitle_areas[i], src->subtitle_areas[i], copy_data); + } + } + + return 0; +} + int av_frame_copy(AVFrame *dst, const AVFrame *src) { if (dst->format != src->format || dst->format < 0) return AVERROR(EINVAL); - if (dst->width > 0 && dst->height > 0) + switch(dst->type) { + case AVMEDIA_TYPE_VIDEO: return frame_copy_video(dst, src); - else if (dst->nb_samples > 0 && dst->channels > 0) + case AVMEDIA_TYPE_AUDIO: return frame_copy_audio(dst, src); - - return AVERROR(EINVAL); + case AVMEDIA_TYPE_SUBTITLE: + return frame_copy_subtitles(dst, src, 1); + default: + return AVERROR(EINVAL); + } } void av_frame_remove_side_data(AVFrame *frame, enum AVFrameSideDataType type) @@ -832,3 +940,49 @@ int av_frame_apply_cropping(AVFrame *frame, int flags) return 0; } + +/** + * Copies subtitle data from AVSubtitle (deprecated) to AVFrame + * + * @note This is a compatibility method for conversion to the legacy API + */ +void av_frame_put_subtitle(AVFrame *frame, const AVSubtitle *sub) +{ + frame->format = sub->format; + frame->subtitle_start_time = sub->start_display_time; + frame->subtitle_end_time = sub->end_display_time; + frame->subtitle_pts = sub->pts; + frame->num_subtitle_areas = sub->num_rects; + + if (sub->num_rects) { + frame->subtitle_areas = av_malloc_array(sub->num_rects, sizeof(AVSubtitleArea *)); + + for (unsigned i = 0; i < sub->num_rects; i++) { + frame->subtitle_areas[i] = av_mallocz(sizeof(AVSubtitleArea)); + av_subtitle_rect2area(frame->subtitle_areas[i], sub->rects[i]); + } + } +} + +/** + * Copies subtitle data from AVFrame to AVSubtitle (deprecated) + * + * @note This is a compatibility method for conversion to the legacy API + */ +void av_frame_get_subtitle(AVSubtitle *sub, AVFrame *frame) +{ + sub->start_display_time = frame->subtitle_start_time; + sub->end_display_time = frame->subtitle_end_time; + sub->pts = frame->subtitle_pts; + sub->num_rects = frame->num_subtitle_areas; + + if (frame->num_subtitle_areas) { + sub->rects = av_malloc_array(frame->num_subtitle_areas, sizeof(AVSubtitle *)); + + for (unsigned i = 0; i < frame->num_subtitle_areas; i++) { + sub->rects[i] = av_mallocz(sizeof(AVSubtitleRect)); + av_subtitle_area2rect(sub->rects[i], frame->subtitle_areas[i]); + } + } +} + diff --git a/libavutil/frame.h b/libavutil/frame.h index 753234792e..742f4ba07e 100644 --- a/libavutil/frame.h +++ b/libavutil/frame.h @@ -34,6 +34,7 @@ #include "rational.h" #include "samplefmt.h" #include "pixfmt.h" +#include "subfmt.h" #include "version.h" @@ -278,7 +279,7 @@ typedef struct AVRegionOfInterest { } AVRegionOfInterest; /** - * This structure describes decoded (raw) audio or video data. + * This structure describes decoded (raw) audio, video or subtitle data. * * AVFrame must be allocated using av_frame_alloc(). Note that this only * allocates the AVFrame itself, the buffers for the data must be managed @@ -309,6 +310,13 @@ typedef struct AVRegionOfInterest { */ typedef struct AVFrame { #define AV_NUM_DATA_POINTERS 8 + /** + * Media type of the frame (audio, video, subtitles..) + * + * See AVMEDIA_TYPE_xxx + */ + enum AVMediaType type; + /** * pointer to the picture/channel planes. * This might be different from the first allocated byte. For video, @@ -390,7 +398,7 @@ typedef struct AVFrame { /** * format of the frame, -1 if unknown or unset * Values correspond to enum AVPixelFormat for video frames, - * enum AVSampleFormat for audio) + * enum AVSampleFormat for audio, AVSubtitleType for subtitles) */ int format; @@ -481,6 +489,39 @@ typedef struct AVFrame { */ uint64_t channel_layout; + /** + * Display start time, relative to packet pts, in ms. + */ + uint32_t subtitle_start_time; + + /** + * Display end time, relative to packet pts, in ms. + */ + uint32_t subtitle_end_time; + + /** + * Number of items in the @ref subtitle_areas array. + */ + unsigned num_subtitle_areas; + + /** + * Array of subtitle areas, may be empty. + */ + AVSubtitleArea **subtitle_areas; + + /** + * Usually the same as packet pts, in AV_TIME_BASE. + * + * @deprecated This is kept for compatibility reasons and corresponds to + * AVSubtitle->pts. Might be removed in the future. + */ + int64_t subtitle_pts; + + /** + * Header containing style information for text subtitles. + */ + AVBufferRef *subtitle_header; + /** * AVBuffer references backing the data for this frame. If all elements of * this array are NULL, then this frame is not reference counted. This array @@ -740,6 +781,8 @@ void av_frame_move_ref(AVFrame *dst, AVFrame *src); /** * Allocate new buffer(s) for audio or video data. * + * Note: For subtitle data, use av_frame_get_buffer2 + * * The following fields must be set on frame before calling this function: * - format (pixel format for video, sample format for audio) * - width and height for video @@ -759,9 +802,39 @@ void av_frame_move_ref(AVFrame *dst, AVFrame *src); * recommended to pass 0 here unless you know what you are doing. * * @return 0 on success, a negative AVERROR on error. + * + * @deprecated Use @ref av_frame_get_buffer2 instead and set @ref AVFrame.type + * before calling. */ +attribute_deprecated int av_frame_get_buffer(AVFrame *frame, int align); +/** + * Allocate new buffer(s) for audio, video or subtitle data. + * + * The following fields must be set on frame before calling this function: + * - format (pixel format for video, sample format for audio) + * - width and height for video + * - nb_samples and channel_layout for audio + * - type (AVMediaType) + * + * This function will fill AVFrame.data and AVFrame.buf arrays and, if + * necessary, allocate and fill AVFrame.extended_data and AVFrame.extended_buf. + * For planar formats, one buffer will be allocated for each plane. + * + * @warning: if frame already has been allocated, calling this function will + * leak memory. In addition, undefined behavior can occur in certain + * cases. + * + * @param frame frame in which to store the new buffers. + * @param align Required buffer size alignment. If equal to 0, alignment will be + * chosen automatically for the current CPU. It is highly + * recommended to pass 0 here unless you know what you are doing. + * + * @return 0 on success, a negative AVERROR on error. + */ +int av_frame_get_buffer2(AVFrame *frame, int align); + /** * Check if the frame data is writable. * @@ -863,6 +936,22 @@ AVFrameSideData *av_frame_get_side_data(const AVFrame *frame, */ void av_frame_remove_side_data(AVFrame *frame, enum AVFrameSideDataType type); +/** + * Copies subtitle data from AVSubtitle to AVFrame. + * + * @deprecated This is a compatibility method for interoperability with + * the legacy subtitle API. + */ +void av_frame_put_subtitle(AVFrame *frame, const AVSubtitle *sub); + +/** + * Copies subtitle data from AVFrame to AVSubtitle. + * + * @deprecated This is a compatibility method for interoperability with + * the legacy subtitle API. + */ +void av_frame_get_subtitle(AVSubtitle *sub, AVFrame *frame); + /** * Flags for frame cropping. diff --git a/libavutil/subfmt.c b/libavutil/subfmt.c new file mode 100644 index 0000000000..f68907a644 --- /dev/null +++ b/libavutil/subfmt.c @@ -0,0 +1,243 @@ +/* + * Copyright (c) 2021 softworkz + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <string.h> +#include "common.h" +#include "subfmt.h" + +typedef struct SubtitleFmtInfo { + enum AVSubtitleType fmt; + const char* name; +} SubtitleFmtInfo; + +static SubtitleFmtInfo sub_fmt_info[AV_SUBTITLE_FMT_NB] = { + {.fmt = AV_SUBTITLE_FMT_UNKNOWN, .name = "Unknown subtitle format", }, + {.fmt = AV_SUBTITLE_FMT_BITMAP, .name = "Graphical subtitles", }, + {.fmt = AV_SUBTITLE_FMT_TEXT, .name = "Text subtitles (plain)", }, + {.fmt = AV_SUBTITLE_FMT_ASS, .name = "Text subtitles (ass)", }, +}; + +const char *av_get_subtitle_fmt_name(enum AVSubtitleType sub_fmt) +{ + if (sub_fmt < 0 || sub_fmt >= AV_SUBTITLE_FMT_NB) + return NULL; + return sub_fmt_info[sub_fmt].name; +} + +enum AVSubtitleType av_get_subtitle_fmt(const char *name) +{ + for (int i = 0; i < AV_SUBTITLE_FMT_NB; i++) + if (!strcmp(sub_fmt_info[i].name, name)) + return i; + return AV_SUBTITLE_FMT_NONE; +} + +int av_subtitle_area2rect(AVSubtitleRect *dst, const AVSubtitleArea *src) +{ + dst->x = src->x; + dst->y = src->y; + dst->w = src->w; + dst->h = src->h; + dst->nb_colors = src->nb_colors; + dst->type = src->type; + dst->flags = src->flags; + + switch (dst->type) { + case AV_SUBTITLE_FMT_BITMAP: + + if (src->h > 0 && src->w > 0 && src->buf[0]) { + uint32_t *pal; + AVBufferRef *buf = src->buf[0]; + dst->data[0] = av_mallocz(buf->size); + memcpy(dst->data[0], buf->data, buf->size); + dst->linesize[0] = src->linesize[0]; + + dst->data[1] = av_mallocz(256 * 4); + pal = (uint32_t *)dst->data[1]; + + for (unsigned i = 0; i < 256; i++) { + pal[i] = src->pal[i]; + } + } + + break; + case AV_SUBTITLE_FMT_TEXT: + + if (src->text) + dst->text = av_strdup(src->text); + else + dst->text = av_strdup(""); + + if (!dst->text) + return AVERROR(ENOMEM); + + break; + case AV_SUBTITLE_FMT_ASS: + + if (src->ass) + dst->ass = av_strdup(src->ass); + else + dst->ass = av_strdup(""); + + if (!dst->ass) + return AVERROR(ENOMEM); + + break; + default: + + av_log(NULL, AV_LOG_ERROR, "Subtitle rect has invalid format: %d", dst->type); + return AVERROR(ENOMEM); + } + + return 0; +} + +int av_subtitle_rect2area(AVSubtitleArea *dst, const AVSubtitleRect *src) +{ + dst->x = src->x; + dst->y = src->y; + dst->w = src->w; + dst->h = src->h; + dst->nb_colors = src->nb_colors; + dst->type = src->type; + dst->flags = src->flags; + + switch (dst->type) { + case AV_SUBTITLE_FMT_BITMAP: + + if (src->h > 0 && src->w > 0 && src->data[0]) { + AVBufferRef *buf = av_buffer_allocz(src->h * src->linesize[0]); + memcpy(buf->data, src->data[0], buf->size); + + dst->buf[0] = buf; + dst->linesize[0] = src->linesize[0]; + } + + if (src->data[1]) { + uint32_t *pal = (uint32_t *)src->data[1]; + + for (unsigned i = 0; i < 256; i++) { + dst->pal[i] = pal[i]; + } + } + + break; + case AV_SUBTITLE_FMT_TEXT: + + if (src->text) { + dst->text = av_strdup(src->text); + if (!dst->text) + return AVERROR(ENOMEM); + } + + break; + case AV_SUBTITLE_FMT_ASS: + + if (src->ass) { + dst->ass = av_strdup(src->ass); + if (!dst->ass) + return AVERROR(ENOMEM); + } + + break; + default: + + av_log(NULL, AV_LOG_ERROR, "Subtitle area has invalid format: %d", dst->type); + return AVERROR(ENOMEM); + } + + return 0; +} + +int av_subtitle_area2area(AVSubtitleArea *dst, const AVSubtitleArea *src, int copy_data) +{ + dst->x = src->x; + dst->y = src->y; + dst->w = src->w; + dst->h = src->h; + dst->nb_colors = src->nb_colors; + dst->type = src->type; + dst->flags = src->flags; + + switch (dst->type) { + case AV_SUBTITLE_FMT_BITMAP: + + if (src->h > 0 && src->w > 0 && src->buf[0]) { + dst->buf[0] = av_buffer_ref(src->buf[0]); + if (!dst->buf[0]) + return AVERROR(ENOMEM); + + if (copy_data) { + const int ret = av_buffer_make_writable(&dst->buf[0]); + if (ret < 0) + return ret; + } + + dst->linesize[0] = src->linesize[0]; + } + + for (unsigned i = 0; i < 256; i++) + dst->pal[i] = src->pal[i]; + + break; + case AV_SUBTITLE_FMT_TEXT: + + if (src->text) { + dst->text = av_strdup(src->text); + if (!dst->text) + return AVERROR(ENOMEM); + } + + break; + case AV_SUBTITLE_FMT_ASS: + + if (src->ass) { + dst->ass = av_strdup(src->ass); + if (!dst->ass) + return AVERROR(ENOMEM); + } + + break; + default: + + av_log(NULL, AV_LOG_ERROR, "Subtitle area has invalid format: %d", dst->type); + return AVERROR(ENOMEM); + } + + return 0; +} + +void avsubtitle_free(AVSubtitle *sub) +{ + for (unsigned i = 0; i < sub->num_rects; i++) { + av_freep(&sub->rects[i]->data[0]); + av_freep(&sub->rects[i]->data[1]); + av_freep(&sub->rects[i]->data[2]); + av_freep(&sub->rects[i]->data[3]); + av_freep(&sub->rects[i]->text); + av_freep(&sub->rects[i]->ass); + av_freep(&sub->rects[i]); + } + + av_freep(&sub->rects); + + memset(sub, 0, sizeof(*sub)); +} + diff --git a/libavutil/subfmt.h b/libavutil/subfmt.h new file mode 100644 index 0000000000..372e3db413 --- /dev/null +++ b/libavutil/subfmt.h @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2021 softworkz + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_SUBFMT_H +#define AVUTIL_SUBFMT_H + +#include <inttypes.h> + +#include "buffer.h" + +enum AVSubtitleType { + + /** + * Subtitle format unknown. + */ + AV_SUBTITLE_FMT_NONE = -1, + + /** + * Subtitle format unknown. + */ + AV_SUBTITLE_FMT_UNKNOWN = 0, + + /** + * Bitmap area in AVSubtitleRect.data, pixfmt AV_PIX_FMT_PAL8. + */ + AV_SUBTITLE_FMT_BITMAP = 1, + SUBTITLE_BITMAP = 1, ///< Deprecated, use AV_SUBTITLE_FMT_BITMAP instead. + + /** + * Plain text in AVSubtitleRect.text. + */ + AV_SUBTITLE_FMT_TEXT = 2, + SUBTITLE_TEXT = 2, ///< Deprecated, use AV_SUBTITLE_FMT_TEXT instead. + + /** + * Text Formatted as per ASS specification, contained AVSubtitleRect.ass. + */ + AV_SUBTITLE_FMT_ASS = 3, + SUBTITLE_ASS = 3, ///< Deprecated, use AV_SUBTITLE_FMT_ASS instead. + + AV_SUBTITLE_FMT_NB, +}; + +typedef struct AVSubtitleArea { +#define AV_NUM_BUFFER_POINTERS 1 + + enum AVSubtitleType type; + int flags; + + int x; ///< top left corner of area. + int y; ///< top left corner of area. + int w; ///< width of area. + int h; ///< height of area. + int nb_colors; ///< number of colors in bitmap palette (@ref pal). + + /** + * Buffers and line sizes for the bitmap of this subtitle. + * + * @{ + */ + AVBufferRef *buf[AV_NUM_BUFFER_POINTERS]; + int linesize[AV_NUM_BUFFER_POINTERS]; + /** + * @} + */ + + uint32_t pal[256]; ///< RGBA palette for the bitmap. + + char *text; ///< 0-terminated plain UTF-8 text + char *ass; ///< 0-terminated ASS/SSA compatible event line. + +} AVSubtitleArea; + +typedef struct AVSubtitleRect { + unsigned nb_refs; + int x; ///< top left corner of pict, undefined when pict is not set + int y; ///< top left corner of pict, undefined when pict is not set + int w; ///< width of pict, undefined when pict is not set + int h; ///< height of pict, undefined when pict is not set + int nb_colors; ///< number of colors in pict, undefined when pict is not set + + /** + * data+linesize for the bitmap of this subtitle. + */ + uint8_t *data[4]; + int linesize[4]; + + enum AVSubtitleType type; + + char *text; ///< 0 terminated plain UTF-8 text + + /** + * 0-terminated ASS/SSA compatible event line. + */ + char *ass; + + int flags; +} AVSubtitleRect; + +typedef struct AVSubtitle { + uint16_t format; /* 0 = graphics */ + uint32_t start_display_time; /* relative to packet pts, in ms */ + uint32_t end_display_time; /* relative to packet pts, in ms */ + unsigned num_rects; + AVSubtitleRect **rects; + int64_t pts; ///< Same as packet pts, in AV_TIME_BASE +} AVSubtitle; + +/** + * Return the name of sub_fmt, or NULL if sub_fmt is not + * recognized. + */ +const char *av_get_subtitle_fmt_name(enum AVSubtitleType sub_fmt); + +/** + * Return a subtitle format corresponding to name, or AV_SUBTITLE_FMT_NONE + * on error. + * + * @param name Subtitle format name. + */ +enum AVSubtitleType av_get_subtitle_fmt(const char *name); + +/** + * Copy a subtitle area. + * + * @param dst The target area. + * @param src The source area. + * @param copy_data Determines whether to copy references or actual + * data from @ref AVSubtitleArea.buf. + * + * @return 0 on success. + */ +int av_subtitle_area2area(AVSubtitleArea *dst, const AVSubtitleArea *src, int copy_data); + +/** + * Copy data from @ref AVSubtitleArea to @ref AVSubtitleRect. + * + * @param dst The target rect (@ref AVSubtitleRect). + * @param src The source area (@ref AVSubtitleArea). + * + * @return 0 on success. + * + * @deprecated This is a compatibility method for interoperability with + * the legacy subtitle API. + */ +int av_subtitle_area2rect(AVSubtitleRect *dst, const AVSubtitleArea *src); + +/** + * Copy data from @ref AVSubtitleRect to @ref AVSubtitleArea. + * + * @param dst The source area (@ref AVSubtitleArea). + * @param src The target rect (@ref AVSubtitleRect). + * + * @return 0 on success. + * + * @deprecated This is a compatibility method for interoperability with + * the legacy subtitle API. + */ +int av_subtitle_rect2area(AVSubtitleArea *dst, const AVSubtitleRect *src); + +/** + * Free all allocated data in the given subtitle struct. + * + * @param sub AVSubtitle to free. + */ +void avsubtitle_free(AVSubtitle *sub); + +#endif /* AVUTIL_SUBFMT_H */
Root commit for adding subtitle filtering capabilities. In detail: - Add type (AVMediaType) field to AVFrame Replaces previous way of distinction which was based on checking width and height to determine whether a frame is audio or video - Add subtitle fields to AVFrame - Add new struct AVSubtitleArea, similar to AVSubtitleRect, but different allocation logic. Cannot and must not be used interchangeably, hence the new struct - Move enum AVSubtitleType, AVSubtitle and AVSubtitleRect to avutil - Add public-named members to enum AVSubtitleType (AV_SUBTITLE_FMT_) - Add avcodec_decode_subtitle3 which takes subtitle frames, serving as compatibility shim to legacy subtitle decoding - Add additional methods for conversion between old and new API Signed-off-by: softworkz <softworkz@hotmail.com> --- libavcodec/avcodec.c | 19 --- libavcodec/avcodec.h | 75 ++-------- libavcodec/decode.c | 53 +++++-- libavcodec/libzvbi-teletextdec.c | 1 + libavcodec/pgssubdec.c | 1 + libavcodec/utils.c | 11 ++ libavfilter/vf_subtitles.c | 50 +++++-- libavformat/utils.c | 1 + libavutil/Makefile | 2 + libavutil/frame.c | 194 +++++++++++++++++++++--- libavutil/frame.h | 93 +++++++++++- libavutil/subfmt.c | 243 +++++++++++++++++++++++++++++++ libavutil/subfmt.h | 185 +++++++++++++++++++++++ 13 files changed, 803 insertions(+), 125 deletions(-) create mode 100644 libavutil/subfmt.c create mode 100644 libavutil/subfmt.h