diff mbox series

[FFmpeg-devel,v6,03/25] avcodec/subtitles: Introduce new frame-based subtitle decoding API

Message ID 0a685a6b19a6e0dd8bd0d4f5dd8ceff4d8a1561f.1656261322.git.ffmpegagent@gmail.com
State New
Headers show
Series Subtitle Filtering 2022 | expand

Checks

Context Check Description
yinshiyou/make_fate_loongarch64 success Make fate finished
yinshiyou/make_loongarch64 warning New warnings during build
andriy/make_fate_x86 success Make fate finished
andriy/make_x86 warning New warnings during build

Commit Message

Aman Karmani June 26, 2022, 4:35 p.m. UTC
From: softworkz <softworkz@hotmail.com>

- Modify avcodec_send_packet() to support subtitles via the regular
  frame based decoding API
- Add decode_subtitle_shim() which takes subtitle frames,
  and serves as a compatibility shim to the legacy subtitle decoding
  API until all subtitle decoders are migrated to the frame-based API
- Add additional methods for conversion between old and new API

Signed-off-by: softworkz <softworkz@hotmail.com>
---
 libavcodec/avcodec.c  |   8 ++
 libavcodec/avcodec.h  |  10 ++-
 libavcodec/decode.c   |  60 ++++++++++++--
 libavcodec/internal.h |  16 ++++
 libavcodec/utils.c    | 184 ++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 269 insertions(+), 9 deletions(-)
diff mbox series

Patch

diff --git a/libavcodec/avcodec.c b/libavcodec/avcodec.c
index 5f6e71a39e..0a1d961fc6 100644
--- a/libavcodec/avcodec.c
+++ b/libavcodec/avcodec.c
@@ -358,6 +358,14 @@  FF_DISABLE_DEPRECATION_WARNINGS
 FF_ENABLE_DEPRECATION_WARNINGS
 #endif
 
+        // Set the subtitle type from the codec descriptor in case the decoder hasn't done itself
+        if (avctx->codec_type == AVMEDIA_TYPE_SUBTITLE && avctx->subtitle_type == AV_SUBTITLE_FMT_UNKNOWN) {
+            if(avctx->codec_descriptor->props & AV_CODEC_PROP_BITMAP_SUB)
+                avctx->subtitle_type = AV_SUBTITLE_FMT_BITMAP;
+            if(avctx->codec_descriptor->props & AV_CODEC_PROP_TEXT_SUB)
+                 avctx->subtitle_type = AV_SUBTITLE_FMT_ASS;
+        }
+
 #if FF_API_AVCTX_TIMEBASE
         if (avctx->framerate.num > 0 && avctx->framerate.den > 0)
             avctx->time_base = av_inv_q(av_mul_q(avctx->framerate, (AVRational){avctx->ticks_per_frame, 1}));
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 56d551f92d..de87b0406b 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -1698,7 +1698,7 @@  typedef struct AVCodecContext {
 
     /**
      * Header containing style information for text subtitles.
-     * For SUBTITLE_ASS subtitle type, it should contain the whole ASS
+     * For AV_SUBTITLE_FMT_ASS subtitle type, it should contain the whole ASS
      * [Script Info] and [V4+ Styles] section, plus the [Events] line and
      * the Format line following. It shouldn't include any Dialogue line.
      * - encoding: Set/allocated/freed by user (before avcodec_open2())
@@ -2056,6 +2056,8 @@  typedef struct AVCodecContext {
      *             The decoder can then override during decoding as needed.
      */
     AVChannelLayout ch_layout;
+
+    enum AVSubtitleType subtitle_type;
 } AVCodecContext;
 
 /**
@@ -2432,7 +2434,10 @@  int avcodec_close(AVCodecContext *avctx);
  * Free all allocated data in the given subtitle struct.
  *
  * @param sub AVSubtitle to free.
+ *
+ * @deprecated Use the regular frame based encode and decode APIs instead.
  */
+attribute_deprecated
 void avsubtitle_free(AVSubtitle *sub);
 
 /**
@@ -2525,7 +2530,10 @@  enum AVChromaLocation avcodec_chroma_pos_to_enum(int xpos, int ypos);
  *                 must be freed with avsubtitle_free if *got_sub_ptr is set.
  * @param[in,out] got_sub_ptr Zero if no subtitle could be decompressed, otherwise, it is nonzero.
  * @param[in] avpkt The input AVPacket containing the input buffer.
+ *
+ * @deprecated Use the new decode API (avcodec_send_packet, avcodec_receive_frame) instead.
  */
+attribute_deprecated
 int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
                             int *got_sub_ptr,
                             AVPacket *avpkt);
diff --git a/libavcodec/decode.c b/libavcodec/decode.c
index 1893caa6a6..e8ca7b6da4 100644
--- a/libavcodec/decode.c
+++ b/libavcodec/decode.c
@@ -573,6 +573,39 @@  static int decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
     return ret;
 }
 
+static int decode_subtitle2_priv(AVCodecContext *avctx, AVSubtitle *sub,
+                                 int *got_sub_ptr, AVPacket *avpkt);
+
+static int decode_subtitle_shim(AVCodecContext *avctx, AVFrame *frame, AVPacket *avpkt)
+{
+    int ret, got_sub_ptr = 0;
+    AVSubtitle subtitle = { 0 };
+
+    if (frame->buf[0])
+        return AVERROR(EAGAIN);
+
+    av_frame_unref(frame);
+
+    ret = decode_subtitle2_priv(avctx, &subtitle, &got_sub_ptr, avpkt);
+
+    if (ret >= 0 && got_sub_ptr) {
+        frame->type = AVMEDIA_TYPE_SUBTITLE;
+        frame->format = subtitle.format;
+        ret = av_frame_get_buffer2(frame, 0);
+
+        if (ret >= 0)
+            ret = ff_frame_put_subtitle(frame, &subtitle);
+
+        frame->width = avctx->width;
+        frame->height = avctx->height;
+        frame->pkt_dts = avpkt->dts;
+    }
+
+    avsubtitle_free(&subtitle);
+
+    return ret;
+}
+
 int attribute_align_arg avcodec_send_packet(AVCodecContext *avctx, const AVPacket *avpkt)
 {
     AVCodecInternal *avci = avctx->internal;
@@ -587,6 +620,13 @@  int attribute_align_arg avcodec_send_packet(AVCodecContext *avctx, const AVPacke
     if (avpkt && !avpkt->size && avpkt->data)
         return AVERROR(EINVAL);
 
+    if (avctx->codec_type == AVMEDIA_TYPE_SUBTITLE)
+		// this does not exactly implement the avcodec_send_packet/avcodec_receive_frame API
+	    // but we know that no subtitle decoder produces multiple AVSubtitles per packet through
+		// the legacy API, and this will be changed when migrating the subtitle decoders
+		// to the frame based decoding api
+        return decode_subtitle_shim(avctx, avci->buffer_frame, avpkt);
+
     av_packet_unref(avci->buffer_pkt);
     if (avpkt && (avpkt->data || avpkt->side_data_elems)) {
         ret = av_packet_ref(avci->buffer_pkt, avpkt);
@@ -648,7 +688,9 @@  int attribute_align_arg avcodec_receive_frame(AVCodecContext *avctx, AVFrame *fr
 
     if (avci->buffer_frame->buf[0]) {
         av_frame_move_ref(frame, avci->buffer_frame);
-    } else {
+    } else if (avctx->codec_type == AVMEDIA_TYPE_SUBTITLE)
+        return AVERROR(EAGAIN);
+    else {
         ret = decode_receive_frame_internal(avctx, frame);
         if (ret < 0)
             return ret;
@@ -813,9 +855,8 @@  static int utf8_check(const uint8_t *str)
     return 1;
 }
 
-int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
-                             int *got_sub_ptr,
-                             AVPacket *avpkt)
+static int decode_subtitle2_priv(AVCodecContext *avctx, AVSubtitle *sub,
+                             int *got_sub_ptr, AVPacket *avpkt)
 {
     int ret = 0;
 
@@ -861,10 +902,7 @@  int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
                                                  avctx->pkt_timebase, ms);
         }
 
-        if (avctx->codec_descriptor->props & AV_CODEC_PROP_BITMAP_SUB)
-            sub->format = 0;
-        else if (avctx->codec_descriptor->props & AV_CODEC_PROP_TEXT_SUB)
-            sub->format = 1;
+        sub->format = (uint16_t)avctx->subtitle_type;
 
         for (unsigned i = 0; i < sub->num_rects; i++) {
             if (avctx->sub_charenc_mode != FF_SUB_CHARENC_MODE_IGNORE &&
@@ -885,6 +923,12 @@  int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
     return ret;
 }
 
+int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
+                             int *got_sub_ptr, AVPacket *avpkt)
+{
+    return decode_subtitle2_priv(avctx, sub, got_sub_ptr, avpkt);
+}
+
 enum AVPixelFormat avcodec_default_get_format(struct AVCodecContext *avctx,
                                               const enum AVPixelFormat *fmt)
 {
diff --git a/libavcodec/internal.h b/libavcodec/internal.h
index 17e1de8127..69656729d8 100644
--- a/libavcodec/internal.h
+++ b/libavcodec/internal.h
@@ -290,4 +290,20 @@  int ff_int_from_list_or_default(void *ctx, const char * val_name, int val,
 
 void ff_dvdsub_parse_palette(uint32_t *palette, const char *p);
 
+/**
+ * Copies subtitle data from AVSubtitle to AVFrame.
+ *
+ * @deprecated This is a compatibility method for interoperability with
+ * the legacy subtitle API.
+ */
+int ff_frame_put_subtitle(AVFrame* frame, const AVSubtitle* sub);
+
+/**
+ * Copies subtitle data from AVFrame to AVSubtitle.
+ *
+ * @deprecated This is a compatibility method for interoperability with
+ * the legacy subtitle API.
+ */
+int ff_frame_get_subtitle(AVSubtitle* sub, AVFrame* frame);
+
 #endif /* AVCODEC_INTERNAL_H */
diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index eb7e505a62..b67b6b6122 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -827,6 +827,190 @@  FF_ENABLE_DEPRECATION_WARNINGS
     return FFMAX(0, duration);
 }
 
+static int subtitle_area2rect(AVSubtitleRect *dst, const AVSubtitleArea *src)
+{
+    dst->x         =  src->x;
+    dst->y         =  src->y;
+    dst->w         =  src->w;
+    dst->h         =  src->h;
+    dst->nb_colors =  src->nb_colors;
+    dst->type      =  src->type;
+    dst->flags     =  src->flags;
+
+    switch (dst->type) {
+    case AV_SUBTITLE_FMT_BITMAP:
+
+        if (src->h > 0 && src->w > 0 && src->buf[0]) {
+            uint32_t *pal;
+            AVBufferRef *buf = src->buf[0];
+            dst->data[0] = av_mallocz(buf->size);
+            memcpy(dst->data[0], buf->data, buf->size);
+            dst->linesize[0] = src->linesize[0];
+
+            dst->data[1] = av_mallocz(256 * 4);
+            pal = (uint32_t *)dst->data[1];
+
+            for (unsigned i = 0; i < 256; i++) {
+                pal[i] = src->pal[i];
+            }
+        }
+
+        break;
+    case AV_SUBTITLE_FMT_TEXT:
+
+        if (src->text)
+            dst->text = av_strdup(src->text);
+        else
+            dst->text = av_strdup("");
+
+        if (!dst->text)
+            return AVERROR(ENOMEM);
+
+        break;
+    case AV_SUBTITLE_FMT_ASS:
+
+        if (src->ass)
+            dst->ass = av_strdup(src->ass);
+        else
+            dst->ass = av_strdup("");
+
+        if (!dst->ass)
+            return AVERROR(ENOMEM);
+
+        break;
+    default:
+
+        av_log(NULL, AV_LOG_ERROR, "Subtitle rect has invalid format: %d", dst->type);
+        return AVERROR(EINVAL);
+    }
+
+    return 0;
+}
+
+static int subtitle_rect2area(AVSubtitleArea *dst, const AVSubtitleRect *src)
+{
+    dst->x         =  src->x;
+    dst->y         =  src->y;
+    dst->w         =  src->w;
+    dst->h         =  src->h;
+    dst->nb_colors =  src->nb_colors;
+    dst->type      =  src->type;
+    dst->flags     =  src->flags;
+
+    switch (dst->type) {
+    case AV_SUBTITLE_FMT_BITMAP:
+
+        if (src->h > 0 && src->w > 0 && src->data[0]) {
+            AVBufferRef *buf = av_buffer_allocz(src->h * src->linesize[0]);
+            memcpy(buf->data, src->data[0], buf->size);
+
+            dst->buf[0] = buf;
+            dst->linesize[0] = src->linesize[0];
+        }
+
+        if (src->data[1]) {
+            uint32_t *pal = (uint32_t *)src->data[1];
+
+            for (unsigned i = 0; i < 256; i++) {
+                dst->pal[i] = pal[i];
+            }
+        }
+
+        break;
+    case AV_SUBTITLE_FMT_TEXT:
+
+        if (src->text) {
+            dst->text = av_strdup(src->text);
+            if (!dst->text)
+                return AVERROR(ENOMEM);
+        }
+
+        break;
+    case AV_SUBTITLE_FMT_ASS:
+
+        if (src->ass) {
+            dst->ass = av_strdup(src->ass);
+            if (!dst->ass)
+                return AVERROR(ENOMEM);
+        }
+
+        break;
+    default:
+
+        av_log(NULL, AV_LOG_ERROR, "Subtitle area has invalid format: %d", dst->type);
+        return AVERROR(EINVAL);
+    }
+
+    return 0;
+}
+
+/**
+ * Copies subtitle data from AVSubtitle (deprecated) to AVFrame
+ *
+ * @note This is a compatibility method for conversion to the legacy API
+ */
+int ff_frame_put_subtitle(AVFrame *frame, const AVSubtitle *sub)
+{
+    frame->format = sub->format;
+    frame->subtitle_timing.start_pts = sub->pts;
+    frame->subtitle_timing.start_pts += av_rescale_q(sub->start_display_time, (AVRational){ 1, 1000 }, AV_TIME_BASE_Q);
+    frame->subtitle_timing.duration = av_rescale_q(sub->end_display_time - sub->start_display_time, (AVRational){ 1, 1000 }, AV_TIME_BASE_Q);
+
+    if (sub->num_rects) {
+        frame->subtitle_areas = av_malloc_array(sub->num_rects, sizeof(AVSubtitleArea*));
+        if (!frame->subtitle_areas)
+            return AVERROR(ENOMEM);
+
+        for (unsigned i = 0; i < sub->num_rects; i++) {
+            int ret;
+            frame->subtitle_areas[i] = av_mallocz(sizeof(AVSubtitleArea));
+            if (!frame->subtitle_areas[i])
+                return AVERROR(ENOMEM);
+            ret = subtitle_rect2area(frame->subtitle_areas[i], sub->rects[i]);
+            if (ret < 0) {
+                frame->num_subtitle_areas = i;
+                return ret;
+            }
+        }
+    }
+
+    frame->num_subtitle_areas = sub->num_rects;
+    return 0;
+}
+
+/**
+ * Copies subtitle data from AVFrame to AVSubtitle (deprecated)
+ *
+ * @note This is a compatibility method for conversion to the legacy API
+ */
+int ff_frame_get_subtitle(AVSubtitle *sub, AVFrame *frame)
+{
+    const int64_t duration_ms = av_rescale_q(frame->subtitle_timing.duration, AV_TIME_BASE_Q, (AVRational){ 1, 1000 });
+
+    sub->start_display_time = 0;
+    sub->end_display_time = (int32_t)duration_ms;
+    sub->pts = frame->subtitle_timing.start_pts;
+
+    if (frame->num_subtitle_areas) {
+        sub->rects = av_malloc_array(frame->num_subtitle_areas, sizeof(AVSubtitleRect*));
+        if (!sub->rects)
+            return AVERROR(ENOMEM);
+
+        for (unsigned i = 0; i < frame->num_subtitle_areas; i++) {
+            int ret;
+            sub->rects[i] = av_mallocz(sizeof(AVSubtitleRect));
+            ret = subtitle_area2rect(sub->rects[i], frame->subtitle_areas[i]);
+            if (ret < 0) {
+                sub->num_rects = i;
+                return ret;
+            }
+        }
+    }
+
+    sub->num_rects = frame->num_subtitle_areas;
+    return 0;
+}
+
 int av_get_audio_frame_duration2(AVCodecParameters *par, int frame_bytes)
 {
    int channels = par->ch_layout.nb_channels;