diff mbox series

[FFmpeg-devel,v3,02/26] avutil/frame: Prepare AVFrame for subtitle handling

Message ID e922f141ba1fc076cd6881844b2f62145c50dfc6.1642649134.git.ffmpegagent@gmail.com
State New
Headers show
Series Subtitle Filtering 2022 | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished
andriy/makex86 warning New warnings during build
andriy/make_ppc success Make finished
andriy/make_fate_ppc success Make fate finished
andriy/makeppc warning New warnings during build
andriy/make_fate_aarch64_jetson success Make fate finished
andriy/make_aarch64_jetson warning New warnings during build

Commit Message

softworkz Jan. 20, 2022, 3:25 a.m. UTC
From: softworkz <softworkz@hotmail.com>

Root commit for adding subtitle filtering capabilities.
In detail:

- Add type (AVMediaType) field to AVFrame
  Replaces previous way of distinction which was based on checking
  width and height to determine whether a frame is audio or video
- Add subtitle fields to AVFrame
- Add new struct AVSubtitleArea, similar to AVSubtitleRect, but
  different allocation logic. Cannot and must not be used
  interchangeably, hence the new struct

Signed-off-by: softworkz <softworkz@hotmail.com>
---
 libavutil/Makefile |   1 +
 libavutil/frame.c  | 211 ++++++++++++++++++++++++++++++++++++++++-----
 libavutil/frame.h  |  85 +++++++++++++++++-
 libavutil/subfmt.c |  45 ++++++++++
 libavutil/subfmt.h |  47 ++++++++++
 5 files changed, 364 insertions(+), 25 deletions(-)
 create mode 100644 libavutil/subfmt.c
diff mbox series

Patch

diff --git a/libavutil/Makefile b/libavutil/Makefile
index ce644f4d48..8bc0a14942 100644
--- a/libavutil/Makefile
+++ b/libavutil/Makefile
@@ -160,6 +160,7 @@  OBJS = adler32.o                                                        \
        slicethread.o                                                    \
        spherical.o                                                      \
        stereo3d.o                                                       \
+       subfmt.o                                                         \
        threadmessage.o                                                  \
        time.o                                                           \
        timecode.o                                                       \
diff --git a/libavutil/frame.c b/libavutil/frame.c
index 8997c85e35..2b95830b6f 100644
--- a/libavutil/frame.c
+++ b/libavutil/frame.c
@@ -26,6 +26,7 @@ 
 #include "imgutils.h"
 #include "mem.h"
 #include "samplefmt.h"
+#include "subfmt.h"
 #include "hwcontext.h"
 
 #define CHECK_CHANNELS_CONSISTENCY(frame) \
@@ -50,6 +51,9 @@  const char *av_get_colorspace_name(enum AVColorSpace val)
     return name[val];
 }
 #endif
+
+static int frame_copy_subtitles(AVFrame *dst, const AVFrame *src, int copy_data);
+
 static void get_frame_defaults(AVFrame *frame)
 {
     memset(frame, 0, sizeof(*frame));
@@ -70,7 +74,12 @@  static void get_frame_defaults(AVFrame *frame)
     frame->colorspace          = AVCOL_SPC_UNSPECIFIED;
     frame->color_range         = AVCOL_RANGE_UNSPECIFIED;
     frame->chroma_location     = AVCHROMA_LOC_UNSPECIFIED;
-    frame->flags               = 0;
+    frame->num_subtitle_areas  = 0;
+    frame->subtitle_areas      = NULL;
+    frame->subtitle_header     = NULL;
+    frame->repeat_sub          = 0;
+    frame->subtitle_timing.start_pts = 0;
+    frame->subtitle_timing.duration  = 0;
 }
 
 static void free_side_data(AVFrameSideData **ptr_sd)
@@ -240,23 +249,55 @@  static int get_audio_buffer(AVFrame *frame, int align)
 
 }
 
+static int get_subtitle_buffer(AVFrame *frame)
+{
+    // Buffers in AVFrame->buf[] are not used in case of subtitle frames.
+    // To accomodate with existing code, checking ->buf[0] to determine
+    // whether a frame is ref-counted or has data, we're adding a 1-byte
+    // buffer here, which marks the subtitle frame to contain data.
+    frame->buf[0] = av_buffer_alloc(1);
+    if (!frame->buf[0]) {
+        av_frame_unref(frame);
+        return AVERROR(ENOMEM);
+    }
+
+    frame->extended_data = frame->data;
+
+    return 0;
+}
+
 int av_frame_get_buffer(AVFrame *frame, int align)
+{
+    if (frame->width > 0 && frame->height > 0)
+        frame->type = AVMEDIA_TYPE_VIDEO;
+    else if (frame->nb_samples > 0 && (frame->channel_layout || frame->channels > 0))
+        frame->type = AVMEDIA_TYPE_AUDIO;
+
+    return av_frame_get_buffer2(frame, align);
+}
+
+int av_frame_get_buffer2(AVFrame *frame, int align)
 {
     if (frame->format < 0)
         return AVERROR(EINVAL);
 
-    if (frame->width > 0 && frame->height > 0)
+    switch(frame->type) {
+    case AVMEDIA_TYPE_VIDEO:
         return get_video_buffer(frame, align);
-    else if (frame->nb_samples > 0 && (frame->channel_layout || frame->channels > 0))
+    case AVMEDIA_TYPE_AUDIO:
         return get_audio_buffer(frame, align);
-
-    return AVERROR(EINVAL);
+    case AVMEDIA_TYPE_SUBTITLE:
+        return get_subtitle_buffer(frame);
+    default:
+        return AVERROR(EINVAL);
+    }
 }
 
 static int frame_copy_props(AVFrame *dst, const AVFrame *src, int force_copy)
 {
     int ret, i;
 
+    dst->type                   = src->type;
     dst->key_frame              = src->key_frame;
     dst->pict_type              = src->pict_type;
     dst->sample_aspect_ratio    = src->sample_aspect_ratio;
@@ -288,6 +329,12 @@  static int frame_copy_props(AVFrame *dst, const AVFrame *src, int force_copy)
     dst->colorspace             = src->colorspace;
     dst->color_range            = src->color_range;
     dst->chroma_location        = src->chroma_location;
+    dst->repeat_sub             = src->repeat_sub;
+    dst->subtitle_timing.start_pts = src->subtitle_timing.start_pts;
+    dst->subtitle_timing.duration  = src->subtitle_timing.duration;
+
+    if ((ret = av_buffer_replace(&dst->subtitle_header, src->subtitle_header)) < 0)
+        return ret;
 
     av_dict_copy(&dst->metadata, src->metadata, 0);
 
@@ -329,6 +376,7 @@  int av_frame_ref(AVFrame *dst, const AVFrame *src)
     av_assert1(dst->width == 0 && dst->height == 0);
     av_assert1(dst->channels == 0);
 
+    dst->type           = src->type;
     dst->format         = src->format;
     dst->width          = src->width;
     dst->height         = src->height;
@@ -342,7 +390,7 @@  int av_frame_ref(AVFrame *dst, const AVFrame *src)
 
     /* duplicate the frame data if it's not refcounted */
     if (!src->buf[0]) {
-        ret = av_frame_get_buffer(dst, 0);
+        ret = av_frame_get_buffer2(dst, 0);
         if (ret < 0)
             goto fail;
 
@@ -364,6 +412,10 @@  int av_frame_ref(AVFrame *dst, const AVFrame *src)
         }
     }
 
+    /* copy subtitle areas */
+    if (src->type == AVMEDIA_TYPE_SUBTITLE)
+        frame_copy_subtitles(dst, src, 0);
+
     if (src->extended_buf) {
         dst->extended_buf = av_calloc(src->nb_extended_buf,
                                       sizeof(*dst->extended_buf));
@@ -434,7 +486,7 @@  AVFrame *av_frame_clone(const AVFrame *src)
 
 void av_frame_unref(AVFrame *frame)
 {
-    int i;
+    unsigned i, n;
 
     if (!frame)
         return;
@@ -453,6 +505,21 @@  void av_frame_unref(AVFrame *frame)
     av_buffer_unref(&frame->opaque_ref);
     av_buffer_unref(&frame->private_ref);
 
+    av_buffer_unref(&frame->subtitle_header);
+
+    for (i = 0; i < frame->num_subtitle_areas; i++) {
+        AVSubtitleArea *area = frame->subtitle_areas[i];
+
+        for (n = 0; n < FF_ARRAY_ELEMS(area->buf); n++)
+            av_buffer_unref(&area->buf[n]);
+
+        av_freep(&area->text);
+        av_freep(&area->ass);
+        av_free(area);
+    }
+
+    av_freep(&frame->subtitle_areas);
+
     if (frame->extended_data != frame->data)
         av_freep(&frame->extended_data);
 
@@ -472,18 +539,28 @@  void av_frame_move_ref(AVFrame *dst, AVFrame *src)
 
 int av_frame_is_writable(AVFrame *frame)
 {
-    int i, ret = 1;
+    AVSubtitleArea *area;
+    int ret = 1;
 
     /* assume non-refcounted frames are not writable */
     if (!frame->buf[0])
         return 0;
 
-    for (i = 0; i < FF_ARRAY_ELEMS(frame->buf); i++)
+    for (unsigned i = 0; i < FF_ARRAY_ELEMS(frame->buf); i++)
         if (frame->buf[i])
             ret &= !!av_buffer_is_writable(frame->buf[i]);
-    for (i = 0; i < frame->nb_extended_buf; i++)
+    for (unsigned i = 0; i < frame->nb_extended_buf; i++)
         ret &= !!av_buffer_is_writable(frame->extended_buf[i]);
 
+    for (unsigned i = 0; i < frame->num_subtitle_areas; i++) {
+        area = frame->subtitle_areas[i];
+        if (area) {
+            for (unsigned n = 0; n < FF_ARRAY_ELEMS(area->buf); n++)
+                if (area->buf[n])
+                    ret &= !!av_buffer_is_writable(area->buf[n]);
+            }
+    }
+
     return ret;
 }
 
@@ -499,6 +576,7 @@  int av_frame_make_writable(AVFrame *frame)
         return 0;
 
     memset(&tmp, 0, sizeof(tmp));
+    tmp.type           = frame->type;
     tmp.format         = frame->format;
     tmp.width          = frame->width;
     tmp.height         = frame->height;
@@ -509,7 +587,7 @@  int av_frame_make_writable(AVFrame *frame)
     if (frame->hw_frames_ctx)
         ret = av_hwframe_get_buffer(frame->hw_frames_ctx, &tmp, 0);
     else
-        ret = av_frame_get_buffer(&tmp, 0);
+        ret = av_frame_get_buffer2(&tmp, 0);
     if (ret < 0)
         return ret;
 
@@ -544,14 +622,22 @@  AVBufferRef *av_frame_get_plane_buffer(AVFrame *frame, int plane)
     uint8_t *data;
     int planes, i;
 
-    if (frame->nb_samples) {
-        int channels = frame->channels;
-        if (!channels)
-            return NULL;
-        CHECK_CHANNELS_CONSISTENCY(frame);
-        planes = av_sample_fmt_is_planar(frame->format) ? channels : 1;
-    } else
+    switch(frame->type) {
+    case AVMEDIA_TYPE_VIDEO:
         planes = 4;
+        break;
+    case AVMEDIA_TYPE_AUDIO:
+        {
+            int channels = frame->channels;
+            if (!channels)
+                return NULL;
+            CHECK_CHANNELS_CONSISTENCY(frame);
+            planes = av_sample_fmt_is_planar(frame->format) ? channels : 1;
+            break;
+        }
+    default:
+        return NULL;
+    }
 
     if (plane < 0 || plane >= planes || !frame->extended_data[plane])
         return NULL;
@@ -675,17 +761,98 @@  static int frame_copy_audio(AVFrame *dst, const AVFrame *src)
     return 0;
 }
 
+static int av_subtitle_area2area(AVSubtitleArea *dst, const AVSubtitleArea *src, int copy_data)
+{
+    dst->x         =  src->x;
+    dst->y         =  src->y;
+    dst->w         =  src->w;
+    dst->h         =  src->h;
+    dst->nb_colors =  src->nb_colors;
+    dst->type      =  src->type;
+    dst->flags     =  src->flags;
+
+    switch (dst->type) {
+    case AV_SUBTITLE_FMT_BITMAP:
+
+        for (unsigned i = 0; i < AV_NUM_BUFFER_POINTERS; i++) {
+            if (src->h > 0 && src->w > 0 && src->buf[i]) {
+                dst->buf[0] = av_buffer_ref(src->buf[i]);
+                if (!dst->buf[i])
+                    return AVERROR(ENOMEM);
+
+                if (copy_data) {
+                    const int ret = av_buffer_make_writable(&dst->buf[i]);
+                    if (ret < 0)
+                        return ret;
+                }
+
+                dst->linesize[i] = src->linesize[i];
+            }
+        }
+
+        memcpy(&dst->pal[0], &src->pal[0], sizeof(src->pal[0]) * 256);
+
+        break;
+    case AV_SUBTITLE_FMT_TEXT:
+
+        if (src->text) {
+            dst->text = av_strdup(src->text);
+            if (!dst->text)
+                return AVERROR(ENOMEM);
+        }
+
+        break;
+    case AV_SUBTITLE_FMT_ASS:
+
+        if (src->ass) {
+            dst->ass = av_strdup(src->ass);
+            if (!dst->ass)
+                return AVERROR(ENOMEM);
+        }
+
+        break;
+    default:
+
+        av_log(NULL, AV_LOG_ERROR, "Subtitle area has invalid format: %d", dst->type);
+        return AVERROR(ENOMEM);
+    }
+
+    return 0;
+}
+
+static int frame_copy_subtitles(AVFrame *dst, const AVFrame *src, int copy_data)
+{
+    dst->format = src->format;
+
+    dst->num_subtitle_areas = src->num_subtitle_areas;
+
+    if (src->num_subtitle_areas) {
+        dst->subtitle_areas = av_malloc_array(src->num_subtitle_areas, sizeof(AVSubtitleArea *));
+
+        for (unsigned i = 0; i < src->num_subtitle_areas; i++) {
+            dst->subtitle_areas[i] = av_mallocz(sizeof(AVSubtitleArea));
+            av_subtitle_area2area(dst->subtitle_areas[i], src->subtitle_areas[i], copy_data);
+        }
+    }
+
+    return 0;
+}
+
 int av_frame_copy(AVFrame *dst, const AVFrame *src)
 {
     if (dst->format != src->format || dst->format < 0)
         return AVERROR(EINVAL);
 
-    if (dst->width > 0 && dst->height > 0)
+    switch(dst->type) {
+    case AVMEDIA_TYPE_VIDEO:
         return frame_copy_video(dst, src);
-    else if (dst->nb_samples > 0 && dst->channels > 0)
+    case AVMEDIA_TYPE_AUDIO:
         return frame_copy_audio(dst, src);
-
-    return AVERROR(EINVAL);
+    case AVMEDIA_TYPE_SUBTITLE:
+        return frame_copy_subtitles(dst, src, 1);
+    default:
+        return AVERROR(EINVAL);
+    }
 }
 
 void av_frame_remove_side_data(AVFrame *frame, enum AVFrameSideDataType type)
diff --git a/libavutil/frame.h b/libavutil/frame.h
index 18e239f870..ed519c5e2b 100644
--- a/libavutil/frame.h
+++ b/libavutil/frame.h
@@ -25,7 +25,6 @@ 
 #ifndef AVUTIL_FRAME_H
 #define AVUTIL_FRAME_H
 
-#include <stddef.h>
 #include <stdint.h>
 
 #include "avutil.h"
@@ -34,6 +33,7 @@ 
 #include "rational.h"
 #include "samplefmt.h"
 #include "pixfmt.h"
+#include "subfmt.h"
 #include "version.h"
 
 
@@ -285,7 +285,7 @@  typedef struct AVRegionOfInterest {
 } AVRegionOfInterest;
 
 /**
- * This structure describes decoded (raw) audio or video data.
+ * This structure describes decoded (raw) audio, video or subtitle data.
  *
  * AVFrame must be allocated using av_frame_alloc(). Note that this only
  * allocates the AVFrame itself, the buffers for the data must be managed
@@ -399,7 +399,7 @@  typedef struct AVFrame {
     /**
      * format of the frame, -1 if unknown or unset
      * Values correspond to enum AVPixelFormat for video frames,
-     * enum AVSampleFormat for audio)
+     * enum AVSampleFormat for audio, AVSubtitleType for subtitles)
      */
     int format;
 
@@ -681,6 +681,53 @@  typedef struct AVFrame {
      * for the target frame's private_ref field.
      */
     AVBufferRef *private_ref;
+
+    /**
+     * Media type of the frame (audio, video, subtitles..)
+     *
+     * See AVMEDIA_TYPE_xxx
+     */
+    enum AVMediaType type;
+
+    /**
+     * Number of items in the @ref subtitle_areas array.
+     */
+    unsigned num_subtitle_areas;
+
+    /**
+     * Array of subtitle areas, may be empty.
+     */
+    AVSubtitleArea **subtitle_areas;
+
+    /**
+     * Header containing style information for text subtitles.
+     */
+    AVBufferRef *subtitle_header;
+
+    /**
+     * Indicates that a subtitle frame is a repeated frame for arbitrating flow
+     * in a filter graph.
+     * The field subtitle_timing.start_pts always indicates the original presentation
+     * time, while the frame's pts field may be different.
+     */
+    int repeat_sub;
+
+    struct SubtitleTiming
+    {
+        /**
+         * The display start time, in AV_TIME_BASE.
+         *
+         * For subtitle frames, AVFrame.pts is populated from the packet pts value,
+         * which is not always the same as this value.
+         */
+        int64_t start_pts;
+
+        /**
+         * Display duration, in AV_TIME_BASE.
+         */
+        int64_t duration;
+
+    } subtitle_timing;
 } AVFrame;
 
 
@@ -757,6 +804,8 @@  void av_frame_move_ref(AVFrame *dst, AVFrame *src);
 /**
  * Allocate new buffer(s) for audio or video data.
  *
+ * Note: For subtitle data, use av_frame_get_buffer2
+ *
  * The following fields must be set on frame before calling this function:
  * - format (pixel format for video, sample format for audio)
  * - width and height for video
@@ -776,9 +825,39 @@  void av_frame_move_ref(AVFrame *dst, AVFrame *src);
  *              recommended to pass 0 here unless you know what you are doing.
  *
  * @return 0 on success, a negative AVERROR on error.
+ *
+ * @deprecated Use @ref av_frame_get_buffer2 instead and set @ref AVFrame.type
+ * before calling.
  */
+attribute_deprecated
 int av_frame_get_buffer(AVFrame *frame, int align);
 
+/**
+ * Allocate new buffer(s) for audio, video or subtitle data.
+ *
+ * The following fields must be set on frame before calling this function:
+ * - format (pixel format for video, sample format for audio)
+ * - width and height for video
+ * - nb_samples and channel_layout for audio
+ * - type (AVMediaType)
+ *
+ * This function will fill AVFrame.data and AVFrame.buf arrays and, if
+ * necessary, allocate and fill AVFrame.extended_data and AVFrame.extended_buf.
+ * For planar formats, one buffer will be allocated for each plane.
+ *
+ * @warning: if frame already has been allocated, calling this function will
+ *           leak memory. In addition, undefined behavior can occur in certain
+ *           cases.
+ *
+ * @param frame frame in which to store the new buffers.
+ * @param align Required buffer size alignment. If equal to 0, alignment will be
+ *              chosen automatically for the current CPU. It is highly
+ *              recommended to pass 0 here unless you know what you are doing.
+ *
+ * @return 0 on success, a negative AVERROR on error.
+ */
+int av_frame_get_buffer2(AVFrame *frame, int align);
+
 /**
  * Check if the frame data is writable.
  *
diff --git a/libavutil/subfmt.c b/libavutil/subfmt.c
new file mode 100644
index 0000000000..c72ebe2a43
--- /dev/null
+++ b/libavutil/subfmt.c
@@ -0,0 +1,45 @@ 
+/*
+ * Copyright (c) 2021 softworkz
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <string.h>
+#include "common.h"
+#include "subfmt.h"
+
+static const char sub_fmt_info[AV_SUBTITLE_FMT_NB][24] = {
+    [AV_SUBTITLE_FMT_UNKNOWN] = "Unknown subtitle format",
+    [AV_SUBTITLE_FMT_BITMAP]  = "Graphical subtitles",
+    [AV_SUBTITLE_FMT_TEXT]    = "Text subtitles (plain)",
+    [AV_SUBTITLE_FMT_ASS]     = "Text subtitles (ass)",
+};
+
+const char *av_get_subtitle_fmt_name(enum AVSubtitleType sub_fmt)
+{
+    if (sub_fmt < 0 || sub_fmt >= AV_SUBTITLE_FMT_NB)
+        return NULL;
+    return sub_fmt_info[sub_fmt];
+}
+
+enum AVSubtitleType av_get_subtitle_fmt(const char *name)
+{
+    for (int i = 0; i < AV_SUBTITLE_FMT_NB; i++)
+        if (!strcmp(sub_fmt_info[i], name))
+            return i;
+    return AV_SUBTITLE_FMT_NONE;
+}
diff --git a/libavutil/subfmt.h b/libavutil/subfmt.h
index 791b45519f..3b8a0613b2 100644
--- a/libavutil/subfmt.h
+++ b/libavutil/subfmt.h
@@ -21,6 +21,9 @@ 
 #ifndef AVUTIL_SUBFMT_H
 #define AVUTIL_SUBFMT_H
 
+#include <stdint.h>
+
+#include "buffer.h"
 #include "version.h"
 
 enum AVSubtitleType {
@@ -65,4 +68,48 @@  enum AVSubtitleType {
     AV_SUBTITLE_FMT_NB,         ///< number of subtitle formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions.
 };
 
+typedef struct AVSubtitleArea {
+#define AV_NUM_BUFFER_POINTERS 1
+
+    enum AVSubtitleType type;
+    int flags;
+
+    int x;         ///< top left corner  of area.
+    int y;         ///< top left corner  of area.
+    int w;         ///< width            of area.
+    int h;         ///< height           of area.
+    int nb_colors; ///< number of colors in bitmap palette (@ref pal).
+
+    /**
+     * Buffers and line sizes for the bitmap of this subtitle.
+     *
+     * @{
+     */
+    AVBufferRef *buf[AV_NUM_BUFFER_POINTERS];
+    int linesize[AV_NUM_BUFFER_POINTERS];
+    /**
+     * @}
+     */
+
+    uint32_t pal[256]; ///< RGBA palette for the bitmap.
+
+    char *text;        ///< 0-terminated plain UTF-8 text
+    char *ass;         ///< 0-terminated ASS/SSA compatible event line.
+
+} AVSubtitleArea;
+
+/**
+ * Return the name of sub_fmt, or NULL if sub_fmt is not
+ * recognized.
+ */
+const char *av_get_subtitle_fmt_name(enum AVSubtitleType sub_fmt);
+
+/**
+ * Return a subtitle format corresponding to name, or AV_SUBTITLE_FMT_NONE
+ * on error.
+ *
+ * @param name Subtitle format name.
+ */
+enum AVSubtitleType av_get_subtitle_fmt(const char *name);
+
 #endif /* AVUTIL_SUBFMT_H */