diff mbox series

[FFmpeg-devel,v5,1/4] avformat/rcwtdec: add RCWT Closed Captions demuxer

Message ID 20240319173913.2754690-1-marth64@proxyid.net
State New
Headers show
Series [FFmpeg-devel,v5,1/4] avformat/rcwtdec: add RCWT Closed Captions demuxer | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Marth64 March 19, 2024, 5:39 p.m. UTC
Signed-off-by: Marth64 <marth64@proxyid.net>
---
 Changelog                |   2 +-
 doc/demuxers.texi        |  29 ++++++++
 libavformat/Makefile     |   1 +
 libavformat/allformats.c |   1 +
 libavformat/rcwtdec.c    | 148 +++++++++++++++++++++++++++++++++++++++
 5 files changed, 180 insertions(+), 1 deletion(-)
 create mode 100644 libavformat/rcwtdec.c

Comments

Michael Niedermayer March 19, 2024, 9:41 p.m. UTC | #1
On Tue, Mar 19, 2024 at 12:39:10PM -0500, Marth64 wrote:
> Signed-off-by: Marth64 <marth64@proxyid.net>
> ---
>  Changelog                |   2 +-
>  doc/demuxers.texi        |  29 ++++++++
>  libavformat/Makefile     |   1 +
>  libavformat/allformats.c |   1 +
>  libavformat/rcwtdec.c    | 148 +++++++++++++++++++++++++++++++++++++++
>  5 files changed, 180 insertions(+), 1 deletion(-)
>  create mode 100644 libavformat/rcwtdec.c
[...]

> +static int rcwt_probe(const AVProbeData *p)
> +{
> +    return p->buf_size > RCWT_HEADER_SIZE &&
> +           AV_RB16(p->buf) == 0xCCCC && AV_RB8(p->buf + 2) == 0xED ? 50 : 0;
> +}

this fails

tools/probetest 256 4096

Failure of rcwt probing code with score=50 type=2 p=F01 size=16

can you make the test more robust ?

thx

[...]
Marth64 March 19, 2024, 10:07 p.m. UTC | #2
Yes, v6 is on the way with fix (validated working with probetest parameters
above). I can check the format version, of which there is only 1 known
version.

Thanks for catching this.
Stefano Sabatini March 20, 2024, 2:11 p.m. UTC | #3
On date Tuesday 2024-03-19 12:39:10 -0500, Marth64 wrote:
> Signed-off-by: Marth64 <marth64@proxyid.net>
> ---
>  Changelog                |   2 +-
>  doc/demuxers.texi        |  29 ++++++++
>  libavformat/Makefile     |   1 +
>  libavformat/allformats.c |   1 +
>  libavformat/rcwtdec.c    | 148 +++++++++++++++++++++++++++++++++++++++
>  5 files changed, 180 insertions(+), 1 deletion(-)
>  create mode 100644 libavformat/rcwtdec.c
> 
> diff --git a/Changelog b/Changelog
> index e3ca52430c..be871f75cd 100644
> --- a/Changelog
> +++ b/Changelog
> @@ -19,7 +19,7 @@ version <next>:
>  - lavu/eval: introduce randomi() function in expressions
>  - VVC decoder
>  - fsync filter
> -- Raw Captions with Time (RCWT) closed caption muxer
> +- Raw Captions with Time (RCWT) closed caption muxer and demuxer
>  - ffmpeg CLI -bsf option may now be used for input as well as output
>  - ffmpeg CLI options may now be used as -/opt <path>, which is equivalent
>    to -opt <contents of file <path>>
> diff --git a/doc/demuxers.texi b/doc/demuxers.texi
> index b70f3a38d7..26d4ba18a1 100644
> --- a/doc/demuxers.texi
> +++ b/doc/demuxers.texi
> @@ -1038,6 +1038,35 @@ the command:
>  ffplay -f rawvideo -pixel_format rgb24 -video_size 320x240 -framerate 10 input.raw
>  @end example
>  
> +@anchor{rcwtdec}
> +@section rcwt
> +
> +RCWT (Raw Captions With Time) is a format native to ccextractor, a commonly
> +used open source tool for processing 608/708 Closed Captions (CC) sources.
> +It can be used to archive the original, raw CC bitstream and to produce
> +a source file for later CC processing or conversion. This demuxer can process
> +RCWT sources created by ccextractor or FFmpeg. For more information on the format,
> +see @ref{rcwtenc,,,ffmpeg-formats}.
> +
> +This demuxer implements the specification as of March 2024, which has
> +been stable and unchanged since April 2014.
> +
> +@subsection Examples
> +
> +@itemize
> +@item

> +Render Closed Captions to ASSA (using FFmpeg's CC decoder):

ASS ??

> +@example
> +ffmpeg -i CC.rcwt.bin CC.ass
> +@end example
> +
> +@item

> +Convert an RCWT backup to SCC:

for the sake of exclicitness, I'd avoid more acronyms, I'd guess this
is source Closed Caption?

[...]

Looks good to me otherwise, thanks.
diff mbox series

Patch

diff --git a/Changelog b/Changelog
index e3ca52430c..be871f75cd 100644
--- a/Changelog
+++ b/Changelog
@@ -19,7 +19,7 @@  version <next>:
 - lavu/eval: introduce randomi() function in expressions
 - VVC decoder
 - fsync filter
-- Raw Captions with Time (RCWT) closed caption muxer
+- Raw Captions with Time (RCWT) closed caption muxer and demuxer
 - ffmpeg CLI -bsf option may now be used for input as well as output
 - ffmpeg CLI options may now be used as -/opt <path>, which is equivalent
   to -opt <contents of file <path>>
diff --git a/doc/demuxers.texi b/doc/demuxers.texi
index b70f3a38d7..26d4ba18a1 100644
--- a/doc/demuxers.texi
+++ b/doc/demuxers.texi
@@ -1038,6 +1038,35 @@  the command:
 ffplay -f rawvideo -pixel_format rgb24 -video_size 320x240 -framerate 10 input.raw
 @end example
 
+@anchor{rcwtdec}
+@section rcwt
+
+RCWT (Raw Captions With Time) is a format native to ccextractor, a commonly
+used open source tool for processing 608/708 Closed Captions (CC) sources.
+It can be used to archive the original, raw CC bitstream and to produce
+a source file for later CC processing or conversion. This demuxer can process
+RCWT sources created by ccextractor or FFmpeg. For more information on the format,
+see @ref{rcwtenc,,,ffmpeg-formats}.
+
+This demuxer implements the specification as of March 2024, which has
+been stable and unchanged since April 2014.
+
+@subsection Examples
+
+@itemize
+@item
+Render Closed Captions to ASSA (using FFmpeg's CC decoder):
+@example
+ffmpeg -i CC.rcwt.bin CC.ass
+@end example
+
+@item
+Convert an RCWT backup to SCC:
+@example
+ffmpeg -i CC.rcwt.bin -c:s copy CC.scc
+@end example
+@end itemize
+
 @section sbg
 
 SBaGen script demuxer.
diff --git a/libavformat/Makefile b/libavformat/Makefile
index 94a949f555..a6de720d8c 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -493,6 +493,7 @@  OBJS-$(CONFIG_QOA_DEMUXER)               += qoadec.o
 OBJS-$(CONFIG_R3D_DEMUXER)               += r3d.o
 OBJS-$(CONFIG_RAWVIDEO_DEMUXER)          += rawvideodec.o
 OBJS-$(CONFIG_RAWVIDEO_MUXER)            += rawenc.o
+OBJS-$(CONFIG_RCWT_DEMUXER)              += rcwtdec.o subtitles.o
 OBJS-$(CONFIG_RCWT_MUXER)                += rcwtenc.o subtitles.o
 OBJS-$(CONFIG_REALTEXT_DEMUXER)          += realtextdec.o subtitles.o
 OBJS-$(CONFIG_REDSPARK_DEMUXER)          += redspark.o
diff --git a/libavformat/allformats.c b/libavformat/allformats.c
index e15d0fa6d7..3140018f8d 100644
--- a/libavformat/allformats.c
+++ b/libavformat/allformats.c
@@ -391,6 +391,7 @@  extern const FFInputFormat  ff_qoa_demuxer;
 extern const FFInputFormat  ff_r3d_demuxer;
 extern const FFInputFormat  ff_rawvideo_demuxer;
 extern const FFOutputFormat ff_rawvideo_muxer;
+extern const FFInputFormat  ff_rcwt_demuxer;
 extern const FFOutputFormat ff_rcwt_muxer;
 extern const FFInputFormat  ff_realtext_demuxer;
 extern const FFInputFormat  ff_redspark_demuxer;
diff --git a/libavformat/rcwtdec.c b/libavformat/rcwtdec.c
new file mode 100644
index 0000000000..1baa571231
--- /dev/null
+++ b/libavformat/rcwtdec.c
@@ -0,0 +1,148 @@ 
+/*
+ * RCWT (Raw Captions With Time) demuxer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/*
+ * RCWT (Raw Captions With Time) is a format native to ccextractor, a commonly
+ * used open source tool for processing 608/708 Closed Captions (CC) sources.
+ *
+ * This demuxer implements the specification as of March 2024, which has
+ * been stable and unchanged since April 2014.
+ *
+ * A free specification of RCWT can be found here:
+ * @url{https://github.com/CCExtractor/ccextractor/blob/master/docs/BINARY_FILE_FORMAT.TXT}
+ */
+
+#include "avformat.h"
+#include "demux.h"
+#include "internal.h"
+#include "subtitles.h"
+#include "libavutil/intreadwrite.h"
+
+#define RCWT_CLUSTER_MAX_BLOCKS             65535
+#define RCWT_BLOCK_SIZE                     3
+#define RCWT_HEADER_SIZE                    11
+
+typedef struct RCWTContext {
+    FFDemuxSubtitlesQueue q;
+} RCWTContext;
+
+static int rcwt_read_header(AVFormatContext *avf)
+{
+    RCWTContext *rcwt = avf->priv_data;
+
+    AVPacket      *sub = NULL;
+    AVStream      *st;
+    uint8_t       header[RCWT_HEADER_SIZE] = {0};
+    int           nb_bytes = 0;
+
+    /* validate the header */
+    nb_bytes = avio_read(avf->pb, header, RCWT_HEADER_SIZE);
+    if (nb_bytes != RCWT_HEADER_SIZE) {
+        av_log(avf, AV_LOG_ERROR, "Header does not have the expected size "
+                                  "(expected=%d actual=%d)\n",
+                                  RCWT_HEADER_SIZE, nb_bytes);
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (AV_RB16(header + 6) != 0x0001) {
+        av_log(avf, AV_LOG_ERROR, "RCWT format version is not compatible "
+                                  "(only version 0.001 is known)\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    av_log(avf, AV_LOG_DEBUG, "RCWT writer application: %02X version: %02x\n",
+                              header[3], header[5]);
+
+    /* setup AVStream */
+    st = avformat_new_stream(avf, NULL);
+    if (!st)
+        return AVERROR(ENOMEM);
+
+    st->codecpar->codec_type = AVMEDIA_TYPE_SUBTITLE;
+    st->codecpar->codec_id   = AV_CODEC_ID_EIA_608;
+
+    avpriv_set_pts_info(st, 64, 1, 1000);
+
+    /* demux */
+    while (!avio_feof(avf->pb)) {
+        int64_t       cluster_pts       = AV_NOPTS_VALUE;
+        int           cluster_nb_blocks = 0;
+        int           cluster_size      = 0;
+        uint8_t       *cluster_buf;
+
+        cluster_pts       = avio_rl64(avf->pb);
+        cluster_nb_blocks = avio_rl16(avf->pb);
+        if (cluster_nb_blocks == 0)
+            continue;
+
+        cluster_size      = cluster_nb_blocks * RCWT_BLOCK_SIZE;
+        cluster_buf       = av_malloc(cluster_size);
+        if (!cluster_buf)
+            return AVERROR(ENOMEM);
+
+        nb_bytes          = avio_read(avf->pb, cluster_buf, cluster_size);
+        if (nb_bytes < 0)
+            return nb_bytes;
+
+        if (nb_bytes != cluster_size) {
+            av_freep(&cluster_buf);
+            av_log(avf, AV_LOG_ERROR, "Cluster does not have the expected size "
+                                      "(expected=%d actual=%d pos=%ld)\n",
+                                      cluster_size, nb_bytes, avio_tell(avf->pb));
+            return AVERROR_INVALIDDATA;
+        }
+
+        sub = ff_subtitles_queue_insert(&rcwt->q, cluster_buf, cluster_size, 0);
+        if (!sub) {
+            av_freep(&cluster_buf);
+            return AVERROR(ENOMEM);
+        }
+
+        sub->pos = avio_tell(avf->pb);
+        sub->pts = cluster_pts;
+
+        av_freep(&cluster_buf);
+        cluster_buf = NULL;
+    }
+
+    ff_subtitles_queue_finalize(avf, &rcwt->q);
+
+    return 0;
+}
+
+static int rcwt_probe(const AVProbeData *p)
+{
+    return p->buf_size > RCWT_HEADER_SIZE &&
+           AV_RB16(p->buf) == 0xCCCC && AV_RB8(p->buf + 2) == 0xED ? 50 : 0;
+}
+
+const FFInputFormat ff_rcwt_demuxer = {
+    .p.name         = "rcwt",
+    .p.long_name    = NULL_IF_CONFIG_SMALL("RCWT (Raw Captions With Time)"),
+    .p.extensions   = "bin",
+    .p.flags        = AVFMT_TS_DISCONT,
+    .priv_data_size = sizeof(RCWTContext),
+    .flags_internal = FF_FMT_INIT_CLEANUP,
+    .read_probe     = rcwt_probe,
+    .read_header    = rcwt_read_header,
+    .read_packet    = ff_subtitles_read_packet,
+    .read_seek2     = ff_subtitles_read_seek,
+    .read_close     = ff_subtitles_read_close
+};