diff mbox series

[FFmpeg-devel,v3,2/4] avformat/webpdec: WebP demuxer implementation

Message ID 20210912202010.1542872-3-yakoyoku@gmail.com
State New
Headers show
Series [FFmpeg-devel,v3,1/4] avcodec/webp: compatibilize with avformat/webpdec | expand

Checks

Context Check Description
andriy/commit_msg_x86 warning Please wrap lines in the body of the commit message between 60 and 72 characters.
andriy/make_x86 success Make finished
andriy/make_fate_x86 fail Make fate failed
andriy/makex86 warning New warnings during build
andriy/commit_msg_ppc warning Please wrap lines in the body of the commit message between 60 and 72 characters.
andriy/make_ppc success Make finished
andriy/make_fate_ppc fail Make fate failed
andriy/makeppc warning New warnings during build

Commit Message

Martin Reboredo Sept. 12, 2021, 8:20 p.m. UTC
FFmpeg has the ability to mux encoded WebP packets, but it cannot demux the format.
The purpose of this patch is to add a way to extract pictures from a WebP stream.
Any other side data processing (mainly ICC profiles) is left up for later work.
Although we have a demuxer with `image2`, it doesn't have support for animated frames like this patch.

The WebP format is based on RIFF, and due to the charasteristics of the latter, I've took advantage from chunking for processing purposes.
Package reading is done by taking chunks in a specific way. Starts by splitting the `RIFF`/`WEBP` header, then it goes by any of the three
`VP8 ` (lossy)/`VP8L` (lossless)/`VP8X` (extended format). In the case of a `VP8X` chunk we check for relevant flags. We then follow by grabbing the
`VP8 `/`ALPH` (alpha frame) + `VP8 `/`VP8L` chunks accourdingly. If the container specifies that is an animated package we take `ANIM` for the animation
parameters and the many `ANMF` animation frames, which every of them contains an image chunk (`VP8 `/`ALPH` + `VP8 `/`VP8L`). Otherwise, if an unknown
chunk is found, we just simply ignore it.

Tested by remuxing WebP images (using `ffmpeg -i testa.webp -codec:v copy testb.webp`), viewed the images in my browser and compared the checksums.

Mostly followed the WebP container specification [1] for the implementation, the VP8 bitstream [2] and the WebP lossless [3] specs were used too.

Partially fixes #4907.

[1]: https://developers.google.com/speed/webp/docs/riff_container
[2]: https://datatracker.ietf.org/doc/html/rfc6386
[3]: https://developers.google.com/speed/webp/docs/webp_lossless_bitstream_specification

Signed-off-by: Martin Reboredo <yakoyoku@gmail.com>
---
 MAINTAINERS              |   1 +
 libavformat/Makefile     |   1 +
 libavformat/allformats.c |   1 +
 libavformat/riff.c       |   1 +
 libavformat/webpdec.c    | 326 +++++++++++++++++++++++++++++++++++++++
 5 files changed, 330 insertions(+)
 create mode 100644 libavformat/webpdec.c
diff mbox series

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index dcac46003e..f2d8f5eb17 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -505,6 +505,7 @@  Muxers/Demuxers:
   wav.c                                 Michael Niedermayer
   wc3movie.c                            Mike Melanson
   webm dash (matroskaenc.c)             Vignesh Venkatasubramanian
+  webp*.c                               Martin Reboredo
   webvtt*                               Matthew J Heaney
   westwood.c                            Mike Melanson
   wtv.c                                 Peter Ross
diff --git a/libavformat/Makefile b/libavformat/Makefile
index f7e47563da..aec2833c52 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -581,6 +581,7 @@  OBJS-$(CONFIG_WEBM_MUXER)                += matroskaenc.o matroska.o \
 OBJS-$(CONFIG_WEBM_DASH_MANIFEST_MUXER)  += webmdashenc.o
 OBJS-$(CONFIG_WEBM_CHUNK_MUXER)          += webm_chunk.o
 OBJS-$(CONFIG_WEBP_MUXER)                += webpenc.o
+OBJS-$(CONFIG_WEBP_DEMUXER)              += webpdec.o
 OBJS-$(CONFIG_WEBVTT_DEMUXER)            += webvttdec.o subtitles.o
 OBJS-$(CONFIG_WEBVTT_MUXER)              += webvttenc.o
 OBJS-$(CONFIG_WSAUD_DEMUXER)             += westwood_aud.o
diff --git a/libavformat/allformats.c b/libavformat/allformats.c
index 5471f7c16f..55f3c9a956 100644
--- a/libavformat/allformats.c
+++ b/libavformat/allformats.c
@@ -473,6 +473,7 @@  extern const AVOutputFormat ff_webm_muxer;
 extern const AVInputFormat  ff_webm_dash_manifest_demuxer;
 extern const AVOutputFormat ff_webm_dash_manifest_muxer;
 extern const AVOutputFormat ff_webm_chunk_muxer;
+extern const AVInputFormat  ff_webp_demuxer;
 extern const AVOutputFormat ff_webp_muxer;
 extern const AVInputFormat  ff_webvtt_demuxer;
 extern const AVOutputFormat ff_webvtt_muxer;
diff --git a/libavformat/riff.c b/libavformat/riff.c
index 27a9706510..9bd940ba52 100644
--- a/libavformat/riff.c
+++ b/libavformat/riff.c
@@ -321,6 +321,7 @@  const AVCodecTag ff_codec_bmp_tags[] = {
     { AV_CODEC_ID_VP7,          MKTAG('V', 'P', '7', '1') },
     { AV_CODEC_ID_VP8,          MKTAG('V', 'P', '8', '0') },
     { AV_CODEC_ID_VP9,          MKTAG('V', 'P', '9', '0') },
+    { AV_CODEC_ID_WEBP,         MKTAG('W', 'E', 'B', 'P') },
     { AV_CODEC_ID_ASV1,         MKTAG('A', 'S', 'V', '1') },
     { AV_CODEC_ID_ASV2,         MKTAG('A', 'S', 'V', '2') },
     { AV_CODEC_ID_VCR1,         MKTAG('V', 'C', 'R', '1') },
diff --git a/libavformat/webpdec.c b/libavformat/webpdec.c
new file mode 100644
index 0000000000..e5c27231be
--- /dev/null
+++ b/libavformat/webpdec.c
@@ -0,0 +1,326 @@ 
+/*
+ * webp demuxer
+ * Copyright (c) 2021 Martin Reboredo <yakoyoku@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/intreadwrite.h"
+#include "libavutil/mathematics.h"
+#include "libavutil/opt.h"
+#include "avformat.h"
+#include "internal.h"
+
+typedef struct WebpDemuxContext {
+    AVClass *class;              // class for AVOptions
+    int width;                   // width of the picture
+    int height;                  // height of the picture
+    int size;                    // size of the entire WebP file
+    int loop;                    // number of times to loop all the pictures (0 means indefinitely)
+    int read_webp_header;        // RIFF header has been read
+    int using_webp_anim_decoder; // input WebP is animated
+    int vp8x;                    // VP8X chunk has been found
+    int lossless;                // WebP file is lossless
+    int alpha;                   // input contains alpha
+    int iccp;                    // file contains an ICC profile
+} WebpDemuxContext;
+
+static int webpdec_read_probe(const AVProbeData * p)
+{
+    if (AV_RL32(p->buf) != AV_RL32("RIFF"))
+        return 0;
+
+    if (AV_RL32(&p->buf[8]) != AV_RL32("WEBP"))
+        return 0;
+
+    return AVPROBE_SCORE_MAX;
+}
+
+static int parse_animation_frame_duration(AVFormatContext * s, AVPacket * pkt)
+{
+    pkt->duration = av_rescale_q(AV_RL24(pkt->data + 20),
+                                 (AVRational) { 1, 1000 },
+                                 s->streams[0]->time_base);
+
+    return 0;
+}
+
+static int parse_vp8x_chunk(AVFormatContext * s, AVPacket * pkt)
+{
+    WebpDemuxContext *w = s->priv_data;
+    AVIOContext *pb = s->pb;
+    int bgcolor = 0xFFFFFFFF;
+    int cont = 1, anim_frame = 0, alpha_frame = 0;
+    int64_t ret = 0;
+
+    s->packet_size = 0;
+
+    while (cont && ret >= 0) {
+        int skip = 0, rewind = 1;
+        int fourcc = avio_rl32(pb);
+        int size = avio_rl32(pb);
+        int padded_size = size + (size & 1);
+        int chunk_size = padded_size + 8;
+        s->packet_size += chunk_size;
+
+        if (padded_size == 0)
+            return AVERROR_EOF;
+
+        switch (fourcc) {
+        case MKTAG('V', 'P', '8', 'X'):
+            return AVERROR_INVALIDDATA;
+            /* case MKTAG('I', 'C', 'C', 'P'):
+               avio_read(pb, w->iccp_data, padded_size); */
+        case MKTAG('A', 'L', 'P', 'H'):
+            if (!w->alpha || alpha_frame == 1)
+                return AVERROR_INVALIDDATA;
+            if (w->using_webp_anim_decoder && anim_frame == 0)
+                return AVERROR_INVALIDDATA;
+
+            alpha_frame = 1;
+            break;
+        case MKTAG('V', 'P', '8', 'L'):
+            alpha_frame = 1;
+        case MKTAG('V', 'P', '8', ' '):
+            if (w->alpha && alpha_frame == 0)
+                return AVERROR_INVALIDDATA;
+            if (w->using_webp_anim_decoder && anim_frame == 0)
+                return AVERROR_INVALIDDATA;
+
+            cont = 0;
+            break;
+        case MKTAG('A', 'N', 'I', 'M'):
+            if (w->loop == -1) {
+                bgcolor = avio_rl32(pb);
+                w->loop = avio_rl16(pb);
+
+                ret = avio_seek(pb, -14, SEEK_CUR);
+                if (ret < 0)
+                    return ret;
+
+                ret = av_get_packet(pb, pkt, s->packet_size);
+                if (ret < 0)
+                    return ret;
+
+                return 0;
+            }
+            cont = 0;
+            break;
+        case MKTAG('A', 'N', 'M', 'F'):
+            if (!w->using_webp_anim_decoder || anim_frame == 1)
+                return AVERROR_INVALIDDATA;
+
+            ret = avio_seek(pb, -8, SEEK_CUR);
+            if (ret < 0)
+                return ret;
+
+            ret = av_get_packet(pb, pkt, s->packet_size);
+            if (ret < 0)
+                return ret;
+
+            ret = parse_animation_frame_duration(s, pkt);
+            if (ret < 0)
+                return ret;
+
+            anim_frame = 1;
+            rewind = 0;
+            return 0;
+        default:
+            s->packet_size -= chunk_size;
+            skip = 1;
+            rewind = 0;
+            break;
+        }
+
+        if (skip) {
+            ret = avio_skip(pb, padded_size);
+        }
+        if (rewind) {
+            ret = avio_seek(pb, -8, SEEK_CUR);
+            if (ret < 0)
+                return ret;
+            ret = av_append_packet(pb, pkt, chunk_size);
+        }
+    }
+
+    return ret;
+}
+
+static int parse_header(AVFormatContext * s)
+{
+    WebpDemuxContext *w = s->priv_data;
+    AVIOContext *pb = s->pb;
+    int size;
+    unsigned int flags = 0;
+    int ret = 0;
+
+    if (avio_rl32(pb) != AV_RL32("RIFF"))
+        return AVERROR_INVALIDDATA;
+    w->size = avio_rl32(pb) + 8;
+    if (avio_rl32(pb) != AV_RL32("WEBP"))
+        return AVERROR_INVALIDDATA;
+
+    if (avio_rl24(pb) != AV_RL24("VP8"))
+        return AVERROR_INVALIDDATA;
+    switch (avio_r8(pb)) {
+    case 'X':
+        w->vp8x = 1;
+        break;
+    case 'L':
+        w->lossless = 1;
+    case ' ':
+        break;
+    default:
+        return AVERROR_INVALIDDATA;
+    }
+    size = avio_rl32(pb);
+    if (w->vp8x) {
+        flags = avio_r8(pb);
+
+        if (flags & 0x02)
+            w->using_webp_anim_decoder = 1;
+        if (flags & 0x10)
+            w->alpha = 1;
+        if (flags & 0x20)
+            w->iccp = 1;
+
+        ret = avio_skip(pb, 3);
+        if (ret < 0)
+            return ret;
+
+        w->width = avio_rl24(pb) + 1;
+        w->height = avio_rl24(pb) + 1;
+
+        ret = avio_seek(pb, -30, SEEK_CUR);
+    } else if (w->lossless) {
+        avio_r8(pb);
+        flags = avio_rl32(pb);
+        w->width = (flags & 0x3FFF) + 1;
+        w->height = ((flags >> 14) & 0x3FFF) + 1;
+        w->alpha = (flags >> 28) & 0x01;
+
+        ret = avio_seek(pb, -25, SEEK_CUR);
+    } else {
+        ret = avio_skip(pb, 6);
+        if (ret < 0)
+            return ret;
+
+        w->width = (avio_rl16(pb) & 0x3FFF);
+        w->height = (avio_rl16(pb) & 0x3FFF);
+
+        ret = avio_seek(pb, -30, SEEK_CUR);
+    }
+
+    return ret;
+}
+
+static int webpdec_read_header(AVFormatContext * s)
+{
+    WebpDemuxContext *w = s->priv_data;
+    AVStream *st;
+    int ret;
+
+    w->width = -1;
+    w->height = -1;
+    w->loop = -1;
+    w->read_webp_header = 0;
+
+    ret = parse_header(s);
+    if (ret < 0)
+        return ret;
+
+    st = avformat_new_stream(s, NULL);
+    if (!st)
+        return AVERROR(ENOMEM);
+
+    st->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
+    st->codecpar->codec_id = AV_CODEC_ID_WEBP;
+    st->codecpar->width = w->width;
+    st->codecpar->height = w->height;
+    st->codecpar->format = w->alpha ? AV_PIX_FMT_YUVA420P : AV_PIX_FMT_YUV420P;
+
+    st->start_time = 0;
+
+    avpriv_set_pts_info(st, 24, 1, 1000);
+
+    return 0;
+}
+
+static int webpdec_read_packet(AVFormatContext * s, AVPacket * pkt)
+{
+    WebpDemuxContext *w = s->priv_data;
+    AVIOContext *pb = s->pb;
+    int ret;
+
+    ret = avio_feof(pb);
+    if (ret < 0)
+        return ret;
+    else if (ret > 0)
+        return AVERROR_EOF;
+
+    if (!w->read_webp_header) {
+        if (!w->using_webp_anim_decoder) {
+            pkt->duration =
+                av_rescale_q(33, (AVRational) { 1, 1000 }, s->streams[0]->time_base);
+            s->packet_size = w->size;
+        } else {
+            s->packet_size = 30;
+        }
+
+        ret = av_get_packet(pb, pkt, s->packet_size);
+        if (ret < 0)
+            return ret;
+
+        w->read_webp_header = 1;
+
+        return 0;
+    }
+
+    if (w->vp8x) {
+        ret = parse_vp8x_chunk(s, pkt);
+        if (ret < 0)
+            return ret;
+    } else {
+        int fourcc = avio_rl32(pb);
+        int size = avio_rl32(pb) + 8;
+        size = size + (size & 1);
+        if (fourcc != AV_RL32("VP8 ") && fourcc != AV_RL32("VP8L"))
+            return AVERROR_INVALIDDATA;
+        ret = avio_seek(pb, -8, SEEK_CUR);
+        if (ret < 0)
+            return ret;
+        ret = av_get_packet(pb, pkt, size);
+        if (ret < 0)
+            return ret;
+    }
+
+    pkt->stream_index = 0;
+
+    return 0;
+}
+
+const AVInputFormat ff_webp_demuxer = {
+    .name           = "webp",
+    .long_name      = NULL_IF_CONFIG_SMALL("WebP"),
+    .extensions     = "webp",
+    .mime_type      = "image/webp",
+    .priv_data_size = sizeof(WebpDemuxContext),
+    .read_probe     = webpdec_read_probe,
+    .read_header    = webpdec_read_header,
+    .read_packet    = webpdec_read_packet,
+    .flags          = AVFMT_VARIABLE_FPS,
+};