[FFmpeg-devel,1/2] libavcodec: add support for animated WebP decoding

Message ID	20200708052824.18582-1-josef@pex.com
State	Superseded
Headers	show Delivered-To: andriy.gelman@gmail.com Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100; From: Josef Zlomek <josef@pex.com> To: ffmpeg-devel@ffmpeg.org Date: Wed, 8 Jul 2020 07:28:23 +0200 Message-Id: <20200708052824.18582-1-josef@pex.com> Subject: [FFmpeg-devel] [PATCH 1/2] libavcodec: add support for animated WebP decoding Precedence: list Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> Cc: Josef Zlomek <josef@pex.com> MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org> Content-Length: 49483
Series	[FFmpeg-devel,1/2] libavcodec: add support for animated WebP decoding \| expand [FFmpeg-devel,1/2] libavcodec: add support for animated WebP decoding [FFmpeg-devel,2/2] libavformat: add WebP demuxer

Context	Check	Description
andriy/default	pending
andriy/make	success	Make finished
andriy/make_fate	fail	Make fate failed

diff --git a/Changelog b/Changelog index 1bb9931c0d..1e41040a8e 100644 --- a/Changelog +++ b/Changelog @@ -5,6 +5,7 @@ version <next>: - AudioToolbox output device - MacCaption demuxer - PGX decoder +- animated WebP parser/decoder version 4.3: diff --git a/libavcodec/version.h b/libavcodec/version.h index 482cc6d6ba..e75891d463 100644 --- a/libavcodec/version.h +++ b/libavcodec/version.h @@ -28,7 +28,7 @@ #include "libavutil/version.h" #define LIBAVCODEC_VERSION_MAJOR 58 -#define LIBAVCODEC_VERSION_MINOR 94 +#define LIBAVCODEC_VERSION_MINOR 95 #define LIBAVCODEC_VERSION_MICRO 100 #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \ diff --git a/libavcodec/webp.c b/libavcodec/webp.c index c6d0206846..e0eb076cd1 100644 --- a/libavcodec/webp.c +++ b/libavcodec/webp.c @@ -41,6 +41,7 @@ */ #include "libavutil/imgutils.h" +#include "libswscale/swscale.h" #define BITSTREAM_READER_LE #include "avcodec.h" @@ -57,6 +58,9 @@ #define VP8X_FLAG_ALPHA 0x10 #define VP8X_FLAG_ICC 0x20 +#define ANMF_DISPOSAL_METHOD 0x01 +#define ANMF_BLENDING_METHOD 0x02 + #define MAX_PALETTE_SIZE 256 #define MAX_CACHE_BITS 11 #define NUM_CODE_LENGTH_CODES 19 @@ -188,19 +192,30 @@ typedef struct ImageContext { typedef struct WebPContext { VP8Context v; /* VP8 Context used for lossy decoding */ GetBitContext gb; /* bitstream reader for main image chunk */ + AVFrame *canvas_frame; /* AVFrame for canvas */ + AVFrame *frame; /* AVFrame for decoded frame */ AVFrame *alpha_frame; /* AVFrame for alpha data decompressed from VP8L */ AVCodecContext *avctx; /* parent AVCodecContext */ int initialized; /* set once the VP8 context is initialized */ int has_alpha; /* has a separate alpha chunk */ enum AlphaCompression alpha_compression; /* compression type for alpha chunk */ enum AlphaFilter alpha_filter; /* filtering method for alpha chunk */ + AVPacket alpha_packet; /* alpha chunk */ uint8_t *alpha_data; /* alpha chunk data */ int alpha_data_size; /* alpha chunk data size */ int has_exif; /* set after an EXIF chunk has been processed */ int has_iccp; /* set after an ICCP chunk has been processed */ - int width; /* image width */ - int height; /* image height */ + int vp8x_flags; /* flags from VP8X chunk */ + int anmf_flags; /* flags from ANMF chunk */ + int canvas_width; /* canvas width */ + int canvas_height; /* canvas height */ + int width; /* frame width */ + int height; /* frame height */ + int pos_x; /* frame position X */ + int pos_y; /* frame position Y */ int lossless; /* indicates lossless or lossy */ + uint32_t background_argb; /* background color in ARGB format */ + uint8_t background_yuva[4]; /* background color in YUVA format */ int nb_transforms; /* number of transforms */ enum TransformType transforms[4]; /* transformations used in the image, in order */ @@ -1100,7 +1115,7 @@ static int apply_color_indexing_transform(WebPContext *s) return 0; } -static void update_canvas_size(AVCodecContext *avctx, int w, int h) +static void update_frame_size(AVCodecContext *avctx, int w, int h) { WebPContext *s = avctx->priv_data; if (s->width && s->width != w) { @@ -1140,7 +1155,7 @@ static int vp8_lossless_decode_frame(AVCodecContext *avctx, AVFrame *p, w = get_bits(&s->gb, 14) + 1; h = get_bits(&s->gb, 14) + 1; - update_canvas_size(avctx, w, h); + update_frame_size(avctx, w, h); ret = ff_set_dimensions(avctx, s->width, s->height); if (ret < 0) @@ -1356,7 +1371,7 @@ static int vp8_lossy_decode_frame(AVCodecContext *avctx, AVFrame *p, if (!*got_frame) return AVERROR_INVALIDDATA; - update_canvas_size(avctx, avctx->width, avctx->height); + update_frame_size(avctx, avctx->width, avctx->height); if (s->has_alpha) { ret = vp8_lossy_decode_alpha(avctx, p, s->alpha_data, @@ -1367,6 +1382,392 @@ static int vp8_lossy_decode_frame(AVCodecContext *avctx, AVFrame *p, return ret; } +static int convert_background_color(AVCodecContext *avctx) +{ + WebPContext *s = avctx->priv_data; + int i; + int ret; + + uint32_t src_frame[2][2] = { {s->background_argb, s->background_argb}, + {s->background_argb, s->background_argb} }; + uint8_t dst_frames[4][2][2]; + + const uint8_t * const src_data[4] = { (const uint8_t *) &src_frame[0][0], + NULL, + NULL, + NULL }; + uint8_t * const dst_data[4] = { &dst_frames[0][0][0], + &dst_frames[1][0][0], + &dst_frames[2][0][0], + &dst_frames[3][0][0] }; + int src_linesize[4] = { 4, 0, 0, 0 }; + int dst_linesize[4] = { 2, 1, 1, 2 }; + + struct SwsContext *ctx = sws_getContext(2, 2, AV_PIX_FMT_ARGB, + 2, 2, AV_PIX_FMT_YUVA420P, + 0, 0, 0, 0); + if (!ctx) + return AVERROR(EINVAL); + + ret = sws_scale(ctx, src_data, src_linesize, 0, 2, dst_data, dst_linesize); + + if (ret >= 0) { + for (i = 0; i < 4; i++) + s->background_yuva[i] = dst_frames[i][0][0]; + } + + sws_freeContext(ctx); + return ret; +} + +static int init_canvas_frame(AVCodecContext *avctx) +{ + WebPContext *s = avctx->priv_data; + AVFrame *canvas; + AVFrame *frame = s->frame; + int x, y; + int width, height; + int ret; + + canvas = av_frame_alloc(); + if (!canvas) + return AVERROR(ENOMEM); + s->canvas_frame = canvas; + + /* let canvas always have alpha */ + canvas->format = frame->format == AV_PIX_FMT_YUV420P ? AV_PIX_FMT_YUVA420P : frame->format; + canvas->width = s->canvas_width; + canvas->height = s->canvas_height; + + ret = av_frame_copy_props(canvas, frame); + if (ret < 0) + return ret; + + ret = av_frame_get_buffer(canvas, 1); + if (ret < 0) + return ret; + + if (canvas->format == AV_PIX_FMT_ARGB) { + width = canvas->width; + height = canvas->height; + + for (y = 0; y < height; y++) { + uint32_t *dst = (uint32_t *) (canvas->data[0] + y * canvas->linesize[0]); + for (x = 0; x < width; x++) + dst[x] = s->background_argb; + } + } else /* if (canvas->format == AV_PIX_FMT_YUVA420P) */ { + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(canvas->format); + int component; + int plane; + + ret = convert_background_color(avctx); + if (ret < 0) + return ret; + + for (component = 0; component < desc->nb_components; component++) { + plane = desc->comp[component].plane; + height = canvas->height; + + if (component == 1 || component == 2) { + height = AV_CEIL_RSHIFT(height, desc->log2_chroma_h); + } + + memset(canvas->data[plane], s->background_yuva[component], + height * canvas->linesize[plane]); + } + } + + return 0; +} + +static int blend_frame_into_canvas(AVCodecContext *avctx) +{ + WebPContext *s = avctx->priv_data; + AVFrame *canvas = s->canvas_frame; + AVFrame *frame = s->frame; + int x, y; + int width, height; + int pos_x, pos_y; + + if ((s->anmf_flags & ANMF_BLENDING_METHOD) || frame->format == AV_PIX_FMT_YUV420P) { + /* do not blend, overwrite */ + + if (canvas->format == AV_PIX_FMT_ARGB) { + width = s->width; + height = s->height; + pos_x = s->pos_x; + pos_y = s->pos_y; + + for (y = 0; y < height; y++) { + const uint32_t *src = (uint32_t *) (frame->data[0] + y * frame->linesize[0]); + uint32_t *dst = (uint32_t *) (canvas->data[0] + (y + pos_y) * canvas->linesize[0]) + pos_x; + memcpy(dst, src, width * 4); + } + } else /* if (canvas->format == AV_PIX_FMT_YUVA420P) */ { + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format); + int component; + int plane; + + for (component = 0; component < desc->nb_components; component++) { + plane = desc->comp[component].plane; + width = s->width; + height = s->height; + pos_x = s->pos_x; + pos_y = s->pos_y; + if (component == 1 || component == 2) { + width = AV_CEIL_RSHIFT(width, desc->log2_chroma_w); + height = AV_CEIL_RSHIFT(height, desc->log2_chroma_h); + pos_x = AV_CEIL_RSHIFT(pos_x, desc->log2_chroma_w); + pos_y = AV_CEIL_RSHIFT(pos_y, desc->log2_chroma_h); + } + + for (y = 0; y < height; y++) { + const uint8_t *src = frame->data[plane] + y * frame->linesize[plane]; + uint8_t *dst = canvas->data[plane] + (y + pos_y) * canvas->linesize[plane] + pos_x; + memcpy(dst, src, width); + } + } + + if (desc->nb_components < 4) { + /* frame does not have alpha, set alpha to 255 */ + desc = av_pix_fmt_desc_get(canvas->format); + plane = desc->comp[3].plane; + width = s->width; + height = s->height; + pos_x = s->pos_x; + pos_y = s->pos_y; + + for (y = 0; y < height; y++) { + uint8_t *dst = canvas->data[plane] + (y + pos_y) * canvas->linesize[plane] + pos_x; + memset(dst, 255, width); + } + } + } + } else { + /* alpha blending */ + + if (canvas->format == AV_PIX_FMT_ARGB) { + width = s->width; + height = s->height; + pos_x = s->pos_x; + pos_y = s->pos_y; + + for (y = 0; y < height; y++) { + const uint8_t *src = frame->data[0] + y * frame->linesize[0]; + uint8_t *dst = canvas->data[0] + (y + pos_y) * canvas->linesize[0] + pos_x * 4; + for (x = 0; x < width; x++) { + int src_alpha = src[0]; + int dst_alpha = dst[0]; + int dst_alpha2 = dst_alpha - ROUNDED_DIV(src_alpha * dst_alpha, 255); + int blend_alpha = src_alpha + dst_alpha2; + + if (blend_alpha == 0) { + dst[0] = 0; + dst[1] = 0; + dst[2] = 0; + dst[3] = 0; + } else { + dst[0] = blend_alpha; + dst[1] = ROUNDED_DIV(src[1] * src_alpha + dst[1] * dst_alpha2, blend_alpha); + dst[2] = ROUNDED_DIV(src[2] * src_alpha + dst[2] * dst_alpha2, blend_alpha); + dst[3] = ROUNDED_DIV(src[3] * src_alpha + dst[3] * dst_alpha2, blend_alpha); + } + src += 4; + dst += 4; + } + } + } else /* if (canvas->format == AV_PIX_FMT_YUVA420P) */ { + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format); + int plane_y, plane_u, plane_v, plane_a; + int tile_w; + int tile_h; + int src_alpha; + int dst_alpha; + int dst_alpha2; + int blend_alpha; + + av_assert0(desc->nb_components >= 4); + plane_y = desc->comp[0].plane; + plane_u = desc->comp[1].plane; + plane_v = desc->comp[2].plane; + plane_a = desc->comp[3].plane; + + // first, blend U & V planes, because the later step modifies alpha plane + width = AV_CEIL_RSHIFT(s->width, desc->log2_chroma_w); + height = AV_CEIL_RSHIFT(s->height, desc->log2_chroma_h); + pos_x = AV_CEIL_RSHIFT(s->pos_x, desc->log2_chroma_w); + pos_y = AV_CEIL_RSHIFT(s->pos_y, desc->log2_chroma_h); + tile_w = 1 << desc->log2_chroma_w; + tile_h = 1 << desc->log2_chroma_h; + + for (y = 0; y < height; y++) { + uint8_t *src_u = frame->data[plane_u] + y * frame->linesize[plane_u]; + uint8_t *src_v = frame->data[plane_v] + y * frame->linesize[plane_v]; + uint8_t *dst_u = canvas->data[plane_u] + (y + pos_y) * canvas->linesize[plane_u] + pos_x; + uint8_t *dst_v = canvas->data[plane_v] + (y + pos_y) * canvas->linesize[plane_v] + pos_x; + for (x = 0; x < width; x++) { + int xx, yy; + + src_alpha = 0; + dst_alpha = 0; + for (yy = 0; yy < tile_h; yy++) { + for (xx = 0; xx < tile_w; xx++) { + src_alpha += frame->data[plane_a][(y * tile_h + yy) * frame->linesize[plane_a] + (x * tile_w + xx)]; + dst_alpha += canvas->data[plane_a][((y + pos_y) * tile_h + yy) * canvas->linesize[plane_a] + ((x + pos_x) * tile_w + xx)]; + } + } + src_alpha = RSHIFT(src_alpha, desc->log2_chroma_w + desc->log2_chroma_h); + dst_alpha = RSHIFT(dst_alpha, desc->log2_chroma_w + desc->log2_chroma_h); + dst_alpha2 = dst_alpha - ROUNDED_DIV(src_alpha * dst_alpha, 255); + blend_alpha = src_alpha + dst_alpha2; + + if (blend_alpha == 0) { + *dst_u = 0; + *dst_v = 0; + } else { + *dst_u = ROUNDED_DIV(*src_u * src_alpha + *dst_u * dst_alpha2, blend_alpha); + *dst_v = ROUNDED_DIV(*src_v * src_alpha + *dst_v * dst_alpha2, blend_alpha); + } + + src_u++; + src_v++; + dst_u++; + dst_v++; + } + } + + // then blend Y & A planes + width = s->width; + height = s->height; + pos_x = s->pos_x; + pos_y = s->pos_y; + + for (y = 0; y < height; y++) { + const uint8_t *src_y = frame->data[plane_y] + y * frame->linesize[plane_y]; + const uint8_t *src_a = frame->data[plane_a] + y * frame->linesize[plane_a]; + uint8_t *dst_y = canvas->data[plane_y] + (y + pos_y) * canvas->linesize[plane_y] + pos_x; + uint8_t *dst_a = canvas->data[plane_a] + (y + pos_y) * canvas->linesize[plane_a] + pos_x; + for (x = 0; x < width; x++) { + src_alpha = *src_a; + dst_alpha = *dst_a; + dst_alpha2 = dst_alpha - ROUNDED_DIV(src_alpha * dst_alpha, 255); + blend_alpha = src_alpha + dst_alpha2; + + if (blend_alpha == 0) { + *dst_y = 0; + *dst_a = 0; + } else { + *dst_y = ROUNDED_DIV(*src_y * src_alpha + *dst_y * dst_alpha2, blend_alpha); + *dst_a = blend_alpha; + } + + src_y++; + src_a++; + dst_y++; + dst_a++; + } + } + } + } + + return 0; +} + +static int copy_canvas_to_frame(AVCodecContext *avctx, AVFrame *frame) +{ + WebPContext *s = avctx->priv_data; + AVFrame *canvas = s->canvas_frame; + int ret; + + avctx->pix_fmt = canvas->format; + frame->format = canvas->format; + frame->width = canvas->width; + frame->height = canvas->height; + + ret = av_frame_get_buffer(frame, 1); + if (ret < 0) + return ret; + + ret = av_frame_copy_props(frame, canvas); + if (ret < 0) + return ret; + + ret = av_frame_copy(frame, canvas); + if (ret < 0) + return ret; + + /* VP8 decoder changed the width and height in AVCodecContext. + * Change it back to the canvas size. */ + ret = ff_set_dimensions(avctx, canvas->width, canvas->height); + if (ret < 0) + return ret; + + return 0; +} + +static int dispose_frame_in_canvas(AVCodecContext *avctx) +{ + WebPContext *s = avctx->priv_data; + AVFrame *canvas = s->canvas_frame; + int x, y; + int width, height; + int pos_x, pos_y; + + if (s->anmf_flags & ANMF_DISPOSAL_METHOD) { + /* dispose to background color */ + + if (canvas->format == AV_PIX_FMT_ARGB) { + width = s->width; + height = s->height; + pos_x = s->pos_x; + pos_y = s->pos_y; + + for (y = 0; y < height; y++) { + uint32_t *dst = (uint32_t *) (canvas->data[0] + (y + pos_y) * canvas->linesize[0]) + pos_x; + for (x = 0; x < width; x++) + dst[x] = s->background_argb; + } + } else /* if (canvas->format == AV_PIX_FMT_YUVA420P) */ { + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(canvas->format); + int component; + int plane; + + for (component = 0; component < desc->nb_components; component++) { + plane = desc->comp[component].plane; + width = s->width; + height = s->height; + pos_x = s->pos_x; + pos_y = s->pos_y; + if (component == 1 || component == 2) { + width = AV_CEIL_RSHIFT(width, desc->log2_chroma_w); + height = AV_CEIL_RSHIFT(height, desc->log2_chroma_h); + pos_x = AV_CEIL_RSHIFT(pos_x, desc->log2_chroma_w); + pos_y = AV_CEIL_RSHIFT(pos_y, desc->log2_chroma_h); + } + + for (y = 0; y < height; y++) { + uint8_t *dst = canvas->data[plane] + (y + pos_y) * canvas->linesize[plane] + pos_x; + memset(dst, s->background_yuva[component], width); + } + } + } + } + + return 0; +} + +static av_cold int webp_decode_init(AVCodecContext *avctx) +{ + WebPContext *s = avctx->priv_data; + + s->frame = av_frame_alloc(); + if (!s->frame) + return AVERROR(ENOMEM); + + return 0; +} + static int webp_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *avpkt) { @@ -1375,33 +1776,13 @@ static int webp_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, GetByteContext gb; int ret; uint32_t chunk_type, chunk_size; - int vp8x_flags = 0; s->avctx = avctx; - s->width = 0; - s->height = 0; *got_frame = 0; - s->has_alpha = 0; - s->has_exif = 0; - s->has_iccp = 0; bytestream2_init(&gb, avpkt->data, avpkt->size); - if (bytestream2_get_bytes_left(&gb) < 12) - return AVERROR_INVALIDDATA; - - if (bytestream2_get_le32(&gb) != MKTAG('R', 'I', 'F', 'F')) { - av_log(avctx, AV_LOG_ERROR, "missing RIFF tag\n"); - return AVERROR_INVALIDDATA; - } - - chunk_size = bytestream2_get_le32(&gb); - if (bytestream2_get_bytes_left(&gb) < chunk_size) - return AVERROR_INVALIDDATA; - - if (bytestream2_get_le32(&gb) != MKTAG('W', 'E', 'B', 'P')) { - av_log(avctx, AV_LOG_ERROR, "missing WEBP tag\n"); - return AVERROR_INVALIDDATA; - } + // clear the previous frame + av_frame_unref(s->frame); while (bytestream2_get_bytes_left(&gb) > 8) { char chunk_str[5] = { 0 }; @@ -1412,6 +1793,10 @@ static int webp_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, return AVERROR_INVALIDDATA; chunk_size += chunk_size & 1; + /* we need to dive into RIFF chunk */ + if (chunk_type == MKTAG('R', 'I', 'F', 'F')) + chunk_size = 4; + if (bytestream2_get_bytes_left(&gb) < chunk_size) { /* we seem to be running out of data, but it could also be that the bitstream has trailing junk leading to bogus chunk_size. */ @@ -1419,9 +1804,27 @@ static int webp_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, } switch (chunk_type) { + case MKTAG('R', 'I', 'F', 'F'): + if (bytestream2_get_le32(&gb) != MKTAG('W', 'E', 'B', 'P')) { + av_log(avctx, AV_LOG_ERROR, "missing WEBP tag\n"); + return AVERROR_INVALIDDATA; + } + s->vp8x_flags = 0; + s->anmf_flags = 0; + s->canvas_width = 0; + s->canvas_height = 0; + s->width = 0; + s->height = 0; + s->pos_x = 0; + s->pos_y = 0; + s->has_alpha = 0; + s->has_exif = 0; + s->has_iccp = 0; + av_packet_unref(&s->alpha_packet); + break; case MKTAG('V', 'P', '8', ' '): if (!*got_frame) { - ret = vp8_lossy_decode_frame(avctx, p, got_frame, + ret = vp8_lossy_decode_frame(avctx, s->frame, got_frame, avpkt->data + bytestream2_tell(&gb), chunk_size); if (ret < 0) @@ -1431,7 +1834,7 @@ static int webp_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, break; case MKTAG('V', 'P', '8', 'L'): if (!*got_frame) { - ret = vp8_lossless_decode_frame(avctx, p, got_frame, + ret = vp8_lossless_decode_frame(avctx, s->frame, got_frame, avpkt->data + bytestream2_tell(&gb), chunk_size, 0); if (ret < 0) @@ -1441,14 +1844,16 @@ static int webp_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, bytestream2_skip(&gb, chunk_size); break; case MKTAG('V', 'P', '8', 'X'): - if (s->width || s->height || *got_frame) { + if (s->canvas_width || s->canvas_height || *got_frame) { av_log(avctx, AV_LOG_ERROR, "Canvas dimensions are already set\n"); return AVERROR_INVALIDDATA; } - vp8x_flags = bytestream2_get_byte(&gb); + s->vp8x_flags = bytestream2_get_byte(&gb); bytestream2_skip(&gb, 3); s->width = bytestream2_get_le24(&gb) + 1; s->height = bytestream2_get_le24(&gb) + 1; + s->canvas_width = s->width; + s->canvas_height = s->height; ret = av_image_check_size(s->width, s->height, 0, avctx); if (ret < 0) return ret; @@ -1456,7 +1861,7 @@ static int webp_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, case MKTAG('A', 'L', 'P', 'H'): { int alpha_header, filter_m, compression; - if (!(vp8x_flags & VP8X_FLAG_ALPHA)) { + if (!(s->vp8x_flags & VP8X_FLAG_ALPHA)) { av_log(avctx, AV_LOG_WARNING, "ALPHA chunk present, but alpha bit not set in the " "VP8X header\n"); @@ -1465,8 +1870,12 @@ static int webp_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, av_log(avctx, AV_LOG_ERROR, "invalid ALPHA chunk size\n"); return AVERROR_INVALIDDATA; } + av_packet_unref(&s->alpha_packet); + ret = av_packet_ref(&s->alpha_packet, avpkt); + if (ret < 0) + return ret; alpha_header = bytestream2_get_byte(&gb); - s->alpha_data = avpkt->data + bytestream2_tell(&gb); + s->alpha_data = s->alpha_packet.data + bytestream2_tell(&gb); s->alpha_data_size = chunk_size - 1; bytestream2_skip(&gb, s->alpha_data_size); @@ -1493,7 +1902,7 @@ static int webp_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, av_log(avctx, AV_LOG_VERBOSE, "Ignoring extra EXIF chunk\n"); goto exif_end; } - if (!(vp8x_flags & VP8X_FLAG_EXIF_METADATA)) + if (!(s->vp8x_flags & VP8X_FLAG_EXIF_METADATA)) av_log(avctx, AV_LOG_WARNING, "EXIF chunk present, but Exif bit not set in the " "VP8X header\n"); @@ -1528,13 +1937,13 @@ exif_end: bytestream2_skip(&gb, chunk_size); break; } - if (!(vp8x_flags & VP8X_FLAG_ICC)) + if (!(s->vp8x_flags & VP8X_FLAG_ICC)) av_log(avctx, AV_LOG_WARNING, "ICCP chunk present, but ICC Profile bit not set in the " "VP8X header\n"); s->has_iccp = 1; - sd = av_frame_new_side_data(p, AV_FRAME_DATA_ICC_PROFILE, chunk_size); + sd = av_frame_new_side_data(s->frame, AV_FRAME_DATA_ICC_PROFILE, chunk_size); if (!sd) return AVERROR(ENOMEM); @@ -1542,7 +1951,39 @@ exif_end: break; } case MKTAG('A', 'N', 'I', 'M'): + if (!(s->vp8x_flags & VP8X_FLAG_ANIMATION)) { + av_log(avctx, AV_LOG_WARNING, + "ANIM chunk present, but animation bit not set in the " + "VP8X header\n"); + } + /* background is stored as BGRA, we need ARGB in native endian */ + s->background_argb = av_bswap32(bytestream2_get_ne32u(&gb)); + bytestream2_skip(&gb, 2); /* loop count is ignored */ + break; case MKTAG('A', 'N', 'M', 'F'): + av_packet_unref(&s->alpha_packet); + s->has_alpha = 0; + + if (!(s->vp8x_flags & VP8X_FLAG_ANIMATION)) { + av_log(avctx, AV_LOG_WARNING, + "ANMF chunk present, but animation bit not set in the " + "VP8X header\n"); + s->vp8x_flags |= VP8X_FLAG_ANIMATION; + } + s->pos_x = bytestream2_get_le24(&gb) * 2; + s->pos_y = bytestream2_get_le24(&gb) * 2; + s->width = bytestream2_get_le24(&gb) + 1; + s->height = bytestream2_get_le24(&gb) + 1; + bytestream2_skip(&gb, 3); /* duration */ + s->anmf_flags = bytestream2_get_byte(&gb); + + if (s->width + s->pos_x > s->canvas_width || + s->height + s->pos_y > s->canvas_height) { + av_log(avctx, AV_LOG_ERROR, + "frame does not fit into canvas\n"); + return AVERROR_INVALIDDATA; + } + break; case MKTAG('X', 'M', 'P', ' '): AV_WL32(chunk_str, chunk_type); av_log(avctx, AV_LOG_WARNING, "skipping unsupported chunk: %s\n", @@ -1558,9 +1999,31 @@ exif_end: } } - if (!*got_frame) { - av_log(avctx, AV_LOG_ERROR, "image data not found\n"); - return AVERROR_INVALIDDATA; + if (*got_frame) { + if (!(s->vp8x_flags & VP8X_FLAG_ANIMATION)) { + /* no animation, output the decoded frame */ + av_frame_move_ref(p, s->frame); + } else { + if (!s->canvas_frame) { + ret = init_canvas_frame(avctx); + if (ret < 0) + return ret; + } + + ret = blend_frame_into_canvas(avctx); + if (ret < 0) + return ret; + + ret = copy_canvas_to_frame(avctx, p); + if (ret < 0) + return ret; + + ret = dispose_frame_in_canvas(avctx); + if (ret < 0) + return ret; + } + + p->pts = avpkt->pts; } return avpkt->size; @@ -1570,6 +2033,10 @@ static av_cold int webp_decode_close(AVCodecContext *avctx) { WebPContext *s = avctx->priv_data; + av_frame_free(&s->canvas_frame); + av_frame_free(&s->frame); + av_packet_unref(&s->alpha_packet); + if (s->initialized) return ff_vp8_decode_free(avctx); @@ -1582,7 +2049,8 @@ AVCodec ff_webp_decoder = { .type = AVMEDIA_TYPE_VIDEO, .id = AV_CODEC_ID_WEBP, .priv_data_size = sizeof(WebPContext), + .init = webp_decode_init, .decode = webp_decode_frame, .close = webp_decode_close, - .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS, + .capabilities = AV_CODEC_CAP_DR1, }; diff --git a/libavcodec/webp_parser.c b/libavcodec/webp_parser.c index fdb7c38350..f959be8520 100644 --- a/libavcodec/webp_parser.c +++ b/libavcodec/webp_parser.c @@ -25,13 +25,16 @@ #include "libavutil/bswap.h" #include "libavutil/common.h" +#include "libavutil/intreadwrite.h" #include "parser.h" typedef struct WebPParseContext { ParseContext pc; + int frame; uint32_t fsize; - uint32_t remaining_size; + uint32_t remaining_file_size; + uint32_t remaining_tag_size; } WebPParseContext; static int webp_parse(AVCodecParserContext *s, AVCodecContext *avctx, @@ -41,62 +44,89 @@ static int webp_parse(AVCodecParserContext *s, AVCodecContext *avctx, WebPParseContext *ctx = s->priv_data; uint64_t state = ctx->pc.state64; int next = END_NOT_FOUND; - int i = 0; + int i, len; - *poutbuf = NULL; - *poutbuf_size = 0; - -restart: - if (ctx->pc.frame_start_found <= 8) { - for (; i < buf_size; i++) { + for (i = 0; i < buf_size;) { + if (ctx->remaining_tag_size) { + /* consuming tag */ + len = FFMIN(ctx->remaining_tag_size, buf_size - i); + i += len; + ctx->remaining_tag_size -= len; + ctx->remaining_file_size -= len; + } else if (ctx->frame) { + /* consumed tag containing frame, flush it */ + next = i; + ctx->frame = 0; + break; + } else { + /* scan for the next tag or file */ state = (state << 8) | buf[i]; - if (ctx->pc.frame_start_found == 0) { - if ((state >> 32) == MKBETAG('R', 'I', 'F', 'F')) { - ctx->fsize = av_bswap32(state); - if (ctx->fsize > 15 && ctx->fsize <= UINT32_MAX - 10) { - ctx->pc.frame_start_found = 1; - ctx->fsize += 8; + i++; + + if (!ctx->remaining_file_size) { + /* scan for the next file */ + if (ctx->pc.frame_start_found == 4) { + ctx->pc.frame_start_found = 0; + if ((uint32_t) state == MKBETAG('W', 'E', 'B', 'P')) { + if (i != 12) { + next = i - 12; + state = 0; + break; + } + ctx->remaining_file_size = ctx->fsize - 4; + continue; } } - } else if (ctx->pc.frame_start_found == 8) { - if ((state >> 32) != MKBETAG('W', 'E', 'B', 'P')) { + if (ctx->pc.frame_start_found == 0) { + if ((state >> 32) == MKBETAG('R', 'I', 'F', 'F')) { + ctx->fsize = av_bswap32(state); + if (ctx->fsize > 15 && ctx->fsize <= UINT32_MAX - 10) { + ctx->fsize += (ctx->fsize & 1); + ctx->pc.frame_start_found = 1; + } + } + } else + ctx->pc.frame_start_found++; + } else { + /* read the next tag */ + ctx->remaining_file_size--; + if (ctx->remaining_file_size == 0) { ctx->pc.frame_start_found = 0; continue; } ctx->pc.frame_start_found++; - ctx->remaining_size = ctx->fsize + i - 15; - if (ctx->pc.index + i > 15) { - next = i - 15; - state = 0; - break; - } else { - ctx->pc.state64 = 0; - goto restart; + if (ctx->pc.frame_start_found < 8) + continue; + + switch (state >> 32) { + case MKBETAG('A', 'N', 'M', 'F'): + case MKBETAG('V', 'P', '8', ' '): + case MKBETAG('V', 'P', '8', 'L'): + ctx->frame = 1; + break; + default: + ctx->frame = 0; + break; } - } else if (ctx->pc.frame_start_found) - ctx->pc.frame_start_found++; - } - ctx->pc.state64 = state; - } else { - if (ctx->remaining_size) { - i = FFMIN(ctx->remaining_size, buf_size); - ctx->remaining_size -= i; - if (ctx->remaining_size) - goto flush; - ctx->pc.frame_start_found = 0; - goto restart; + ctx->remaining_tag_size = av_bswap32(state); + ctx->remaining_tag_size += ctx->remaining_tag_size & 1; + if (ctx->remaining_tag_size > ctx->remaining_file_size) { + /* this is probably trash at the end of file */ + ctx->remaining_tag_size = ctx->remaining_file_size; + } + ctx->pc.frame_start_found = 0; + state = 0; + } } } + ctx->pc.state64 = state; -flush: - if (ff_combine_frame(&ctx->pc, next, &buf, &buf_size) < 0) + if (ff_combine_frame(&ctx->pc, next, &buf, &buf_size) < 0) { + *poutbuf = NULL; + *poutbuf_size = 0; return buf_size; - - if (next != END_NOT_FOUND && next < 0) - ctx->pc.frame_start_found = FFMAX(ctx->pc.frame_start_found - i - 1, 0); - else - ctx->pc.frame_start_found = 0; + } *poutbuf = buf; *poutbuf_size = buf_size;

[FFmpeg-devel,1/2] libavcodec: add support for animated WebP decoding

Checks

Commit Message

Comments

Patch