Message ID | 20220107090111.243853-2-leo.izen@gmail.com |
---|---|
State | New |
Headers | show |
Series | jpegxl patchset | expand |
Context | Check | Description |
---|---|---|
andriy/make_x86 | success | Make finished |
andriy/make_fate_x86 | success | Make fate finished |
andriy/make_ppc | success | Make finished |
andriy/make_fate_ppc | success | Make fate finished |
Leo Izen: > This commit adds support to libavcodec to read and parse > encoded Jpeg XL images. Jpeg XL is intended to be an > extended-life replacement to legacy mjpeg. > --- > MAINTAINERS | 2 + > libavcodec/Makefile | 1 + > libavcodec/codec_desc.c | 9 + > libavcodec/codec_id.h | 1 + > libavcodec/jpegxl.h | 206 ++++++++++ > libavcodec/jpegxl_parser.c | 809 +++++++++++++++++++++++++++++++++++++ > libavcodec/parsers.c | 1 + > libavcodec/version.h | 2 +- > 8 files changed, 1030 insertions(+), 1 deletion(-) > create mode 100644 libavcodec/jpegxl.h > create mode 100644 libavcodec/jpegxl_parser.c > > diff --git a/MAINTAINERS b/MAINTAINERS > index c065e94498..17c0104672 100644 > --- a/MAINTAINERS > +++ b/MAINTAINERS > @@ -187,6 +187,7 @@ Codecs: > interplayvideo.c Mike Melanson > jni*, ffjni* Matthieu Bouron > jpeg2000* Nicolas Bertrand > + jpegxl.h, jpegxl_parser.c Leo Izen > jvdec.c Peter Ross > lcl*.c Roberto Togni, Reimar Doeffinger > libcelt_dec.c Nicolas George > @@ -615,6 +616,7 @@ Haihao Xiang (haihao) 1F0C 31E8 B4FE F7A4 4DC1 DC99 E0F5 76D4 76FC 437F > Jaikrishnan Menon 61A1 F09F 01C9 2D45 78E1 C862 25DC 8831 AF70 D368 > James Almer 7751 2E8C FD94 A169 57E6 9A7A 1463 01AD 7376 59E0 > Jean Delvare 7CA6 9F44 60F1 BDC4 1FD2 C858 A552 6B9B B3CD 4E6A > +Leo Izen (thebombzen) B6FD 3CFC 7ACF 83FC 9137 6945 5A71 C331 FD2F A19A > Loren Merritt ABD9 08F4 C920 3F65 D8BE 35D7 1540 DAA7 060F 56DE > Lynne FE50 139C 6805 72CA FD52 1F8D A2FE A5F0 3F03 4464 > Michael Niedermayer 9FF2 128B 147E F673 0BAD F133 611E C787 040B 0FAB > diff --git a/libavcodec/Makefile b/libavcodec/Makefile > index cfc70a3eaf..423022e714 100644 > --- a/libavcodec/Makefile > +++ b/libavcodec/Makefile > @@ -43,6 +43,7 @@ OBJS = ac3_parser.o \ > dv_profile.o \ > encode.o \ > imgconvert.o \ > + jpegxl_parser.o \ > jni.o \ > mathtables.o \ > mediacodec.o \ > diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c > index 0974ee03de..0f3d0f910b 100644 > --- a/libavcodec/codec_desc.c > +++ b/libavcodec/codec_desc.c > @@ -1862,6 +1862,15 @@ static const AVCodecDescriptor codec_descriptors[] = { > .long_name = NULL_IF_CONFIG_SMALL("GEM Raster image"), > .props = AV_CODEC_PROP_LOSSY, > }, > + { > + .id = AV_CODEC_ID_JPEGXL, > + .type = AVMEDIA_TYPE_VIDEO, > + .name = "jpegxl", > + .long_name = NULL_IF_CONFIG_SMALL("JPEG XL"), > + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY | > + AV_CODEC_PROP_LOSSLESS, > + .mime_types= MT("image/jxl"), > + }, > > /* various PCM "codecs" */ > { > diff --git a/libavcodec/codec_id.h b/libavcodec/codec_id.h > index ab265ec584..551a516446 100644 > --- a/libavcodec/codec_id.h > +++ b/libavcodec/codec_id.h > @@ -308,6 +308,7 @@ enum AVCodecID { > AV_CODEC_ID_SIMBIOSIS_IMX, > AV_CODEC_ID_SGA_VIDEO, > AV_CODEC_ID_GEM, > + AV_CODEC_ID_JPEGXL, > > /* various PCM "codecs" */ > AV_CODEC_ID_FIRST_AUDIO = 0x10000, ///< A dummy id pointing at the start of audio codecs > diff --git a/libavcodec/jpegxl.h b/libavcodec/jpegxl.h > new file mode 100644 > index 0000000000..cbfb33f74c > --- /dev/null > +++ b/libavcodec/jpegxl.h > @@ -0,0 +1,206 @@ > +/* > + * JPEG XL header > + * Copyright (c) 2021 Leo Izen <leo.izen@gmail.com> > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > + */ > + > +/** > + * @file > + * JPEG XL header > + */ > + > +#ifndef AVCODEC_JPEGXL_H > +#define AVCODEC_JPEGXL_H > + > +#include <stdint.h> > + > +#define FF_JPEGXL_CODESTREAM_SIGNATURE_LE 0x0aff > +#define FF_JPEGXL_CODESTREAM_SIGNATURE_BE 0xff0a > +#define FF_JPEGXL_CONTAINER_SIGNATURE_LE 0x204c584a0c000000 > +#define FF_JPEGXL_CONTAINER_SIGNATURE_BE 0x0000000c4a584c20 > + > +enum JpegXLExtraChannelType { > + FF_JPEGXL_CT_ALPHA = 0, > + FF_JPEGXL_CT_DEPTH, > + FF_JPEGXL_CT_SPOT_COLOR, > + FF_JPEGXL_CT_SELECTION_MASK, > + FF_JPEGXL_CT_BLACK, > + FF_JPEGXL_CT_CFA, > + FF_JPEGXL_CT_THERMAL, > + FF_JPEGXL_CT_NON_OPTIONAL = 15, > + FF_JPEGXL_CT_OPTIONAL > +}; > + > +enum JpegXLColorSpace { > + FF_JPEGXL_CS_RGB = 0, > + FF_JPEGXL_CS_GRAY, > + FF_JPEGXL_CS_XYB, > + FF_JPEGXL_CS_UNKNOWN > +}; > + > +enum JpegXLWhitePoint { > + FF_JPEGXL_WP_D65 = 1, > + FF_JPEGXL_WP_CUSTOM, > + FF_JPEGXL_WP_E = 10, > + FF_JPEGXL_WP_DCI = 11 > +}; > + > +enum JpegXLPrimaries { > + FF_JPEGXL_PR_SRGB = 1, > + FF_JPEGXL_PR_CUSTOM, > + FF_JPEGXL_PR_2100 = 9, > + FF_JPEGXL_PR_P3 = 11, > +}; > + > +enum JpegXLTransferFunction { > + FF_JPEGXL_TF_709 = 1, > + FF_JPEGXL_TF_UNKNOWN, > + FF_JPEGXL_TF_LINEAR = 8, > + FF_JPEGXL_TF_SRGB = 13, > + FF_JPEGXL_TF_PQ = 16, > + FF_JPEGXL_TF_DCI, > + FF_JPEGXL_TF_HLG > +}; > + > +enum JpegXLRenderingIntent { > + FF_JPEGXL_RI_PERCEPTUAL = 0, > + FF_JPEGXL_RI_RELATIVE, > + FF_JPEGXL_RI_SATURATION, > + FF_JPEGXL_RI_ABSOLUTE > +}; > + > +typedef struct JpegXLExtraChannelInfo { > + enum JpegXLExtraChannelType type; > + uint32_t bits_per_sample; > + uint32_t exp_bits_per_sample; > + uint32_t dim_shift; > + size_t name_len; > + /* utf-8 */ > + char *name; > + int alpha_associated; > + float red; > + float green; > + float blue; > + float solidity; > + uint32_t cfa_channel; > +} JpegXLExtraChannelInfo; > + > +typedef struct JpegXLHeader { > + uint32_t width; > + uint32_t height; > + int orientation; > + /* zero if not present */ > + uint32_t intrinsic_width; > + uint32_t intrinsic_height; > + uint32_t preview_width; > + uint32_t preview_height; > + /* BEGIN animation header */ > + uint32_t anim_tb_num; > + uint32_t anim_tb_denom; > + uint32_t anim_loop_count; > + int anim_have_pts; > + /* END animation header */ > + > + uint32_t bits_per_sample; > + uint32_t exp_bits_per_sample; > + > + int modular_16bit_buffers; > + > + uint32_t num_extra_channels; > + > + /* > + * NULL if no extra channels > + * otherwise an array of extra channel info > + * with length num_extra_channels > + */ > + JpegXLExtraChannelInfo *extra_channel_info; > + > + int xyb_encoded; > + > + /* BEGIN color encoding bundle */ > + int have_icc_profile; > + enum JpegXLColorSpace color_space; > + enum JpegXLWhitePoint white_point; > + uint32_t white_ux; > + uint32_t white_uy; > + enum JpegXLPrimaries primaries; > + uint32_t red_ux; > + uint32_t red_uy; > + uint32_t green_ux; > + uint32_t green_uy; > + uint32_t blue_ux; > + uint32_t blue_uy; > + /* > + * if this is less than 1 << 24, > + * then interpret it as a gamma value > + * If this is greater than or equal to 1 << 24, > + * then subtract 1 << 24 and interpret it as a > + * an enum JpegXLTransferFunction > + */ > + int have_gamma; > + uint32_t transfer_function; > + enum JpegXLRenderingIntent rendering_intent; > + /* END color encoding bundle */ > + > + /* BEGIN tone mapping bundle */ > + float intensity_target; > + float min_nits; > + int relative_to_max_display; > + float linear_below; > + /* END tone mapping bundle */ > + > + uint64_t extensions; > + /* if extensions is nonzero, this will be length 64 */ > + /* otherwise it will be NULL */ > + uint64_t *extension_bits; > + > + int default_transform; > + > + /* if present, an array of length 16 */ > + /* NULL if not present */ > + float *opsin_inverse_matrix; > + > + uint32_t cw_mask; > + > + /* if these are not present > + * use NULL for these pointers > + * otherwise up2_weight is an > + * array of length 15, up4_weight > + * is length 55, and up8_weight is > + * length 210 > + */ > + float *up2_weight; > + float *up4_weight; > + float *up8_weight; > + > + /* > + * this is not provided by the header, > + * but rather, by the container > + * raw Jpeg XL Codestreams are level 5 > + * the container can choose to up it to 10 > + */ > + int level; > + > +} JpegXLHeader; Why is this structure exported in the header when it is only used by the parser? > + > +/** > + * @return 0 upon valid, nonzero upon some parse error > + */ > +int avpriv_jpegxl_verify_codestream_header(void *avctx, uint8_t *buf, size_t buflen, int level); > + > +#endif /* AVCODEC_JPEGXL_H */ > diff --git a/libavcodec/jpegxl_parser.c b/libavcodec/jpegxl_parser.c > new file mode 100644 > index 0000000000..3355ca603e > --- /dev/null > +++ b/libavcodec/jpegxl_parser.c > @@ -0,0 +1,809 @@ > +/* > + * JPEG XL parser > + * Copyright (c) 2021 Leo Izen <leo.izen@gmail.com> > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > + */ > + > +/** > + * @file > + * JPEG XL parser > + */ > + > +#include <stdint.h> Unnecessary, as inttypes.h is guaranteed by the spec to include stdint.h. > +#include <inttypes.h> > +#include <stdlib.h> > + > +#ifndef BITSTREAM_READER_LE > +#define BITSTREAM_READER_LE > +#endif The check here is nonsense. > +#ifdef CACHED_BITSTREAM_READER > +#undef CACHED_BITSTREAM_READER > +#endif > +#define CACHED_BITSTREAM_READER 1 Is there a reason you insist on the cached bitstream reader? > + > +#include "libavutil/error.h" > +#include "libavutil/intreadwrite.h" > +#include "libavutil/mem.h" > + > +#include "codec_id.h" > +#include "config.h" > +#include "get_bits.h" > +#include "jpegxl.h" > +#include "parser.h" > + > +#if CONFIG_JPEGXL_PARSER > + > +typedef struct JpegXLParseContext { > + ParseContext pc; > + GetBitContext gb; > + const uint8_t *buf; > + size_t buflen; > + size_t bits_read; What exactly is the point of duplicating the GetBitContext's internal status? > +} JpegXLParseContext; > + > +#define jxl_bits(n) jpegxl_get_bits(jxlr, (n)) > +#define jxl_enum() jpegxl_u32(jxlr, (uint32_t[]){0, 1, 2, 18}, (uint32_t[]){0, 0, 4, 6}) > + > +#define jxl_parse_errv(type, value) av_log(avctx, AV_LOG_DEBUG, \ > + "Invalid " type " at position: %zu", \ > + value, jxlr->bits_read) > + > +#define jxl_parse_errvv(type, v1, v2) av_log(avctx, AV_LOG_DEBUG, \ > + "Invalid " type " at position: %zu", \ > + v1, v2, jxlr->bits_read) > + > +#define jxl_parse_err(type) jxl_parse_errv("%s", type) > + > +static void jpegxl_reset_pc(JpegXLParseContext *jxlr) > +{ > + memset(&jxlr->gb, 0, sizeof(GetBitContext)); Unnecessary, as you reset gb below. > + jxlr->bits_read = 0; > + init_get_bits8(&jxlr->gb, jxlr->buf, jxlr->buflen); > +} > + > +static void jpegxl_init_pc(JpegXLParseContext *jxlr, const uint8_t *buf, size_t buflen) > +{ > + memset(&jxlr->pc, 0, sizeof(ParseContext)); > + memset(&jxlr->gb, 0, sizeof(GetBitContext)); Unnecessary, as you are initializing gb below. > + jxlr->buf = buf; > + jxlr->buflen = buflen; > + jxlr->bits_read = 0; > + init_get_bits8(&jxlr->gb, buf, buflen); > +} > + > +static uint64_t jpegxl_get_bits(JpegXLParseContext *jxlr, size_t bits) > +{ > + if (!bits) > + return 0; > + if (bits > INT_MAX) > + /* what are you doing with all those bits... */ > + return 0; > + jxlr->bits_read += bits; > + /* extra buffer size of 8 bytes */ > + if ((jxlr->bits_read + 1) / 8 + 8 > jxlr->buflen - 1) This check looks weird. E.g. why (jxlr->bits_read + 1) / 8 and not (jxlr->bits_read + 7) / 8? And why the extra buffer size of eight bytes? > + /* overflowing buffer */ > + return 0; > + while (bits > 64) { I do not see anything where you would read more than 64 bits at a time. It would also make no sense (the format would be wasting bits in this case). > + size_t bitcount = (bits - 1) % 64 + 1; > + get_bits64(&jxlr->gb, bitcount); > + bits -= bitcount; > + } > + return get_bits64(&jxlr->gb, bits); > +} > + > +static uint32_t jpegxl_u32(JpegXLParseContext *jxlr, > + uint32_t *constants, uint32_t *ubits) Both arrays can be made const. And actually, both arrays should be declared as const uint32_t [4]. > +{ > + uint32_t ret, choice = jxl_bits(2); > + ret = constants[choice]; > + if (ubits[choice]) > + ret += jxl_bits(ubits[choice]); > + return ret; > +} > + > +static uint64_t jpegxl_u64(JpegXLParseContext *jxlr) > +{ > + uint64_t shift = 12, ret; > + switch (jxl_bits(2)) { > + case 0: > + ret = 0; > + break; > + case 1: > + ret = 1 + jxl_bits(4); > + break; > + case 2: > + ret = 17 + jxl_bits(8); > + break; > + case 3: > + ret = jxl_bits(12); > + while (jxl_bits(1)){ > + if (shift < 60) { > + ret |= jxl_bits(8) << shift; > + shift += 8; > + } else { > + ret |= jxl_bits(4) << shift; > + break; > + } > + } > + break; > + } > + return ret; > +} > + > +static float jpegxl_f16(JpegXLParseContext *jxlr) > +{ > + float ret; > + uint32_t mantissa = jxl_bits(10) << 13; > + uint32_t biased_exponent = jxl_bits(5); > + if (biased_exponent == 31) > + mantissa |= 0xFF << 23; > + else > + mantissa |= ((biased_exponent - 15 + 127) & 0xFF) << 23; > + memcpy(&ret, &mantissa, sizeof(float)); return av_int2float(mantissa); > + return ret; > +} > + > +static uint32_t jpegxl_width_from_ratio(uint32_t height, int ratio) > +{ > + switch (ratio){ > + case 1: > + return height; > + case 2: > + return (height * 12) / 10; > + case 3: > + return (height * 4) / 3; > + case 4: > + return (height * 3) / 2; > + case 5: > + return (height * 16) / 9; > + case 6: > + return (height * 5) / 4; > + case 7: > + return height * 2; > + default: > + /* manual width */ > + return 0; > + } > +} > + > + > +static int jpegxl_parse_size_header(JpegXLParseContext *jxlr, > + uint32_t *width, uint32_t *height) > +{ > + uint32_t w, h; > + if (jxl_bits(1)) { > + /* small size header */ > + h = (jxl_bits(5) + 1) << 3; > + w = jpegxl_width_from_ratio(h, jxl_bits(3)); > + if (!w) > + w = (jxl_bits(5) + 1) << 3; > + } else { > + /* large size header */ > + h = 1 + jpegxl_u32(jxlr, > + (uint32_t[]){0, 0, 0, 0}, (uint32_t[]){9, 13, 18, 30}); h can be up to 2^30 here. This means that the multiplications in jpegxl_width_from_ratio() can overflow and need to be performed in 64bit (although the end result always fits into an uint32_t). (E.g. if h were 2^30, then jpegxl_width_from_ratio() will return 0 in cases 2, 3 and 5.) > + w = jpegxl_width_from_ratio(h, jxl_bits(3)); > + if (!w) > + w = 1 + jpegxl_u32(jxlr, > + (uint32_t[]){0, 0, 0, 0}, (uint32_t[]){9, 13, 18, 30}); > + } > + *width = w, *height = h; > + return 0; > +} > + > +static int jpegxl_parse_preview_header(JpegXLParseContext *jxlr, > + uint32_t *width, uint32_t *height) > +{ > + uint32_t w, h; > + if (jxl_bits(1)) { > + /* div8 */ > + h = jpegxl_u32(jxlr, > + (uint32_t[]){16, 32, 1, 33}, (uint32_t[]){0, 0, 5, 9}) << 3; > + w = jpegxl_width_from_ratio(h, jxl_bits(3)); > + if (!w) > + w = jpegxl_u32(jxlr, > + (uint32_t[]){16, 32, 1, 33}, (uint32_t[]){0, 0, 5, 9}) << 3; > + } else { > + /* full */ > + h = jpegxl_u32(jxlr, > + (uint32_t[]){1, 65, 321, 1345}, (uint32_t[]){6, 8, 10, 12}); > + w = jpegxl_width_from_ratio(h, jxl_bits(3)); > + if (!w) > + w = jpegxl_u32(jxlr, > + (uint32_t[]){1, 65, 321, 1345}, (uint32_t[]){6, 8, 10, 12}); > + } > + *width = w, *height = h; > + return 0; > +} > + > +static int jpegxl_parse_animation_header(JpegXLParseContext *jxlr, > + uint32_t *num, uint32_t *denom, uint32_t *count, int *have_pts) > +{ > + uint32_t n, d, c; > + int p; > + n = jpegxl_u32(jxlr, > + (uint32_t[]){100, 1000, 1, 1}, (uint32_t[]){0, 0, 10, 30}); > + d = jpegxl_u32(jxlr, > + (uint32_t[]){1, 1001, 1, 1}, (uint32_t[]){0, 0, 8, 10}); > + c = jpegxl_u32(jxlr, > + (uint32_t[]){0, 0, 0, 0}, (uint32_t[]){0, 3, 16, 32}); > + p = jxl_bits(1); > + *num = n, *denom = d, *count = c, *have_pts = p; > + return 0; > +} > + > +static int jpegxl_parse_bit_depth(JpegXLParseContext *jxlr, > + uint32_t *depth, uint32_t *exp_depth) > +{ > + uint32_t d, e; > + if (jxl_bits(1)) { > + /* float samples */ > + d = jpegxl_u32(jxlr, > + (uint32_t[]){32, 16, 24, 1}, (uint32_t[]){0, 0, 0, 6}); > + e = jxl_bits(4) + 1; > + } else { > + /* integer samples */ > + d = jpegxl_u32(jxlr, > + (uint32_t[]){8, 10, 12, 1}, (uint32_t[]){0, 0, 0, 6}); > + e = 0; > + } > + *depth = d, *exp_depth = e; > + return 0; > +} > + > +static int jpegxl_parse_extra_channel_info(JpegXLParseContext *jxlr, > + JpegXLExtraChannelInfo *info, int level) > +{ > + int status = 0; > + int all_default = jxl_bits(1); > + > + if (!all_default) { > + info->type = jxl_enum(); > + if (info->type > 63) > + /* enum types cannot be 64+ */ > + return 1; > + status = jpegxl_parse_bit_depth(jxlr, &info->bits_per_sample, &info->exp_bits_per_sample); > + if (!status) > + return status; > + info->dim_shift = jpegxl_u32(jxlr, (uint32_t[]){0, 3, 4, 1}, (uint32_t[]){0, 0, 0, 3}); > + info->name_len = jpegxl_u32(jxlr, (uint32_t[]){0, 0, 16, 48}, (uint32_t[]){0, 4, 5, 10}); > + } else { > + info->type = FF_JPEGXL_CT_ALPHA; > + info->bits_per_sample = 8; > + info->exp_bits_per_sample = 0; > + } > + > + info->name = av_malloc(info->name_len + 1); > + if (!info->name) > + return AVERROR(ENOMEM); > + > + for (uint32_t i = 0; i < info->name_len; i++) > + /* there is no byte-alignment guarantee so no memcpy */ > + info->name[i] = jxl_bits(8); > + > + /* null-terminate it for string operations */ > + /* even though we have name_len */ > + info->name[info->name_len] = '\0'; > + > + info->alpha_associated = > + !all_default && info->type == FF_JPEGXL_CT_ALPHA && jxl_bits(1); > + > + if (info->type == FF_JPEGXL_CT_SPOT_COLOR) { > + info->red = jpegxl_f16(jxlr); > + info->green = jpegxl_f16(jxlr); > + info->blue = jpegxl_f16(jxlr); > + info->solidity = jpegxl_f16(jxlr); > + } > + > + if (info->type == FF_JPEGXL_CT_CFA) > + info->cfa_channel = jpegxl_u32(jxlr, (uint32_t[]){1, 0, 3, 19}, (uint32_t[]){0, 2, 4, 8}); > + else > + info->cfa_channel = 1; > + > + if (info->type == FF_JPEGXL_CT_BLACK && level < 10) > + return 1; > + > + return 0; > +} > + > +static void jpegxl_free_header(JpegXLHeader *header) > +{ > + if (header) { > + if (header->extra_channel_info) { > + for (uint32_t i = 0; i < header->num_extra_channels; i++) { > + if (header->extra_channel_info + i) > + av_freep(&header->extra_channel_info[i].name); > + } > + av_freep(&header->extra_channel_info); > + } > + if (header->extension_bits) > + av_freep(&header->extension_bits); > + if (header->opsin_inverse_matrix) > + av_freep(&header->opsin_inverse_matrix); > + if (header->up2_weight) > + av_freep(&header->up2_weight); > + if (header->up4_weight) > + av_freep(&header->up4_weight); > + if (header->up8_weight) > + av_freep(&header->up8_weight); > + av_freep(&header); > + } > +} > + > +/** > + * Parse a JpegXL Codestream Header and read it into the argument Header > + * @param level Codestream level provided by the container, 5 if raw codestream > + * @return 0 upon success, negative upon error, and positive if the buffer overran > + */ > +static int jpegxl_parse_codestream_header(void *avctx, JpegXLParseContext *jxlr, JpegXLHeader **headerp, int level) > +{ > + JpegXLHeader *header = NULL; > + int all_default, extra_fields = 0, status; > + > + header = av_mallocz(sizeof(JpegXLHeader)); This allocation is completely unnecessary, just put a JpegXLHeader on the stack in avpriv_jpegxl_verify_codestream_header() and jpegxl_parse(). > + if (!header) { > + av_log(avctx, AV_LOG_ERROR, "Could not allocate JpegXLHeader"); > + status = AVERROR(ENOMEM); > + goto fail; > + } > + > + /* signature check */ > + if (jxl_bits(16) != FF_JPEGXL_CODESTREAM_SIGNATURE_LE) { > + av_log(avctx, AV_LOG_DEBUG, "Failed JPEG XL Signature Check"); > + goto fail; > + } > + > + status = jpegxl_parse_size_header(jxlr, > + &header->width, &header->height); > + if (status) { > + jxl_parse_err("size header"); > + goto fail; > + } > + > + /* level 5 codestream */ > + if (level < 10) { > + if (header->width > (1 << 18) || header->height > (1 << 18) > + || (header->width >> 4) * (header->height >> 4) > (1 << 20)) { > + jxl_parse_err("width or height or both"); > + goto fail; > + } > + header->level = 5; > + } else { > + if (header->width > (1 << 30) || header->height > (1 << 30) > + || (header->width >> 14) * (header->height >> 14) > (1 << 12)) { > + jxl_parse_err("width or height or both"); > + goto fail; > + } > + header->level = 10; > + } > + > + all_default = jxl_bits(1); > + > + if (!all_default) > + extra_fields = jxl_bits(1); > + > + if (extra_fields) { > + header->orientation = jxl_bits(3); > + /* intrinstic size */ > + if (jxl_bits(1)) { > + status = jpegxl_parse_size_header(jxlr, > + &header->intrinsic_width, &header->intrinsic_height); > + if (status) { > + jxl_parse_err("intrinstic size header"); > + goto fail; > + } > + } > + > + /* preview header */ > + if (jxl_bits(1)) { > + status = jpegxl_parse_preview_header(jxlr, > + &header->preview_width, &header->preview_height); > + if (status) { > + jxl_parse_err("preview header"); > + goto fail; > + } > + if (header->preview_width > 4096 || header->preview_height > 4096) { > + jxl_parse_errvv("preview header size %" PRIu32 ", %" PRIu32, > + header->preview_width, header->preview_height); > + goto fail; > + } > + } > + > + /* animation header */ > + if (jxl_bits(1)) { > + status = jpegxl_parse_animation_header(jxlr, > + &header->anim_tb_num, &header->anim_tb_denom, > + &header->anim_loop_count, &header->anim_have_pts); > + if (status) { > + jxl_parse_err("animation header"); > + goto fail; > + } > + } > + > + } > + > + if (!all_default) { > + status = jpegxl_parse_bit_depth(jxlr, > + &header->bits_per_sample, &header->exp_bits_per_sample); > + if (status) { > + jxl_parse_err("bit depth header"); > + goto fail; > + } > + > + header->modular_16bit_buffers = jxl_bits(1); > + > + if (!header->modular_16bit_buffers && level < 10) { > + jxl_parse_err("modular 16bit buffers"); > + goto fail; > + } > + > + header->num_extra_channels = jpegxl_u32(jxlr, > + (uint32_t[]){0, 1, 2, 1}, (uint32_t[]){0, 0, 4, 12}); > + if (header->num_extra_channels > 256 || > + level < 10 && header->num_extra_channels > 4) { > + jxl_parse_err("too many extra channels"); > + goto fail; > + } > + if (header->num_extra_channels) { > + header->extra_channel_info = > + av_calloc(header->num_extra_channels + 1, sizeof(JpegXLExtraChannelInfo)); > + for (uint32_t i = 0; i < header->num_extra_channels; i++) { > + status = jpegxl_parse_extra_channel_info(jxlr, header->extra_channel_info + i, level); > + if (status) { > + jxl_parse_errv("extra channel number %" PRIu32, i); > + goto fail; > + } > + } > + } > + > + header->xyb_encoded = jxl_bits(1); > + > + if (jxl_bits(1)) { > + /* all_default for color encoding */ > + header->have_icc_profile = 0; > + header->color_space = FF_JPEGXL_CS_RGB; > + header->white_point = FF_JPEGXL_WP_D65; > + header->primaries = FF_JPEGXL_PR_SRGB; > + header->transfer_function = (1 << 24) + FF_JPEGXL_TF_SRGB; > + header->rendering_intent = FF_JPEGXL_RI_RELATIVE; > + } else { > + header->have_icc_profile = jxl_bits(1); > + header->color_space = jxl_enum(); > + if (header->color_space > 63) { > + jxl_parse_errv("color space enum %" PRIu32, header->white_point); > + goto fail; > + } > + if (header->color_space != FF_JPEGXL_CS_XYB > + && !header->have_icc_profile) { > + header->white_point = jxl_enum(); > + if (header->white_point > 63) { > + jxl_parse_errv("white point enum %" PRIu32, header->white_point); > + goto fail; > + } > + } else { > + header->white_point = FF_JPEGXL_WP_D65; > + } > + if (header->white_point == FF_JPEGXL_WP_CUSTOM) { > + header->white_ux = jpegxl_u32(jxlr, (uint32_t[]){0, 524288, 1048576, 2097152}, (uint32_t[]){19, 19, 20, 21}); > + header->white_uy = jpegxl_u32(jxlr, (uint32_t[]){0, 524288, 1048576, 2097152}, (uint32_t[]){19, 19, 20, 21}); > + } > + if (header->color_space != FF_JPEGXL_CS_XYB > + && header->color_space != FF_JPEGXL_CS_GRAY > + && !header->have_icc_profile) { > + header->primaries = jxl_enum(); > + if (header->primaries > 63) { > + jxl_parse_errv("primaries enum %" PRIu32, header->primaries); > + goto fail; > + } > + } else { > + header->primaries = FF_JPEGXL_PR_SRGB; > + } > + if (header->primaries == FF_JPEGXL_PR_CUSTOM) { > + header->red_ux = jpegxl_u32(jxlr, (uint32_t[]){0, 524288, 1048576, 2097152}, (uint32_t[]){19, 19, 20, 21}); > + header->red_uy = jpegxl_u32(jxlr, (uint32_t[]){0, 524288, 1048576, 2097152}, (uint32_t[]){19, 19, 20, 21}); > + header->green_ux = jpegxl_u32(jxlr, (uint32_t[]){0, 524288, 1048576, 2097152}, (uint32_t[]){19, 19, 20, 21}); > + header->green_uy = jpegxl_u32(jxlr, (uint32_t[]){0, 524288, 1048576, 2097152}, (uint32_t[]){19, 19, 20, 21}); > + header->blue_ux = jpegxl_u32(jxlr, (uint32_t[]){0, 524288, 1048576, 2097152}, (uint32_t[]){19, 19, 20, 21}); > + header->blue_uy = jpegxl_u32(jxlr, (uint32_t[]){0, 524288, 1048576, 2097152}, (uint32_t[]){19, 19, 20, 21}); > + } > + if (!header->have_icc_profile) { > + if (jxl_bits(1)) { > + /* this is gamma */ > + header->transfer_function = jxl_bits(24); > + } else { > + header->transfer_function = jxl_enum(); > + if (header->transfer_function > 63) { > + jxl_parse_errv("transfer function enum %" PRIu32, header->transfer_function); > + goto fail; > + } > + /* > + * higher than the highest possible gamma value > + * marks it as an enum isntead of gamma > + */ > + header->transfer_function += 1 << 24; > + } > + header->rendering_intent = jxl_enum(); > + if (header->rendering_intent > 63) { > + jxl_parse_errv("rendering intent enum %" PRIu32, header->rendering_intent); > + goto fail; > + } > + } else { > + header->transfer_function = (1 << 24) + FF_JPEGXL_TF_SRGB; > + header->rendering_intent = FF_JPEGXL_RI_RELATIVE; > + } > + } > + > + /* lazy && works with this macro */ > + if (extra_fields && !jxl_bits(1)) { > + header->intensity_target = jpegxl_f16(jxlr); > + header->min_nits = jpegxl_f16(jxlr); > + header->relative_to_max_display = jxl_bits(1); > + header->linear_below = jpegxl_f16(jxlr); > + } else { > + header->intensity_target = 255; > + } > + > + header->extensions = jpegxl_u64(jxlr); > + if (header->extensions) { > + header->extension_bits = av_calloc(64, sizeof(uint64_t)); This array is write-only. > + if (!header->extension_bits) { > + av_log(avctx, AV_LOG_ERROR, "Could not allocate extension bit array"); > + status = AVERROR(ENOMEM); > + goto fail; > + } > + for (int i = 0; i < 64; i++) { > + if (header->extensions & (UINT64_C(1) << i)) > + header->extension_bits[i] = jpegxl_u64(jxlr); > + } > + } > + > + } else { > + header->modular_16bit_buffers = 1; > + header->xyb_encoded = 1; > + } > + > + header->default_transform = jxl_bits(1); > + > + /* lazy && works with this macro */ > + if (!header->default_transform && header->xyb_encoded && !jxl_bits(1)) { > + header->opsin_inverse_matrix = av_malloc_array(16, sizeof(float)); > + if (!header->opsin_inverse_matrix) { > + av_log(avctx, AV_LOG_ERROR, "Could not allocate Opsin Inverse Matrix"); > + status = AVERROR(ENOMEM); > + goto fail; > + } > + for (int i = 0; i < 16; i++) { > + header->opsin_inverse_matrix[i] = jpegxl_f16(jxlr); > + } > + } > + > + if (!header->default_transform) { > + header->cw_mask = jxl_bits(3); > + } > + > + if (header->cw_mask & 1) { > + header->up2_weight = av_malloc_array(15, sizeof(float)); > + if (!header->up2_weight) { > + av_log(avctx, AV_LOG_ERROR, "Could not allocate up2_weight"); > + status = AVERROR(ENOMEM); > + goto fail; > + } > + for (int i = 0; i < 15; i++) { > + header->up2_weight[i] = jpegxl_f16(jxlr); > + } > + } > + if (header->cw_mask & 2) { > + header->up4_weight = av_malloc_array(55, sizeof(float)); > + if (!header->up4_weight) { > + av_log(avctx, AV_LOG_ERROR, "Could not allocate up4_weight"); > + status = AVERROR(ENOMEM); > + goto fail; > + } > + for (int i = 0; i < 55; i++) { > + header->up4_weight[i] = jpegxl_f16(jxlr); > + } > + } > + if (header->cw_mask & 4) { > + header->up8_weight = av_malloc_array(210, sizeof(float)); These arrays are never used at all, so you can just avoid the allocation. In fact I wonder whether it is necessary to parse this stuff here at all (AFAIK no other parser does such deep parsing). > + if (!header->up8_weight) { > + av_log(avctx, AV_LOG_ERROR, "Could not allocate up8_weight"); > + status = AVERROR(ENOMEM); > + goto fail; > + } > + for (int i = 0; i < 210; i++) { > + header->up8_weight[i] = jpegxl_f16(jxlr); > + } > + } > + > + /* zero pad to byte */ > + if (jxl_bits(7 - ((jxlr->bits_read - 1) % 8))) { > + jxl_parse_err("zero padding to byte"); > + goto fail; > + } > + > + /* bytes consumed > buflen */ > + if ((jxlr->bits_read + 1) / 8 - 1 > jxlr->buflen) { > + jxl_parse_err("unexpected end of file"); > + goto fail; > + } > + > + *headerp = header; > + return 0; > + > +fail: > + jpegxl_free_header(header); > + if (status >= 0 && (jxlr->bits_read + 1) / 8 - 1 > jxlr->buflen) > + return FFMIN(jxlr->bits_read, INT_MAX); > + if (status > 0) > + return -status; > + return status || -1; This is equivalent to "return 1;" > +} > + > +static int jpegxl_parse_header(void *avctx, JpegXLParseContext *jxlr, JpegXLHeader **headerp, int level) > +{ > + uint64_t sig = jxl_bits(64); > + jpegxl_reset_pc(jxlr); > + if (sig == FF_JPEGXL_CONTAINER_SIGNATURE_LE) { > + for (;;) { > + uint64_t size = 0; > + uint32_t tag = 0; > + for (int k = 0; k < 4; k++) > + size = (size << 8) | jxl_bits(8); > + for (int k = 0; k < 4; k++) > + tag = (tag << 8) | jxl_bits(8); > + if (tag == MKBETAG('j','x','l','p')) { > + jxl_bits(32); > + break; > + } > + if (tag == MKBETAG('j','x','l','c')) > + break; > + if (size == 1) { > + size = 0; > + for (int k = 0; k < 8; k++) > + size = (size << 8) | jxl_bits(8); > + if (size > INT_MAX) > + break; > + size -= 8; > + } > + if (jxlr->bits_read / 8 > jxlr->buflen) > + break; This code looks weird: You read the size of an isobmff-style box, but then you actually ignore the size. (Apart from that: You are always byte-aligned here, so you could just as well read the stuff bytewise and skip the corresponding number of bits lateron.) > + } > + } > + return jpegxl_parse_codestream_header(avctx, jxlr, headerp, level); > +} > + > +int avpriv_jpegxl_verify_codestream_header(void *avctx, uint8_t *buf, size_t buflen, int level) > +{ > + JpegXLParseContext jxlri; > + JpegXLHeader *header = NULL; > + int status; > + jpegxl_init_pc(&jxlri, buf, buflen); > + status = jpegxl_parse_codestream_header(avctx, &jxlri, &header, level); > + if (header) > + jpegxl_free_header(header); > + return status; > +} > + > +static enum AVPixelFormat jpegxl_header_get_pixfmt(JpegXLHeader *header) { > + int alpha = 0; > + for (int i = 0; i < header->num_extra_channels; i++) { > + if ((header->extra_channel_info + i)->type == FF_JPEGXL_CT_ALPHA) { > + alpha = 1; > + break; > + } > + } > + if (header->color_space == FF_JPEGXL_CS_GRAY) { > + if (header->bits_per_sample <= 8) > + return alpha ? AV_PIX_FMT_YA8 : AV_PIX_FMT_GRAY8; > + if (header->bits_per_sample > 16 || header->exp_bits_per_sample) > + return alpha ? AV_PIX_FMT_NONE : AV_PIX_FMT_GRAYF32LE; > + return alpha ? AV_PIX_FMT_YA16LE : AV_PIX_FMT_GRAY16LE; > + } else if (header->color_space == FF_JPEGXL_CS_RGB > + || header->color_space == FF_JPEGXL_CS_XYB) { > + if (header->bits_per_sample <= 8) > + return alpha ? AV_PIX_FMT_RGBA : AV_PIX_FMT_RGB24; > + if (header->bits_per_sample > 16 || header->exp_bits_per_sample) > + return alpha ? AV_PIX_FMT_GBRAPF32LE : AV_PIX_FMT_GBRPF32LE; > + return alpha ? AV_PIX_FMT_RGBA64LE : AV_PIX_FMT_RGB48LE; > + } > + return AV_PIX_FMT_NONE; > +} > + > +static av_cold int jpegxl_parse_init(AVCodecParserContext *s1) > +{ > + s1->pict_type = AV_PICTURE_TYPE_NONE; > + return 0; > +} > + > +static int jpegxl_parse(AVCodecParserContext *s1, > + AVCodecContext *avctx, > + const uint8_t **poutbuf, int *poutbuf_size, > + const uint8_t *buf, int buf_size) > +{ > + JpegXLParseContext *jxlr = s1->priv_data; > + JpegXLHeader *header = NULL; > + int next = END_NOT_FOUND, status = 0; > + size_t i = 0; > + > + *poutbuf_size = 0; > + *poutbuf = NULL; > + > + if (buf_size == 0 || s1->flags & PARSER_FLAG_COMPLETE_FRAMES) { > + /* eof is a frame boundary */ > + next = buf_size; > + } else if (!jxlr->pc.frame_start_found) { > + /* look for stream signature */ > + uint64_t state64 = jxlr->pc.state64; > + for (; i < buf_size; i++) { > + state64 = (state64 << 8) | buf[i]; > + if ((state64 & 0xFFFF) == FF_JPEGXL_CODESTREAM_SIGNATURE_BE > + || state64 == FF_JPEGXL_CONTAINER_SIGNATURE_BE) { > + jxlr->pc.frame_start_found = 1; > + break; > + } > + } > + jxlr->pc.state64 = state64; Did you test this part? Did it work? I am wondering because you are actually supposed to indicate how many bytes of input you have consumed, yet you always indicate END_NOT_FOUND. > + } > + > + if (jxlr->pc.frame_start_found && s1->pict_type == AV_PICTURE_TYPE_NONE) { > + jpegxl_init_pc(jxlr, buf, buf_size); > + status = jpegxl_parse_header(NULL, jxlr, &header, 5); Why don't you forward the logcontext? > + if (status == 0) { > + /* parsed successfully */ > + s1->pict_type = AV_PICTURE_TYPE_I; > + s1->key_frame = 1; > + s1->width = avctx->width = avctx->coded_width = header->width; > + s1->height = avctx->height = avctx->coded_height = header->height; > + s1->format = avctx->pix_fmt = jpegxl_header_get_pixfmt(header); > + jxlr->pc.frame_start_found = 1; > + } > + if (header) > + jpegxl_free_header(header); > + header = NULL; > + } > + > + if (ff_combine_frame(&jxlr->pc, next, &buf, &buf_size) < 0) { > + *poutbuf = NULL; > + *poutbuf_size = 0; > + return buf_size; > + } > + > + jxlr->pc.frame_start_found = 0; > + > + *poutbuf = buf; > + *poutbuf_size = buf_size; > + > + return next; > +} > + > +const AVCodecParser ff_jpegxl_parser = { > + .codec_ids = { AV_CODEC_ID_JPEGXL }, > + .priv_data_size = sizeof(JpegXLParseContext), > + .parser_init = jpegxl_parse_init, > + .parser_parse = jpegxl_parse, > + .parser_close = ff_parse_close, > +}; > + > +#else /* CONFIG_JPEGXL_PARSER */ > + > +int avpriv_jpegxl_verify_codestream_header(void *avctx, uint8_t *buf, size_t buflen, int level) > +{ We don't provide fallback-stubs for avpriv functions; instead we just add configure/Makefile depencies. > + /* parser disabled at compile-time */ > + return AVERROR(ENOSYS); > +} > + > +#endif /* CONFIG_JPEGXL_PARSER */ > diff --git a/libavcodec/parsers.c b/libavcodec/parsers.c > index 6b40c18d80..18a40eceea 100644 > --- a/libavcodec/parsers.c > +++ b/libavcodec/parsers.c > @@ -52,6 +52,7 @@ extern const AVCodecParser ff_h264_parser; > extern const AVCodecParser ff_hevc_parser; > extern const AVCodecParser ff_ipu_parser; > extern const AVCodecParser ff_jpeg2000_parser; > +extern const AVCodecParser ff_jpegxl_parser; > extern const AVCodecParser ff_mjpeg_parser; > extern const AVCodecParser ff_mlp_parser; > extern const AVCodecParser ff_mpeg4video_parser; > diff --git a/libavcodec/version.h b/libavcodec/version.h > index a46fb05f1a..b5867ad041 100644 > --- a/libavcodec/version.h > +++ b/libavcodec/version.h > @@ -28,7 +28,7 @@ > #include "libavutil/version.h" > > #define LIBAVCODEC_VERSION_MAJOR 59 > -#define LIBAVCODEC_VERSION_MINOR 20 > +#define LIBAVCODEC_VERSION_MINOR 21 > #define LIBAVCODEC_VERSION_MICRO 100 > > #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \ >
On 1/7/22 08:51, Andreas Rheinhardt wrote: > Leo Izen: >> + int level; >> + >> +} JpegXLHeader; > Why is this structure exported in the header when it is only used by the > parser? This was a relic from before I was told to remove the struct from the exported function signature. I'll move it over to the parser. > >> +#include <stdint.h> > Unnecessary, as inttypes.h is guaranteed by the spec to include stdint.h. Will remove. > +#include <inttypes.h> > +#include <stdlib.h> > + > +#ifndef BITSTREAM_READER_LE > +#define BITSTREAM_READER_LE > +#endif > > The check here is nonsense. Will remove. >> +#ifdef CACHED_BITSTREAM_READER >> +#undef CACHED_BITSTREAM_READER >> +#endif >> +#define CACHED_BITSTREAM_READER 1 > Is there a reason you insist on the cached bitstream reader? I was under the impression it was needed for get_bits64. Is this not the case? >> + >> +#include "libavutil/error.h" >> +#include "libavutil/intreadwrite.h" >> +#include "libavutil/mem.h" >> + >> +#include "codec_id.h" >> +#include "config.h" >> +#include "get_bits.h" >> +#include "jpegxl.h" >> +#include "parser.h" >> + >> +#if CONFIG_JPEGXL_PARSER >> + >> +typedef struct JpegXLParseContext { >> + ParseContext pc; >> + GetBitContext gb; >> + const uint8_t *buf; >> + size_t buflen; >> + size_t bits_read; > What exactly is the point of duplicating the GetBitContext's internal > status? I wasn't sure if any of the code ever added to gb->buffer. If it doesn't, I can remove the duplication. > +{ > + memset(&jxlr->gb, 0, sizeof(GetBitContext)); > Unnecessary, as you reset gb below. Will remove. > + memset(&jxlr->gb, 0, sizeof(GetBitContext)); > Unnecessary, as you are initializing gb below. Will remove. > >> + if ((jxlr->bits_read + 1) / 8 + 8 > jxlr->buflen - 1) > This check looks weird. E.g. why (jxlr->bits_read + 1) / 8 and not > (jxlr->bits_read + 7) / 8? And why the extra buffer size of eight bytes? I'm pretty sure this is a relic from before I was using get_bits.h and simply called AV_RL64() on the buffer, and I wanted to make sure that would not overflow. I missed the change here, and I should fix this. > >> + /* overflowing buffer */ >> + return 0; >> + while (bits > 64) { > I do not see anything where you would read more than 64 bits at a time. > It would also make no sense (the format would be wasting bits in this case). When parsing the ISOBMFF-like container, I call jxl_bits() to skip boxes I do not intend to parse after determining their size. >> +} >> + >> +static uint32_t jpegxl_u32(JpegXLParseContext *jxlr, >> + uint32_t *constants, uint32_t *ubits) > Both arrays can be made const. And actually, both arrays should be > declared as const uint32_t [4]. Will change. > >> + memcpy(&ret, &mantissa, sizeof(float)); > return av_int2float(mantissa); Huh, so that's a thing. Will change. >> + (uint32_t[]){0, 0, 0, 0}, (uint32_t[]){9, 13, 18, 30}); > h can be up to 2^30 here. This means that the multiplications in > jpegxl_width_from_ratio() can overflow and need to be performed in 64bit > (although the end result always fits into an uint32_t). > (E.g. if h were 2^30, then jpegxl_width_from_ratio() will return 0 in > cases 2, 3 and 5.) Good catch, I missed this overflow issue. Will fix. >> + header = av_mallocz(sizeof(JpegXLHeader)); > This allocation is completely unnecessary, just put a JpegXLHeader on > the stack in avpriv_jpegxl_verify_codestream_header() and jpegxl_parse(). The parse function requires the header to be zeroed out. I could stack allocate it and assign { 0 } to it. I'm pretty sure this was from when the header used to be exported, before I was told it should be hidden, making this now unnecessary, although it sifted through. Thanks, will change. >> + header->extensions = jpegxl_u64(jxlr); >> + if (header->extensions) { >> + header->extension_bits = av_calloc(64, sizeof(uint64_t)); > This array is write-only. There's many fields I parse and assign to the header, but do not do anything with because the eventual goal was to integrate this into an internal decoder. If I simply skipped them, then they'd just need to be added later. > >> + if (!header->extension_bits) { >> + av_log(avctx, AV_LOG_ERROR, "Could not allocate extension bit array"); >> + status = AVERROR(ENOMEM); >> + goto fail; >> + } >> + for (int i = 0; i < 64; i++) { >> + if (header->extensions & (UINT64_C(1) << i)) >> + header->extension_bits[i] = jpegxl_u64(jxlr); >> + } >> + } >> + >> + } else { >> + header->modular_16bit_buffers = 1; >> + header->xyb_encoded = 1; >> + } >> + >> + header->default_transform = jxl_bits(1); >> + >> + /* lazy && works with this macro */ >> + if (!header->default_transform && header->xyb_encoded && !jxl_bits(1)) { >> + header->opsin_inverse_matrix = av_malloc_array(16, sizeof(float)); >> + if (!header->opsin_inverse_matrix) { >> + av_log(avctx, AV_LOG_ERROR, "Could not allocate Opsin Inverse Matrix"); >> + status = AVERROR(ENOMEM); >> + goto fail; >> + } >> + for (int i = 0; i < 16; i++) { >> + header->opsin_inverse_matrix[i] = jpegxl_f16(jxlr); >> + } >> + } >> + >> + if (!header->default_transform) { >> + header->cw_mask = jxl_bits(3); >> + } >> + >> + if (header->cw_mask & 1) { >> + header->up2_weight = av_malloc_array(15, sizeof(float)); >> + if (!header->up2_weight) { >> + av_log(avctx, AV_LOG_ERROR, "Could not allocate up2_weight"); >> + status = AVERROR(ENOMEM); >> + goto fail; >> + } >> + for (int i = 0; i < 15; i++) { >> + header->up2_weight[i] = jpegxl_f16(jxlr); >> + } >> + } >> + if (header->cw_mask & 2) { >> + header->up4_weight = av_malloc_array(55, sizeof(float)); >> + if (!header->up4_weight) { >> + av_log(avctx, AV_LOG_ERROR, "Could not allocate up4_weight"); >> + status = AVERROR(ENOMEM); >> + goto fail; >> + } >> + for (int i = 0; i < 55; i++) { >> + header->up4_weight[i] = jpegxl_f16(jxlr); >> + } >> + } >> + if (header->cw_mask & 4) { >> + header->up8_weight = av_malloc_array(210, sizeof(float)); > These arrays are never used at all, so you can just avoid the > allocation. In fact I wonder whether it is necessary to parse this stuff > here at all (AFAIK no other parser does such deep parsing). The parser parses pretty deeply in order to check for validity. Without very deep parsing, it fails the probetest fuzzer because it gives too many false positives. The codestream format is very permissive in this regard. >> + if (status >= 0 && (jxlr->bits_read + 1) / 8 - 1 > jxlr->buflen) >> + return FFMIN(jxlr->bits_read, INT_MAX); >> + if (status > 0) >> + return -status; >> + return status || -1; > This is equivalent to "return 1;" The goal was to use the return value to distinguish between a parser error caused by an unexpected end of codestream (by how many bytes) verses a particular error. I don't believe I ended up using it anywhere, so I suppose I could change this to "return 1;" >> +} >> + >> +static int jpegxl_parse_header(void *avctx, JpegXLParseContext *jxlr, JpegXLHeader **headerp, int level) >> +{ >> + uint64_t sig = jxl_bits(64); >> + jpegxl_reset_pc(jxlr); >> + if (sig == FF_JPEGXL_CONTAINER_SIGNATURE_LE) { >> + for (;;) { >> + uint64_t size = 0; >> + uint32_t tag = 0; >> + for (int k = 0; k < 4; k++) >> + size = (size << 8) | jxl_bits(8); >> + for (int k = 0; k < 4; k++) >> + tag = (tag << 8) | jxl_bits(8); >> + if (tag == MKBETAG('j','x','l','p')) { >> + jxl_bits(32); >> + break; >> + } >> + if (tag == MKBETAG('j','x','l','c')) >> + break; >> + if (size == 1) { >> + size = 0; >> + for (int k = 0; k < 8; k++) >> + size = (size << 8) | jxl_bits(8); >> + if (size > INT_MAX) >> + break; >> + size -= 8; >> + } >> + if (jxlr->bits_read / 8 > jxlr->buflen) >> + break; > This code looks weird: You read the size of an isobmff-style box, but > then you actually ignore the size. > (Apart from that: You are always byte-aligned here, so you could just as > well read the stuff bytewise and skip the corresponding number of bits > lateron.) > Uh, there's supposed to be a jxl_bits(size * 8) that somehow fell through the cracks (with appropriate overflow checking). I'm not sure why it still works. Perhaps I got lucky on my samples. See earlier comment about jxl_bits(n) for n > 64. >> + if (buf_size == 0 || s1->flags & PARSER_FLAG_COMPLETE_FRAMES) { >> + /* eof is a frame boundary */ >> + next = buf_size; >> + } else if (!jxlr->pc.frame_start_found) { >> + /* look for stream signature */ >> + uint64_t state64 = jxlr->pc.state64; >> + for (; i < buf_size; i++) { >> + state64 = (state64 << 8) | buf[i]; >> + if ((state64 & 0xFFFF) == FF_JPEGXL_CODESTREAM_SIGNATURE_BE >> + || state64 == FF_JPEGXL_CONTAINER_SIGNATURE_BE) { >> + jxlr->pc.frame_start_found = 1; >> + break; >> + } >> + } >> + jxlr->pc.state64 = state64; > Did you test this part? Did it work? I am wondering because you are > actually supposed to indicate how many bytes of input you have consumed, > yet you always indicate END_NOT_FOUND. I did, and it did, although I suppose I did not test against things like junk in front of a valid file. > >> + } >> + >> + if (jxlr->pc.frame_start_found && s1->pict_type == AV_PICTURE_TYPE_NONE) { >> + jpegxl_init_pc(jxlr, buf, buf_size); >> + status = jpegxl_parse_header(NULL, jxlr, &header, 5); > Why don't you forward the logcontext? Will do. Also, the init line should read buf + i, and buf_size - i, per earlier comment. >> +const AVCodecParser ff_jpegxl_parser = { >> + .codec_ids = { AV_CODEC_ID_JPEGXL }, >> + .priv_data_size = sizeof(JpegXLParseContext), >> + .parser_init = jpegxl_parse_init, >> + .parser_parse = jpegxl_parse, >> + .parser_close = ff_parse_close, >> +}; >> + >> +#else /* CONFIG_JPEGXL_PARSER */ >> + >> +int avpriv_jpegxl_verify_codestream_header(void *avctx, uint8_t *buf, size_t buflen, int level) >> +{ > We don't provide fallback-stubs for avpriv functions; instead we just > add configure/Makefile depencies. I'm not sure how to do that for this specific task, so I'll ask on IRC. Also, thanks for taking the time to do a thorough code review, I very much appreciate it. -Leo Izen
diff --git a/MAINTAINERS b/MAINTAINERS index c065e94498..17c0104672 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -187,6 +187,7 @@ Codecs: interplayvideo.c Mike Melanson jni*, ffjni* Matthieu Bouron jpeg2000* Nicolas Bertrand + jpegxl.h, jpegxl_parser.c Leo Izen jvdec.c Peter Ross lcl*.c Roberto Togni, Reimar Doeffinger libcelt_dec.c Nicolas George @@ -615,6 +616,7 @@ Haihao Xiang (haihao) 1F0C 31E8 B4FE F7A4 4DC1 DC99 E0F5 76D4 76FC 437F Jaikrishnan Menon 61A1 F09F 01C9 2D45 78E1 C862 25DC 8831 AF70 D368 James Almer 7751 2E8C FD94 A169 57E6 9A7A 1463 01AD 7376 59E0 Jean Delvare 7CA6 9F44 60F1 BDC4 1FD2 C858 A552 6B9B B3CD 4E6A +Leo Izen (thebombzen) B6FD 3CFC 7ACF 83FC 9137 6945 5A71 C331 FD2F A19A Loren Merritt ABD9 08F4 C920 3F65 D8BE 35D7 1540 DAA7 060F 56DE Lynne FE50 139C 6805 72CA FD52 1F8D A2FE A5F0 3F03 4464 Michael Niedermayer 9FF2 128B 147E F673 0BAD F133 611E C787 040B 0FAB diff --git a/libavcodec/Makefile b/libavcodec/Makefile index cfc70a3eaf..423022e714 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -43,6 +43,7 @@ OBJS = ac3_parser.o \ dv_profile.o \ encode.o \ imgconvert.o \ + jpegxl_parser.o \ jni.o \ mathtables.o \ mediacodec.o \ diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c index 0974ee03de..0f3d0f910b 100644 --- a/libavcodec/codec_desc.c +++ b/libavcodec/codec_desc.c @@ -1862,6 +1862,15 @@ static const AVCodecDescriptor codec_descriptors[] = { .long_name = NULL_IF_CONFIG_SMALL("GEM Raster image"), .props = AV_CODEC_PROP_LOSSY, }, + { + .id = AV_CODEC_ID_JPEGXL, + .type = AVMEDIA_TYPE_VIDEO, + .name = "jpegxl", + .long_name = NULL_IF_CONFIG_SMALL("JPEG XL"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY | + AV_CODEC_PROP_LOSSLESS, + .mime_types= MT("image/jxl"), + }, /* various PCM "codecs" */ { diff --git a/libavcodec/codec_id.h b/libavcodec/codec_id.h index ab265ec584..551a516446 100644 --- a/libavcodec/codec_id.h +++ b/libavcodec/codec_id.h @@ -308,6 +308,7 @@ enum AVCodecID { AV_CODEC_ID_SIMBIOSIS_IMX, AV_CODEC_ID_SGA_VIDEO, AV_CODEC_ID_GEM, + AV_CODEC_ID_JPEGXL, /* various PCM "codecs" */ AV_CODEC_ID_FIRST_AUDIO = 0x10000, ///< A dummy id pointing at the start of audio codecs diff --git a/libavcodec/jpegxl.h b/libavcodec/jpegxl.h new file mode 100644 index 0000000000..cbfb33f74c --- /dev/null +++ b/libavcodec/jpegxl.h @@ -0,0 +1,206 @@ +/* + * JPEG XL header + * Copyright (c) 2021 Leo Izen <leo.izen@gmail.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * JPEG XL header + */ + +#ifndef AVCODEC_JPEGXL_H +#define AVCODEC_JPEGXL_H + +#include <stdint.h> + +#define FF_JPEGXL_CODESTREAM_SIGNATURE_LE 0x0aff +#define FF_JPEGXL_CODESTREAM_SIGNATURE_BE 0xff0a +#define FF_JPEGXL_CONTAINER_SIGNATURE_LE 0x204c584a0c000000 +#define FF_JPEGXL_CONTAINER_SIGNATURE_BE 0x0000000c4a584c20 + +enum JpegXLExtraChannelType { + FF_JPEGXL_CT_ALPHA = 0, + FF_JPEGXL_CT_DEPTH, + FF_JPEGXL_CT_SPOT_COLOR, + FF_JPEGXL_CT_SELECTION_MASK, + FF_JPEGXL_CT_BLACK, + FF_JPEGXL_CT_CFA, + FF_JPEGXL_CT_THERMAL, + FF_JPEGXL_CT_NON_OPTIONAL = 15, + FF_JPEGXL_CT_OPTIONAL +}; + +enum JpegXLColorSpace { + FF_JPEGXL_CS_RGB = 0, + FF_JPEGXL_CS_GRAY, + FF_JPEGXL_CS_XYB, + FF_JPEGXL_CS_UNKNOWN +}; + +enum JpegXLWhitePoint { + FF_JPEGXL_WP_D65 = 1, + FF_JPEGXL_WP_CUSTOM, + FF_JPEGXL_WP_E = 10, + FF_JPEGXL_WP_DCI = 11 +}; + +enum JpegXLPrimaries { + FF_JPEGXL_PR_SRGB = 1, + FF_JPEGXL_PR_CUSTOM, + FF_JPEGXL_PR_2100 = 9, + FF_JPEGXL_PR_P3 = 11, +}; + +enum JpegXLTransferFunction { + FF_JPEGXL_TF_709 = 1, + FF_JPEGXL_TF_UNKNOWN, + FF_JPEGXL_TF_LINEAR = 8, + FF_JPEGXL_TF_SRGB = 13, + FF_JPEGXL_TF_PQ = 16, + FF_JPEGXL_TF_DCI, + FF_JPEGXL_TF_HLG +}; + +enum JpegXLRenderingIntent { + FF_JPEGXL_RI_PERCEPTUAL = 0, + FF_JPEGXL_RI_RELATIVE, + FF_JPEGXL_RI_SATURATION, + FF_JPEGXL_RI_ABSOLUTE +}; + +typedef struct JpegXLExtraChannelInfo { + enum JpegXLExtraChannelType type; + uint32_t bits_per_sample; + uint32_t exp_bits_per_sample; + uint32_t dim_shift; + size_t name_len; + /* utf-8 */ + char *name; + int alpha_associated; + float red; + float green; + float blue; + float solidity; + uint32_t cfa_channel; +} JpegXLExtraChannelInfo; + +typedef struct JpegXLHeader { + uint32_t width; + uint32_t height; + int orientation; + /* zero if not present */ + uint32_t intrinsic_width; + uint32_t intrinsic_height; + uint32_t preview_width; + uint32_t preview_height; + /* BEGIN animation header */ + uint32_t anim_tb_num; + uint32_t anim_tb_denom; + uint32_t anim_loop_count; + int anim_have_pts; + /* END animation header */ + + uint32_t bits_per_sample; + uint32_t exp_bits_per_sample; + + int modular_16bit_buffers; + + uint32_t num_extra_channels; + + /* + * NULL if no extra channels + * otherwise an array of extra channel info + * with length num_extra_channels + */ + JpegXLExtraChannelInfo *extra_channel_info; + + int xyb_encoded; + + /* BEGIN color encoding bundle */ + int have_icc_profile; + enum JpegXLColorSpace color_space; + enum JpegXLWhitePoint white_point; + uint32_t white_ux; + uint32_t white_uy; + enum JpegXLPrimaries primaries; + uint32_t red_ux; + uint32_t red_uy; + uint32_t green_ux; + uint32_t green_uy; + uint32_t blue_ux; + uint32_t blue_uy; + /* + * if this is less than 1 << 24, + * then interpret it as a gamma value + * If this is greater than or equal to 1 << 24, + * then subtract 1 << 24 and interpret it as a + * an enum JpegXLTransferFunction + */ + int have_gamma; + uint32_t transfer_function; + enum JpegXLRenderingIntent rendering_intent; + /* END color encoding bundle */ + + /* BEGIN tone mapping bundle */ + float intensity_target; + float min_nits; + int relative_to_max_display; + float linear_below; + /* END tone mapping bundle */ + + uint64_t extensions; + /* if extensions is nonzero, this will be length 64 */ + /* otherwise it will be NULL */ + uint64_t *extension_bits; + + int default_transform; + + /* if present, an array of length 16 */ + /* NULL if not present */ + float *opsin_inverse_matrix; + + uint32_t cw_mask; + + /* if these are not present + * use NULL for these pointers + * otherwise up2_weight is an + * array of length 15, up4_weight + * is length 55, and up8_weight is + * length 210 + */ + float *up2_weight; + float *up4_weight; + float *up8_weight; + + /* + * this is not provided by the header, + * but rather, by the container + * raw Jpeg XL Codestreams are level 5 + * the container can choose to up it to 10 + */ + int level; + +} JpegXLHeader; + +/** + * @return 0 upon valid, nonzero upon some parse error + */ +int avpriv_jpegxl_verify_codestream_header(void *avctx, uint8_t *buf, size_t buflen, int level); + +#endif /* AVCODEC_JPEGXL_H */ diff --git a/libavcodec/jpegxl_parser.c b/libavcodec/jpegxl_parser.c new file mode 100644 index 0000000000..3355ca603e --- /dev/null +++ b/libavcodec/jpegxl_parser.c @@ -0,0 +1,809 @@ +/* + * JPEG XL parser + * Copyright (c) 2021 Leo Izen <leo.izen@gmail.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * JPEG XL parser + */ + +#include <stdint.h> +#include <inttypes.h> +#include <stdlib.h> + +#ifndef BITSTREAM_READER_LE +#define BITSTREAM_READER_LE +#endif +#ifdef CACHED_BITSTREAM_READER +#undef CACHED_BITSTREAM_READER +#endif +#define CACHED_BITSTREAM_READER 1 + +#include "libavutil/error.h" +#include "libavutil/intreadwrite.h" +#include "libavutil/mem.h" + +#include "codec_id.h" +#include "config.h" +#include "get_bits.h" +#include "jpegxl.h" +#include "parser.h" + +#if CONFIG_JPEGXL_PARSER + +typedef struct JpegXLParseContext { + ParseContext pc; + GetBitContext gb; + const uint8_t *buf; + size_t buflen; + size_t bits_read; +} JpegXLParseContext; + +#define jxl_bits(n) jpegxl_get_bits(jxlr, (n)) +#define jxl_enum() jpegxl_u32(jxlr, (uint32_t[]){0, 1, 2, 18}, (uint32_t[]){0, 0, 4, 6}) + +#define jxl_parse_errv(type, value) av_log(avctx, AV_LOG_DEBUG, \ + "Invalid " type " at position: %zu", \ + value, jxlr->bits_read) + +#define jxl_parse_errvv(type, v1, v2) av_log(avctx, AV_LOG_DEBUG, \ + "Invalid " type " at position: %zu", \ + v1, v2, jxlr->bits_read) + +#define jxl_parse_err(type) jxl_parse_errv("%s", type) + +static void jpegxl_reset_pc(JpegXLParseContext *jxlr) +{ + memset(&jxlr->gb, 0, sizeof(GetBitContext)); + jxlr->bits_read = 0; + init_get_bits8(&jxlr->gb, jxlr->buf, jxlr->buflen); +} + +static void jpegxl_init_pc(JpegXLParseContext *jxlr, const uint8_t *buf, size_t buflen) +{ + memset(&jxlr->pc, 0, sizeof(ParseContext)); + memset(&jxlr->gb, 0, sizeof(GetBitContext)); + jxlr->buf = buf; + jxlr->buflen = buflen; + jxlr->bits_read = 0; + init_get_bits8(&jxlr->gb, buf, buflen); +} + +static uint64_t jpegxl_get_bits(JpegXLParseContext *jxlr, size_t bits) +{ + if (!bits) + return 0; + if (bits > INT_MAX) + /* what are you doing with all those bits... */ + return 0; + jxlr->bits_read += bits; + /* extra buffer size of 8 bytes */ + if ((jxlr->bits_read + 1) / 8 + 8 > jxlr->buflen - 1) + /* overflowing buffer */ + return 0; + while (bits > 64) { + size_t bitcount = (bits - 1) % 64 + 1; + get_bits64(&jxlr->gb, bitcount); + bits -= bitcount; + } + return get_bits64(&jxlr->gb, bits); +} + +static uint32_t jpegxl_u32(JpegXLParseContext *jxlr, + uint32_t *constants, uint32_t *ubits) +{ + uint32_t ret, choice = jxl_bits(2); + ret = constants[choice]; + if (ubits[choice]) + ret += jxl_bits(ubits[choice]); + return ret; +} + +static uint64_t jpegxl_u64(JpegXLParseContext *jxlr) +{ + uint64_t shift = 12, ret; + switch (jxl_bits(2)) { + case 0: + ret = 0; + break; + case 1: + ret = 1 + jxl_bits(4); + break; + case 2: + ret = 17 + jxl_bits(8); + break; + case 3: + ret = jxl_bits(12); + while (jxl_bits(1)){ + if (shift < 60) { + ret |= jxl_bits(8) << shift; + shift += 8; + } else { + ret |= jxl_bits(4) << shift; + break; + } + } + break; + } + return ret; +} + +static float jpegxl_f16(JpegXLParseContext *jxlr) +{ + float ret; + uint32_t mantissa = jxl_bits(10) << 13; + uint32_t biased_exponent = jxl_bits(5); + if (biased_exponent == 31) + mantissa |= 0xFF << 23; + else + mantissa |= ((biased_exponent - 15 + 127) & 0xFF) << 23; + memcpy(&ret, &mantissa, sizeof(float)); + return ret; +} + +static uint32_t jpegxl_width_from_ratio(uint32_t height, int ratio) +{ + switch (ratio){ + case 1: + return height; + case 2: + return (height * 12) / 10; + case 3: + return (height * 4) / 3; + case 4: + return (height * 3) / 2; + case 5: + return (height * 16) / 9; + case 6: + return (height * 5) / 4; + case 7: + return height * 2; + default: + /* manual width */ + return 0; + } +} + + +static int jpegxl_parse_size_header(JpegXLParseContext *jxlr, + uint32_t *width, uint32_t *height) +{ + uint32_t w, h; + if (jxl_bits(1)) { + /* small size header */ + h = (jxl_bits(5) + 1) << 3; + w = jpegxl_width_from_ratio(h, jxl_bits(3)); + if (!w) + w = (jxl_bits(5) + 1) << 3; + } else { + /* large size header */ + h = 1 + jpegxl_u32(jxlr, + (uint32_t[]){0, 0, 0, 0}, (uint32_t[]){9, 13, 18, 30}); + w = jpegxl_width_from_ratio(h, jxl_bits(3)); + if (!w) + w = 1 + jpegxl_u32(jxlr, + (uint32_t[]){0, 0, 0, 0}, (uint32_t[]){9, 13, 18, 30}); + } + *width = w, *height = h; + return 0; +} + +static int jpegxl_parse_preview_header(JpegXLParseContext *jxlr, + uint32_t *width, uint32_t *height) +{ + uint32_t w, h; + if (jxl_bits(1)) { + /* div8 */ + h = jpegxl_u32(jxlr, + (uint32_t[]){16, 32, 1, 33}, (uint32_t[]){0, 0, 5, 9}) << 3; + w = jpegxl_width_from_ratio(h, jxl_bits(3)); + if (!w) + w = jpegxl_u32(jxlr, + (uint32_t[]){16, 32, 1, 33}, (uint32_t[]){0, 0, 5, 9}) << 3; + } else { + /* full */ + h = jpegxl_u32(jxlr, + (uint32_t[]){1, 65, 321, 1345}, (uint32_t[]){6, 8, 10, 12}); + w = jpegxl_width_from_ratio(h, jxl_bits(3)); + if (!w) + w = jpegxl_u32(jxlr, + (uint32_t[]){1, 65, 321, 1345}, (uint32_t[]){6, 8, 10, 12}); + } + *width = w, *height = h; + return 0; +} + +static int jpegxl_parse_animation_header(JpegXLParseContext *jxlr, + uint32_t *num, uint32_t *denom, uint32_t *count, int *have_pts) +{ + uint32_t n, d, c; + int p; + n = jpegxl_u32(jxlr, + (uint32_t[]){100, 1000, 1, 1}, (uint32_t[]){0, 0, 10, 30}); + d = jpegxl_u32(jxlr, + (uint32_t[]){1, 1001, 1, 1}, (uint32_t[]){0, 0, 8, 10}); + c = jpegxl_u32(jxlr, + (uint32_t[]){0, 0, 0, 0}, (uint32_t[]){0, 3, 16, 32}); + p = jxl_bits(1); + *num = n, *denom = d, *count = c, *have_pts = p; + return 0; +} + +static int jpegxl_parse_bit_depth(JpegXLParseContext *jxlr, + uint32_t *depth, uint32_t *exp_depth) +{ + uint32_t d, e; + if (jxl_bits(1)) { + /* float samples */ + d = jpegxl_u32(jxlr, + (uint32_t[]){32, 16, 24, 1}, (uint32_t[]){0, 0, 0, 6}); + e = jxl_bits(4) + 1; + } else { + /* integer samples */ + d = jpegxl_u32(jxlr, + (uint32_t[]){8, 10, 12, 1}, (uint32_t[]){0, 0, 0, 6}); + e = 0; + } + *depth = d, *exp_depth = e; + return 0; +} + +static int jpegxl_parse_extra_channel_info(JpegXLParseContext *jxlr, + JpegXLExtraChannelInfo *info, int level) +{ + int status = 0; + int all_default = jxl_bits(1); + + if (!all_default) { + info->type = jxl_enum(); + if (info->type > 63) + /* enum types cannot be 64+ */ + return 1; + status = jpegxl_parse_bit_depth(jxlr, &info->bits_per_sample, &info->exp_bits_per_sample); + if (!status) + return status; + info->dim_shift = jpegxl_u32(jxlr, (uint32_t[]){0, 3, 4, 1}, (uint32_t[]){0, 0, 0, 3}); + info->name_len = jpegxl_u32(jxlr, (uint32_t[]){0, 0, 16, 48}, (uint32_t[]){0, 4, 5, 10}); + } else { + info->type = FF_JPEGXL_CT_ALPHA; + info->bits_per_sample = 8; + info->exp_bits_per_sample = 0; + } + + info->name = av_malloc(info->name_len + 1); + if (!info->name) + return AVERROR(ENOMEM); + + for (uint32_t i = 0; i < info->name_len; i++) + /* there is no byte-alignment guarantee so no memcpy */ + info->name[i] = jxl_bits(8); + + /* null-terminate it for string operations */ + /* even though we have name_len */ + info->name[info->name_len] = '\0'; + + info->alpha_associated = + !all_default && info->type == FF_JPEGXL_CT_ALPHA && jxl_bits(1); + + if (info->type == FF_JPEGXL_CT_SPOT_COLOR) { + info->red = jpegxl_f16(jxlr); + info->green = jpegxl_f16(jxlr); + info->blue = jpegxl_f16(jxlr); + info->solidity = jpegxl_f16(jxlr); + } + + if (info->type == FF_JPEGXL_CT_CFA) + info->cfa_channel = jpegxl_u32(jxlr, (uint32_t[]){1, 0, 3, 19}, (uint32_t[]){0, 2, 4, 8}); + else + info->cfa_channel = 1; + + if (info->type == FF_JPEGXL_CT_BLACK && level < 10) + return 1; + + return 0; +} + +static void jpegxl_free_header(JpegXLHeader *header) +{ + if (header) { + if (header->extra_channel_info) { + for (uint32_t i = 0; i < header->num_extra_channels; i++) { + if (header->extra_channel_info + i) + av_freep(&header->extra_channel_info[i].name); + } + av_freep(&header->extra_channel_info); + } + if (header->extension_bits) + av_freep(&header->extension_bits); + if (header->opsin_inverse_matrix) + av_freep(&header->opsin_inverse_matrix); + if (header->up2_weight) + av_freep(&header->up2_weight); + if (header->up4_weight) + av_freep(&header->up4_weight); + if (header->up8_weight) + av_freep(&header->up8_weight); + av_freep(&header); + } +} + +/** + * Parse a JpegXL Codestream Header and read it into the argument Header + * @param level Codestream level provided by the container, 5 if raw codestream + * @return 0 upon success, negative upon error, and positive if the buffer overran + */ +static int jpegxl_parse_codestream_header(void *avctx, JpegXLParseContext *jxlr, JpegXLHeader **headerp, int level) +{ + JpegXLHeader *header = NULL; + int all_default, extra_fields = 0, status; + + header = av_mallocz(sizeof(JpegXLHeader)); + if (!header) { + av_log(avctx, AV_LOG_ERROR, "Could not allocate JpegXLHeader"); + status = AVERROR(ENOMEM); + goto fail; + } + + /* signature check */ + if (jxl_bits(16) != FF_JPEGXL_CODESTREAM_SIGNATURE_LE) { + av_log(avctx, AV_LOG_DEBUG, "Failed JPEG XL Signature Check"); + goto fail; + } + + status = jpegxl_parse_size_header(jxlr, + &header->width, &header->height); + if (status) { + jxl_parse_err("size header"); + goto fail; + } + + /* level 5 codestream */ + if (level < 10) { + if (header->width > (1 << 18) || header->height > (1 << 18) + || (header->width >> 4) * (header->height >> 4) > (1 << 20)) { + jxl_parse_err("width or height or both"); + goto fail; + } + header->level = 5; + } else { + if (header->width > (1 << 30) || header->height > (1 << 30) + || (header->width >> 14) * (header->height >> 14) > (1 << 12)) { + jxl_parse_err("width or height or both"); + goto fail; + } + header->level = 10; + } + + all_default = jxl_bits(1); + + if (!all_default) + extra_fields = jxl_bits(1); + + if (extra_fields) { + header->orientation = jxl_bits(3); + /* intrinstic size */ + if (jxl_bits(1)) { + status = jpegxl_parse_size_header(jxlr, + &header->intrinsic_width, &header->intrinsic_height); + if (status) { + jxl_parse_err("intrinstic size header"); + goto fail; + } + } + + /* preview header */ + if (jxl_bits(1)) { + status = jpegxl_parse_preview_header(jxlr, + &header->preview_width, &header->preview_height); + if (status) { + jxl_parse_err("preview header"); + goto fail; + } + if (header->preview_width > 4096 || header->preview_height > 4096) { + jxl_parse_errvv("preview header size %" PRIu32 ", %" PRIu32, + header->preview_width, header->preview_height); + goto fail; + } + } + + /* animation header */ + if (jxl_bits(1)) { + status = jpegxl_parse_animation_header(jxlr, + &header->anim_tb_num, &header->anim_tb_denom, + &header->anim_loop_count, &header->anim_have_pts); + if (status) { + jxl_parse_err("animation header"); + goto fail; + } + } + + } + + if (!all_default) { + status = jpegxl_parse_bit_depth(jxlr, + &header->bits_per_sample, &header->exp_bits_per_sample); + if (status) { + jxl_parse_err("bit depth header"); + goto fail; + } + + header->modular_16bit_buffers = jxl_bits(1); + + if (!header->modular_16bit_buffers && level < 10) { + jxl_parse_err("modular 16bit buffers"); + goto fail; + } + + header->num_extra_channels = jpegxl_u32(jxlr, + (uint32_t[]){0, 1, 2, 1}, (uint32_t[]){0, 0, 4, 12}); + if (header->num_extra_channels > 256 || + level < 10 && header->num_extra_channels > 4) { + jxl_parse_err("too many extra channels"); + goto fail; + } + if (header->num_extra_channels) { + header->extra_channel_info = + av_calloc(header->num_extra_channels + 1, sizeof(JpegXLExtraChannelInfo)); + for (uint32_t i = 0; i < header->num_extra_channels; i++) { + status = jpegxl_parse_extra_channel_info(jxlr, header->extra_channel_info + i, level); + if (status) { + jxl_parse_errv("extra channel number %" PRIu32, i); + goto fail; + } + } + } + + header->xyb_encoded = jxl_bits(1); + + if (jxl_bits(1)) { + /* all_default for color encoding */ + header->have_icc_profile = 0; + header->color_space = FF_JPEGXL_CS_RGB; + header->white_point = FF_JPEGXL_WP_D65; + header->primaries = FF_JPEGXL_PR_SRGB; + header->transfer_function = (1 << 24) + FF_JPEGXL_TF_SRGB; + header->rendering_intent = FF_JPEGXL_RI_RELATIVE; + } else { + header->have_icc_profile = jxl_bits(1); + header->color_space = jxl_enum(); + if (header->color_space > 63) { + jxl_parse_errv("color space enum %" PRIu32, header->white_point); + goto fail; + } + if (header->color_space != FF_JPEGXL_CS_XYB + && !header->have_icc_profile) { + header->white_point = jxl_enum(); + if (header->white_point > 63) { + jxl_parse_errv("white point enum %" PRIu32, header->white_point); + goto fail; + } + } else { + header->white_point = FF_JPEGXL_WP_D65; + } + if (header->white_point == FF_JPEGXL_WP_CUSTOM) { + header->white_ux = jpegxl_u32(jxlr, (uint32_t[]){0, 524288, 1048576, 2097152}, (uint32_t[]){19, 19, 20, 21}); + header->white_uy = jpegxl_u32(jxlr, (uint32_t[]){0, 524288, 1048576, 2097152}, (uint32_t[]){19, 19, 20, 21}); + } + if (header->color_space != FF_JPEGXL_CS_XYB + && header->color_space != FF_JPEGXL_CS_GRAY + && !header->have_icc_profile) { + header->primaries = jxl_enum(); + if (header->primaries > 63) { + jxl_parse_errv("primaries enum %" PRIu32, header->primaries); + goto fail; + } + } else { + header->primaries = FF_JPEGXL_PR_SRGB; + } + if (header->primaries == FF_JPEGXL_PR_CUSTOM) { + header->red_ux = jpegxl_u32(jxlr, (uint32_t[]){0, 524288, 1048576, 2097152}, (uint32_t[]){19, 19, 20, 21}); + header->red_uy = jpegxl_u32(jxlr, (uint32_t[]){0, 524288, 1048576, 2097152}, (uint32_t[]){19, 19, 20, 21}); + header->green_ux = jpegxl_u32(jxlr, (uint32_t[]){0, 524288, 1048576, 2097152}, (uint32_t[]){19, 19, 20, 21}); + header->green_uy = jpegxl_u32(jxlr, (uint32_t[]){0, 524288, 1048576, 2097152}, (uint32_t[]){19, 19, 20, 21}); + header->blue_ux = jpegxl_u32(jxlr, (uint32_t[]){0, 524288, 1048576, 2097152}, (uint32_t[]){19, 19, 20, 21}); + header->blue_uy = jpegxl_u32(jxlr, (uint32_t[]){0, 524288, 1048576, 2097152}, (uint32_t[]){19, 19, 20, 21}); + } + if (!header->have_icc_profile) { + if (jxl_bits(1)) { + /* this is gamma */ + header->transfer_function = jxl_bits(24); + } else { + header->transfer_function = jxl_enum(); + if (header->transfer_function > 63) { + jxl_parse_errv("transfer function enum %" PRIu32, header->transfer_function); + goto fail; + } + /* + * higher than the highest possible gamma value + * marks it as an enum isntead of gamma + */ + header->transfer_function += 1 << 24; + } + header->rendering_intent = jxl_enum(); + if (header->rendering_intent > 63) { + jxl_parse_errv("rendering intent enum %" PRIu32, header->rendering_intent); + goto fail; + } + } else { + header->transfer_function = (1 << 24) + FF_JPEGXL_TF_SRGB; + header->rendering_intent = FF_JPEGXL_RI_RELATIVE; + } + } + + /* lazy && works with this macro */ + if (extra_fields && !jxl_bits(1)) { + header->intensity_target = jpegxl_f16(jxlr); + header->min_nits = jpegxl_f16(jxlr); + header->relative_to_max_display = jxl_bits(1); + header->linear_below = jpegxl_f16(jxlr); + } else { + header->intensity_target = 255; + } + + header->extensions = jpegxl_u64(jxlr); + if (header->extensions) { + header->extension_bits = av_calloc(64, sizeof(uint64_t)); + if (!header->extension_bits) { + av_log(avctx, AV_LOG_ERROR, "Could not allocate extension bit array"); + status = AVERROR(ENOMEM); + goto fail; + } + for (int i = 0; i < 64; i++) { + if (header->extensions & (UINT64_C(1) << i)) + header->extension_bits[i] = jpegxl_u64(jxlr); + } + } + + } else { + header->modular_16bit_buffers = 1; + header->xyb_encoded = 1; + } + + header->default_transform = jxl_bits(1); + + /* lazy && works with this macro */ + if (!header->default_transform && header->xyb_encoded && !jxl_bits(1)) { + header->opsin_inverse_matrix = av_malloc_array(16, sizeof(float)); + if (!header->opsin_inverse_matrix) { + av_log(avctx, AV_LOG_ERROR, "Could not allocate Opsin Inverse Matrix"); + status = AVERROR(ENOMEM); + goto fail; + } + for (int i = 0; i < 16; i++) { + header->opsin_inverse_matrix[i] = jpegxl_f16(jxlr); + } + } + + if (!header->default_transform) { + header->cw_mask = jxl_bits(3); + } + + if (header->cw_mask & 1) { + header->up2_weight = av_malloc_array(15, sizeof(float)); + if (!header->up2_weight) { + av_log(avctx, AV_LOG_ERROR, "Could not allocate up2_weight"); + status = AVERROR(ENOMEM); + goto fail; + } + for (int i = 0; i < 15; i++) { + header->up2_weight[i] = jpegxl_f16(jxlr); + } + } + if (header->cw_mask & 2) { + header->up4_weight = av_malloc_array(55, sizeof(float)); + if (!header->up4_weight) { + av_log(avctx, AV_LOG_ERROR, "Could not allocate up4_weight"); + status = AVERROR(ENOMEM); + goto fail; + } + for (int i = 0; i < 55; i++) { + header->up4_weight[i] = jpegxl_f16(jxlr); + } + } + if (header->cw_mask & 4) { + header->up8_weight = av_malloc_array(210, sizeof(float)); + if (!header->up8_weight) { + av_log(avctx, AV_LOG_ERROR, "Could not allocate up8_weight"); + status = AVERROR(ENOMEM); + goto fail; + } + for (int i = 0; i < 210; i++) { + header->up8_weight[i] = jpegxl_f16(jxlr); + } + } + + /* zero pad to byte */ + if (jxl_bits(7 - ((jxlr->bits_read - 1) % 8))) { + jxl_parse_err("zero padding to byte"); + goto fail; + } + + /* bytes consumed > buflen */ + if ((jxlr->bits_read + 1) / 8 - 1 > jxlr->buflen) { + jxl_parse_err("unexpected end of file"); + goto fail; + } + + *headerp = header; + return 0; + +fail: + jpegxl_free_header(header); + if (status >= 0 && (jxlr->bits_read + 1) / 8 - 1 > jxlr->buflen) + return FFMIN(jxlr->bits_read, INT_MAX); + if (status > 0) + return -status; + return status || -1; +} + +static int jpegxl_parse_header(void *avctx, JpegXLParseContext *jxlr, JpegXLHeader **headerp, int level) +{ + uint64_t sig = jxl_bits(64); + jpegxl_reset_pc(jxlr); + if (sig == FF_JPEGXL_CONTAINER_SIGNATURE_LE) { + for (;;) { + uint64_t size = 0; + uint32_t tag = 0; + for (int k = 0; k < 4; k++) + size = (size << 8) | jxl_bits(8); + for (int k = 0; k < 4; k++) + tag = (tag << 8) | jxl_bits(8); + if (tag == MKBETAG('j','x','l','p')) { + jxl_bits(32); + break; + } + if (tag == MKBETAG('j','x','l','c')) + break; + if (size == 1) { + size = 0; + for (int k = 0; k < 8; k++) + size = (size << 8) | jxl_bits(8); + if (size > INT_MAX) + break; + size -= 8; + } + if (jxlr->bits_read / 8 > jxlr->buflen) + break; + } + } + return jpegxl_parse_codestream_header(avctx, jxlr, headerp, level); +} + +int avpriv_jpegxl_verify_codestream_header(void *avctx, uint8_t *buf, size_t buflen, int level) +{ + JpegXLParseContext jxlri; + JpegXLHeader *header = NULL; + int status; + jpegxl_init_pc(&jxlri, buf, buflen); + status = jpegxl_parse_codestream_header(avctx, &jxlri, &header, level); + if (header) + jpegxl_free_header(header); + return status; +} + +static enum AVPixelFormat jpegxl_header_get_pixfmt(JpegXLHeader *header) { + int alpha = 0; + for (int i = 0; i < header->num_extra_channels; i++) { + if ((header->extra_channel_info + i)->type == FF_JPEGXL_CT_ALPHA) { + alpha = 1; + break; + } + } + if (header->color_space == FF_JPEGXL_CS_GRAY) { + if (header->bits_per_sample <= 8) + return alpha ? AV_PIX_FMT_YA8 : AV_PIX_FMT_GRAY8; + if (header->bits_per_sample > 16 || header->exp_bits_per_sample) + return alpha ? AV_PIX_FMT_NONE : AV_PIX_FMT_GRAYF32LE; + return alpha ? AV_PIX_FMT_YA16LE : AV_PIX_FMT_GRAY16LE; + } else if (header->color_space == FF_JPEGXL_CS_RGB + || header->color_space == FF_JPEGXL_CS_XYB) { + if (header->bits_per_sample <= 8) + return alpha ? AV_PIX_FMT_RGBA : AV_PIX_FMT_RGB24; + if (header->bits_per_sample > 16 || header->exp_bits_per_sample) + return alpha ? AV_PIX_FMT_GBRAPF32LE : AV_PIX_FMT_GBRPF32LE; + return alpha ? AV_PIX_FMT_RGBA64LE : AV_PIX_FMT_RGB48LE; + } + return AV_PIX_FMT_NONE; +} + +static av_cold int jpegxl_parse_init(AVCodecParserContext *s1) +{ + s1->pict_type = AV_PICTURE_TYPE_NONE; + return 0; +} + +static int jpegxl_parse(AVCodecParserContext *s1, + AVCodecContext *avctx, + const uint8_t **poutbuf, int *poutbuf_size, + const uint8_t *buf, int buf_size) +{ + JpegXLParseContext *jxlr = s1->priv_data; + JpegXLHeader *header = NULL; + int next = END_NOT_FOUND, status = 0; + size_t i = 0; + + *poutbuf_size = 0; + *poutbuf = NULL; + + if (buf_size == 0 || s1->flags & PARSER_FLAG_COMPLETE_FRAMES) { + /* eof is a frame boundary */ + next = buf_size; + } else if (!jxlr->pc.frame_start_found) { + /* look for stream signature */ + uint64_t state64 = jxlr->pc.state64; + for (; i < buf_size; i++) { + state64 = (state64 << 8) | buf[i]; + if ((state64 & 0xFFFF) == FF_JPEGXL_CODESTREAM_SIGNATURE_BE + || state64 == FF_JPEGXL_CONTAINER_SIGNATURE_BE) { + jxlr->pc.frame_start_found = 1; + break; + } + } + jxlr->pc.state64 = state64; + } + + if (jxlr->pc.frame_start_found && s1->pict_type == AV_PICTURE_TYPE_NONE) { + jpegxl_init_pc(jxlr, buf, buf_size); + status = jpegxl_parse_header(NULL, jxlr, &header, 5); + if (status == 0) { + /* parsed successfully */ + s1->pict_type = AV_PICTURE_TYPE_I; + s1->key_frame = 1; + s1->width = avctx->width = avctx->coded_width = header->width; + s1->height = avctx->height = avctx->coded_height = header->height; + s1->format = avctx->pix_fmt = jpegxl_header_get_pixfmt(header); + jxlr->pc.frame_start_found = 1; + } + if (header) + jpegxl_free_header(header); + header = NULL; + } + + if (ff_combine_frame(&jxlr->pc, next, &buf, &buf_size) < 0) { + *poutbuf = NULL; + *poutbuf_size = 0; + return buf_size; + } + + jxlr->pc.frame_start_found = 0; + + *poutbuf = buf; + *poutbuf_size = buf_size; + + return next; +} + +const AVCodecParser ff_jpegxl_parser = { + .codec_ids = { AV_CODEC_ID_JPEGXL }, + .priv_data_size = sizeof(JpegXLParseContext), + .parser_init = jpegxl_parse_init, + .parser_parse = jpegxl_parse, + .parser_close = ff_parse_close, +}; + +#else /* CONFIG_JPEGXL_PARSER */ + +int avpriv_jpegxl_verify_codestream_header(void *avctx, uint8_t *buf, size_t buflen, int level) +{ + /* parser disabled at compile-time */ + return AVERROR(ENOSYS); +} + +#endif /* CONFIG_JPEGXL_PARSER */ diff --git a/libavcodec/parsers.c b/libavcodec/parsers.c index 6b40c18d80..18a40eceea 100644 --- a/libavcodec/parsers.c +++ b/libavcodec/parsers.c @@ -52,6 +52,7 @@ extern const AVCodecParser ff_h264_parser; extern const AVCodecParser ff_hevc_parser; extern const AVCodecParser ff_ipu_parser; extern const AVCodecParser ff_jpeg2000_parser; +extern const AVCodecParser ff_jpegxl_parser; extern const AVCodecParser ff_mjpeg_parser; extern const AVCodecParser ff_mlp_parser; extern const AVCodecParser ff_mpeg4video_parser; diff --git a/libavcodec/version.h b/libavcodec/version.h index a46fb05f1a..b5867ad041 100644 --- a/libavcodec/version.h +++ b/libavcodec/version.h @@ -28,7 +28,7 @@ #include "libavutil/version.h" #define LIBAVCODEC_VERSION_MAJOR 59 -#define LIBAVCODEC_VERSION_MINOR 20 +#define LIBAVCODEC_VERSION_MINOR 21 #define LIBAVCODEC_VERSION_MICRO 100 #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \