[FFmpeg-devel,v2,7/7] swscale/input: add rgbaf16 input support

Message ID	20220814164833.19965-7-timo@rothenpieler.org
State	New
Headers	show Delivered-To: ffmpegpatchwork2@gmail.com Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100; From: Timo Rothenpieler <timo@rothenpieler.org> To: ffmpeg-devel@ffmpeg.org Date: Sun, 14 Aug 2022 18:48:33 +0200 Message-Id: <20220814164833.19965-7-timo@rothenpieler.org> In-Reply-To: <20220814164833.19965-1-timo@rothenpieler.org> References: <20220814164833.19965-1-timo@rothenpieler.org> MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH v2 7/7] swscale/input: add rgbaf16 input support Precedence: list Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> Cc: Timo Rothenpieler <timo@rothenpieler.org> Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>
Series	[FFmpeg-devel,v2,1/7] avutil: move half-precision float helper to avutil \| expand [FFmpeg-devel,v2,1/7] avutil: move half-precision float helper to avutil [FFmpeg-devel,v2,2/7] avutil/half2float: adjust conversion of NaN [FFmpeg-devel,v2,3/7] avutil/half2float: move tables to header-internal structs [FFmpeg-devel,v2,4/7] avutil/half2float: move non-inline init code out of header [FFmpeg-devel,v2,5/7] avutil/half2float: use native _Float16 if available [FFmpeg-devel,v2,6/7] swscale: add opaque parameter to input functions [FFmpeg-devel,v2,7/7] swscale/input: add rgbaf16 input support

Context	Check	Description
yinshiyou/make_loongarch64	success	Make finished
yinshiyou/make_fate_loongarch64	success	Make fate finished
andriy/make_x86	success	Make finished
andriy/make_fate_x86	success	Make fate finished

diff --git a/libavutil/Makefile b/libavutil/Makefile index 3d9c07aea8..1aac1a4cc0 100644 --- a/libavutil/Makefile +++ b/libavutil/Makefile @@ -131,6 +131,7 @@ OBJS = adler32.o \ float_dsp.o \ fixed_dsp.o \ frame.o \ + half2float.o \ hash.o \ hdr_dynamic_metadata.o \ hdr_dynamic_vivid_metadata.o \ diff --git a/libswscale/half2float.c b/libswscale/half2float.c new file mode 100644 index 0000000000..1b023f96a5 --- /dev/null +++ b/libswscale/half2float.c @@ -0,0 +1,19 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/half2float.c" diff --git a/libswscale/input.c b/libswscale/input.c index 36ef1e43ac..1077d01e91 100644 --- a/libswscale/input.c +++ b/libswscale/input.c @@ -1124,6 +1124,112 @@ static void grayf32##endian_name##ToY16_c(uint8_t *dst, const uint8_t *src, rgbf32_planar_funcs_endian(le, 0) rgbf32_planar_funcs_endian(be, 1) +#define rdpx(src) av_int2float(half2float(is_be ? AV_RB16(&src) : AV_RL16(&src), h2f_tbl)) + +static av_always_inline void rgbaf16ToUV_half_endian(uint16_t *dstU, uint16_t *dstV, int is_be, + const uint16_t *src, int width, + int32_t *rgb2yuv, Half2FloatTables *h2f_tbl) +{ + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; + int i; + for (i = 0; i < width; i++) { + int r = (lrintf(av_clipf(65535.0f * rdpx(src[i*8+0]), 0.0f, 65535.0f)) + + lrintf(av_clipf(65535.0f * rdpx(src[i*8+4]), 0.0f, 65535.0f))) >> 1; + int g = (lrintf(av_clipf(65535.0f * rdpx(src[i*8+1]), 0.0f, 65535.0f)) + + lrintf(av_clipf(65535.0f * rdpx(src[i*8+5]), 0.0f, 65535.0f))) >> 1; + int b = (lrintf(av_clipf(65535.0f * rdpx(src[i*8+2]), 0.0f, 65535.0f)) + + lrintf(av_clipf(65535.0f * rdpx(src[i*8+6]), 0.0f, 65535.0f))) >> 1; + + dstU[i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + dstV[i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + } +} + +static av_always_inline void rgbaf16ToUV_endian(uint16_t *dstU, uint16_t *dstV, int is_be, + const uint16_t *src, int width, + int32_t *rgb2yuv, Half2FloatTables *h2f_tbl) +{ + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; + int i; + for (i = 0; i < width; i++) { + int r = lrintf(av_clipf(65535.0f * rdpx(src[i*4+0]), 0.0f, 65535.0f)); + int g = lrintf(av_clipf(65535.0f * rdpx(src[i*4+1]), 0.0f, 65535.0f)); + int b = lrintf(av_clipf(65535.0f * rdpx(src[i*4+2]), 0.0f, 65535.0f)); + + dstU[i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + dstV[i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + } +} + +static av_always_inline void rgbaf16ToY_endian(uint16_t *dst, const uint16_t *src, int is_be, + int width, int32_t *rgb2yuv, Half2FloatTables *h2f_tbl) +{ + int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; + int i; + for (i = 0; i < width; i++) { + int r = lrintf(av_clipf(65535.0f * rdpx(src[i*4+0]), 0.0f, 65535.0f)); + int g = lrintf(av_clipf(65535.0f * rdpx(src[i*4+1]), 0.0f, 65535.0f)); + int b = lrintf(av_clipf(65535.0f * rdpx(src[i*4+2]), 0.0f, 65535.0f)); + + dst[i] = (ry*r + gy*g + by*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + } +} + +static av_always_inline void rgbaf16ToA_endian(uint16_t *dst, const uint16_t *src, int is_be, + int width, Half2FloatTables *h2f_tbl) +{ + int i; + for (i=0; i<width; i++) { + dst[i] = lrintf(av_clipf(65535.0f * rdpx(src[i*4+3]), 0.0f, 65535.0f)); + } +} + +#undef rdpx + +#define rgbaf16_funcs_endian(endian_name, endian) \ +static void rgbaf16##endian_name##ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused, \ + const uint8_t *src1, const uint8_t *src2, \ + int width, uint32_t *_rgb2yuv, void *opq) \ +{ \ + const uint16_t *src = (const uint16_t*)src1; \ + uint16_t *dstU = (uint16_t*)_dstU; \ + uint16_t *dstV = (uint16_t*)_dstV; \ + int32_t *rgb2yuv = (int32_t*)_rgb2yuv; \ + av_assert1(src1==src2); \ + rgbaf16ToUV_half_endian(dstU, dstV, endian, src, width, rgb2yuv, opq); \ +} \ +static void rgbaf16##endian_name##ToUV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused, \ + const uint8_t *src1, const uint8_t *src2, \ + int width, uint32_t *_rgb2yuv, void *opq) \ +{ \ + const uint16_t *src = (const uint16_t*)src1; \ + uint16_t *dstU = (uint16_t*)_dstU; \ + uint16_t *dstV = (uint16_t*)_dstV; \ + int32_t *rgb2yuv = (int32_t*)_rgb2yuv; \ + av_assert1(src1==src2); \ + rgbaf16ToUV_endian(dstU, dstV, endian, src, width, rgb2yuv, opq); \ +} \ +static void rgbaf16##endian_name##ToY_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused0, \ + const uint8_t *unused1, int width, uint32_t *_rgb2yuv, void *opq) \ +{ \ + const uint16_t *src = (const uint16_t*)_src; \ + uint16_t *dst = (uint16_t*)_dst; \ + int32_t *rgb2yuv = (int32_t*)_rgb2yuv; \ + rgbaf16ToY_endian(dst, src, endian, width, rgb2yuv, opq); \ +} \ +static void rgbaf16##endian_name##ToA_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused0, \ + const uint8_t *unused1, int width, uint32_t *unused2, void *opq) \ +{ \ + const uint16_t *src = (const uint16_t*)_src; \ + uint16_t *dst = (uint16_t*)_dst; \ + rgbaf16ToA_endian(dst, src, endian, width, opq); \ +} + +rgbaf16_funcs_endian(le, 0) +rgbaf16_funcs_endian(be, 1) + av_cold void ff_sws_init_input_funcs(SwsContext *c) { enum AVPixelFormat srcFormat = c->srcFormat; @@ -1388,6 +1494,12 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_X2BGR10LE: c->chrToYV12 = bgr30leToUV_half_c; break; + case AV_PIX_FMT_RGBAF16BE: + c->chrToYV12 = rgbaf16beToUV_half_c; + break; + case AV_PIX_FMT_RGBAF16LE: + c->chrToYV12 = rgbaf16leToUV_half_c; + break; } } else { switch (srcFormat) { @@ -1475,6 +1587,12 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_X2BGR10LE: c->chrToYV12 = bgr30leToUV_c; break; + case AV_PIX_FMT_RGBAF16BE: + c->chrToYV12 = rgbaf16beToUV_c; + break; + case AV_PIX_FMT_RGBAF16LE: + c->chrToYV12 = rgbaf16leToUV_c; + break; } } @@ -1763,6 +1881,12 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_X2BGR10LE: c->lumToYV12 = bgr30leToY_c; break; + case AV_PIX_FMT_RGBAF16BE: + c->lumToYV12 = rgbaf16beToY_c; + break; + case AV_PIX_FMT_RGBAF16LE: + c->lumToYV12 = rgbaf16leToY_c; + break; } if (c->needAlpha) { if (is16BPS(srcFormat) || isNBPS(srcFormat)) { @@ -1782,6 +1906,12 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break; + case AV_PIX_FMT_RGBAF16BE: + c->alpToYV12 = rgbaf16beToA_c; + break; + case AV_PIX_FMT_RGBAF16LE: + c->alpToYV12 = rgbaf16leToA_c; + break; case AV_PIX_FMT_YA8: c->alpToYV12 = uyvyToY_c; break; diff --git a/libswscale/slice.c b/libswscale/slice.c index b3ee06d632..db1c696727 100644 --- a/libswscale/slice.c +++ b/libswscale/slice.c @@ -282,7 +282,13 @@ int ff_init_filters(SwsContext * c) c->descIndex[0] = num_ydesc + (need_gamma ? 1 : 0); c->descIndex[1] = num_ydesc + num_cdesc + (need_gamma ? 1 : 0); - + if (isFloat16(c->srcFormat)) { + c->h2f_tables = av_malloc(sizeof(*c->h2f_tables)); + if (!c->h2f_tables) + return AVERROR(ENOMEM); + ff_init_half2float_tables(c->h2f_tables); + c->input_opaque = c->h2f_tables; + } c->desc = av_calloc(c->numDesc, sizeof(*c->desc)); if (!c->desc) @@ -393,5 +399,6 @@ int ff_free_filters(SwsContext *c) free_slice(&c->slice[i]); av_freep(&c->slice); } + av_freep(&c->h2f_tables); return 0; } diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h index 9ab542933f..6c14ce8536 100644 --- a/libswscale/swscale_internal.h +++ b/libswscale/swscale_internal.h @@ -35,6 +35,7 @@ #include "libavutil/pixdesc.h" #include "libavutil/slicethread.h" #include "libavutil/ppc/util_altivec.h" +#include "libavutil/half2float.h" #define STR(s) AV_TOSTRING(s) // AV_STRINGIFY is too long @@ -679,6 +680,8 @@ typedef struct SwsContext { unsigned int dst_slice_align; atomic_int stride_unaligned_warned; atomic_int data_unaligned_warned; + + Half2FloatTables *h2f_tables; } SwsContext; //FIXME check init (where 0) @@ -840,6 +843,13 @@ static av_always_inline int isFloat(enum AVPixelFormat pix_fmt) return desc->flags & AV_PIX_FMT_FLAG_FLOAT; } +static av_always_inline int isFloat16(enum AVPixelFormat pix_fmt) +{ + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); + av_assert0(desc); + return (desc->flags & AV_PIX_FMT_FLAG_FLOAT) && desc->comp[0].depth == 16; +} + static av_always_inline int isALPHA(enum AVPixelFormat pix_fmt) { const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); diff --git a/libswscale/utils.c b/libswscale/utils.c index 34503e57f4..81646c0d73 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -259,6 +259,8 @@ static const FormatEntry format_entries[] = { [AV_PIX_FMT_P416LE] = { 1, 1 }, [AV_PIX_FMT_NV16] = { 1, 1 }, [AV_PIX_FMT_VUYA] = { 1, 1 }, + [AV_PIX_FMT_RGBAF16BE] = { 1, 0 }, + [AV_PIX_FMT_RGBAF16LE] = { 1, 0 }, }; int ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos, diff --git a/libswscale/version.h b/libswscale/version.h index 3193562d18..d8694bb5c0 100644 --- a/libswscale/version.h +++ b/libswscale/version.h @@ -29,7 +29,7 @@ #include "version_major.h" #define LIBSWSCALE_VERSION_MINOR 8 -#define LIBSWSCALE_VERSION_MICRO 102 +#define LIBSWSCALE_VERSION_MICRO 103 #define LIBSWSCALE_VERSION_INT AV_VERSION_INT(LIBSWSCALE_VERSION_MAJOR, \ LIBSWSCALE_VERSION_MINOR, \

[FFmpeg-devel,v2,7/7] swscale/input: add rgbaf16 input support

Checks

Commit Message

Comments

Patch