[FFmpeg-devel,v2,3/7] avutil/half2float: move tables to header-internal structs

Message ID	20220814164833.19965-3-timo@rothenpieler.org
State	Accepted
Commit	f3fb528cd5bfecec6835a3951c75903a194ae1ad
Headers	show Delivered-To: ffmpegpatchwork2@gmail.com Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100; From: Timo Rothenpieler <timo@rothenpieler.org> To: ffmpeg-devel@ffmpeg.org Date: Sun, 14 Aug 2022 18:48:29 +0200 Message-Id: <20220814164833.19965-3-timo@rothenpieler.org> In-Reply-To: <20220814164833.19965-1-timo@rothenpieler.org> References: <20220814164833.19965-1-timo@rothenpieler.org> MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH v2 3/7] avutil/half2float: move tables to header-internal structs Precedence: list Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> Cc: Timo Rothenpieler <timo@rothenpieler.org> Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>
Series	[FFmpeg-devel,v2,1/7] avutil: move half-precision float helper to avutil \| expand [FFmpeg-devel,v2,1/7] avutil: move half-precision float helper to avutil [FFmpeg-devel,v2,2/7] avutil/half2float: adjust conversion of NaN [FFmpeg-devel,v2,3/7] avutil/half2float: move tables to header-internal structs [FFmpeg-devel,v2,4/7] avutil/half2float: move non-inline init code out of header [FFmpeg-devel,v2,5/7] avutil/half2float: use native _Float16 if available [FFmpeg-devel,v2,6/7] swscale: add opaque parameter to input functions [FFmpeg-devel,v2,7/7] swscale/input: add rgbaf16 input support

Context	Check	Description
yinshiyou/make_fate_loongarch64	success	Make fate finished
yinshiyou/make_loongarch64	warning	New warnings during build
andriy/make_fate_x86	success	Make fate finished
andriy/make_x86	warning	New warnings during build

diff --git a/libavcodec/exr.c b/libavcodec/exr.c index 47f4786491..b0e2e85024 100644 --- a/libavcodec/exr.c +++ b/libavcodec/exr.c @@ -191,9 +191,7 @@ typedef struct EXRContext { float gamma; union av_intfloat32 gamma_table[65536]; - uint32_t mantissatable[3072]; - uint32_t exponenttable[64]; - uint16_t offsettable[64]; + Half2FloatTables h2f_tables; } EXRContext; static int zip_uncompress(const EXRContext *s, const uint8_t *src, int compressed_size, @@ -899,10 +897,7 @@ static int ac_uncompress(const EXRContext *s, GetByteContext *gb, float *block) n += val & 0xff; } else { ret = n; - block[ff_zigzag_direct[n]] = av_int2float(half2float(val, - s->mantissatable, - s->exponenttable, - s->offsettable)); + block[ff_zigzag_direct[n]] = av_int2float(half2float(val, &s->h2f_tables)); n++; } } @@ -1120,8 +1115,7 @@ static int dwa_uncompress(const EXRContext *s, const uint8_t *src, int compresse uint16_t *dc = (uint16_t *)td->dc_data; union av_intfloat32 dc_val; - dc_val.i = half2float(dc[idx], s->mantissatable, - s->exponenttable, s->offsettable); + dc_val.i = half2float(dc[idx], &s->h2f_tables); block[0] = dc_val.f; ac_uncompress(s, &agb, block); @@ -1171,7 +1165,7 @@ static int dwa_uncompress(const EXRContext *s, const uint8_t *src, int compresse for (int x = 0; x < td->xsize; x++) { uint16_t ha = ai0[x] | (ai1[x] << 8); - ao[x] = half2float(ha, s->mantissatable, s->exponenttable, s->offsettable); + ao[x] = half2float(ha, &s->h2f_tables); } } @@ -1427,10 +1421,7 @@ static int decode_block(AVCodecContext *avctx, void *tdata, } } else { for (x = 0; x < xsize; x++) { - ptr_x[0].i = half2float(bytestream_get_le16(&src), - s->mantissatable, - s->exponenttable, - s->offsettable); + ptr_x[0].i = half2float(bytestream_get_le16(&src), &s->h2f_tables); ptr_x++; } } @@ -2217,7 +2208,7 @@ static av_cold int decode_init(AVCodecContext *avctx) float one_gamma = 1.0f / s->gamma; avpriv_trc_function trc_func = NULL; - half2float_table(s->mantissatable, s->exponenttable, s->offsettable); + init_half2float_tables(&s->h2f_tables); s->avctx = avctx; @@ -2230,18 +2221,18 @@ static av_cold int decode_init(AVCodecContext *avctx) trc_func = avpriv_get_trc_function_from_trc(s->apply_trc_type); if (trc_func) { for (i = 0; i < 65536; ++i) { - t.i = half2float(i, s->mantissatable, s->exponenttable, s->offsettable); + t.i = half2float(i, &s->h2f_tables); t.f = trc_func(t.f); s->gamma_table[i] = t; } } else { if (one_gamma > 0.9999f && one_gamma < 1.0001f) { for (i = 0; i < 65536; ++i) { - s->gamma_table[i].i = half2float(i, s->mantissatable, s->exponenttable, s->offsettable); + s->gamma_table[i].i = half2float(i, &s->h2f_tables); } } else { for (i = 0; i < 65536; ++i) { - t.i = half2float(i, s->mantissatable, s->exponenttable, s->offsettable); + t.i = half2float(i, &s->h2f_tables); /* If negative value we reuse half value */ if (t.f <= 0.0f) { s->gamma_table[i] = t; diff --git a/libavcodec/exrenc.c b/libavcodec/exrenc.c index 56c084d483..356bd11543 100644 --- a/libavcodec/exrenc.c +++ b/libavcodec/exrenc.c @@ -87,15 +87,14 @@ typedef struct EXRContext { EXRScanlineData *scanline; - uint16_t basetable[512]; - uint8_t shifttable[512]; + Float2HalfTables f2h_tables; } EXRContext; static av_cold int encode_init(AVCodecContext *avctx) { EXRContext *s = avctx->priv_data; - float2half_tables(s->basetable, s->shifttable); + init_float2half_tables(&s->f2h_tables); switch (avctx->pix_fmt) { case AV_PIX_FMT_GBRPF32: @@ -256,7 +255,7 @@ static int encode_scanline_rle(EXRContext *s, const AVFrame *frame) const uint32_t *src = (const uint32_t *)(frame->data[ch] + y * frame->linesize[ch]); for (int x = 0; x < frame->width; x++) - dst[x] = float2half(src[x], s->basetable, s->shifttable); + dst[x] = float2half(src[x], &s->f2h_tables); } break; } @@ -324,7 +323,7 @@ static int encode_scanline_zip(EXRContext *s, const AVFrame *frame) const uint32_t *src = (const uint32_t *)(frame->data[ch] + (y * s->scanline_height + l) * frame->linesize[ch]); for (int x = 0; x < frame->width; x++) - dst[x] = float2half(src[x], s->basetable, s->shifttable); + dst[x] = float2half(src[x], &s->f2h_tables); } } break; @@ -482,7 +481,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, const uint32_t *src = (const uint32_t *)(frame->data[ch] + y * frame->linesize[ch]); for (int x = 0; x < frame->width; x++) - bytestream2_put_le16(pb, float2half(src[x], s->basetable, s->shifttable)); + bytestream2_put_le16(pb, float2half(src[x], &s->f2h_tables)); } } } diff --git a/libavcodec/pnm.h b/libavcodec/pnm.h index 7e5445f529..4906eeb2b9 100644 --- a/libavcodec/pnm.h +++ b/libavcodec/pnm.h @@ -22,6 +22,7 @@ #ifndef AVCODEC_PNM_H #define AVCODEC_PNM_H +#include "libavutil/half2float.h" #include "avcodec.h" typedef struct PNMContext { @@ -34,9 +35,7 @@ typedef struct PNMContext { int half; float scale; - uint32_t mantissatable[3072]; - uint32_t exponenttable[64]; - uint16_t offsettable[64]; + Half2FloatTables h2f_tables; } PNMContext; int ff_pnm_decode_header(AVCodecContext *avctx, PNMContext * const s); diff --git a/libavcodec/pnmdec.c b/libavcodec/pnmdec.c index 9383dc8e60..6adc348ec8 100644 --- a/libavcodec/pnmdec.c +++ b/libavcodec/pnmdec.c @@ -313,18 +313,9 @@ static int pnm_decode_frame(AVCodecContext *avctx, AVFrame *p, b = (float *)p->data[1]; for (int i = 0; i < avctx->height; i++) { for (int j = 0; j < avctx->width; j++) { - r[j] = av_int2float(half2float(AV_RL16(s->bytestream+0), - s->mantissatable, - s->exponenttable, - s->offsettable)) * scale; - g[j] = av_int2float(half2float(AV_RL16(s->bytestream+2), - s->mantissatable, - s->exponenttable, - s->offsettable)) * scale; - b[j] = av_int2float(half2float(AV_RL16(s->bytestream+4), - s->mantissatable, - s->exponenttable, - s->offsettable)) * scale; + r[j] = av_int2float(half2float(AV_RL16(s->bytestream+0), &s->h2f_tables)) * scale; + g[j] = av_int2float(half2float(AV_RL16(s->bytestream+2), &s->h2f_tables)) * scale; + b[j] = av_int2float(half2float(AV_RL16(s->bytestream+4), &s->h2f_tables)) * scale; s->bytestream += 6; } @@ -340,18 +331,9 @@ static int pnm_decode_frame(AVCodecContext *avctx, AVFrame *p, b = (float *)p->data[1]; for (int i = 0; i < avctx->height; i++) { for (int j = 0; j < avctx->width; j++) { - r[j] = av_int2float(half2float(AV_RB16(s->bytestream+0), - s->mantissatable, - s->exponenttable, - s->offsettable)) * scale; - g[j] = av_int2float(half2float(AV_RB16(s->bytestream+2), - s->mantissatable, - s->exponenttable, - s->offsettable)) * scale; - b[j] = av_int2float(half2float(AV_RB16(s->bytestream+4), - s->mantissatable, - s->exponenttable, - s->offsettable)) * scale; + r[j] = av_int2float(half2float(AV_RB16(s->bytestream+0), &s->h2f_tables)) * scale; + g[j] = av_int2float(half2float(AV_RB16(s->bytestream+2), &s->h2f_tables)) * scale; + b[j] = av_int2float(half2float(AV_RB16(s->bytestream+4), &s->h2f_tables)) * scale; s->bytestream += 6; } @@ -394,10 +376,7 @@ static int pnm_decode_frame(AVCodecContext *avctx, AVFrame *p, float *g = (float *)p->data[0]; for (int i = 0; i < avctx->height; i++) { for (int j = 0; j < avctx->width; j++) { - g[j] = av_int2float(half2float(AV_RL16(s->bytestream), - s->mantissatable, - s->exponenttable, - s->offsettable)) * scale; + g[j] = av_int2float(half2float(AV_RL16(s->bytestream), &s->h2f_tables)) * scale; s->bytestream += 2; } g += p->linesize[0] / 4; @@ -406,10 +385,7 @@ static int pnm_decode_frame(AVCodecContext *avctx, AVFrame *p, float *g = (float *)p->data[0]; for (int i = 0; i < avctx->height; i++) { for (int j = 0; j < avctx->width; j++) { - g[j] = av_int2float(half2float(AV_RB16(s->bytestream), - s->mantissatable, - s->exponenttable, - s->offsettable)) * scale; + g[j] = av_int2float(half2float(AV_RB16(s->bytestream), &s->h2f_tables)) * scale; s->bytestream += 2; } g += p->linesize[0] / 4; @@ -501,7 +477,7 @@ static av_cold int phm_dec_init(AVCodecContext *avctx) { PNMContext *s = avctx->priv_data; - half2float_table(s->mantissatable, s->exponenttable, s->offsettable); + init_half2float_tables(&s->h2f_tables); return 0; } diff --git a/libavcodec/pnmenc.c b/libavcodec/pnmenc.c index 7ce534d06e..38a5d8172d 100644 --- a/libavcodec/pnmenc.c +++ b/libavcodec/pnmenc.c @@ -30,8 +30,7 @@ #include "encode.h" typedef struct PHMEncContext { - uint16_t basetable[512]; - uint8_t shifttable[512]; + Float2HalfTables f2h_tables; } PHMEncContext; static int pnm_encode_frame(AVCodecContext *avctx, AVPacket *pkt, @@ -169,9 +168,9 @@ static int pnm_encode_frame(AVCodecContext *avctx, AVPacket *pkt, for (int i = 0; i < avctx->height; i++) { for (int j = 0; j < avctx->width; j++) { - AV_WN16(bytestream + 0, float2half(av_float2int(r[j]), s->basetable, s->shifttable)); - AV_WN16(bytestream + 2, float2half(av_float2int(g[j]), s->basetable, s->shifttable)); - AV_WN16(bytestream + 4, float2half(av_float2int(b[j]), s->basetable, s->shifttable)); + AV_WN16(bytestream + 0, float2half(av_float2int(r[j]), &s->f2h_tables)); + AV_WN16(bytestream + 2, float2half(av_float2int(g[j]), &s->f2h_tables)); + AV_WN16(bytestream + 4, float2half(av_float2int(b[j]), &s->f2h_tables)); bytestream += 6; } @@ -184,7 +183,7 @@ static int pnm_encode_frame(AVCodecContext *avctx, AVPacket *pkt, for (int i = 0; i < avctx->height; i++) { for (int j = 0; j < avctx->width; j++) { - AV_WN16(bytestream, float2half(av_float2int(g[j]), s->basetable, s->shifttable)); + AV_WN16(bytestream, float2half(av_float2int(g[j]), &s->f2h_tables)); bytestream += 2; } @@ -295,7 +294,7 @@ static av_cold int phm_enc_init(AVCodecContext *avctx) { PHMEncContext *s = avctx->priv_data; - float2half_tables(s->basetable, s->shifttable); + init_float2half_tables(&s->f2h_tables); return 0; } diff --git a/libavutil/float2half.h b/libavutil/float2half.h index d6aaab8278..3548aa1d45 100644 --- a/libavutil/float2half.h +++ b/libavutil/float2half.h @@ -21,45 +21,50 @@ #include <stdint.h> -static void float2half_tables(uint16_t *basetable, uint8_t *shifttable) +typedef struct Float2HalfTables { + uint16_t basetable[512]; + uint8_t shifttable[512]; +} Float2HalfTables; + +static void init_float2half_tables(Float2HalfTables *t) { for (int i = 0; i < 256; i++) { int e = i - 127; if (e < -24) { // Very small numbers map to zero - basetable[i|0x000] = 0x0000; - basetable[i|0x100] = 0x8000; - shifttable[i|0x000] = 24; - shifttable[i|0x100] = 24; + t->basetable[i|0x000] = 0x0000; + t->basetable[i|0x100] = 0x8000; + t->shifttable[i|0x000] = 24; + t->shifttable[i|0x100] = 24; } else if (e < -14) { // Small numbers map to denorms - basetable[i|0x000] = (0x0400>>(-e-14)); - basetable[i|0x100] = (0x0400>>(-e-14)) | 0x8000; - shifttable[i|0x000] = -e-1; - shifttable[i|0x100] = -e-1; + t->basetable[i|0x000] = (0x0400>>(-e-14)); + t->basetable[i|0x100] = (0x0400>>(-e-14)) | 0x8000; + t->shifttable[i|0x000] = -e-1; + t->shifttable[i|0x100] = -e-1; } else if (e <= 15) { // Normal numbers just lose precision - basetable[i|0x000] = ((e + 15) << 10); - basetable[i|0x100] = ((e + 15) << 10) | 0x8000; - shifttable[i|0x000] = 13; - shifttable[i|0x100] = 13; + t->basetable[i|0x000] = ((e + 15) << 10); + t->basetable[i|0x100] = ((e + 15) << 10) | 0x8000; + t->shifttable[i|0x000] = 13; + t->shifttable[i|0x100] = 13; } else if (e < 128) { // Large numbers map to Infinity - basetable[i|0x000] = 0x7C00; - basetable[i|0x100] = 0xFC00; - shifttable[i|0x000] = 24; - shifttable[i|0x100] = 24; + t->basetable[i|0x000] = 0x7C00; + t->basetable[i|0x100] = 0xFC00; + t->shifttable[i|0x000] = 24; + t->shifttable[i|0x100] = 24; } else { // Infinity and NaN's stay Infinity and NaN's - basetable[i|0x000] = 0x7C00; - basetable[i|0x100] = 0xFC00; - shifttable[i|0x000] = 13; - shifttable[i|0x100] = 13; + t->basetable[i|0x000] = 0x7C00; + t->basetable[i|0x100] = 0xFC00; + t->shifttable[i|0x000] = 13; + t->shifttable[i|0x100] = 13; } } } -static uint16_t float2half(uint32_t f, uint16_t *basetable, uint8_t *shifttable) +static uint16_t float2half(uint32_t f, const Float2HalfTables *t) { uint16_t h; - h = basetable[(f >> 23) & 0x1ff] + ((f & 0x007fffff) >> shifttable[(f >> 23) & 0x1ff]); + h = t->basetable[(f >> 23) & 0x1ff] + ((f & 0x007fffff) >> t->shifttable[(f >> 23) & 0x1ff]); return h; } diff --git a/libavutil/half2float.h b/libavutil/half2float.h index 5af4690cfe..5696567a8c 100644 --- a/libavutil/half2float.h +++ b/libavutil/half2float.h @@ -21,6 +21,12 @@ #include <stdint.h> +typedef struct Half2FloatTables { + uint32_t mantissatable[3072]; + uint32_t exponenttable[64]; + uint16_t offsettable[64]; +} Half2FloatTables; + static uint32_t convertmantissa(uint32_t i) { int32_t m = i << 13; // Zero pad mantissa bits @@ -37,41 +43,39 @@ static uint32_t convertmantissa(uint32_t i) return m | e; // Return combined number } -static void half2float_table(uint32_t *mantissatable, uint32_t *exponenttable, - uint16_t *offsettable) +static void init_half2float_tables(Half2FloatTables *t) { - mantissatable[0] = 0; + t->mantissatable[0] = 0; for (int i = 1; i < 1024; i++) - mantissatable[i] = convertmantissa(i); + t->mantissatable[i] = convertmantissa(i); for (int i = 1024; i < 2048; i++) - mantissatable[i] = 0x38000000UL + ((i - 1024) << 13UL); + t->mantissatable[i] = 0x38000000UL + ((i - 1024) << 13UL); for (int i = 2048; i < 3072; i++) - mantissatable[i] = mantissatable[i - 1024] | 0x400000UL; - mantissatable[2048] = mantissatable[1024]; + t->mantissatable[i] = t->mantissatable[i - 1024] | 0x400000UL; + t->mantissatable[2048] = t->mantissatable[1024]; - exponenttable[0] = 0; + t->exponenttable[0] = 0; for (int i = 1; i < 31; i++) - exponenttable[i] = i << 23; + t->exponenttable[i] = i << 23; for (int i = 33; i < 63; i++) - exponenttable[i] = 0x80000000UL + ((i - 32) << 23UL); - exponenttable[31]= 0x47800000UL; - exponenttable[32]= 0x80000000UL; - exponenttable[63]= 0xC7800000UL; + t->exponenttable[i] = 0x80000000UL + ((i - 32) << 23UL); + t->exponenttable[31]= 0x47800000UL; + t->exponenttable[32]= 0x80000000UL; + t->exponenttable[63]= 0xC7800000UL; - offsettable[0] = 0; + t->offsettable[0] = 0; for (int i = 1; i < 64; i++) - offsettable[i] = 1024; - offsettable[31] = 2048; - offsettable[32] = 0; - offsettable[63] = 2048; + t->offsettable[i] = 1024; + t->offsettable[31] = 2048; + t->offsettable[32] = 0; + t->offsettable[63] = 2048; } -static uint32_t half2float(uint16_t h, const uint32_t *mantissatable, const uint32_t *exponenttable, - const uint16_t *offsettable) +static uint32_t half2float(uint16_t h, const Half2FloatTables *t) { uint32_t f; - f = mantissatable[offsettable[h >> 10] + (h & 0x3ff)] + exponenttable[h >> 10]; + f = t->mantissatable[t->offsettable[h >> 10] + (h & 0x3ff)] + t->exponenttable[h >> 10]; return f; }

[FFmpeg-devel,v2,3/7] avutil/half2float: move tables to header-internal structs

Checks

Commit Message

Patch