Message ID | 20221103040010.1134-2-mindmark@gmail.com |
---|---|
State | New |
Headers | show |
Series | swscale rgbaf32 input/output support | expand |
Context | Check | Description |
---|---|---|
yinshiyou/make_loongarch64 | success | Make finished |
yinshiyou/make_fate_loongarch64 | success | Make fate finished |
On Wed, Nov 02, 2022 at 09:00:07PM -0700, mindmark@gmail.com wrote: > From: Mark Reid <mindmark@gmail.com> > > --- > libswscale/input.c | 172 +++++++++++++++++++++++++++++++++++++++++++++ > libswscale/utils.c | 4 ++ > 2 files changed, 176 insertions(+) > > diff --git a/libswscale/input.c b/libswscale/input.c > index 7ff7bfaa01..4683284b0b 100644 > --- a/libswscale/input.c > +++ b/libswscale/input.c > @@ -1284,6 +1284,136 @@ static void rgbaf16##endian_name##ToA_c(uint8_t *_dst, const uint8_t *_src, cons > rgbaf16_funcs_endian(le, 0) > rgbaf16_funcs_endian(be, 1) > > +#define rdpx(src) (is_be ? av_int2float(AV_RB32(&src)): av_int2float(AV_RL32(&src))) > + > +static av_always_inline void rgbaf32ToUV_half_endian(uint16_t *dstU, uint16_t *dstV, int is_be, > + const float *src, int width, > + int32_t *rgb2yuv, int comp) > +{ > + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; > + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; > + int i; > + for (i = 0; i < width; i++) { > + int r = (lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+0]), 0.0f, 65535.0f)) + > + lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+4]), 0.0f, 65535.0f))) >> 1; > + int g = (lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+1]), 0.0f, 65535.0f)) + > + lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+5]), 0.0f, 65535.0f))) >> 1; > + int b = (lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+2]), 0.0f, 65535.0f)) + > + lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+6]), 0.0f, 65535.0f))) >> 1; > + > + dstU[i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; > + dstV[i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; I would expect this sort of code to use 2 lrintf() and 2 av_clipf() not 6 > + } > +} > + > +static av_always_inline void rgbaf32ToUV_endian(uint16_t *dstU, uint16_t *dstV, int is_be, > + const float *src, int width, > + int32_t *rgb2yuv, int comp) > +{ > + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; > + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; > + int i; > + for (i = 0; i < width; i++) { > + int r = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+0]), 0.0f, 65535.0f)); > + int g = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+1]), 0.0f, 65535.0f)); > + int b = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+2]), 0.0f, 65535.0f)); > + > + dstU[i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; > + dstV[i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; > + } > +} > + > +static av_always_inline void rgbaf32ToY_endian(uint16_t *dst, const float *src, int is_be, > + int width, int32_t *rgb2yuv, int comp) > +{ > + int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; > + int i; > + for (i = 0; i < width; i++) { > + int r = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+0]), 0.0f, 65535.0f)); > + int g = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+1]), 0.0f, 65535.0f)); > + int b = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+2]), 0.0f, 65535.0f)); > + > + dst[i] = (ry*r + gy*g + by*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; there is one output so there should be only need for one clip and one float->int thx [...]
On Sun, Nov 13, 2022 at 1:25 PM Michael Niedermayer <michael@niedermayer.cc> wrote: > On Wed, Nov 02, 2022 at 09:00:07PM -0700, mindmark@gmail.com wrote: > > From: Mark Reid <mindmark@gmail.com> > > > > --- > > libswscale/input.c | 172 +++++++++++++++++++++++++++++++++++++++++++++ > > libswscale/utils.c | 4 ++ > > 2 files changed, 176 insertions(+) > > > > diff --git a/libswscale/input.c b/libswscale/input.c > > index 7ff7bfaa01..4683284b0b 100644 > > --- a/libswscale/input.c > > +++ b/libswscale/input.c > > @@ -1284,6 +1284,136 @@ static void rgbaf16##endian_name##ToA_c(uint8_t > *_dst, const uint8_t *_src, cons > > rgbaf16_funcs_endian(le, 0) > > rgbaf16_funcs_endian(be, 1) > > > > +#define rdpx(src) (is_be ? av_int2float(AV_RB32(&src)): > av_int2float(AV_RL32(&src))) > > + > > +static av_always_inline void rgbaf32ToUV_half_endian(uint16_t *dstU, > uint16_t *dstV, int is_be, > > + const float *src, > int width, > > + int32_t *rgb2yuv, > int comp) > > +{ > > + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = > rgb2yuv[BU_IDX]; > > + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = > rgb2yuv[BV_IDX]; > > + int i; > > + for (i = 0; i < width; i++) { > > > + int r = (lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+0]), > 0.0f, 65535.0f)) + > > + lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+4]), > 0.0f, 65535.0f))) >> 1; > > + int g = (lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+1]), > 0.0f, 65535.0f)) + > > + lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+5]), > 0.0f, 65535.0f))) >> 1; > > + int b = (lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+2]), > 0.0f, 65535.0f)) + > > + lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+6]), > 0.0f, 65535.0f))) >> 1; > > + > > + dstU[i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) > >> RGB2YUV_SHIFT; > > + dstV[i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) > >> RGB2YUV_SHIFT; > > I would expect this sort of code to use 2 lrintf() and 2 av_clipf() not 6 > > ya it is a bit excessive, I'll just remove the _half conversions for now, they aren't strictly necessary as far as I can tell. > > > + } > > +} > > + > > +static av_always_inline void rgbaf32ToUV_endian(uint16_t *dstU, > uint16_t *dstV, int is_be, > > + const float *src, int > width, > > + int32_t *rgb2yuv, int > comp) > > +{ > > + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = > rgb2yuv[BU_IDX]; > > + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = > rgb2yuv[BV_IDX]; > > + int i; > > + for (i = 0; i < width; i++) { > > + int r = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+0]), 0.0f, > 65535.0f)); > > + int g = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+1]), 0.0f, > 65535.0f)); > > + int b = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+2]), 0.0f, > 65535.0f)); > > + > > + dstU[i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) > >> RGB2YUV_SHIFT; > > + dstV[i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) > >> RGB2YUV_SHIFT; > > + } > > +} > > + > > > +static av_always_inline void rgbaf32ToY_endian(uint16_t *dst, const > float *src, int is_be, > > + int width, int32_t > *rgb2yuv, int comp) > > +{ > > + int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = > rgb2yuv[BY_IDX]; > > + int i; > > + for (i = 0; i < width; i++) { > > + int r = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+0]), 0.0f, > 65535.0f)); > > + int g = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+1]), 0.0f, > 65535.0f)); > > + int b = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+2]), 0.0f, > 65535.0f)); > > + > > > + dst[i] = (ry*r + gy*g + by*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> > RGB2YUV_SHIFT; > > there is one output so there should be only need for one clip and one > float->int > This is matching the f32 planar version. I think I was paranoid about things being bitexact for tests and that's why it's currently being done this way. I'll see what happens if I introduce more float operations, could I perhaps do this in a later patch? some asm might have to change too. > thx > > [...] > -- > Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB > > Any man who breaks a law that conscience tells him is unjust and willingly > accepts the penalty by staying in jail in order to arouse the conscience > of > the community on the injustice of the law is at that moment expressing the > very highest respect for law. - Martin Luther King Jr > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". >
On Sun, Nov 13, 2022 at 05:50:37PM -0800, Mark Reid wrote: > On Sun, Nov 13, 2022 at 1:25 PM Michael Niedermayer <michael@niedermayer.cc> > wrote: > > > On Wed, Nov 02, 2022 at 09:00:07PM -0700, mindmark@gmail.com wrote: > > > From: Mark Reid <mindmark@gmail.com> > > > > > > --- > > > libswscale/input.c | 172 +++++++++++++++++++++++++++++++++++++++++++++ > > > libswscale/utils.c | 4 ++ > > > 2 files changed, 176 insertions(+) > > > > > > diff --git a/libswscale/input.c b/libswscale/input.c > > > index 7ff7bfaa01..4683284b0b 100644 > > > --- a/libswscale/input.c > > > +++ b/libswscale/input.c > > > @@ -1284,6 +1284,136 @@ static void rgbaf16##endian_name##ToA_c(uint8_t > > *_dst, const uint8_t *_src, cons > > > rgbaf16_funcs_endian(le, 0) > > > rgbaf16_funcs_endian(be, 1) > > > > > > +#define rdpx(src) (is_be ? av_int2float(AV_RB32(&src)): > > av_int2float(AV_RL32(&src))) > > > + > > > +static av_always_inline void rgbaf32ToUV_half_endian(uint16_t *dstU, > > uint16_t *dstV, int is_be, > > > + const float *src, > > int width, > > > + int32_t *rgb2yuv, > > int comp) > > > +{ > > > + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = > > rgb2yuv[BU_IDX]; > > > + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = > > rgb2yuv[BV_IDX]; > > > + int i; > > > + for (i = 0; i < width; i++) { > > > > > + int r = (lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+0]), > > 0.0f, 65535.0f)) + > > > + lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+4]), > > 0.0f, 65535.0f))) >> 1; > > > + int g = (lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+1]), > > 0.0f, 65535.0f)) + > > > + lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+5]), > > 0.0f, 65535.0f))) >> 1; > > > + int b = (lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+2]), > > 0.0f, 65535.0f)) + > > > + lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+6]), > > 0.0f, 65535.0f))) >> 1; > > > + > > > + dstU[i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) > > >> RGB2YUV_SHIFT; > > > + dstV[i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) > > >> RGB2YUV_SHIFT; > > > > I would expect this sort of code to use 2 lrintf() and 2 av_clipf() not 6 > > > > > ya it is a bit excessive, I'll just remove the _half conversions for now, > they aren't strictly necessary as far as I can tell. do you see a problem with just factorizing them out ? it shouldnt be hard to reorder the operations > > > > > > > + } > > > +} > > > + > > > +static av_always_inline void rgbaf32ToUV_endian(uint16_t *dstU, > > uint16_t *dstV, int is_be, > > > + const float *src, int > > width, > > > + int32_t *rgb2yuv, int > > comp) > > > +{ > > > + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = > > rgb2yuv[BU_IDX]; > > > + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = > > rgb2yuv[BV_IDX]; > > > + int i; > > > + for (i = 0; i < width; i++) { > > > + int r = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+0]), 0.0f, > > 65535.0f)); > > > + int g = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+1]), 0.0f, > > 65535.0f)); > > > + int b = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+2]), 0.0f, > > 65535.0f)); > > > + > > > + dstU[i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) > > >> RGB2YUV_SHIFT; > > > + dstV[i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) > > >> RGB2YUV_SHIFT; > > > + } > > > +} > > > + > > > > > +static av_always_inline void rgbaf32ToY_endian(uint16_t *dst, const > > float *src, int is_be, > > > + int width, int32_t > > *rgb2yuv, int comp) > > > +{ > > > + int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = > > rgb2yuv[BY_IDX]; > > > + int i; > > > + for (i = 0; i < width; i++) { > > > + int r = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+0]), 0.0f, > > 65535.0f)); > > > + int g = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+1]), 0.0f, > > 65535.0f)); > > > + int b = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+2]), 0.0f, > > 65535.0f)); > > > + > > > > > + dst[i] = (ry*r + gy*g + by*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> > > RGB2YUV_SHIFT; > > > > there is one output so there should be only need for one clip and one > > float->int > > > > This is matching the f32 planar version. I think I was paranoid about > things being bitexact for tests and that's why it's currently being done > this way. > I'll see what happens if I introduce more float operations, could I perhaps > do this in a later patch? some asm might have to change too. of course can be a seperate patch in a set. Maybe f32 planar can be changed at the same time thx [...]
On Mon, Nov 14, 2022 at 1:08 PM Michael Niedermayer <michael@niedermayer.cc> wrote: > On Sun, Nov 13, 2022 at 05:50:37PM -0800, Mark Reid wrote: > > On Sun, Nov 13, 2022 at 1:25 PM Michael Niedermayer < > michael@niedermayer.cc> > > wrote: > > > > > On Wed, Nov 02, 2022 at 09:00:07PM -0700, mindmark@gmail.com wrote: > > > > From: Mark Reid <mindmark@gmail.com> > > > > > > > > --- > > > > libswscale/input.c | 172 > +++++++++++++++++++++++++++++++++++++++++++++ > > > > libswscale/utils.c | 4 ++ > > > > 2 files changed, 176 insertions(+) > > > > > > > > diff --git a/libswscale/input.c b/libswscale/input.c > > > > index 7ff7bfaa01..4683284b0b 100644 > > > > --- a/libswscale/input.c > > > > +++ b/libswscale/input.c > > > > @@ -1284,6 +1284,136 @@ static void > rgbaf16##endian_name##ToA_c(uint8_t > > > *_dst, const uint8_t *_src, cons > > > > rgbaf16_funcs_endian(le, 0) > > > > rgbaf16_funcs_endian(be, 1) > > > > > > > > +#define rdpx(src) (is_be ? av_int2float(AV_RB32(&src)): > > > av_int2float(AV_RL32(&src))) > > > > + > > > > +static av_always_inline void rgbaf32ToUV_half_endian(uint16_t *dstU, > > > uint16_t *dstV, int is_be, > > > > + const float > *src, > > > int width, > > > > + int32_t > *rgb2yuv, > > > int comp) > > > > +{ > > > > + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = > > > rgb2yuv[BU_IDX]; > > > > + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = > > > rgb2yuv[BV_IDX]; > > > > + int i; > > > > + for (i = 0; i < width; i++) { > > > > > > > + int r = (lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+0]), > > > 0.0f, 65535.0f)) + > > > > + lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+4]), > > > 0.0f, 65535.0f))) >> 1; > > > > + int g = (lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+1]), > > > 0.0f, 65535.0f)) + > > > > + lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+5]), > > > 0.0f, 65535.0f))) >> 1; > > > > + int b = (lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+2]), > > > 0.0f, 65535.0f)) + > > > > + lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+6]), > > > 0.0f, 65535.0f))) >> 1; > > > > + > > > > + dstU[i] = (ru*r + gu*g + bu*b + > (0x10001<<(RGB2YUV_SHIFT-1))) > > > >> RGB2YUV_SHIFT; > > > > + dstV[i] = (rv*r + gv*g + bv*b + > (0x10001<<(RGB2YUV_SHIFT-1))) > > > >> RGB2YUV_SHIFT; > > > > > > I would expect this sort of code to use 2 lrintf() and 2 av_clipf() > not 6 > > > > > > > > ya it is a bit excessive, I'll just remove the _half conversions for now, > > they aren't strictly necessary as far as I can tell. > > do you see a problem with just factorizing them out ? > it shouldnt be hard to reorder the operations > It's just fate checksums and float math that make me apprehensive :p. hmm this code path doesn't actually seem to get tested by fate. Now that I relook at it, the indexing looks wrong for the 3 channel formats too. > > > > > > > > > > > > + } > > > > +} > > > > + > > > > +static av_always_inline void rgbaf32ToUV_endian(uint16_t *dstU, > > > uint16_t *dstV, int is_be, > > > > + const float *src, > int > > > width, > > > > + int32_t *rgb2yuv, > int > > > comp) > > > > +{ > > > > + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = > > > rgb2yuv[BU_IDX]; > > > > + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = > > > rgb2yuv[BV_IDX]; > > > > + int i; > > > > + for (i = 0; i < width; i++) { > > > > + int r = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+0]), > 0.0f, > > > 65535.0f)); > > > > + int g = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+1]), > 0.0f, > > > 65535.0f)); > > > > + int b = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+2]), > 0.0f, > > > 65535.0f)); > > > > + > > > > + dstU[i] = (ru*r + gu*g + bu*b + > (0x10001<<(RGB2YUV_SHIFT-1))) > > > >> RGB2YUV_SHIFT; > > > > + dstV[i] = (rv*r + gv*g + bv*b + > (0x10001<<(RGB2YUV_SHIFT-1))) > > > >> RGB2YUV_SHIFT; > > > > + } > > > > +} > > > > + > > > > > > > +static av_always_inline void rgbaf32ToY_endian(uint16_t *dst, const > > > float *src, int is_be, > > > > + int width, int32_t > > > *rgb2yuv, int comp) > > > > +{ > > > > + int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = > > > rgb2yuv[BY_IDX]; > > > > + int i; > > > > + for (i = 0; i < width; i++) { > > > > + int r = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+0]), > 0.0f, > > > 65535.0f)); > > > > + int g = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+1]), > 0.0f, > > > 65535.0f)); > > > > + int b = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+2]), > 0.0f, > > > 65535.0f)); > > > > + > > > > > > > + dst[i] = (ry*r + gy*g + by*b + (0x2001<<(RGB2YUV_SHIFT-1))) > >> > > > RGB2YUV_SHIFT; > > > > > > there is one output so there should be only need for one clip and one > > > float->int > > > > > > > This is matching the f32 planar version. I think I was paranoid about > > things being bitexact for tests and that's why it's currently being done > > this way. > > I'll see what happens if I introduce more float operations, could I > perhaps > > do this in a later patch? some asm might have to change too. > > of course can be a seperate patch in a set. Maybe f32 planar can be changed > at the same time > great, I'll do that change together in a later patch. > > thx > > [...] > -- > Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB > > In a rich man's house there is no place to spit but his face. > -- Diogenes of Sinope > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". >
diff --git a/libswscale/input.c b/libswscale/input.c index 7ff7bfaa01..4683284b0b 100644 --- a/libswscale/input.c +++ b/libswscale/input.c @@ -1284,6 +1284,136 @@ static void rgbaf16##endian_name##ToA_c(uint8_t *_dst, const uint8_t *_src, cons rgbaf16_funcs_endian(le, 0) rgbaf16_funcs_endian(be, 1) +#define rdpx(src) (is_be ? av_int2float(AV_RB32(&src)): av_int2float(AV_RL32(&src))) + +static av_always_inline void rgbaf32ToUV_half_endian(uint16_t *dstU, uint16_t *dstV, int is_be, + const float *src, int width, + int32_t *rgb2yuv, int comp) +{ + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; + int i; + for (i = 0; i < width; i++) { + int r = (lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+0]), 0.0f, 65535.0f)) + + lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+4]), 0.0f, 65535.0f))) >> 1; + int g = (lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+1]), 0.0f, 65535.0f)) + + lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+5]), 0.0f, 65535.0f))) >> 1; + int b = (lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+2]), 0.0f, 65535.0f)) + + lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+6]), 0.0f, 65535.0f))) >> 1; + + dstU[i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + dstV[i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + } +} + +static av_always_inline void rgbaf32ToUV_endian(uint16_t *dstU, uint16_t *dstV, int is_be, + const float *src, int width, + int32_t *rgb2yuv, int comp) +{ + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; + int i; + for (i = 0; i < width; i++) { + int r = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+0]), 0.0f, 65535.0f)); + int g = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+1]), 0.0f, 65535.0f)); + int b = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+2]), 0.0f, 65535.0f)); + + dstU[i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + dstV[i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + } +} + +static av_always_inline void rgbaf32ToY_endian(uint16_t *dst, const float *src, int is_be, + int width, int32_t *rgb2yuv, int comp) +{ + int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; + int i; + for (i = 0; i < width; i++) { + int r = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+0]), 0.0f, 65535.0f)); + int g = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+1]), 0.0f, 65535.0f)); + int b = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+2]), 0.0f, 65535.0f)); + + dst[i] = (ry*r + gy*g + by*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + } +} + +static av_always_inline void rgbaf32ToA_endian(uint16_t *dst, const float *src, int is_be, + int width, void *opq) +{ + int i; + for (i=0; i<width; i++) { + dst[i] = lrintf(av_clipf(65535.0f * rdpx(src[i*4+3]), 0.0f, 65535.0f)); + } +} + +#undef rdpx + +#define rgbaf32_funcs_endian(endian_name, endian) \ +static void rgbf32##endian_name##ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused, \ + const uint8_t *src1, const uint8_t *src2, \ + int width, uint32_t *rgb2yuv, void *opq) \ +{ \ + const float *src = (const float*)src1; \ + uint16_t *dstU = (uint16_t*)_dstU; \ + uint16_t *dstV = (uint16_t*)_dstV; \ + av_assert1(src1==src2); \ + rgbaf32ToUV_half_endian(dstU, dstV, endian, src, width, rgb2yuv, 3); \ +} \ +static void rgbf32##endian_name##ToUV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused, \ + const uint8_t *src1, const uint8_t *src2, \ + int width, uint32_t *rgb2yuv, void *opq) \ +{ \ + const float *src = (const float*)src1; \ + uint16_t *dstU = (uint16_t*)_dstU; \ + uint16_t *dstV = (uint16_t*)_dstV; \ + av_assert1(src1==src2); \ + rgbaf32ToUV_endian(dstU, dstV, endian, src, width, rgb2yuv, 3); \ +} \ +static void rgbf32##endian_name##ToY_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused0, \ + const uint8_t *unused1, int width, uint32_t *rgb2yuv, void *opq) \ +{ \ + const float *src = (const float*)_src; \ + uint16_t *dst = (uint16_t*)_dst; \ + rgbaf32ToY_endian(dst, src, endian, width, rgb2yuv, 3); \ +} \ +static void rgbaf32##endian_name##ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused, \ + const uint8_t *src1, const uint8_t *src2, \ + int width, uint32_t *rgb2yuv, void *opq) \ +{ \ + const float *src = (const float*)src1; \ + uint16_t *dstU = (uint16_t*)_dstU; \ + uint16_t *dstV = (uint16_t*)_dstV; \ + av_assert1(src1==src2); \ + rgbaf32ToUV_half_endian(dstU, dstV, endian, src, width, rgb2yuv, 4); \ +} \ +static void rgbaf32##endian_name##ToUV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused, \ + const uint8_t *src1, const uint8_t *src2, \ + int width, uint32_t *rgb2yuv, void *opq) \ +{ \ + const float *src = (const float*)src1; \ + uint16_t *dstU = (uint16_t*)_dstU; \ + uint16_t *dstV = (uint16_t*)_dstV; \ + av_assert1(src1==src2); \ + rgbaf32ToUV_endian(dstU, dstV, endian, src, width, rgb2yuv, 4); \ +} \ +static void rgbaf32##endian_name##ToY_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused0, \ + const uint8_t *unused1, int width, uint32_t *rgb2yuv, void *opq) \ +{ \ + const float *src = (const float*)_src; \ + uint16_t *dst = (uint16_t*)_dst; \ + rgbaf32ToY_endian(dst, src, endian, width, rgb2yuv, 4); \ +} \ +static void rgbaf32##endian_name##ToA_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused0, \ + const uint8_t *unused1, int width, uint32_t *unused2, void *opq) \ +{ \ + const float *src = (const float*)_src; \ + uint16_t *dst = (uint16_t*)_dst; \ + rgbaf32ToA_endian(dst, src, endian, width, opq); \ +} + +rgbaf32_funcs_endian(le, 0) +rgbaf32_funcs_endian(be, 1) + av_cold void ff_sws_init_input_funcs(SwsContext *c) { enum AVPixelFormat srcFormat = c->srcFormat; @@ -1570,6 +1700,18 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_RGBAF16LE: c->chrToYV12 = rgbaf16leToUV_half_c; break; + case AV_PIX_FMT_RGBF32BE: + c->chrToYV12 = rgbf32beToUV_half_c; + break; + case AV_PIX_FMT_RGBAF32BE: + c->chrToYV12 = rgbaf32beToUV_half_c; + break; + case AV_PIX_FMT_RGBF32LE: + c->chrToYV12 = rgbf32leToUV_half_c; + break; + case AV_PIX_FMT_RGBAF32LE: + c->chrToYV12 = rgbaf32leToUV_half_c; + break; } } else { switch (srcFormat) { @@ -1663,6 +1805,18 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_RGBAF16LE: c->chrToYV12 = rgbaf16leToUV_c; break; + case AV_PIX_FMT_RGBF32BE: + c->chrToYV12 = rgbf32beToUV_c; + break; + case AV_PIX_FMT_RGBAF32BE: + c->chrToYV12 = rgbaf32beToUV_c; + break; + case AV_PIX_FMT_RGBF32LE: + c->chrToYV12 = rgbf32leToUV_c; + break; + case AV_PIX_FMT_RGBAF32LE: + c->chrToYV12 = rgbaf32leToUV_c; + break; } } @@ -1973,6 +2127,18 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_RGBAF16LE: c->lumToYV12 = rgbaf16leToY_c; break; + case AV_PIX_FMT_RGBF32BE: + c->lumToYV12 = rgbf32beToY_c; + break; + case AV_PIX_FMT_RGBAF32BE: + c->lumToYV12 = rgbaf32beToY_c; + break; + case AV_PIX_FMT_RGBF32LE: + c->lumToYV12 = rgbf32leToY_c; + break; + case AV_PIX_FMT_RGBAF32LE: + c->lumToYV12 = rgbaf32leToY_c; + break; } if (c->needAlpha) { if (is16BPS(srcFormat) || isNBPS(srcFormat)) { @@ -1998,6 +2164,12 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_RGBAF16LE: c->alpToYV12 = rgbaf16leToA_c; break; + case AV_PIX_FMT_RGBAF32BE: + c->alpToYV12 = rgbaf32beToA_c; + break; + case AV_PIX_FMT_RGBAF32LE: + c->alpToYV12 = rgbaf32leToA_c; + break; case AV_PIX_FMT_YA8: c->alpToYV12 = uyvyToY_c; break; diff --git a/libswscale/utils.c b/libswscale/utils.c index 45baa22b23..6da1f21e25 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -266,6 +266,10 @@ static const FormatEntry format_entries[] = { [AV_PIX_FMT_VUYX] = { 1, 1 }, [AV_PIX_FMT_RGBAF16BE] = { 1, 0 }, [AV_PIX_FMT_RGBAF16LE] = { 1, 0 }, + [AV_PIX_FMT_RGBF32BE] = { 1, 0 }, + [AV_PIX_FMT_RGBF32LE] = { 1, 0 }, + [AV_PIX_FMT_RGBAF32BE] = { 1, 0 }, + [AV_PIX_FMT_RGBAF32LE] = { 1, 0 }, [AV_PIX_FMT_XV30LE] = { 1, 1 }, [AV_PIX_FMT_XV36LE] = { 1, 1 }, };
From: Mark Reid <mindmark@gmail.com> --- libswscale/input.c | 172 +++++++++++++++++++++++++++++++++++++++++++++ libswscale/utils.c | 4 ++ 2 files changed, 176 insertions(+)