Message ID | GV1P250MB0737A541ACB7F42A5C6922CD8F4D9@GV1P250MB0737.EURP250.PROD.OUTLOOK.COM |
---|---|
State | New |
Headers | show |
Series | [FFmpeg-devel,1/4] avcodec/snow: Move ff_snow_inner_add_yblock() to snow_dwt.c | expand |
Context | Check | Description |
---|---|---|
yinshiyou/make_loongarch64 | success | Make finished |
yinshiyou/make_fate_loongarch64 | success | Make fate finished |
andriy/make_x86 | success | Make finished |
andriy/make_fate_x86 | success | Make fate finished |
On Mon, Sep 19, 2022 at 11:27:49PM +0200, Andreas Rheinhardt wrote: > Only used there and by x86 snow asm code as fallback. > > Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> > --- > libavcodec/snow.c | 33 --------------------------------- > libavcodec/snow.h | 3 --- > libavcodec/snow_dwt.c | 32 ++++++++++++++++++++++++++++++++ > libavcodec/snow_dwt.h | 3 +++ > 4 files changed, 35 insertions(+), 36 deletions(-) > > diff --git a/libavcodec/snow.c b/libavcodec/snow.c > index aa15fccc42..85ad6d10a1 100644 > --- a/libavcodec/snow.c > +++ b/libavcodec/snow.c > @@ -29,39 +29,6 @@ > #include "snowdata.h" > > > -void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, > - int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){ > - int y, x; > - IDWTELEM * dst; > - for(y=0; y<b_h; y++){ > - //FIXME ugly misuse of obmc_stride > - const uint8_t *obmc1= obmc + y*obmc_stride; > - const uint8_t *obmc2= obmc1+ (obmc_stride>>1); > - const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); > - const uint8_t *obmc4= obmc3+ (obmc_stride>>1); > - dst = slice_buffer_get_line(sb, src_y + y); > - for(x=0; x<b_w; x++){ > - int v= obmc1[x] * block[3][x + y*src_stride] > - +obmc2[x] * block[2][x + y*src_stride] > - +obmc3[x] * block[1][x + y*src_stride] > - +obmc4[x] * block[0][x + y*src_stride]; > - > - v <<= 8 - LOG2_OBMC_MAX; > - if(FRAC_BITS != 8){ > - v >>= 8 - FRAC_BITS; > - } > - if(add){ > - v += dst[x + src_x]; > - v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS; > - if(v&(~255)) v= ~(v>>31); > - dst8[x + y*src_stride] = v; > - }else{ > - dst[x + src_x] -= v; > - } > - } > - } > -} > - > int ff_snow_get_buffer(SnowContext *s, AVFrame *frame) > { > int ret, i; > diff --git a/libavcodec/snow.h b/libavcodec/snow.h > index ed0f9abb42..1c976b9ba7 100644 > --- a/libavcodec/snow.h > +++ b/libavcodec/snow.h > @@ -45,11 +45,8 @@ > #define QSHIFT 5 > #define QROOT (1<<QSHIFT) > #define LOSSLESS_QLOG -128 > -#define FRAC_BITS 4 > #define MAX_REF_FRAMES 8 > > -#define LOG2_OBMC_MAX 8 > -#define OBMC_MAX (1<<(LOG2_OBMC_MAX)) > typedef struct BlockNode{ > int16_t mx; ///< Motion vector component X, see mv_scale > int16_t my; ///< Motion vector component Y, see mv_scale > diff --git a/libavcodec/snow_dwt.c b/libavcodec/snow_dwt.c > index 18b315ef66..9401d119d0 100644 > --- a/libavcodec/snow_dwt.c > +++ b/libavcodec/snow_dwt.c > @@ -25,6 +25,38 @@ > #include "me_cmp.h" > #include "snow_dwt.h" > > +void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, > + uint8_t **block, int b_w, int b_h, > + int src_x, int src_y, int src_stride, > + slice_buffer * sb, int add, uint8_t * dst8) > +{ > + for (int y = 0; y < b_h; y++) { > + //FIXME ugly misuse of obmc_stride > + const uint8_t *obmc1 = obmc + y * obmc_stride; > + const uint8_t *obmc2 = obmc1 + (obmc_stride >> 1); > + const uint8_t *obmc3 = obmc1 + obmc_stride * (obmc_stride >> 1); > + const uint8_t *obmc4 = obmc3 + (obmc_stride >> 1); > + IDWTELEM *dst = slice_buffer_get_line(sb, src_y + y); > + for (int x = 0; x < b_w; x++) { > + int v = obmc1[x] * block[3][x + y*src_stride] > + + obmc2[x] * block[2][x + y*src_stride] > + + obmc3[x] * block[1][x + y*src_stride] > + + obmc4[x] * block[0][x + y*src_stride]; > + > + v <<= 8 - LOG2_OBMC_MAX; > + if (FRAC_BITS != 8) > + v >>= 8 - FRAC_BITS; > + if (add) { > + v += dst[x + src_x]; > + v = (v + (1 << (FRAC_BITS - 1))) >> FRAC_BITS; > + if (v & (~255)) v= ~(v>>31); > + dst8[x + y*src_stride] = v; > + } else > + dst[x + src_x] -= v; > + } > + } > +} > + putting this in snow_dwt may be convenient but it is not part of the dwt so this feels semantically wrong thx [...]
Michael Niedermayer: > On Mon, Sep 19, 2022 at 11:27:49PM +0200, Andreas Rheinhardt wrote: >> Only used there and by x86 snow asm code as fallback. >> >> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> >> --- >> libavcodec/snow.c | 33 --------------------------------- >> libavcodec/snow.h | 3 --- >> libavcodec/snow_dwt.c | 32 ++++++++++++++++++++++++++++++++ >> libavcodec/snow_dwt.h | 3 +++ >> 4 files changed, 35 insertions(+), 36 deletions(-) >> >> diff --git a/libavcodec/snow.c b/libavcodec/snow.c >> index aa15fccc42..85ad6d10a1 100644 >> --- a/libavcodec/snow.c >> +++ b/libavcodec/snow.c >> @@ -29,39 +29,6 @@ >> #include "snowdata.h" >> >> >> -void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, >> - int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){ >> - int y, x; >> - IDWTELEM * dst; >> - for(y=0; y<b_h; y++){ >> - //FIXME ugly misuse of obmc_stride >> - const uint8_t *obmc1= obmc + y*obmc_stride; >> - const uint8_t *obmc2= obmc1+ (obmc_stride>>1); >> - const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); >> - const uint8_t *obmc4= obmc3+ (obmc_stride>>1); >> - dst = slice_buffer_get_line(sb, src_y + y); >> - for(x=0; x<b_w; x++){ >> - int v= obmc1[x] * block[3][x + y*src_stride] >> - +obmc2[x] * block[2][x + y*src_stride] >> - +obmc3[x] * block[1][x + y*src_stride] >> - +obmc4[x] * block[0][x + y*src_stride]; >> - >> - v <<= 8 - LOG2_OBMC_MAX; >> - if(FRAC_BITS != 8){ >> - v >>= 8 - FRAC_BITS; >> - } >> - if(add){ >> - v += dst[x + src_x]; >> - v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS; >> - if(v&(~255)) v= ~(v>>31); >> - dst8[x + y*src_stride] = v; >> - }else{ >> - dst[x + src_x] -= v; >> - } >> - } >> - } >> -} >> - >> int ff_snow_get_buffer(SnowContext *s, AVFrame *frame) >> { >> int ret, i; >> diff --git a/libavcodec/snow.h b/libavcodec/snow.h >> index ed0f9abb42..1c976b9ba7 100644 >> --- a/libavcodec/snow.h >> +++ b/libavcodec/snow.h >> @@ -45,11 +45,8 @@ >> #define QSHIFT 5 >> #define QROOT (1<<QSHIFT) >> #define LOSSLESS_QLOG -128 >> -#define FRAC_BITS 4 >> #define MAX_REF_FRAMES 8 >> >> -#define LOG2_OBMC_MAX 8 >> -#define OBMC_MAX (1<<(LOG2_OBMC_MAX)) >> typedef struct BlockNode{ >> int16_t mx; ///< Motion vector component X, see mv_scale >> int16_t my; ///< Motion vector component Y, see mv_scale >> diff --git a/libavcodec/snow_dwt.c b/libavcodec/snow_dwt.c >> index 18b315ef66..9401d119d0 100644 >> --- a/libavcodec/snow_dwt.c >> +++ b/libavcodec/snow_dwt.c >> @@ -25,6 +25,38 @@ >> #include "me_cmp.h" >> #include "snow_dwt.h" >> >> +void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, >> + uint8_t **block, int b_w, int b_h, >> + int src_x, int src_y, int src_stride, >> + slice_buffer * sb, int add, uint8_t * dst8) >> +{ >> + for (int y = 0; y < b_h; y++) { >> + //FIXME ugly misuse of obmc_stride >> + const uint8_t *obmc1 = obmc + y * obmc_stride; >> + const uint8_t *obmc2 = obmc1 + (obmc_stride >> 1); >> + const uint8_t *obmc3 = obmc1 + obmc_stride * (obmc_stride >> 1); >> + const uint8_t *obmc4 = obmc3 + (obmc_stride >> 1); >> + IDWTELEM *dst = slice_buffer_get_line(sb, src_y + y); >> + for (int x = 0; x < b_w; x++) { >> + int v = obmc1[x] * block[3][x + y*src_stride] >> + + obmc2[x] * block[2][x + y*src_stride] >> + + obmc3[x] * block[1][x + y*src_stride] >> + + obmc4[x] * block[0][x + y*src_stride]; >> + >> + v <<= 8 - LOG2_OBMC_MAX; >> + if (FRAC_BITS != 8) >> + v >>= 8 - FRAC_BITS; >> + if (add) { >> + v += dst[x + src_x]; >> + v = (v + (1 << (FRAC_BITS - 1))) >> FRAC_BITS; >> + if (v & (~255)) v= ~(v>>31); >> + dst8[x + y*src_stride] = v; >> + } else >> + dst[x + src_x] -= v; >> + } >> + } >> +} >> + > > putting this in snow_dwt may be convenient but it is not part of the dwt so > this feels semantically wrong > If it is not part of the dwt, then why does SnowDWTContext have an inner_add_yblock function pointer whose C version is ff_snow_inner_add_yblock? - Andreas
On Wed, Sep 21, 2022 at 01:00:07PM +0200, Andreas Rheinhardt wrote: > Michael Niedermayer: > > On Mon, Sep 19, 2022 at 11:27:49PM +0200, Andreas Rheinhardt wrote: > >> Only used there and by x86 snow asm code as fallback. > >> > >> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> > >> --- > >> libavcodec/snow.c | 33 --------------------------------- > >> libavcodec/snow.h | 3 --- > >> libavcodec/snow_dwt.c | 32 ++++++++++++++++++++++++++++++++ > >> libavcodec/snow_dwt.h | 3 +++ > >> 4 files changed, 35 insertions(+), 36 deletions(-) > >> > >> diff --git a/libavcodec/snow.c b/libavcodec/snow.c > >> index aa15fccc42..85ad6d10a1 100644 > >> --- a/libavcodec/snow.c > >> +++ b/libavcodec/snow.c > >> @@ -29,39 +29,6 @@ > >> #include "snowdata.h" > >> > >> > >> -void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, > >> - int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){ > >> - int y, x; > >> - IDWTELEM * dst; > >> - for(y=0; y<b_h; y++){ > >> - //FIXME ugly misuse of obmc_stride > >> - const uint8_t *obmc1= obmc + y*obmc_stride; > >> - const uint8_t *obmc2= obmc1+ (obmc_stride>>1); > >> - const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); > >> - const uint8_t *obmc4= obmc3+ (obmc_stride>>1); > >> - dst = slice_buffer_get_line(sb, src_y + y); > >> - for(x=0; x<b_w; x++){ > >> - int v= obmc1[x] * block[3][x + y*src_stride] > >> - +obmc2[x] * block[2][x + y*src_stride] > >> - +obmc3[x] * block[1][x + y*src_stride] > >> - +obmc4[x] * block[0][x + y*src_stride]; > >> - > >> - v <<= 8 - LOG2_OBMC_MAX; > >> - if(FRAC_BITS != 8){ > >> - v >>= 8 - FRAC_BITS; > >> - } > >> - if(add){ > >> - v += dst[x + src_x]; > >> - v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS; > >> - if(v&(~255)) v= ~(v>>31); > >> - dst8[x + y*src_stride] = v; > >> - }else{ > >> - dst[x + src_x] -= v; > >> - } > >> - } > >> - } > >> -} > >> - > >> int ff_snow_get_buffer(SnowContext *s, AVFrame *frame) > >> { > >> int ret, i; > >> diff --git a/libavcodec/snow.h b/libavcodec/snow.h > >> index ed0f9abb42..1c976b9ba7 100644 > >> --- a/libavcodec/snow.h > >> +++ b/libavcodec/snow.h > >> @@ -45,11 +45,8 @@ > >> #define QSHIFT 5 > >> #define QROOT (1<<QSHIFT) > >> #define LOSSLESS_QLOG -128 > >> -#define FRAC_BITS 4 > >> #define MAX_REF_FRAMES 8 > >> > >> -#define LOG2_OBMC_MAX 8 > >> -#define OBMC_MAX (1<<(LOG2_OBMC_MAX)) > >> typedef struct BlockNode{ > >> int16_t mx; ///< Motion vector component X, see mv_scale > >> int16_t my; ///< Motion vector component Y, see mv_scale > >> diff --git a/libavcodec/snow_dwt.c b/libavcodec/snow_dwt.c > >> index 18b315ef66..9401d119d0 100644 > >> --- a/libavcodec/snow_dwt.c > >> +++ b/libavcodec/snow_dwt.c > >> @@ -25,6 +25,38 @@ > >> #include "me_cmp.h" > >> #include "snow_dwt.h" > >> > >> +void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, > >> + uint8_t **block, int b_w, int b_h, > >> + int src_x, int src_y, int src_stride, > >> + slice_buffer * sb, int add, uint8_t * dst8) > >> +{ > >> + for (int y = 0; y < b_h; y++) { > >> + //FIXME ugly misuse of obmc_stride > >> + const uint8_t *obmc1 = obmc + y * obmc_stride; > >> + const uint8_t *obmc2 = obmc1 + (obmc_stride >> 1); > >> + const uint8_t *obmc3 = obmc1 + obmc_stride * (obmc_stride >> 1); > >> + const uint8_t *obmc4 = obmc3 + (obmc_stride >> 1); > >> + IDWTELEM *dst = slice_buffer_get_line(sb, src_y + y); > >> + for (int x = 0; x < b_w; x++) { > >> + int v = obmc1[x] * block[3][x + y*src_stride] > >> + + obmc2[x] * block[2][x + y*src_stride] > >> + + obmc3[x] * block[1][x + y*src_stride] > >> + + obmc4[x] * block[0][x + y*src_stride]; > >> + > >> + v <<= 8 - LOG2_OBMC_MAX; > >> + if (FRAC_BITS != 8) > >> + v >>= 8 - FRAC_BITS; > >> + if (add) { > >> + v += dst[x + src_x]; > >> + v = (v + (1 << (FRAC_BITS - 1))) >> FRAC_BITS; > >> + if (v & (~255)) v= ~(v>>31); > >> + dst8[x + y*src_stride] = v; > >> + } else > >> + dst[x + src_x] -= v; > >> + } > >> + } > >> +} > >> + > > > > putting this in snow_dwt may be convenient but it is not part of the dwt so > > this feels semantically wrong > > > > If it is not part of the dwt, then why does SnowDWTContext have an > inner_add_yblock function pointer whose C version is > ff_snow_inner_add_yblock? Iam not sure how this relates to what is part of a DWT you can check the definition of a DWT and then look at this function its not a DWT, its not part of some optimized DWT implementation either what it is, is part of snows OBMC+DWT related code. The callback is a optimization of that, for the purpose of optimization this is mixed together like that. a git grep inner_add_yblock origin also shows mostly snowdsp matches not snowdwt is it really a problem to leave dsp code in files with dsp in t the name ? if so iam not against making an exception here thx [...]
diff --git a/libavcodec/snow.c b/libavcodec/snow.c index aa15fccc42..85ad6d10a1 100644 --- a/libavcodec/snow.c +++ b/libavcodec/snow.c @@ -29,39 +29,6 @@ #include "snowdata.h" -void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, - int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){ - int y, x; - IDWTELEM * dst; - for(y=0; y<b_h; y++){ - //FIXME ugly misuse of obmc_stride - const uint8_t *obmc1= obmc + y*obmc_stride; - const uint8_t *obmc2= obmc1+ (obmc_stride>>1); - const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); - const uint8_t *obmc4= obmc3+ (obmc_stride>>1); - dst = slice_buffer_get_line(sb, src_y + y); - for(x=0; x<b_w; x++){ - int v= obmc1[x] * block[3][x + y*src_stride] - +obmc2[x] * block[2][x + y*src_stride] - +obmc3[x] * block[1][x + y*src_stride] - +obmc4[x] * block[0][x + y*src_stride]; - - v <<= 8 - LOG2_OBMC_MAX; - if(FRAC_BITS != 8){ - v >>= 8 - FRAC_BITS; - } - if(add){ - v += dst[x + src_x]; - v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS; - if(v&(~255)) v= ~(v>>31); - dst8[x + y*src_stride] = v; - }else{ - dst[x + src_x] -= v; - } - } - } -} - int ff_snow_get_buffer(SnowContext *s, AVFrame *frame) { int ret, i; diff --git a/libavcodec/snow.h b/libavcodec/snow.h index ed0f9abb42..1c976b9ba7 100644 --- a/libavcodec/snow.h +++ b/libavcodec/snow.h @@ -45,11 +45,8 @@ #define QSHIFT 5 #define QROOT (1<<QSHIFT) #define LOSSLESS_QLOG -128 -#define FRAC_BITS 4 #define MAX_REF_FRAMES 8 -#define LOG2_OBMC_MAX 8 -#define OBMC_MAX (1<<(LOG2_OBMC_MAX)) typedef struct BlockNode{ int16_t mx; ///< Motion vector component X, see mv_scale int16_t my; ///< Motion vector component Y, see mv_scale diff --git a/libavcodec/snow_dwt.c b/libavcodec/snow_dwt.c index 18b315ef66..9401d119d0 100644 --- a/libavcodec/snow_dwt.c +++ b/libavcodec/snow_dwt.c @@ -25,6 +25,38 @@ #include "me_cmp.h" #include "snow_dwt.h" +void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, + uint8_t **block, int b_w, int b_h, + int src_x, int src_y, int src_stride, + slice_buffer * sb, int add, uint8_t * dst8) +{ + for (int y = 0; y < b_h; y++) { + //FIXME ugly misuse of obmc_stride + const uint8_t *obmc1 = obmc + y * obmc_stride; + const uint8_t *obmc2 = obmc1 + (obmc_stride >> 1); + const uint8_t *obmc3 = obmc1 + obmc_stride * (obmc_stride >> 1); + const uint8_t *obmc4 = obmc3 + (obmc_stride >> 1); + IDWTELEM *dst = slice_buffer_get_line(sb, src_y + y); + for (int x = 0; x < b_w; x++) { + int v = obmc1[x] * block[3][x + y*src_stride] + + obmc2[x] * block[2][x + y*src_stride] + + obmc3[x] * block[1][x + y*src_stride] + + obmc4[x] * block[0][x + y*src_stride]; + + v <<= 8 - LOG2_OBMC_MAX; + if (FRAC_BITS != 8) + v >>= 8 - FRAC_BITS; + if (add) { + v += dst[x + src_x]; + v = (v + (1 << (FRAC_BITS - 1))) >> FRAC_BITS; + if (v & (~255)) v= ~(v>>31); + dst8[x + y*src_stride] = v; + } else + dst[x + src_x] -= v; + } + } +} + int ff_slice_buffer_init(slice_buffer *buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM *base_buffer) diff --git a/libavcodec/snow_dwt.h b/libavcodec/snow_dwt.h index 15b8a3007b..132e01f9ce 100644 --- a/libavcodec/snow_dwt.h +++ b/libavcodec/snow_dwt.h @@ -29,6 +29,9 @@ struct MpegEncContext; typedef int DWTELEM; typedef short IDWTELEM; +#define FRAC_BITS 4 +#define LOG2_OBMC_MAX 8 + #define MAX_DECOMPOSITIONS 8 typedef struct DWTCompose {
Only used there and by x86 snow asm code as fallback. Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> --- libavcodec/snow.c | 33 --------------------------------- libavcodec/snow.h | 3 --- libavcodec/snow_dwt.c | 32 ++++++++++++++++++++++++++++++++ libavcodec/snow_dwt.h | 3 +++ 4 files changed, 35 insertions(+), 36 deletions(-)