Message ID | 20240610202322.786800-1-remi@remlab.net |
---|---|
State | New |
Headers | show |
Series | [FFmpeg-devel,PATCHv4,1/4] lavc/h263dsp: add DCT dequantisation functions | expand |
Context | Check | Description |
---|---|---|
andriy/make_x86 | success | Make finished |
andriy/make_fate_x86 | success | Make fate finished |
Rémi Denis-Courmont: > Note that optimised implementations of these functions will be taken > into actual use only if MpegEncContext.dct_unquantize_h263_{inter,intra} > are *not* overloaded by existing optimisations. > --- > libavcodec/h263dsp.c | 25 +++++++++++++++++++++++++ > libavcodec/h263dsp.h | 4 ++++ > 2 files changed, 29 insertions(+) > > diff --git a/libavcodec/h263dsp.c b/libavcodec/h263dsp.c > index 6a13353499..755d7077bf 100644 > --- a/libavcodec/h263dsp.c > +++ b/libavcodec/h263dsp.c > @@ -23,6 +23,29 @@ > #include "config.h" > #include "h263dsp.h" > > +static void h263_dct_unquantize_inter_c(int16_t *block, size_t nCoeffs, > + int qmul, int qadd) > +{ > + for (size_t i = 0; i <= nCoeffs; i++) { > + int level = block[i]; > + > + if (level) { > + if (level < 0) > + level = level * qmul - qadd; > + else > + level = level * qmul + qadd; > + block[i] = level; > + } > + } > +} > + > +static void h263_dct_unquantize_intra_c(int16_t *block, size_t nCoeffs, > + int qmul, int qadd) > +{ > + if (nCoeffs > 0) Great, a branch. Why don't you just use a signed type? > + h263_dct_unquantize_inter_c(block + 1, nCoeffs - 1, qmul, qadd); > +} > + > const uint8_t ff_h263_loop_filter_strength[32] = { > 0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 5, 5, 6, 6, 7, 7, > 7, 8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11, 12, 12, 12 > @@ -116,6 +139,8 @@ static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale) > > av_cold void ff_h263dsp_init(H263DSPContext *ctx) > { > + ctx->h263_dct_unquantize_intra = h263_dct_unquantize_intra_c; > + ctx->h263_dct_unquantize_inter = h263_dct_unquantize_inter_c; > ctx->h263_h_loop_filter = h263_h_loop_filter_c; > ctx->h263_v_loop_filter = h263_v_loop_filter_c; > > diff --git a/libavcodec/h263dsp.h b/libavcodec/h263dsp.h > index 2dccd23392..0ecbe83314 100644 > --- a/libavcodec/h263dsp.h > +++ b/libavcodec/h263dsp.h > @@ -24,6 +24,10 @@ > extern const uint8_t ff_h263_loop_filter_strength[32]; > > typedef struct H263DSPContext { > + void (*h263_dct_unquantize_intra)(int16_t *block /* align 16 */, > + size_t len, int mul, int add); > + void (*h263_dct_unquantize_inter)(int16_t *block /* align 16 */, > + size_t len, int mul, int add); > void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale); > void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale); > } H263DSPContext;
Le tiistaina 11. kesäkuuta 2024, 13.30.28 EEST Andreas Rheinhardt a écrit : > > +static void h263_dct_unquantize_intra_c(int16_t *block, size_t nCoeffs, > > + int qmul, int qadd) > > +{ > > + if (nCoeffs > 0) > > Great, a branch. Okay so you want sarcasms, have at it. If you had ever looked at optimised compilations, you would have noticed that optimising compiler emits that branch implicitly in scenarii like the current code to deal with that case, thus rewriting the code as if (nCoeffs > 0) { do { ...; i++ } while (i < nCoeffs); } This patch is simply making the branch explicit so we can share the otherwise identical C code. > Why don't you just use a signed type? Just so you know, the correct type for positive sizes is size_t. You know, a size, like *not* a stride.
Rémi Denis-Courmont: > Le tiistaina 11. kesäkuuta 2024, 13.30.28 EEST Andreas Rheinhardt a écrit : >>> +static void h263_dct_unquantize_intra_c(int16_t *block, size_t nCoeffs, >>> + int qmul, int qadd) >>> +{ >>> + if (nCoeffs > 0) >> >> Great, a branch. > > Okay so you want sarcasms, have at it. > > If you had ever looked at optimised compilations, you would have noticed that > optimising compiler emits that branch implicitly in scenarii like the current > code to deal with that case, thus rewriting the code as > > if (nCoeffs > 0) { > do { ...; i++ } while (i < nCoeffs); > } > > This patch is simply making the branch explicit so we can share the otherwise > identical C code. With the current code the initial branch can be avoided in the ac_pred case. > >> Why don't you just use a signed type? > > Just so you know, the correct type for positive sizes is size_t. > You know, a size, like *not* a stride. >
On 6/10/2024 5:23 PM, Rémi Denis-Courmont wrote: > Note that optimised implementations of these functions will be taken > into actual use only if MpegEncContext.dct_unquantize_h263_{inter,intra} > are *not* overloaded by existing optimisations. > --- > libavcodec/h263dsp.c | 25 +++++++++++++++++++++++++ > libavcodec/h263dsp.h | 4 ++++ > 2 files changed, 29 insertions(+) > > diff --git a/libavcodec/h263dsp.c b/libavcodec/h263dsp.c > index 6a13353499..755d7077bf 100644 > --- a/libavcodec/h263dsp.c > +++ b/libavcodec/h263dsp.c > @@ -23,6 +23,29 @@ > #include "config.h" > #include "h263dsp.h" > > +static void h263_dct_unquantize_inter_c(int16_t *block, size_t nCoeffs, > + int qmul, int qadd) > +{ > + for (size_t i = 0; i <= nCoeffs; i++) { > + int level = block[i]; > + > + if (level) { > + if (level < 0) > + level = level * qmul - qadd; > + else > + level = level * qmul + qadd; > + block[i] = level; > + } > + } > +} > + > +static void h263_dct_unquantize_intra_c(int16_t *block, size_t nCoeffs, > + int qmul, int qadd) > +{ > + if (nCoeffs > 0) > + h263_dct_unquantize_inter_c(block + 1, nCoeffs - 1, qmul, qadd); I think you can avoid this branch by doing: for (size_t i = 1; i <= nCoeffs; i++) { int level = block[i]; if (level) { if (level < 0) level = level * qmul - qadd; else level = level * qmul + qadd; block[i] = level; } } > +} > + > const uint8_t ff_h263_loop_filter_strength[32] = { > 0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 5, 5, 6, 6, 7, 7, > 7, 8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11, 12, 12, 12 > @@ -116,6 +139,8 @@ static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale) > > av_cold void ff_h263dsp_init(H263DSPContext *ctx) > { > + ctx->h263_dct_unquantize_intra = h263_dct_unquantize_intra_c; > + ctx->h263_dct_unquantize_inter = h263_dct_unquantize_inter_c; > ctx->h263_h_loop_filter = h263_h_loop_filter_c; > ctx->h263_v_loop_filter = h263_v_loop_filter_c; > > diff --git a/libavcodec/h263dsp.h b/libavcodec/h263dsp.h > index 2dccd23392..0ecbe83314 100644 > --- a/libavcodec/h263dsp.h > +++ b/libavcodec/h263dsp.h > @@ -24,6 +24,10 @@ > extern const uint8_t ff_h263_loop_filter_strength[32]; > > typedef struct H263DSPContext { > + void (*h263_dct_unquantize_intra)(int16_t *block /* align 16 */, > + size_t len, int mul, int add); > + void (*h263_dct_unquantize_inter)(int16_t *block /* align 16 */, > + size_t len, int mul, int add); > void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale); > void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale); > } H263DSPContext;
diff --git a/libavcodec/h263dsp.c b/libavcodec/h263dsp.c index 6a13353499..755d7077bf 100644 --- a/libavcodec/h263dsp.c +++ b/libavcodec/h263dsp.c @@ -23,6 +23,29 @@ #include "config.h" #include "h263dsp.h" +static void h263_dct_unquantize_inter_c(int16_t *block, size_t nCoeffs, + int qmul, int qadd) +{ + for (size_t i = 0; i <= nCoeffs; i++) { + int level = block[i]; + + if (level) { + if (level < 0) + level = level * qmul - qadd; + else + level = level * qmul + qadd; + block[i] = level; + } + } +} + +static void h263_dct_unquantize_intra_c(int16_t *block, size_t nCoeffs, + int qmul, int qadd) +{ + if (nCoeffs > 0) + h263_dct_unquantize_inter_c(block + 1, nCoeffs - 1, qmul, qadd); +} + const uint8_t ff_h263_loop_filter_strength[32] = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 5, 5, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11, 12, 12, 12 @@ -116,6 +139,8 @@ static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale) av_cold void ff_h263dsp_init(H263DSPContext *ctx) { + ctx->h263_dct_unquantize_intra = h263_dct_unquantize_intra_c; + ctx->h263_dct_unquantize_inter = h263_dct_unquantize_inter_c; ctx->h263_h_loop_filter = h263_h_loop_filter_c; ctx->h263_v_loop_filter = h263_v_loop_filter_c; diff --git a/libavcodec/h263dsp.h b/libavcodec/h263dsp.h index 2dccd23392..0ecbe83314 100644 --- a/libavcodec/h263dsp.h +++ b/libavcodec/h263dsp.h @@ -24,6 +24,10 @@ extern const uint8_t ff_h263_loop_filter_strength[32]; typedef struct H263DSPContext { + void (*h263_dct_unquantize_intra)(int16_t *block /* align 16 */, + size_t len, int mul, int add); + void (*h263_dct_unquantize_inter)(int16_t *block /* align 16 */, + size_t len, int mul, int add); void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale); void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale); } H263DSPContext;