Message ID | TYSPR06MB64330000902A81F27C1F7537AA652@TYSPR06MB6433.apcprd06.prod.outlook.com |
---|---|
State | Accepted |
Commit | 8d0dda8260e67996efbc96d3148ce7238e42df60 |
Headers | show |
Series | [FFmpeg-devel,v2,1/4] hevcdec: move sao template to h26x/h2656_sao_template.c | expand |
Context | Check | Description |
---|---|---|
yinshiyou/make_loongarch64 | success | Make finished |
yinshiyou/make_fate_loongarch64 | success | Make fate finished |
andriy/make_x86 | success | Make finished |
andriy/make_fate_x86 | success | Make fate finished |
On Sat, Jan 6, 2024 at 6:52 PM Nuo Mi <nuomi2021@gmail.com> wrote: > --- > libavcodec/vvc/vvc_filter_template.c | 82 +--------------------------- > 1 file changed, 3 insertions(+), 79 deletions(-) > > diff --git a/libavcodec/vvc/vvc_filter_template.c > b/libavcodec/vvc/vvc_filter_template.c > index 9418980c33..671ed7de4e 100644 > --- a/libavcodec/vvc/vvc_filter_template.c > +++ b/libavcodec/vvc/vvc_filter_template.c > @@ -461,6 +461,8 @@ static void FUNC(alf_recon_coeff_and_clip)(int16_t > *coeff, int16_t *clip, > #define FQ2 pix[2 * xstride + 1 * ystride] > #define FQ3 pix[3 * xstride + 1 * ystride] > > +#include "libavcodec/h26x/h2656_deblock_template.c" > Will merge this in two days if there are no objections to the file and directory names. Thank you
Hi, On Mon, Jan 8, 2024 at 8:03 PM Nuo Mi <nuomi2021@gmail.com> wrote: > On Sat, Jan 6, 2024 at 6:52 PM Nuo Mi <nuomi2021@gmail.com> wrote: > > > --- > > libavcodec/vvc/vvc_filter_template.c | 82 +--------------------------- > > 1 file changed, 3 insertions(+), 79 deletions(-) > > > > diff --git a/libavcodec/vvc/vvc_filter_template.c > > b/libavcodec/vvc/vvc_filter_template.c > > index 9418980c33..671ed7de4e 100644 > > --- a/libavcodec/vvc/vvc_filter_template.c > > +++ b/libavcodec/vvc/vvc_filter_template.c > > @@ -461,6 +461,8 @@ static void FUNC(alf_recon_coeff_and_clip)(int16_t > > *coeff, int16_t *clip, > > #define FQ2 pix[2 * xstride + 1 * ystride] > > #define FQ3 pix[3 * xstride + 1 * ystride] > > > > +#include "libavcodec/h26x/h2656_deblock_template.c" > > > Will merge this in two days if there are no objections to the file and > directory names. > Are there options to share the actual generated binary code? The C code admittedly is not so important, but it would be great if there was some way to ensure that optimizations written for HEVC in some instruction set, work for VVC also - or vice versa. Ronald
On Tue, Jan 9, 2024 at 9:23 AM Ronald S. Bultje <rsbultje@gmail.com> wrote: > Hi, > > On Mon, Jan 8, 2024 at 8:03 PM Nuo Mi <nuomi2021@gmail.com> wrote: > > > On Sat, Jan 6, 2024 at 6:52 PM Nuo Mi <nuomi2021@gmail.com> wrote: > > > > > --- > > > libavcodec/vvc/vvc_filter_template.c | 82 +--------------------------- > > > 1 file changed, 3 insertions(+), 79 deletions(-) > > > > > > diff --git a/libavcodec/vvc/vvc_filter_template.c > > > b/libavcodec/vvc/vvc_filter_template.c > > > index 9418980c33..671ed7de4e 100644 > > > --- a/libavcodec/vvc/vvc_filter_template.c > > > +++ b/libavcodec/vvc/vvc_filter_template.c > > > @@ -461,6 +461,8 @@ static void FUNC(alf_recon_coeff_and_clip)(int16_t > > > *coeff, int16_t *clip, > > > #define FQ2 pix[2 * xstride + 1 * ystride] > > > #define FQ3 pix[3 * xstride + 1 * ystride] > > > > > > +#include "libavcodec/h26x/h2656_deblock_template.c" > > > > > Will merge this in two days if there are no objections to the file and > > directory names. > > > > Are there options to share the actual generated binary code? The C code > admittedly is not so important, but it would be great if there was some way > to ensure that optimizations written for HEVC in some instruction set, work > for VVC also - or vice versa. > Yes. After we merge this, we will send out the mc x86 asm code for review. It will share the same binary with HEVC. For SAO/Deblock, we will follow a similar approach, but it needs to be a little later than mc. For C code, we can share the binary as well, but it involves some interface changes, better to do it after all asm is ready. > > Ronald > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". >
Hi, On Mon, Jan 8, 2024 at 10:05 PM Nuo Mi <nuomi2021@gmail.com> wrote: > On Tue, Jan 9, 2024 at 9:23 AM Ronald S. Bultje <rsbultje@gmail.com> > wrote: > > > Hi, > > > > On Mon, Jan 8, 2024 at 8:03 PM Nuo Mi <nuomi2021@gmail.com> wrote: > > > > > On Sat, Jan 6, 2024 at 6:52 PM Nuo Mi <nuomi2021@gmail.com> wrote: > > > > > > > --- > > > > libavcodec/vvc/vvc_filter_template.c | 82 > +--------------------------- > > > > 1 file changed, 3 insertions(+), 79 deletions(-) > > > > > > > > diff --git a/libavcodec/vvc/vvc_filter_template.c > > > > b/libavcodec/vvc/vvc_filter_template.c > > > > index 9418980c33..671ed7de4e 100644 > > > > --- a/libavcodec/vvc/vvc_filter_template.c > > > > +++ b/libavcodec/vvc/vvc_filter_template.c > > > > @@ -461,6 +461,8 @@ static void > FUNC(alf_recon_coeff_and_clip)(int16_t > > > > *coeff, int16_t *clip, > > > > #define FQ2 pix[2 * xstride + 1 * ystride] > > > > #define FQ3 pix[3 * xstride + 1 * ystride] > > > > > > > > +#include "libavcodec/h26x/h2656_deblock_template.c" > > > > > > > Will merge this in two days if there are no objections to the file and > > > directory names. > > > > > > > Are there options to share the actual generated binary code? The C code > > admittedly is not so important, but it would be great if there was some > way > > to ensure that optimizations written for HEVC in some instruction set, > work > > for VVC also - or vice versa. > > > Yes. After we merge this, we will send out the mc x86 asm code for review. > It will share the same binary with HEVC. > For SAO/Deblock, we will follow a similar approach, but it needs to be a > little later than mc. > For C code, we can share the binary as well, but it involves some interface > changes, better to do it after all asm is ready. > OK, that sounds reasonable to me. Ronald
> > > > > Yes. After we merge this, we will send out the mc x86 asm code for > review. > > It will share the same binary with HEVC. > > For SAO/Deblock, we will follow a similar approach, but it needs to be a > > little later than mc. > > For C code, we can share the binary as well, but it involves some > interface > > changes, better to do it after all asm is ready. > > > > OK, that sounds reasonable to me. > Thank you, Ronald. Pushed. > > Ronald > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". >
diff --git a/libavcodec/vvc/vvc_filter_template.c b/libavcodec/vvc/vvc_filter_template.c index 9418980c33..671ed7de4e 100644 --- a/libavcodec/vvc/vvc_filter_template.c +++ b/libavcodec/vvc/vvc_filter_template.c @@ -461,6 +461,8 @@ static void FUNC(alf_recon_coeff_and_clip)(int16_t *coeff, int16_t *clip, #define FQ2 pix[2 * xstride + 1 * ystride] #define FQ3 pix[3 * xstride + 1 * ystride] +#include "libavcodec/h26x/h2656_deblock_template.c" + static void FUNC(loop_filter_luma_large)(pixel *pix, const ptrdiff_t xstride, const ptrdiff_t ystride, const int32_t tc, const uint8_t no_p, const uint8_t no_q, const uint8_t max_len_p, const uint8_t max_len_q) { @@ -541,66 +543,6 @@ static void FUNC(loop_filter_luma_large)(pixel *pix, const ptrdiff_t xstride, co } } -static void FUNC(loop_filter_luma_strong)(pixel *pix, const ptrdiff_t xstride, const ptrdiff_t ystride, const int32_t tc, - const uint8_t no_p, const uint8_t no_q) -{ - const int tc2 = tc << 1; - const int tc3 = tc * 3; - for (int d = 0; d < 4; d++) { - const int p3 = P3; - const int p2 = P2; - const int p1 = P1; - const int p0 = P0; - const int q0 = Q0; - const int q1 = Q1; - const int q2 = Q2; - const int q3 = Q3; - if (!no_p) { - P0 = p0 + av_clip(((p2 + 2 * p1 + 2 * p0 + 2 * q0 + q1 + 4) >> 3) - p0, -tc3, tc3); - P1 = p1 + av_clip(((p2 + p1 + p0 + q0 + 2) >> 2) - p1, -tc2, tc2); - P2 = p2 + av_clip(((2 * p3 + 3 * p2 + p1 + p0 + q0 + 4) >> 3) - p2, -tc, tc); - } - if (!no_q) { - Q0 = q0 + av_clip(((p1 + 2 * p0 + 2 * q0 + 2 * q1 + q2 + 4) >> 3) - q0, -tc3, tc3); - Q1 = q1 + av_clip(((p0 + q0 + q1 + q2 + 2) >> 2) - q1, -tc2, tc2); - Q2 = q2 + av_clip(((2 * q3 + 3 * q2 + q1 + q0 + p0 + 4) >> 3) - q2, -tc, tc); - } - pix += ystride; - } -} - -static void FUNC(loop_filter_luma_weak)(pixel *pix, const ptrdiff_t xstride, const ptrdiff_t ystride, - const int32_t tc, const int32_t beta, const uint8_t no_p, const uint8_t no_q, const int nd_p, const int nd_q) -{ - const int tc_2 = tc >> 1; - for (int d = 0; d < 4; d++) { - const int p2 = P2; - const int p1 = P1; - const int p0 = P0; - const int q0 = Q0; - const int q1 = Q1; - const int q2 = Q2; - int delta0 = (9 * (q0 - p0) - 3 * (q1 - p1) + 8) >> 4; - if (abs(delta0) < 10 * tc) { - delta0 = av_clip(delta0, -tc, tc); - if (!no_p) - P0 = av_clip_pixel(p0 + delta0); - if (!no_q) - Q0 = av_clip_pixel(q0 - delta0); - if (!no_p && nd_p > 1) { - const int deltap1 = av_clip((((p2 + p0 + 1) >> 1) - p1 + delta0) >> 1, -tc_2, tc_2); - P1 = av_clip_pixel(p1 + deltap1); - } - if (!no_q && nd_q > 1) { - const int deltaq1 = av_clip((((q2 + q0 + 1) >> 1) - q1 - delta0) >> 1, -tc_2, tc_2); - Q1 = av_clip_pixel(q1 + deltaq1); - } - } - pix += ystride; - } - -} - static void FUNC(vvc_loop_filter_luma)(uint8_t* _pix, ptrdiff_t _xstride, ptrdiff_t _ystride, const int32_t *_beta, const int32_t *_tc, const uint8_t *_no_p, const uint8_t *_no_q, const uint8_t *_max_len_p, const uint8_t *_max_len_q, int hor_ctu_edge) @@ -673,7 +615,7 @@ static void FUNC(vvc_loop_filter_luma)(uint8_t* _pix, ptrdiff_t _xstride, ptrdif abs(P3 - P0) + abs(Q3 - Q0) < beta_3 && abs(P0 - Q0) < tc25 && abs(TP3 - TP0) + abs(TQ3 - TQ0) < beta_3 && abs(TP0 - TQ0) < tc25 && (d0 << 1) < beta_2 && (d3 << 1) < beta_2) { - FUNC(loop_filter_luma_strong)(pix, xstride, ystride, tc, no_p, no_q); + FUNC(loop_filter_luma_strong)(pix, xstride, ystride, tc, tc << 1, tc * 3, no_p, no_q); } else { // weak filtering int nd_p = 1; int nd_q = 1; @@ -737,24 +679,6 @@ static void FUNC(loop_filter_chroma_strong_one_side)(pixel *pix, const ptrdiff_t } } -static void FUNC(loop_filter_chroma_weak)(pixel *pix, const ptrdiff_t xstride, const ptrdiff_t ystride, - const int size, const int32_t tc, const uint8_t no_p, const uint8_t no_q) -{ - for (int d = 0; d < size; d++) { - int delta0; - const int p1 = P1; - const int p0 = P0; - const int q0 = Q0; - const int q1 = Q1; - delta0 = av_clip((((q0 - p0) * 4) + p1 - q1 + 4) >> 3, -tc, tc); - if (!no_p) - P0 = av_clip_pixel(p0 + delta0); - if (!no_q) - Q0 = av_clip_pixel(q0 - delta0); - pix += ystride; - } -} - static void FUNC(vvc_loop_filter_chroma)(uint8_t *_pix, const ptrdiff_t _xstride, const ptrdiff_t _ystride, const int32_t *_beta, const int32_t *_tc, const uint8_t *_no_p, const uint8_t *_no_q, const uint8_t *_max_len_p, const uint8_t *_max_len_q, const int shift)