Message ID | 20240501224031.109294-2-chen.stonechen@gmail.com |
---|---|
State | New |
Headers | show |
Series | [FFmpeg-devel,1/3,GSoC,2024] libavcodec/vvc: convert (*sad) to (*sad[6]) to prepare for AVX2 funcs | expand |
Stone Chen: > To prepare for adding AVX2 functions for different block widths, change VVCInterDSPContext to contain (*sad[6]) instead of (*sad). This also default initializes the pointer array with the scalar function and the calling sites to jump to the correct function based on block width. There's no change in functionality. > --- > libavcodec/vvc/dsp.h | 2 +- > libavcodec/vvc/inter.c | 4 ++-- > libavcodec/vvc/inter_template.c | 5 ++++- > 3 files changed, 7 insertions(+), 4 deletions(-) > > diff --git a/libavcodec/vvc/dsp.h b/libavcodec/vvc/dsp.h > index 9810ac314c..b06a3ef10e 100644 > --- a/libavcodec/vvc/dsp.h > +++ b/libavcodec/vvc/dsp.h > @@ -86,7 +86,7 @@ typedef struct VVCInterDSPContext { > > void (*apply_bdof)(uint8_t *dst, ptrdiff_t dst_stride, int16_t *src0, int16_t *src1, int block_w, int block_h); > > - int (*sad)(const int16_t *src0, const int16_t *src1, int dx, int dy, int block_w, int block_h); > + int (*sad[6])(const int16_t *src0, const int16_t *src1, int dx, int dy, int block_w, int block_h); > void (*dmvr[2][2])(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, int height, > intptr_t mx, intptr_t my, int width); > } VVCInterDSPContext; > diff --git a/libavcodec/vvc/inter.c b/libavcodec/vvc/inter.c > index 4a8d1d866a..a68f4f9452 100644 > --- a/libavcodec/vvc/inter.c > +++ b/libavcodec/vvc/inter.c > @@ -742,7 +742,7 @@ static void dmvr_mv_refine(VVCLocalContext *lc, MvField *mvf, MvField *orig_mv, > fc->vvcdsp.inter.dmvr[!!my][!!mx](tmp[i], src, src_stride, pred_h, mx, my, pred_w); > } > > - min_sad = fc->vvcdsp.inter.sad(tmp[L0], tmp[L1], dx, dy, block_w, block_h); > + min_sad = fc->vvcdsp.inter.sad[av_log2(block_w) - 2](tmp[L0], tmp[L1], dx, dy, block_w, block_h); > min_sad -= min_sad >> 2; > sad[dy][dx] = min_sad; > > @@ -752,7 +752,7 @@ static void dmvr_mv_refine(VVCLocalContext *lc, MvField *mvf, MvField *orig_mv, > for (dy = 0; dy < SAD_ARRAY_SIZE; dy++) { > for (dx = 0; dx < SAD_ARRAY_SIZE; dx++) { > if (dx != sr_range || dy != sr_range) { > - sad[dy][dx] = fc->vvcdsp.inter.sad(lc->tmp, lc->tmp1, dx, dy, block_w, block_h); > + sad[dy][dx] = fc->vvcdsp.inter.sad[av_log2(block_w) - 2](lc->tmp, lc->tmp1, dx, dy, block_w, block_h); > if (sad[dy][dx] < min_sad) { > min_sad = sad[dy][dx]; > min_dx = dx; > diff --git a/libavcodec/vvc/inter_template.c b/libavcodec/vvc/inter_template.c > index e2fbfd4fc0..545e8dd184 100644 > --- a/libavcodec/vvc/inter_template.c > +++ b/libavcodec/vvc/inter_template.c > @@ -458,7 +458,10 @@ static void FUNC(ff_vvc_inter_dsp_init)(VVCInterDSPContext *const inter) > inter->apply_prof_uni_w = FUNC(apply_prof_uni_w); > inter->apply_bdof = FUNC(apply_bdof); > inter->prof_grad_filter = FUNC(prof_grad_filter); > - inter->sad = vvc_sad; > + > + for (int i = 0; i < FF_ARRAY_ELEMS(inter->sad); i++) { > + inter->sad[i] = vvc_sad; > + } > } > > #undef FUNCS Why is the jump depending upon block width not performed inside your avx2 implementation? - Andreas
On Wed, May 1, 2024 at 6:59 PM Andreas Rheinhardt < andreas.rheinhardt@outlook.com> wrote: > Stone Chen: > > To prepare for adding AVX2 functions for different block widths, change > VVCInterDSPContext to contain (*sad[6]) instead of (*sad). This also > default initializes the pointer array with the scalar function and the > calling sites to jump to the correct function based on block width. There's > no change in functionality. > > --- > > libavcodec/vvc/dsp.h | 2 +- > > libavcodec/vvc/inter.c | 4 ++-- > > libavcodec/vvc/inter_template.c | 5 ++++- > > 3 files changed, 7 insertions(+), 4 deletions(-) > > > > diff --git a/libavcodec/vvc/dsp.h b/libavcodec/vvc/dsp.h > > index 9810ac314c..b06a3ef10e 100644 > > --- a/libavcodec/vvc/dsp.h > > +++ b/libavcodec/vvc/dsp.h > > @@ -86,7 +86,7 @@ typedef struct VVCInterDSPContext { > > > > void (*apply_bdof)(uint8_t *dst, ptrdiff_t dst_stride, int16_t > *src0, int16_t *src1, int block_w, int block_h); > > > > - int (*sad)(const int16_t *src0, const int16_t *src1, int dx, int > dy, int block_w, int block_h); > > + int (*sad[6])(const int16_t *src0, const int16_t *src1, int dx, int > dy, int block_w, int block_h); > > void (*dmvr[2][2])(int16_t *dst, const uint8_t *src, ptrdiff_t > src_stride, int height, > > intptr_t mx, intptr_t my, int width); > > } VVCInterDSPContext; > > diff --git a/libavcodec/vvc/inter.c b/libavcodec/vvc/inter.c > > index 4a8d1d866a..a68f4f9452 100644 > > --- a/libavcodec/vvc/inter.c > > +++ b/libavcodec/vvc/inter.c > > @@ -742,7 +742,7 @@ static void dmvr_mv_refine(VVCLocalContext *lc, > MvField *mvf, MvField *orig_mv, > > fc->vvcdsp.inter.dmvr[!!my][!!mx](tmp[i], src, src_stride, > pred_h, mx, my, pred_w); > > } > > > > - min_sad = fc->vvcdsp.inter.sad(tmp[L0], tmp[L1], dx, dy, block_w, > block_h); > > + min_sad = fc->vvcdsp.inter.sad[av_log2(block_w) - 2](tmp[L0], > tmp[L1], dx, dy, block_w, block_h); > > min_sad -= min_sad >> 2; > > sad[dy][dx] = min_sad; > > > > @@ -752,7 +752,7 @@ static void dmvr_mv_refine(VVCLocalContext *lc, > MvField *mvf, MvField *orig_mv, > > for (dy = 0; dy < SAD_ARRAY_SIZE; dy++) { > > for (dx = 0; dx < SAD_ARRAY_SIZE; dx++) { > > if (dx != sr_range || dy != sr_range) { > > - sad[dy][dx] = fc->vvcdsp.inter.sad(lc->tmp, > lc->tmp1, dx, dy, block_w, block_h); > > + sad[dy][dx] = fc->vvcdsp.inter.sad[av_log2(block_w) > - 2](lc->tmp, lc->tmp1, dx, dy, block_w, block_h); > > if (sad[dy][dx] < min_sad) { > > min_sad = sad[dy][dx]; > > min_dx = dx; > > diff --git a/libavcodec/vvc/inter_template.c > b/libavcodec/vvc/inter_template.c > > index e2fbfd4fc0..545e8dd184 100644 > > --- a/libavcodec/vvc/inter_template.c > > +++ b/libavcodec/vvc/inter_template.c > > @@ -458,7 +458,10 @@ static void > FUNC(ff_vvc_inter_dsp_init)(VVCInterDSPContext *const inter) > > inter->apply_prof_uni_w = FUNC(apply_prof_uni_w); > > inter->apply_bdof = FUNC(apply_bdof); > > inter->prof_grad_filter = FUNC(prof_grad_filter); > > - inter->sad = vvc_sad; > > + > > + for (int i = 0; i < FF_ARRAY_ELEMS(inter->sad); i++) { > > + inter->sad[i] = vvc_sad; > > + } > > } > > > > #undef FUNCS > > Why is the jump depending upon block width not performed inside your > avx2 implementation? > > - Andreas > Hi Andreas, Sorry missed your email, In hindsight, there's no particular reason, besides that it was the easiest way (for me) to get jumps to different functions. I guess I could just use compares to block width and jumps? Or alternatively figure out how to write a jump table in asm. Would those methods be better or did you have something different in mind? Thanks for the feedback! Stone > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". >
diff --git a/libavcodec/vvc/dsp.h b/libavcodec/vvc/dsp.h index 9810ac314c..b06a3ef10e 100644 --- a/libavcodec/vvc/dsp.h +++ b/libavcodec/vvc/dsp.h @@ -86,7 +86,7 @@ typedef struct VVCInterDSPContext { void (*apply_bdof)(uint8_t *dst, ptrdiff_t dst_stride, int16_t *src0, int16_t *src1, int block_w, int block_h); - int (*sad)(const int16_t *src0, const int16_t *src1, int dx, int dy, int block_w, int block_h); + int (*sad[6])(const int16_t *src0, const int16_t *src1, int dx, int dy, int block_w, int block_h); void (*dmvr[2][2])(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, int height, intptr_t mx, intptr_t my, int width); } VVCInterDSPContext; diff --git a/libavcodec/vvc/inter.c b/libavcodec/vvc/inter.c index 4a8d1d866a..a68f4f9452 100644 --- a/libavcodec/vvc/inter.c +++ b/libavcodec/vvc/inter.c @@ -742,7 +742,7 @@ static void dmvr_mv_refine(VVCLocalContext *lc, MvField *mvf, MvField *orig_mv, fc->vvcdsp.inter.dmvr[!!my][!!mx](tmp[i], src, src_stride, pred_h, mx, my, pred_w); } - min_sad = fc->vvcdsp.inter.sad(tmp[L0], tmp[L1], dx, dy, block_w, block_h); + min_sad = fc->vvcdsp.inter.sad[av_log2(block_w) - 2](tmp[L0], tmp[L1], dx, dy, block_w, block_h); min_sad -= min_sad >> 2; sad[dy][dx] = min_sad; @@ -752,7 +752,7 @@ static void dmvr_mv_refine(VVCLocalContext *lc, MvField *mvf, MvField *orig_mv, for (dy = 0; dy < SAD_ARRAY_SIZE; dy++) { for (dx = 0; dx < SAD_ARRAY_SIZE; dx++) { if (dx != sr_range || dy != sr_range) { - sad[dy][dx] = fc->vvcdsp.inter.sad(lc->tmp, lc->tmp1, dx, dy, block_w, block_h); + sad[dy][dx] = fc->vvcdsp.inter.sad[av_log2(block_w) - 2](lc->tmp, lc->tmp1, dx, dy, block_w, block_h); if (sad[dy][dx] < min_sad) { min_sad = sad[dy][dx]; min_dx = dx; diff --git a/libavcodec/vvc/inter_template.c b/libavcodec/vvc/inter_template.c index e2fbfd4fc0..545e8dd184 100644 --- a/libavcodec/vvc/inter_template.c +++ b/libavcodec/vvc/inter_template.c @@ -458,7 +458,10 @@ static void FUNC(ff_vvc_inter_dsp_init)(VVCInterDSPContext *const inter) inter->apply_prof_uni_w = FUNC(apply_prof_uni_w); inter->apply_bdof = FUNC(apply_bdof); inter->prof_grad_filter = FUNC(prof_grad_filter); - inter->sad = vvc_sad; + + for (int i = 0; i < FF_ARRAY_ELEMS(inter->sad); i++) { + inter->sad[i] = vvc_sad; + } } #undef FUNCS