diff mbox series

[FFmpeg-devel,1/3,GSoC,2024] libavcodec/vvc: convert (*sad) to (*sad[6]) to prepare for AVX2 funcs

Message ID 20240501224031.109294-2-chen.stonechen@gmail.com
State New
Headers show
Series [FFmpeg-devel,1/3,GSoC,2024] libavcodec/vvc: convert (*sad) to (*sad[6]) to prepare for AVX2 funcs | expand

Commit Message

Stone Chen May 1, 2024, 10:39 p.m. UTC
To prepare for adding AVX2 functions for different block widths, change VVCInterDSPContext to contain (*sad[6]) instead of (*sad). This also default initializes the pointer array with the scalar function and the calling sites to jump to the correct function based on block width. There's no change in functionality.
---
 libavcodec/vvc/dsp.h            | 2 +-
 libavcodec/vvc/inter.c          | 4 ++--
 libavcodec/vvc/inter_template.c | 5 ++++-
 3 files changed, 7 insertions(+), 4 deletions(-)

Comments

Andreas Rheinhardt May 1, 2024, 10:59 p.m. UTC | #1
Stone Chen:
> To prepare for adding AVX2 functions for different block widths, change VVCInterDSPContext to contain (*sad[6]) instead of (*sad). This also default initializes the pointer array with the scalar function and the calling sites to jump to the correct function based on block width. There's no change in functionality.
> ---
>  libavcodec/vvc/dsp.h            | 2 +-
>  libavcodec/vvc/inter.c          | 4 ++--
>  libavcodec/vvc/inter_template.c | 5 ++++-
>  3 files changed, 7 insertions(+), 4 deletions(-)
> 
> diff --git a/libavcodec/vvc/dsp.h b/libavcodec/vvc/dsp.h
> index 9810ac314c..b06a3ef10e 100644
> --- a/libavcodec/vvc/dsp.h
> +++ b/libavcodec/vvc/dsp.h
> @@ -86,7 +86,7 @@ typedef struct VVCInterDSPContext {
>  
>      void (*apply_bdof)(uint8_t *dst, ptrdiff_t dst_stride, int16_t *src0, int16_t *src1, int block_w, int block_h);
>  
> -    int (*sad)(const int16_t *src0, const int16_t *src1, int dx, int dy, int block_w, int block_h);
> +    int (*sad[6])(const int16_t *src0, const int16_t *src1, int dx, int dy, int block_w, int block_h);
>      void (*dmvr[2][2])(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, int height,
>          intptr_t mx, intptr_t my, int width);
>  } VVCInterDSPContext;
> diff --git a/libavcodec/vvc/inter.c b/libavcodec/vvc/inter.c
> index 4a8d1d866a..a68f4f9452 100644
> --- a/libavcodec/vvc/inter.c
> +++ b/libavcodec/vvc/inter.c
> @@ -742,7 +742,7 @@ static void dmvr_mv_refine(VVCLocalContext *lc, MvField *mvf, MvField *orig_mv,
>          fc->vvcdsp.inter.dmvr[!!my][!!mx](tmp[i], src, src_stride, pred_h, mx, my, pred_w);
>      }
>  
> -    min_sad = fc->vvcdsp.inter.sad(tmp[L0], tmp[L1], dx, dy, block_w, block_h);
> +    min_sad = fc->vvcdsp.inter.sad[av_log2(block_w) - 2](tmp[L0], tmp[L1], dx, dy, block_w, block_h);
>      min_sad -= min_sad >> 2;
>      sad[dy][dx] = min_sad;
>  
> @@ -752,7 +752,7 @@ static void dmvr_mv_refine(VVCLocalContext *lc, MvField *mvf, MvField *orig_mv,
>          for (dy = 0; dy < SAD_ARRAY_SIZE; dy++) {
>              for (dx = 0; dx < SAD_ARRAY_SIZE; dx++) {
>                  if (dx != sr_range || dy != sr_range) {
> -                    sad[dy][dx] = fc->vvcdsp.inter.sad(lc->tmp, lc->tmp1, dx, dy, block_w, block_h);
> +                    sad[dy][dx] = fc->vvcdsp.inter.sad[av_log2(block_w) - 2](lc->tmp, lc->tmp1, dx, dy, block_w, block_h);
>                      if (sad[dy][dx] < min_sad) {
>                          min_sad = sad[dy][dx];
>                          min_dx = dx;
> diff --git a/libavcodec/vvc/inter_template.c b/libavcodec/vvc/inter_template.c
> index e2fbfd4fc0..545e8dd184 100644
> --- a/libavcodec/vvc/inter_template.c
> +++ b/libavcodec/vvc/inter_template.c
> @@ -458,7 +458,10 @@ static void FUNC(ff_vvc_inter_dsp_init)(VVCInterDSPContext *const inter)
>      inter->apply_prof_uni_w     = FUNC(apply_prof_uni_w);
>      inter->apply_bdof           = FUNC(apply_bdof);
>      inter->prof_grad_filter     = FUNC(prof_grad_filter);
> -    inter->sad                  = vvc_sad;
> +    
> +    for (int i = 0; i < FF_ARRAY_ELEMS(inter->sad); i++) {
> +        inter->sad[i]           = vvc_sad;
> +    }
>  }
>  
>  #undef FUNCS

Why is the jump depending upon block width not performed inside your
avx2 implementation?

- Andreas
Stone Chen May 6, 2024, 5:02 p.m. UTC | #2
On Wed, May 1, 2024 at 6:59 PM Andreas Rheinhardt <
andreas.rheinhardt@outlook.com> wrote:

> Stone Chen:
> > To prepare for adding AVX2 functions for different block widths, change
> VVCInterDSPContext to contain (*sad[6]) instead of (*sad). This also
> default initializes the pointer array with the scalar function and the
> calling sites to jump to the correct function based on block width. There's
> no change in functionality.
> > ---
> >  libavcodec/vvc/dsp.h            | 2 +-
> >  libavcodec/vvc/inter.c          | 4 ++--
> >  libavcodec/vvc/inter_template.c | 5 ++++-
> >  3 files changed, 7 insertions(+), 4 deletions(-)
> >
> > diff --git a/libavcodec/vvc/dsp.h b/libavcodec/vvc/dsp.h
> > index 9810ac314c..b06a3ef10e 100644
> > --- a/libavcodec/vvc/dsp.h
> > +++ b/libavcodec/vvc/dsp.h
> > @@ -86,7 +86,7 @@ typedef struct VVCInterDSPContext {
> >
> >      void (*apply_bdof)(uint8_t *dst, ptrdiff_t dst_stride, int16_t
> *src0, int16_t *src1, int block_w, int block_h);
> >
> > -    int (*sad)(const int16_t *src0, const int16_t *src1, int dx, int
> dy, int block_w, int block_h);
> > +    int (*sad[6])(const int16_t *src0, const int16_t *src1, int dx, int
> dy, int block_w, int block_h);
> >      void (*dmvr[2][2])(int16_t *dst, const uint8_t *src, ptrdiff_t
> src_stride, int height,
> >          intptr_t mx, intptr_t my, int width);
> >  } VVCInterDSPContext;
> > diff --git a/libavcodec/vvc/inter.c b/libavcodec/vvc/inter.c
> > index 4a8d1d866a..a68f4f9452 100644
> > --- a/libavcodec/vvc/inter.c
> > +++ b/libavcodec/vvc/inter.c
> > @@ -742,7 +742,7 @@ static void dmvr_mv_refine(VVCLocalContext *lc,
> MvField *mvf, MvField *orig_mv,
> >          fc->vvcdsp.inter.dmvr[!!my][!!mx](tmp[i], src, src_stride,
> pred_h, mx, my, pred_w);
> >      }
> >
> > -    min_sad = fc->vvcdsp.inter.sad(tmp[L0], tmp[L1], dx, dy, block_w,
> block_h);
> > +    min_sad = fc->vvcdsp.inter.sad[av_log2(block_w) - 2](tmp[L0],
> tmp[L1], dx, dy, block_w, block_h);
> >      min_sad -= min_sad >> 2;
> >      sad[dy][dx] = min_sad;
> >
> > @@ -752,7 +752,7 @@ static void dmvr_mv_refine(VVCLocalContext *lc,
> MvField *mvf, MvField *orig_mv,
> >          for (dy = 0; dy < SAD_ARRAY_SIZE; dy++) {
> >              for (dx = 0; dx < SAD_ARRAY_SIZE; dx++) {
> >                  if (dx != sr_range || dy != sr_range) {
> > -                    sad[dy][dx] = fc->vvcdsp.inter.sad(lc->tmp,
> lc->tmp1, dx, dy, block_w, block_h);
> > +                    sad[dy][dx] = fc->vvcdsp.inter.sad[av_log2(block_w)
> - 2](lc->tmp, lc->tmp1, dx, dy, block_w, block_h);
> >                      if (sad[dy][dx] < min_sad) {
> >                          min_sad = sad[dy][dx];
> >                          min_dx = dx;
> > diff --git a/libavcodec/vvc/inter_template.c
> b/libavcodec/vvc/inter_template.c
> > index e2fbfd4fc0..545e8dd184 100644
> > --- a/libavcodec/vvc/inter_template.c
> > +++ b/libavcodec/vvc/inter_template.c
> > @@ -458,7 +458,10 @@ static void
> FUNC(ff_vvc_inter_dsp_init)(VVCInterDSPContext *const inter)
> >      inter->apply_prof_uni_w     = FUNC(apply_prof_uni_w);
> >      inter->apply_bdof           = FUNC(apply_bdof);
> >      inter->prof_grad_filter     = FUNC(prof_grad_filter);
> > -    inter->sad                  = vvc_sad;
> > +
> > +    for (int i = 0; i < FF_ARRAY_ELEMS(inter->sad); i++) {
> > +        inter->sad[i]           = vvc_sad;
> > +    }
> >  }
> >
> >  #undef FUNCS
>
> Why is the jump depending upon block width not performed inside your
> avx2 implementation?
>
> - Andreas
>

Hi Andreas,

Sorry missed your email,

In hindsight, there's no particular reason, besides that it was the easiest
way (for me) to get jumps to different functions.
I guess I could just use compares to block width and jumps? Or
alternatively figure out how to write a jump table in asm.

Would those methods be better or did you have something different in mind?

Thanks for the feedback!
Stone


>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
diff mbox series

Patch

diff --git a/libavcodec/vvc/dsp.h b/libavcodec/vvc/dsp.h
index 9810ac314c..b06a3ef10e 100644
--- a/libavcodec/vvc/dsp.h
+++ b/libavcodec/vvc/dsp.h
@@ -86,7 +86,7 @@  typedef struct VVCInterDSPContext {
 
     void (*apply_bdof)(uint8_t *dst, ptrdiff_t dst_stride, int16_t *src0, int16_t *src1, int block_w, int block_h);
 
-    int (*sad)(const int16_t *src0, const int16_t *src1, int dx, int dy, int block_w, int block_h);
+    int (*sad[6])(const int16_t *src0, const int16_t *src1, int dx, int dy, int block_w, int block_h);
     void (*dmvr[2][2])(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, int height,
         intptr_t mx, intptr_t my, int width);
 } VVCInterDSPContext;
diff --git a/libavcodec/vvc/inter.c b/libavcodec/vvc/inter.c
index 4a8d1d866a..a68f4f9452 100644
--- a/libavcodec/vvc/inter.c
+++ b/libavcodec/vvc/inter.c
@@ -742,7 +742,7 @@  static void dmvr_mv_refine(VVCLocalContext *lc, MvField *mvf, MvField *orig_mv,
         fc->vvcdsp.inter.dmvr[!!my][!!mx](tmp[i], src, src_stride, pred_h, mx, my, pred_w);
     }
 
-    min_sad = fc->vvcdsp.inter.sad(tmp[L0], tmp[L1], dx, dy, block_w, block_h);
+    min_sad = fc->vvcdsp.inter.sad[av_log2(block_w) - 2](tmp[L0], tmp[L1], dx, dy, block_w, block_h);
     min_sad -= min_sad >> 2;
     sad[dy][dx] = min_sad;
 
@@ -752,7 +752,7 @@  static void dmvr_mv_refine(VVCLocalContext *lc, MvField *mvf, MvField *orig_mv,
         for (dy = 0; dy < SAD_ARRAY_SIZE; dy++) {
             for (dx = 0; dx < SAD_ARRAY_SIZE; dx++) {
                 if (dx != sr_range || dy != sr_range) {
-                    sad[dy][dx] = fc->vvcdsp.inter.sad(lc->tmp, lc->tmp1, dx, dy, block_w, block_h);
+                    sad[dy][dx] = fc->vvcdsp.inter.sad[av_log2(block_w) - 2](lc->tmp, lc->tmp1, dx, dy, block_w, block_h);
                     if (sad[dy][dx] < min_sad) {
                         min_sad = sad[dy][dx];
                         min_dx = dx;
diff --git a/libavcodec/vvc/inter_template.c b/libavcodec/vvc/inter_template.c
index e2fbfd4fc0..545e8dd184 100644
--- a/libavcodec/vvc/inter_template.c
+++ b/libavcodec/vvc/inter_template.c
@@ -458,7 +458,10 @@  static void FUNC(ff_vvc_inter_dsp_init)(VVCInterDSPContext *const inter)
     inter->apply_prof_uni_w     = FUNC(apply_prof_uni_w);
     inter->apply_bdof           = FUNC(apply_bdof);
     inter->prof_grad_filter     = FUNC(prof_grad_filter);
-    inter->sad                  = vvc_sad;
+    
+    for (int i = 0; i < FF_ARRAY_ELEMS(inter->sad); i++) {
+        inter->sad[i]           = vvc_sad;
+    }
 }
 
 #undef FUNCS