diff mbox series

[FFmpeg-devel,2/5] avfilter/vf_yadif: Allow alignment to be configurable

Message ID 20220720044117.1282961-2-cphlipot0@gmail.com
State New
Headers show
Series [FFmpeg-devel,1/5] avfilter/vf_yadif: Fix edge size when MAX_ALIGN is < 4 | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Chris Phlipot July 20, 2022, 4:41 a.m. UTC
Allow the alignment to be determined based on what yadif_filter_line
implementation is used. Currently this is either 1, or 8 depending
on whether the C code or the x86 SSE code is used, but allows for
other future implementations that use a larger alignment.

Adjusting MAX_ALIGN to 32 in the case of an AVX2 implementation
could potentially hurt the performance of the SSE implementation,
so we allow yadif to use the smallest needed alignment instead to
maintain existing performance if implementations with wider vectors
are added.

Signed-off-by: Chris Phlipot <cphlipot0@gmail.com>
---
 libavfilter/vf_yadif.c          | 16 +++++++++-------
 libavfilter/x86/vf_yadif_init.c |  1 +
 libavfilter/yadif.h             |  4 +++-
 3 files changed, 13 insertions(+), 8 deletions(-)
diff mbox series

Patch

diff --git a/libavfilter/vf_yadif.c b/libavfilter/vf_yadif.c
index 055327d7a4..42f6246330 100644
--- a/libavfilter/vf_yadif.c
+++ b/libavfilter/vf_yadif.c
@@ -108,9 +108,9 @@  static void filter_line_c(void *dst1,
     FILTER(0, w, 1)
 }
 
-#define MAX_ALIGN 8
 static void filter_edges(void *dst1, void *prev1, void *cur1, void *next1,
-                         int w, int prefs, int mrefs, int parity, int mode)
+                         int w, int prefs, int mrefs, int parity, int mode,
+                         int alignment)
 {
     uint8_t *dst  = dst1;
     uint8_t *prev = prev1;
@@ -120,7 +120,7 @@  static void filter_edges(void *dst1, void *prev1, void *cur1, void *next1,
     uint8_t *prev2 = parity ? prev : cur ;
     uint8_t *next2 = parity ? cur  : next;
 
-    const int edge = FFMAX(MAX_ALIGN - 1, 3);
+    const int edge = FFMAX(alignment - 1, 3);
     int offset = FFMAX(w - edge, 3);
 
     /* Only edge pixels need to be processed here.  A constant value of false
@@ -159,7 +159,8 @@  static void filter_line_c_16bit(void *dst1,
 }
 
 static void filter_edges_16bit(void *dst1, void *prev1, void *cur1, void *next1,
-                               int w, int prefs, int mrefs, int parity, int mode)
+                               int w, int prefs, int mrefs, int parity, int mode,
+                               int alignment)
 {
     uint16_t *dst  = dst1;
     uint16_t *prev = prev1;
@@ -169,7 +170,7 @@  static void filter_edges_16bit(void *dst1, void *prev1, void *cur1, void *next1,
     uint16_t *prev2 = parity ? prev : cur ;
     uint16_t *next2 = parity ? cur  : next;
 
-    const int edge = FFMAX(MAX_ALIGN / 2 - 1, 3);
+    const int edge = FFMAX(alignment / 2 - 1, 3);
     int offset = FFMAX(w - edge, 3);
 
     mrefs /= 2;
@@ -199,7 +200,7 @@  static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
     int slice_start = (td->h *  jobnr   ) / nb_jobs;
     int slice_end   = (td->h * (jobnr+1)) / nb_jobs;
     int y;
-    int edge = 3 + MAX_ALIGN / df - 1;
+    int edge = 3 + s->req_align / df - 1;
 
     /* filtering reads 3 pixels to the left/right; to avoid invalid reads,
      * we need to call the c variant which avoids this for border pixels
@@ -219,7 +220,7 @@  static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
             s->filter_edges(dst, prev, cur, next, td->w,
                             y + 1 < td->h ? refs : -refs,
                             y ? -refs : refs,
-                            td->parity ^ td->tff, mode);
+                            td->parity ^ td->tff, mode, s->req_align);
         } else {
             memcpy(&td->frame->data[td->plane][y * td->frame->linesize[td->plane]],
                    &s->cur->data[td->plane][y * refs], td->w * df);
@@ -303,6 +304,7 @@  static int config_output(AVFilterLink *outlink)
 
     s->csp = av_pix_fmt_desc_get(outlink->format);
     s->filter = filter;
+    s->req_align = 1;
     if (s->csp->comp[0].depth > 8) {
         s->filter_line  = filter_line_c_16bit;
         s->filter_edges = filter_edges_16bit;
diff --git a/libavfilter/x86/vf_yadif_init.c b/libavfilter/x86/vf_yadif_init.c
index 257c3f9199..9dd73f8e44 100644
--- a/libavfilter/x86/vf_yadif_init.c
+++ b/libavfilter/x86/vf_yadif_init.c
@@ -53,6 +53,7 @@  av_cold void ff_yadif_init_x86(YADIFContext *yadif)
     int bit_depth = (!yadif->csp) ? 8
                                   : yadif->csp->comp[0].depth;
 
+    yadif->req_align = 8;
     if (bit_depth >= 15) {
         if (EXTERNAL_SSE2(cpu_flags))
             yadif->filter_line = ff_yadif_filter_line_16bit_sse2;
diff --git a/libavfilter/yadif.h b/libavfilter/yadif.h
index c928911b35..b81f2fc1d9 100644
--- a/libavfilter/yadif.h
+++ b/libavfilter/yadif.h
@@ -66,11 +66,13 @@  typedef struct YADIFContext {
     /**
      * Required alignment for filter_line
      */
+    int req_align;
     void (*filter_line)(void *dst,
                         void *prev, void *cur, void *next,
                         int w, int prefs, int mrefs, int parity, int mode);
     void (*filter_edges)(void *dst, void *prev, void *cur, void *next,
-                         int w, int prefs, int mrefs, int parity, int mode);
+                         int w, int prefs, int mrefs, int parity, int mode,
+                         int alignment);
 
     const AVPixFmtDescriptor *csp;
     int eof;