@@ -108,9 +108,9 @@ static void filter_line_c(void *dst1,
FILTER(0, w, 1)
}
-#define MAX_ALIGN 8
static void filter_edges(void *dst1, void *prev1, void *cur1, void *next1,
- int w, int prefs, int mrefs, int parity, int mode)
+ int w, int prefs, int mrefs, int parity, int mode,
+ int alignment)
{
uint8_t *dst = dst1;
uint8_t *prev = prev1;
@@ -120,7 +120,7 @@ static void filter_edges(void *dst1, void *prev1, void *cur1, void *next1,
uint8_t *prev2 = parity ? prev : cur ;
uint8_t *next2 = parity ? cur : next;
- const int edge = FFMAX(MAX_ALIGN - 1, 3);
+ const int edge = FFMAX(alignment - 1, 3);
int offset = FFMAX(w - edge, 3);
/* Only edge pixels need to be processed here. A constant value of false
@@ -159,7 +159,8 @@ static void filter_line_c_16bit(void *dst1,
}
static void filter_edges_16bit(void *dst1, void *prev1, void *cur1, void *next1,
- int w, int prefs, int mrefs, int parity, int mode)
+ int w, int prefs, int mrefs, int parity, int mode,
+ int alignment)
{
uint16_t *dst = dst1;
uint16_t *prev = prev1;
@@ -169,7 +170,7 @@ static void filter_edges_16bit(void *dst1, void *prev1, void *cur1, void *next1,
uint16_t *prev2 = parity ? prev : cur ;
uint16_t *next2 = parity ? cur : next;
- const int edge = FFMAX(MAX_ALIGN / 2 - 1, 3);
+ const int edge = FFMAX(alignment / 2 - 1, 3);
int offset = FFMAX(w - edge, 3);
mrefs /= 2;
@@ -199,7 +200,7 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
int slice_start = (td->h * jobnr ) / nb_jobs;
int slice_end = (td->h * (jobnr+1)) / nb_jobs;
int y;
- int edge = 3 + MAX_ALIGN / df - 1;
+ int edge = 3 + s->req_align / df - 1;
/* filtering reads 3 pixels to the left/right; to avoid invalid reads,
* we need to call the c variant which avoids this for border pixels
@@ -219,7 +220,7 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
s->filter_edges(dst, prev, cur, next, td->w,
y + 1 < td->h ? refs : -refs,
y ? -refs : refs,
- td->parity ^ td->tff, mode);
+ td->parity ^ td->tff, mode, s->req_align);
} else {
memcpy(&td->frame->data[td->plane][y * td->frame->linesize[td->plane]],
&s->cur->data[td->plane][y * refs], td->w * df);
@@ -303,6 +304,7 @@ static int config_output(AVFilterLink *outlink)
s->csp = av_pix_fmt_desc_get(outlink->format);
s->filter = filter;
+ s->req_align = 1;
if (s->csp->comp[0].depth > 8) {
s->filter_line = filter_line_c_16bit;
s->filter_edges = filter_edges_16bit;
@@ -53,6 +53,7 @@ av_cold void ff_yadif_init_x86(YADIFContext *yadif)
int bit_depth = (!yadif->csp) ? 8
: yadif->csp->comp[0].depth;
+ yadif->req_align = 8;
if (bit_depth >= 15) {
if (EXTERNAL_SSE2(cpu_flags))
yadif->filter_line = ff_yadif_filter_line_16bit_sse2;
@@ -66,11 +66,13 @@ typedef struct YADIFContext {
/**
* Required alignment for filter_line
*/
+ int req_align;
void (*filter_line)(void *dst,
void *prev, void *cur, void *next,
int w, int prefs, int mrefs, int parity, int mode);
void (*filter_edges)(void *dst, void *prev, void *cur, void *next,
- int w, int prefs, int mrefs, int parity, int mode);
+ int w, int prefs, int mrefs, int parity, int mode,
+ int alignment);
const AVPixFmtDescriptor *csp;
int eof;
Allow the alignment to be determined based on what yadif_filter_line implementation is used. Currently this is either 1, or 8 depending on whether the C code or the x86 SSE code is used, but allows for other future implementations that use a larger alignment. Adjusting MAX_ALIGN to 32 in the case of an AVX2 implementation could potentially hurt the performance of the SSE implementation, so we allow yadif to use the smallest needed alignment instead to maintain existing performance if implementations with wider vectors are added. Signed-off-by: Chris Phlipot <cphlipot0@gmail.com> --- libavfilter/vf_yadif.c | 16 +++++++++------- libavfilter/x86/vf_yadif_init.c | 1 + libavfilter/yadif.h | 4 +++- 3 files changed, 13 insertions(+), 8 deletions(-)