Message ID | 20190909201220.13070-1-ffmpeg@tmm1.net |
---|---|
State | New |
Headers | show |
On 9/9/19, Aman Gupta <ffmpeg@tmm1.net> wrote: > From: Aman Gupta <aman@tmm1.net> > > These are simple algorithms which can be run efficiently > on low powered devices to produce deinteraced images. > > Signed-off-by: Aman Gupta <aman@tmm1.net> > --- > doc/filters.texi | 27 ++ > libavfilter/Makefile | 1 + > libavfilter/aarch64/Makefile | 1 + > libavfilter/aarch64/merge_neon.S | 98 ++++++ > libavfilter/allfilters.c | 1 + > libavfilter/arm/Makefile | 3 + > libavfilter/arm/merge_armv6.S | 70 ++++ > libavfilter/arm/merge_neon.S | 109 ++++++ > libavfilter/vf_fastdeint.c | 588 +++++++++++++++++++++++++++++++ > 9 files changed, 898 insertions(+) > create mode 100644 libavfilter/aarch64/merge_neon.S > create mode 100644 libavfilter/arm/Makefile > create mode 100644 libavfilter/arm/merge_armv6.S > create mode 100644 libavfilter/arm/merge_neon.S > create mode 100644 libavfilter/vf_fastdeint.c > > diff --git a/doc/filters.texi b/doc/filters.texi > index 6c81e1da40..55d9adeb81 100644 > --- a/doc/filters.texi > +++ b/doc/filters.texi > @@ -9796,6 +9796,33 @@ fade=t=in:st=5.5:d=0.5 > > @end itemize > > +@section fastdeint > +Fast deinterlacing algorithms. > + > +@table @option > +@item mode > +Deinterlacing algorithm to use. > + > +It accepts the following values: > +@table @samp > +@item discard > +Discard bottom frame. > + > +@item mean > +Half resolution blender. > + > +@item blend > +Full resolution blender. > + > +@item bob > +Bob doubler. > + > +@item linear > +Bob doubler with linear interpolation. > +@end table > + > +@end table > + > @section fftdnoiz > Denoise frames using 3D FFT (frequency domain filtering). > > diff --git a/libavfilter/Makefile b/libavfilter/Makefile > index 3ef4191d9a..a2b3566ec0 100644 > --- a/libavfilter/Makefile > +++ b/libavfilter/Makefile > @@ -234,6 +234,7 @@ OBJS-$(CONFIG_EROSION_OPENCL_FILTER) += > vf_neighbor_opencl.o opencl.o \ > opencl/neighbor.o > OBJS-$(CONFIG_EXTRACTPLANES_FILTER) += vf_extractplanes.o > OBJS-$(CONFIG_FADE_FILTER) += vf_fade.o > +OBJS-$(CONFIG_FASTDEINT_FILTER) += vf_fastdeint.o > OBJS-$(CONFIG_FFTDNOIZ_FILTER) += vf_fftdnoiz.o > OBJS-$(CONFIG_FFTFILT_FILTER) += vf_fftfilt.o > OBJS-$(CONFIG_FIELD_FILTER) += vf_field.o > diff --git a/libavfilter/aarch64/Makefile b/libavfilter/aarch64/Makefile > index b58daa3a3f..2b0ad92893 100644 > --- a/libavfilter/aarch64/Makefile > +++ b/libavfilter/aarch64/Makefile > @@ -1,3 +1,4 @@ > OBJS-$(CONFIG_NLMEANS_FILTER) += aarch64/vf_nlmeans_init.o > > +NEON-OBJS-$(CONFIG_FASTDEINT_FILTER) += aarch64/merge_neon.o > NEON-OBJS-$(CONFIG_NLMEANS_FILTER) += aarch64/vf_nlmeans_neon.o > diff --git a/libavfilter/aarch64/merge_neon.S > b/libavfilter/aarch64/merge_neon.S > new file mode 100644 > index 0000000000..62377331a4 > --- /dev/null > +++ b/libavfilter/aarch64/merge_neon.S > @@ -0,0 +1,98 @@ > +/* > + * Copyright (c) 2009-2016 Rémi Denis-Courmont, Janne Grunau, VLC authors > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + */ > + > +#include "libavutil/aarch64/asm.S" > + > +#define dest x0 > +#define src1 x1 > +#define src2 x2 > +#define size x3 > + > + .align 2 > + // NOTE: Offset and pitch must be multiple of 16-bytes. > +function ff_merge8_neon, export=1 > + ands x5, size, #~63 > + b.eq 2f > + mov x10, #64 > + add x11, src1, #32 > + add x12, src2, #32 > +1: > + ld1 {v0.16b,v1.16b}, [src1], x10 > + ld1 {v4.16b,v5.16b}, [src2], x10 > + ld1 {v2.16b,v3.16b}, [x11], x10 > + uhadd v0.16b, v0.16b, v4.16b > + ld1 {v6.16b,v7.16b}, [x12], x10 > + subs x5, x5, #64 > + uhadd v1.16b, v1.16b, v5.16b > + uhadd v2.16b, v2.16b, v6.16b > + uhadd v3.16b, v3.16b, v7.16b > + st1 {v0.16b,v1.16b}, [dest], #32 > + st1 {v2.16b,v3.16b}, [dest], #32 > + b.gt 1b > +2: > + tbz size, #5, 3f > + ld1 {v0.16b,v1.16b}, [src1], #32 > + ld1 {v4.16b,v5.16b}, [src2], #32 > + uhadd v0.16b, v0.16b, v4.16b > + uhadd v1.16b, v1.16b, v5.16b > + st1 {v0.16b,v1.16b}, [dest], #32 > +3: > + tbz size, #4, 4f > + ld1 {v0.16b}, [src1] > + ld1 {v4.16b}, [src2] > + uhadd v0.16b, v0.16b, v4.16b > + st1 {v0.16b}, [dest] > +4: > + ret > +endfunc > + > + .align 2 > +function ff_merge16_neon, export=1 > + ands x5, size, #~63 > + b.eq 2f > +1: > + ld1 {v0.8h,v1.8h}, [src1], #32 > + ld1 {v4.8h,v5.8h}, [src2], #32 > + ld1 {v2.8h,v3.8h}, [src1], #32 > + uhadd v0.8h, v0.8h, v4.8h > + ld1 {v6.8h,v7.8h}, [src2], #32 > + uhadd v1.8h, v1.8h, v5.8h > + uhadd v2.8h, v2.8h, v6.8h > + uhadd v3.8h, v3.8h, v7.8h > + st1 {v0.8h,v1.8h}, [dest], #32 > + st1 {v2.8h,v3.8h}, [dest], #32 > + subs x5, x5, #64 > + b.gt 1b > +2: > + tbz size, #5, 3f > + ld1 {v0.8h,v1.8h}, [src1], #32 > + ld1 {v4.8h,v5.8h}, [src2], #32 > + uhadd v0.8h, v0.8h, v4.8h > + uhadd v1.8h, v1.8h, v5.8h > + st1 {v0.8h,v1.8h}, [dest], #32 > +3: > + tbz size, #4, 4f > + ld1 {v0.8h}, [src1] > + ld1 {v4.8h}, [src2] > + uhadd v0.8h, v0.8h,v4.8h > + st1 {v0.8h}, [dest] > +4: > + ret > +endfunc > diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c > index b675c688ee..6631af2ffe 100644 > --- a/libavfilter/allfilters.c > +++ b/libavfilter/allfilters.c > @@ -219,6 +219,7 @@ extern AVFilter ff_vf_erosion; > extern AVFilter ff_vf_erosion_opencl; > extern AVFilter ff_vf_extractplanes; > extern AVFilter ff_vf_fade; > +extern AVFilter ff_vf_fastdeint; > extern AVFilter ff_vf_fftdnoiz; > extern AVFilter ff_vf_fftfilt; > extern AVFilter ff_vf_field; > diff --git a/libavfilter/arm/Makefile b/libavfilter/arm/Makefile > new file mode 100644 > index 0000000000..c92d62fac9 > --- /dev/null > +++ b/libavfilter/arm/Makefile > @@ -0,0 +1,3 @@ > +ARMV6-OBJS-$(CONFIG_FASTDEINT_FILTER) += arm/merge_armv6.o > + > +NEON-OBJS-$(CONFIG_FASTDEINT_FILTER) += arm/merge_neon.o > diff --git a/libavfilter/arm/merge_armv6.S b/libavfilter/arm/merge_armv6.S > new file mode 100644 > index 0000000000..9b551c2c6c > --- /dev/null > +++ b/libavfilter/arm/merge_armv6.S > @@ -0,0 +1,70 @@ > +/* > + * Copyright (c) 2009-2012 Rémi Denis-Courmont, VLC authors > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + */ > + > +#include "libavutil/arm/asm.S" > + > +#define dest r0 > +#define src1 r1 > +#define src2 r2 > +#define size r3 > + > + .align 2 > +function ff_merge8_armv6, export=1 > + push {r4-r9,lr} > +1: > + pld [src1, #64] > + ldm src1!, {r4-r5} > + pld [src2, #64] > + ldm src2!, {r8-r9} > + subs size, size, #16 > + uhadd8 r4, r4, r8 > + ldm src1!, {r6-r7} > + uhadd8 r5, r5, r9 > + ldm src2!, {ip,lr} > + uhadd8 r6, r6, ip > + stm dest!, {r4-r5} > + uhadd8 r7, r7, lr > + stm dest!, {r6-r7} > + it eq > + popeq {r4-r9,pc} > + b 1b > +endfunc > + > + .align 2 > +function ff_merge16_armv6, export=1 > + push {r4-r9,lr} > +1: > + pld [src1, #64] > + ldm src1!, {r4-r5} > + pld [src2, #64] > + ldm src2!, {r8-r9} > + subs size, size, #16 > + uhadd16 r4, r4, r8 > + ldm src1!, {r6-r7} > + uhadd16 r5, r5, r9 > + ldm src2!, {ip,lr} > + uhadd16 r6, r6, ip > + stm dest!, {r4-r5} > + uhadd16 r7, r7, lr > + stm dest!, {r6-r7} > + it eq > + popeq {r4-r9,pc} > + b 1b > +endfunc > \ No newline at end of file > diff --git a/libavfilter/arm/merge_neon.S b/libavfilter/arm/merge_neon.S > new file mode 100644 > index 0000000000..ae36cf3ca9 > --- /dev/null > +++ b/libavfilter/arm/merge_neon.S > @@ -0,0 +1,109 @@ > +/* > + * Copyright (c) 2009-2012 Rémi Denis-Courmont, VLC authors > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + */ > + > +#include "libavutil/arm/asm.S" > + > +#define dest r0 > +#define src1 r1 > +#define src2 r2 > +#define size r3 > + > + .align 2 > + @ NOTE: Offset and pitch must be multiple of 16-bytes. > +function ff_merge8_neon, export=1 > + cmp size, #64 > + blo 2f > +1: > + pld [src1, #64] > + vld1.u8 {q0-q1}, [src1,:128]! > + pld [src2, #64] > + vld1.u8 {q8-q9}, [src2,:128]! > + vhadd.u8 q0, q0, q8 > + sub size, size, #64 > + vld1.u8 {q2-q3}, [src1,:128]! > + vhadd.u8 q1, q1, q9 > + vld1.u8 {q10-q11}, [src2,:128]! > + vhadd.u8 q2, q2, q10 > + cmp size, #64 > + vhadd.u8 q3, q3, q11 > + vst1.u8 {q0-q1}, [dest,:128]! > + vst1.u8 {q2-q3}, [dest,:128]! > + bhs 1b > +2: > + cmp size, #32 > + blo 3f > + vld1.u8 {q0-q1}, [src1,:128]! > + sub size, size, #32 > + vld1.u8 {q8-q9}, [src2,:128]! > + vhadd.u8 q0, q0, q8 > + vhadd.u8 q1, q1, q9 > + vst1.u8 {q0-q1}, [dest,:128]! > +3: > + cmp size, #16 > + it lo > + bxlo lr > + vld1.u8 {q0}, [src1,:128]! > + sub size, size, #16 > + vld1.u8 {q8}, [src2,:128]! > + vhadd.u8 q0, q0, q8 > + vst1.u8 {q0}, [dest,:128]! > + bx lr > +endfunc > + > + .align 2 > +function ff_merge16_neon, export=1 > + cmp size, #64 > + blo 2f > +1: > + pld [src1, #64] > + vld1.u16 {q0-q1}, [src1,:128]! > + pld [src2, #64] > + vld1.u16 {q8-q9}, [src2,:128]! > + vhadd.u16 q0, q0, q8 > + sub size, size, #64 > + vld1.u16 {q2-q3}, [src1,:128]! > + vhadd.u16 q1, q1, q9 > + vld1.u16 {q10-q11}, [src2,:128]! > + vhadd.u16 q2, q2, q10 > + cmp size, #64 > + vhadd.u16 q3, q3, q11 > + vst1.u16 {q0-q1}, [dest,:128]! > + vst1.u16 {q2-q3}, [dest,:128]! > + bhs 1b > +2: > + cmp size, #32 > + blo 3f > + vld1.u16 {q0-q1}, [src1,:128]! > + sub size, size, #32 > + vld1.u16 {q8-q9}, [src2,:128]! > + vhadd.u16 q0, q0, q8 > + vhadd.u16 q1, q1, q9 > + vst1.u16 {q0-q1}, [dest,:128]! > +3: > + cmp size, #16 > + it lo > + bxlo lr > + vld1.u16 {q0}, [src1,:128]! > + sub size, size, #16 > + vld1.u16 {q8}, [src2,:128]! > + vhadd.u16 q0, q0, q8 > + vst1.u16 {q0}, [dest,:128]! > + bx lr > +endfunc > \ No newline at end of file > diff --git a/libavfilter/vf_fastdeint.c b/libavfilter/vf_fastdeint.c > new file mode 100644 > index 0000000000..5ddd8be392 > --- /dev/null > +++ b/libavfilter/vf_fastdeint.c > @@ -0,0 +1,588 @@ > +/* > + * Copyright (C) 2015 Aman Gupta <aman@tmm1.net> > + * 2000-2011 VLC authors and VideoLAN > + * > + * Author: Sam Hocevar <sam@zoy.org> > + * Damien Lucas <nitrox@videolan.org> > + * Laurent Aimar <fenrir@videolan.org> > + * Sigmund Augdal Helberg <sigmunau@videolan.org> > + * > + * These algorithms are derived from the VLC project's > + * modules/video_filter/deinterlace/algo_basic.c > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + */ > + > +#include "libavutil/avassert.h" > +#include "libavutil/cpu.h" > +#include "libavutil/common.h" > +#include "libavutil/opt.h" > +#include "libavutil/pixdesc.h" > +#include "libavutil/imgutils.h" > +#include "libavutil/timestamp.h" > +#include "avfilter.h" > +#include "formats.h" > +#include "internal.h" > +#include "video.h" > + > +enum Mode { > + MODE_DISCARD, > + MODE_MEAN, > + MODE_BLEND, > + MODE_BOB, > + MODE_LINEAR, > + MODE_MAX, > +}; > + > +typedef void (*merge_fn)(void *dst, const void *src1, const void *src2, > size_t len); > + > +typedef struct FastDeintContext { > + const AVClass *class; > + merge_fn merge; > + int merge_size; > + int merge_aligned; > + AVFrame *cur, *next; > + enum Mode mode; > + int eof; > +} FastDeintContext; > + > +static void merge8_c(uint8_t *dst, const uint8_t *src1, const uint8_t > *src2, size_t bytes) > +{ > + for (; bytes > 0; bytes--) > + *dst++ = ( *src1++ + *src2++ ) >> 1; > +} > + > +static void merge16_c(uint16_t *dst, const uint16_t *src1, const uint16_t > *src2, size_t bytes) > +{ > + for (size_t words = bytes / 2; words > 0; words--) > + *dst++ = ( *src1++ + *src2++ ) >> 1; > +} > + > +static void merge8_unaligned(FastDeintContext *s, uint8_t *dst, const > uint8_t *src1, const uint8_t *src2, size_t bytes) > +{ > + if (s->merge_aligned) { > + size_t remainder = bytes % 16; > + if (remainder > 0) { > + merge8_c(dst, src1, src2, remainder); > + bytes -= remainder; > + dst += remainder; > + src1 += remainder; > + src2 += remainder; > + } > + } > + s->merge(dst, src1, src2, bytes); > +} > + > +static void merge16_unaligned(FastDeintContext *s, uint16_t *dst, const > uint16_t *src1, const uint16_t *src2, size_t bytes) > +{ > + if (s->merge_aligned) { > + size_t words = bytes / 2; > + size_t remainder = words % 8; > + if (remainder > 0) { > + merge16_c(dst, src1, src2, remainder); > + words -= remainder; > + dst += remainder; > + src1 += remainder; > + src2 += remainder; > + } > + } > + s->merge(dst, src1, src2, bytes); > +} > + > +static void merge_unaligned(FastDeintContext *s, void *dst, const void > *src1, const void *src2, size_t bytes) > +{ > + if (s->merge_size == 16) > + merge16_unaligned(s, dst, src1, src2, bytes); > + else > + merge8_unaligned(s, dst, src1, src2, bytes); > +} > + > +#if HAVE_SSE2_INLINE && defined(__x86_64__) > +static void merge8_sse2(uint8_t *dst, const uint8_t *src1, const uint8_t > *src2, size_t bytes) > +{ > + for(; bytes > 0 && ((uintptr_t)src1 & 15); bytes--) > + *dst++ = ( *src1++ + *src2++ ) >> 1; > + > + for (; bytes >= 16; bytes -= 16) { > + __asm__ __volatile__( "movdqu %2,%%xmm1;" > + "pavgb %1, %%xmm1;" > + "movdqu %%xmm1, %0" :"=m" (*dst): > + "m" (*src1), > + "m" (*src2) : "xmm1" ); > + dst += 16; > + src1 += 16; > + src2 += 16; > + } > + > + if (bytes > 0) { > + merge8_c(dst, src1, src2, bytes); > + } > +} > +static void merge16_sse2(uint16_t *dst, const uint16_t *src1, const > uint16_t *src2, size_t bytes) > +{ > + size_t words = bytes / 2; > + > + for(; words > 0 && ((uintptr_t)src1 & 15); words--) > + *dst++ = ( *src1++ + *src2++ ) >> 1; > + > + for (; words >= 8; words -= 8) { > + __asm__ __volatile__( "movdqu %2,%%xmm1;" > + "pavgw %1, %%xmm1;" > + "movdqu %%xmm1, %0" :"=m" (*dst): > + "m" (*src1), > + "m" (*src2) : "xmm1" ); > + dst += 8; > + src1 += 8; > + src2 += 8; > + } > + Unacceptable code. Inline assembly is forbidden. > + if (words > 0) { > + merge16_c(dst, src1, src2, words * 2); > + } > +} > +#define merge8 merge8_sse2 > +#define merge16 merge16_sse2 > +#else > +#define merge8 merge8_c > +#define merge16 merge16_c > +#endif > + > +static void render_image_single(FastDeintContext *s, AVFrame *out, AVFrame > *frame) > +{ > + int i, planes_nb = 0; > + enum Mode mode = s->mode; > + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(out->format); > + > + for (i = 0; i < desc->nb_components; i++) > + planes_nb = FFMAX(planes_nb, desc->comp[i].plane + 1); > + > + for (i = 0; i < planes_nb; i++) { > + int height, bwidth; > + int dst_linesize, src_linesize; > + const uint8_t *src; > + uint8_t *dst; > + > + bwidth = av_image_get_linesize(out->format, out->width, i); > + if (bwidth < 0) { > + av_log(s, AV_LOG_ERROR, "av_image_get_linesize failed\n"); > + return; > + } > + > + height = out->height; > + if (i == 1 || i == 2) { > + height = FF_CEIL_RSHIFT(out->height, desc->log2_chroma_h); > + } > + > + src = frame->data[i]; > + dst = out->data[i]; > + dst_linesize = out->linesize[i]; > + src_linesize = frame->linesize[i]; > + > + if (mode == MODE_BLEND) { > + // Copy first line > + memcpy(dst, src, bwidth); > + dst += dst_linesize; > + height--; > + } > + > + // Merge remaining lines > + for (; height > 0; height--) { > + if (mode == MODE_DISCARD) > + memcpy(dst, src, bwidth); > + else > + merge_unaligned(s, dst, src, src + src_linesize, bwidth); > + dst += dst_linesize; > + src += src_linesize; > + if (mode == MODE_MEAN || mode == MODE_DISCARD) { > + src += src_linesize; > + height--; > + } > + } > + } > + if (mode != MODE_DISCARD) > + emms_c(); > +} > + > +static void render_image_doubler(FastDeintContext *s, AVFrame *out, AVFrame > *frame, int field) > +{ > + int i, planes_nb = 0; > + enum Mode mode = s->mode; > + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(out->format); > + > + for (i = 0; i < desc->nb_components; i++) > + planes_nb = FFMAX(planes_nb, desc->comp[i].plane + 1); > + > + for (i = 0; i < planes_nb; i++) { > + int height, bwidth; > + int dst_linesize, src_linesize; > + const uint8_t *src; > + uint8_t *dst; > + > + bwidth = av_image_get_linesize(out->format, out->width, i); > + if (bwidth < 0) { > + av_log(s, AV_LOG_ERROR, "av_image_get_linesize failed\n"); > + return; > + } > + height = out->height; > + if (i == 1 || i == 2) { > + height = FF_CEIL_RSHIFT(out->height, desc->log2_chroma_h); > + } > + > + src = frame->data[i]; > + dst = out->data[i]; > + src_linesize = frame->linesize[i]; > + dst_linesize = out->linesize[i]; > + > + // For BOTTOM field we need to add the first line > + if (field == 1) { > + memcpy(dst, src, bwidth); > + dst += dst_linesize; > + src += src_linesize; > + height--; > + } > + > + height -= 2; > + > + for (; height > 0; height-=2) { > + memcpy(dst, src, bwidth); > + dst += dst_linesize; > + > + if (mode == MODE_LINEAR) > + merge_unaligned(s, dst, src, src + 2 * src_linesize, > bwidth); > + else > + memcpy(dst, src, bwidth); > + dst += dst_linesize; > + > + src += src_linesize * 2; > + } > + > + memcpy(dst, src, bwidth); > + > + // For TOP field we need to add the last line > + if (field == 0) > + { > + dst += dst_linesize; > + src += src_linesize; > + memcpy(dst, src, bwidth); > + } > + } > + if (mode == MODE_LINEAR) > + emms_c(); > +} > + > +static int filter_frame_single(AVFilterLink *link, AVFrame *frame) > +{ > + AVFilterContext *ctx = link->dst; > + AVFrame *out; > + FastDeintContext *s = ctx->priv; > + > + if (!frame->interlaced_frame) { > + return ff_filter_frame(ctx->outputs[0], frame); > + } > + > + out = ff_get_video_buffer(ctx->outputs[0], link->w, link->h); > + if (!out) { > + av_frame_free(&frame); > + return AVERROR(ENOMEM); > + } > + > + av_frame_copy_props(out, frame); > + out->interlaced_frame = 0; > + render_image_single(s, out, frame); > + > + av_frame_free(&frame); > + return ff_filter_frame(ctx->outputs[0], out); > +} > + > +static AVFrame *copy_frame(AVFilterLink *link, AVFrame *frame) > +{ > + AVFilterContext *ctx = link->dst; > + AVFrame *out; > + > + if (frame->format == AV_PIX_FMT_VIDEOTOOLBOX) > + out = av_frame_alloc(); > + else > + out = ff_get_video_buffer(ctx->outputs[0], link->w, link->h); > + > + if (!out) > + return NULL; > + > + av_frame_copy_props(out, frame); > + return out; > +} > + > +static int filter_frame_double(AVFilterLink *link, AVFrame *in) > +{ > + AVFilterContext *ctx = link->dst; > + FastDeintContext *s = ctx->priv; > + AVFrame *frame, *out, *out2; > + int tff, ret; > + > + s->cur = s->next; > + s->next = in; > + > + if (!s->cur) { > + return 0; > + } > + > + frame = s->cur; > + > + if (!frame->interlaced_frame) { > + if (frame->pts != AV_NOPTS_VALUE) > + frame->pts *= 2; > + s->cur = NULL; > + return ff_filter_frame(ctx->outputs[0], frame); > + } > + > + tff = frame->top_field_first; > + out = copy_frame(link, frame); > + if (!out) { > + av_frame_free(&frame); > + s->cur = NULL; > + return AVERROR(ENOMEM); > + } > + > + out->interlaced_frame = 0; > + if (out->pts != AV_NOPTS_VALUE) > + out->pts = out->pts * 2; > + render_image_doubler(s, out, frame, !tff); > + > + ret = ff_filter_frame(ctx->outputs[0], out); > + if (ret < 0) { > + av_frame_free(&frame); > + s->cur = NULL; > + return ret; > + } > + > + out2 = copy_frame(link, frame); > + if (!out2) { > + av_frame_free(&frame); > + s->cur = NULL; > + return AVERROR(ENOMEM); > + } > + > + out2->interlaced_frame = 0; > + av_frame_remove_side_data(out2, AV_FRAME_DATA_A53_CC); > + if (out2->pts != AV_NOPTS_VALUE) { > + out2->pts = frame->pts + s->next->pts; > + } > + render_image_doubler(s, out2, frame, tff); > + > + av_frame_free(&frame); > + s->cur = NULL; > + > + return ff_filter_frame(ctx->outputs[0], out2); > +} > + > +static int filter_frame(AVFilterLink *link, AVFrame *frame) > +{ > + AVFilterContext *ctx = link->dst; > + FastDeintContext *s = ctx->priv; > + > + av_assert0(frame); > + > + if (s->mode == MODE_LINEAR || s->mode == MODE_BOB) { > + return filter_frame_double(link, frame); > + } else { > + return filter_frame_single(link, frame); > + } > +} > + > +static av_cold void uninit(AVFilterContext *ctx) > +{ > + FastDeintContext *s = ctx->priv; > + av_frame_free(&s->cur); > + av_frame_free(&s->next); > +} > + > +static int query_formats(AVFilterContext *ctx) > +{ > + static const enum AVPixelFormat pix_fmts[] = { > + AV_PIX_FMT_YUV420P, > + AV_PIX_FMT_YUV422P, > + AV_PIX_FMT_YUV444P, > + AV_PIX_FMT_YUV410P, > + AV_PIX_FMT_YUV411P, > + AV_PIX_FMT_GRAY8, > + AV_PIX_FMT_YUVJ420P, > + AV_PIX_FMT_YUVJ422P, > + AV_PIX_FMT_YUVJ444P, > + AV_PIX_FMT_GRAY16, > + AV_PIX_FMT_YUV440P, > + AV_PIX_FMT_YUVJ440P, > + AV_PIX_FMT_YUV420P9, > + AV_PIX_FMT_YUV422P9, > + AV_PIX_FMT_YUV444P9, > + AV_PIX_FMT_YUV420P10, > + AV_PIX_FMT_YUV422P10, > + AV_PIX_FMT_YUV444P10, > + AV_PIX_FMT_YUV420P12, > + AV_PIX_FMT_YUV422P12, > + AV_PIX_FMT_YUV444P12, > + AV_PIX_FMT_YUV420P14, > + AV_PIX_FMT_YUV422P14, > + AV_PIX_FMT_YUV444P14, > + AV_PIX_FMT_YUV420P16, > + AV_PIX_FMT_YUV422P16, > + AV_PIX_FMT_YUV444P16, > + AV_PIX_FMT_YUVA420P, > + AV_PIX_FMT_YUVA422P, > + AV_PIX_FMT_YUVA444P, > + AV_PIX_FMT_GBRP, > + AV_PIX_FMT_GBRP9, > + AV_PIX_FMT_GBRP10, > + AV_PIX_FMT_GBRP12, > + AV_PIX_FMT_GBRP14, > + AV_PIX_FMT_GBRP16, > + AV_PIX_FMT_GBRAP, > + AV_PIX_FMT_NONE > + }; Group this ones on less lines somehow. > + > + AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts); > + if (!fmts_list) > + return AVERROR(ENOMEM); > + return ff_set_common_formats(ctx, fmts_list); > +} > + > +#if ARCH_ARM > +#include "libavutil/arm/cpu.h" > +#endif > +#if ARCH_AARCH64 > +#include "libavutil/aarch64/cpu.h" > +#endif > +#if ARCH_AARCH64 || ARCH_ARM > +void ff_merge8_neon(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, > size_t bytes); > +void ff_merge16_neon(uint16_t *dst, const uint16_t *src1, const uint16_t > *src2, size_t bytes); > +void ff_merge8_armv6(uint8_t *dst, const uint8_t *src1, const uint8_t > *src2, size_t bytes); > +void ff_merge16_armv6(uint16_t *dst, const uint16_t *src1, const uint16_t > *src2, size_t bytes); > +#endif > + I do not like this style. Look what other filters do, like one that adds x86 SIMD. > +static int config_props(AVFilterLink *link) > +{ > + AVFilterContext *ctx = link->src; > + FastDeintContext *s = ctx->priv; > + const AVPixFmtDescriptor *pix; > +#if ARCH_AARCH64 || ARCH_ARM > + int cpu_flags = av_get_cpu_flags(); > +#endif This belongs in separate directory and file. See aarch64 directory > + > + link->w = link->src->inputs[0]->w; > + link->h = link->src->inputs[0]->h; > + link->time_base = link->src->inputs[0]->time_base; > + link->frame_rate = link->src->inputs[0]->frame_rate; > + link->sample_aspect_ratio = link->src->inputs[0]->sample_aspect_ratio; > + > + if (s->mode == MODE_MEAN || s->mode == MODE_DISCARD) { > + link->h /= 2; > + link->sample_aspect_ratio = av_mul_q(link->sample_aspect_ratio, > av_make_q(1, 2)); > + } > + if (s->mode == MODE_LINEAR || s->mode == MODE_BOB) { > + link->time_base = av_mul_q(link->time_base, av_make_q(1, 2)); > + link->frame_rate = av_mul_q(link->frame_rate, av_make_q(2, 1)); > + } > + > + pix = av_pix_fmt_desc_get(link->format); > + s->merge_size = (pix->comp[0].depth > 8) ? 16 : 8; > + s->merge = s->merge_size == 16 ? (merge_fn)merge16 : (merge_fn)merge8; > + > +#if ARCH_ARM > + if (have_armv6(cpu_flags)) { > + s->merge = s->merge_size == 16 ? (merge_fn)ff_merge16_armv6 : > (merge_fn)ff_merge8_armv6; > + s->merge_aligned = 1; > + } > +#endif > +#if ARCH_AARCH64 || ARCH_ARM > + if (have_neon(cpu_flags)) { > + s->merge = s->merge_size == 16 ? (merge_fn)ff_merge16_neon : > (merge_fn)ff_merge8_neon; > + s->merge_aligned = 1; > + } > +#endif > + > + return 0; > +} > + > +static int request_frame(AVFilterLink *link) > +{ > + AVFilterContext *ctx = link->src; > + FastDeintContext *s = ctx->priv; > + int ret; > + > + if (s->eof) > + return AVERROR_EOF; > + > + ret = ff_request_frame(ctx->inputs[0]); > + > + if (ret == AVERROR_EOF && s->cur) { > + AVFrame *next = av_frame_clone(s->next); > + if (!next) > + return AVERROR(ENOMEM); > + > + next->pts = s->next->pts * 2 - s->cur->pts; > + filter_frame(ctx->inputs[0], next); > + s->eof = 1; > + } else if (ret < 0) { > + return ret; > + } > + > + return 0; > +} > + > +#define OFFSET(x) offsetof(FastDeintContext, x) > +#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM > + > +#define CONST(name, help, val, unit) { name, help, 0, AV_OPT_TYPE_CONST, > {.i64=val}, INT_MIN, INT_MAX, FLAGS, unit } > + > +static const AVOption fastdeint_options[] = { > + { "mode", "specify the deinterlacing mode", OFFSET(mode), > AV_OPT_TYPE_INT, {.i64=MODE_BLEND}, 0, MODE_MAX-1, FLAGS, "mode" }, > + CONST("discard", "discard bottom frame", MODE_DISCARD, "mode"), > + CONST("mean", "half resolution blender", MODE_MEAN, "mode"), > + CONST("blend", "full resolution blender", MODE_BLEND, "mode"), > + CONST("bob", "bob doubler", MODE_BOB, "mode"), > + CONST("linear", "bob doubler with linear interpolation", MODE_LINEAR, > "mode"), > + > + { NULL } > +}; > + > +AVFILTER_DEFINE_CLASS(fastdeint); > + > +static const AVFilterPad fastdeint_inputs[] = { > + { > + .name = "default", > + .type = AVMEDIA_TYPE_VIDEO, > + .filter_frame = filter_frame, > + }, > + { NULL } > +}; > + > +static const AVFilterPad fastdeint_outputs[] = { > + { > + .name = "default", > + .type = AVMEDIA_TYPE_VIDEO, > + .config_props = config_props, > + .request_frame = request_frame > + }, > + { NULL } > +}; > + > +AVFilter ff_vf_fastdeint = { > + .name = "fastdeint", > + .description = NULL_IF_CONFIG_SMALL("fast deinterlacing algorithms"), First letter should be capitalized. > + .priv_size = sizeof(FastDeintContext), > + .priv_class = &fastdeint_class, > + .uninit = uninit, > + .query_formats = query_formats, > + .inputs = fastdeint_inputs, > + .outputs = fastdeint_outputs, > +}; > -- > 2.20.1 > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
On 9/9/2019 5:12 PM, Aman Gupta wrote: > From: Aman Gupta <aman@tmm1.net> > > These are simple algorithms which can be run efficiently > on low powered devices to produce deinteraced images. > > Signed-off-by: Aman Gupta <aman@tmm1.net> > --- > doc/filters.texi | 27 ++ > libavfilter/Makefile | 1 + > libavfilter/aarch64/Makefile | 1 + > libavfilter/aarch64/merge_neon.S | 98 ++++++ > libavfilter/allfilters.c | 1 + > libavfilter/arm/Makefile | 3 + > libavfilter/arm/merge_armv6.S | 70 ++++ > libavfilter/arm/merge_neon.S | 109 ++++++ > libavfilter/vf_fastdeint.c | 588 +++++++++++++++++++++++++++++++ > 9 files changed, 898 insertions(+) > create mode 100644 libavfilter/aarch64/merge_neon.S > create mode 100644 libavfilter/arm/Makefile > create mode 100644 libavfilter/arm/merge_armv6.S > create mode 100644 libavfilter/arm/merge_neon.S > create mode 100644 libavfilter/vf_fastdeint.c Asm stuff should be in a separate entry. > > diff --git a/doc/filters.texi b/doc/filters.texi > index 6c81e1da40..55d9adeb81 100644 > --- a/doc/filters.texi > +++ b/doc/filters.texi > @@ -9796,6 +9796,33 @@ fade=t=in:st=5.5:d=0.5 > > @end itemize > > +@section fastdeint > +Fast deinterlacing algorithms. > + > +@table @option > +@item mode > +Deinterlacing algorithm to use. > + > +It accepts the following values: > +@table @samp > +@item discard > +Discard bottom frame. > + > +@item mean > +Half resolution blender. > + > +@item blend > +Full resolution blender. > + > +@item bob > +Bob doubler. > + > +@item linear > +Bob doubler with linear interpolation. > +@end table > + > +@end table > + > @section fftdnoiz > Denoise frames using 3D FFT (frequency domain filtering). > > diff --git a/libavfilter/Makefile b/libavfilter/Makefile > index 3ef4191d9a..a2b3566ec0 100644 > --- a/libavfilter/Makefile > +++ b/libavfilter/Makefile > @@ -234,6 +234,7 @@ OBJS-$(CONFIG_EROSION_OPENCL_FILTER) += vf_neighbor_opencl.o opencl.o \ > opencl/neighbor.o > OBJS-$(CONFIG_EXTRACTPLANES_FILTER) += vf_extractplanes.o > OBJS-$(CONFIG_FADE_FILTER) += vf_fade.o > +OBJS-$(CONFIG_FASTDEINT_FILTER) += vf_fastdeint.o > OBJS-$(CONFIG_FFTDNOIZ_FILTER) += vf_fftdnoiz.o > OBJS-$(CONFIG_FFTFILT_FILTER) += vf_fftfilt.o > OBJS-$(CONFIG_FIELD_FILTER) += vf_field.o > diff --git a/libavfilter/aarch64/Makefile b/libavfilter/aarch64/Makefile > index b58daa3a3f..2b0ad92893 100644 > --- a/libavfilter/aarch64/Makefile > +++ b/libavfilter/aarch64/Makefile > @@ -1,3 +1,4 @@ > OBJS-$(CONFIG_NLMEANS_FILTER) += aarch64/vf_nlmeans_init.o > > +NEON-OBJS-$(CONFIG_FASTDEINT_FILTER) += aarch64/merge_neon.o > NEON-OBJS-$(CONFIG_NLMEANS_FILTER) += aarch64/vf_nlmeans_neon.o > diff --git a/libavfilter/aarch64/merge_neon.S b/libavfilter/aarch64/merge_neon.S > new file mode 100644 > index 0000000000..62377331a4 > --- /dev/null > +++ b/libavfilter/aarch64/merge_neon.S > @@ -0,0 +1,98 @@ > +/* > + * Copyright (c) 2009-2016 Rémi Denis-Courmont, Janne Grunau, VLC authors > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > + */ > + > +#include "libavutil/aarch64/asm.S" > + > +#define dest x0 > +#define src1 x1 > +#define src2 x2 > +#define size x3 > + > + .align 2 > + // NOTE: Offset and pitch must be multiple of 16-bytes. > +function ff_merge8_neon, export=1 > + ands x5, size, #~63 > + b.eq 2f > + mov x10, #64 > + add x11, src1, #32 > + add x12, src2, #32 > +1: > + ld1 {v0.16b,v1.16b}, [src1], x10 > + ld1 {v4.16b,v5.16b}, [src2], x10 > + ld1 {v2.16b,v3.16b}, [x11], x10 > + uhadd v0.16b, v0.16b, v4.16b > + ld1 {v6.16b,v7.16b}, [x12], x10 > + subs x5, x5, #64 > + uhadd v1.16b, v1.16b, v5.16b > + uhadd v2.16b, v2.16b, v6.16b > + uhadd v3.16b, v3.16b, v7.16b > + st1 {v0.16b,v1.16b}, [dest], #32 > + st1 {v2.16b,v3.16b}, [dest], #32 > + b.gt 1b > +2: > + tbz size, #5, 3f > + ld1 {v0.16b,v1.16b}, [src1], #32 > + ld1 {v4.16b,v5.16b}, [src2], #32 > + uhadd v0.16b, v0.16b, v4.16b > + uhadd v1.16b, v1.16b, v5.16b > + st1 {v0.16b,v1.16b}, [dest], #32 > +3: > + tbz size, #4, 4f > + ld1 {v0.16b}, [src1] > + ld1 {v4.16b}, [src2] > + uhadd v0.16b, v0.16b, v4.16b > + st1 {v0.16b}, [dest] > +4: > + ret > +endfunc > + > + .align 2 > +function ff_merge16_neon, export=1 > + ands x5, size, #~63 > + b.eq 2f > +1: > + ld1 {v0.8h,v1.8h}, [src1], #32 > + ld1 {v4.8h,v5.8h}, [src2], #32 > + ld1 {v2.8h,v3.8h}, [src1], #32 > + uhadd v0.8h, v0.8h, v4.8h > + ld1 {v6.8h,v7.8h}, [src2], #32 > + uhadd v1.8h, v1.8h, v5.8h > + uhadd v2.8h, v2.8h, v6.8h > + uhadd v3.8h, v3.8h, v7.8h > + st1 {v0.8h,v1.8h}, [dest], #32 > + st1 {v2.8h,v3.8h}, [dest], #32 > + subs x5, x5, #64 > + b.gt 1b > +2: > + tbz size, #5, 3f > + ld1 {v0.8h,v1.8h}, [src1], #32 > + ld1 {v4.8h,v5.8h}, [src2], #32 > + uhadd v0.8h, v0.8h, v4.8h > + uhadd v1.8h, v1.8h, v5.8h > + st1 {v0.8h,v1.8h}, [dest], #32 > +3: > + tbz size, #4, 4f > + ld1 {v0.8h}, [src1] > + ld1 {v4.8h}, [src2] > + uhadd v0.8h, v0.8h,v4.8h > + st1 {v0.8h}, [dest] > +4: > + ret > +endfunc > diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c > index b675c688ee..6631af2ffe 100644 > --- a/libavfilter/allfilters.c > +++ b/libavfilter/allfilters.c > @@ -219,6 +219,7 @@ extern AVFilter ff_vf_erosion; > extern AVFilter ff_vf_erosion_opencl; > extern AVFilter ff_vf_extractplanes; > extern AVFilter ff_vf_fade; > +extern AVFilter ff_vf_fastdeint; > extern AVFilter ff_vf_fftdnoiz; > extern AVFilter ff_vf_fftfilt; > extern AVFilter ff_vf_field; > diff --git a/libavfilter/arm/Makefile b/libavfilter/arm/Makefile > new file mode 100644 > index 0000000000..c92d62fac9 > --- /dev/null > +++ b/libavfilter/arm/Makefile > @@ -0,0 +1,3 @@ > +ARMV6-OBJS-$(CONFIG_FASTDEINT_FILTER) += arm/merge_armv6.o > + > +NEON-OBJS-$(CONFIG_FASTDEINT_FILTER) += arm/merge_neon.o > diff --git a/libavfilter/arm/merge_armv6.S b/libavfilter/arm/merge_armv6.S > new file mode 100644 > index 0000000000..9b551c2c6c > --- /dev/null > +++ b/libavfilter/arm/merge_armv6.S > @@ -0,0 +1,70 @@ > +/* > + * Copyright (c) 2009-2012 Rémi Denis-Courmont, VLC authors > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > + */ > + > +#include "libavutil/arm/asm.S" > + > +#define dest r0 > +#define src1 r1 > +#define src2 r2 > +#define size r3 > + > + .align 2 > +function ff_merge8_armv6, export=1 > + push {r4-r9,lr} > +1: > + pld [src1, #64] > + ldm src1!, {r4-r5} > + pld [src2, #64] > + ldm src2!, {r8-r9} > + subs size, size, #16 > + uhadd8 r4, r4, r8 > + ldm src1!, {r6-r7} > + uhadd8 r5, r5, r9 > + ldm src2!, {ip,lr} > + uhadd8 r6, r6, ip > + stm dest!, {r4-r5} > + uhadd8 r7, r7, lr > + stm dest!, {r6-r7} > + it eq > + popeq {r4-r9,pc} > + b 1b > +endfunc > + > + .align 2 > +function ff_merge16_armv6, export=1 > + push {r4-r9,lr} > +1: > + pld [src1, #64] > + ldm src1!, {r4-r5} > + pld [src2, #64] > + ldm src2!, {r8-r9} > + subs size, size, #16 > + uhadd16 r4, r4, r8 > + ldm src1!, {r6-r7} > + uhadd16 r5, r5, r9 > + ldm src2!, {ip,lr} > + uhadd16 r6, r6, ip > + stm dest!, {r4-r5} > + uhadd16 r7, r7, lr > + stm dest!, {r6-r7} > + it eq > + popeq {r4-r9,pc} > + b 1b > +endfunc > \ No newline at end of file This shouldn't happen. > diff --git a/libavfilter/arm/merge_neon.S b/libavfilter/arm/merge_neon.S > new file mode 100644 > index 0000000000..ae36cf3ca9 > --- /dev/null > +++ b/libavfilter/arm/merge_neon.S > @@ -0,0 +1,109 @@ > +/* > + * Copyright (c) 2009-2012 Rémi Denis-Courmont, VLC authors > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > + */ > + > +#include "libavutil/arm/asm.S" > + > +#define dest r0 > +#define src1 r1 > +#define src2 r2 > +#define size r3 > + > + .align 2 > + @ NOTE: Offset and pitch must be multiple of 16-bytes. > +function ff_merge8_neon, export=1 > + cmp size, #64 > + blo 2f > +1: > + pld [src1, #64] > + vld1.u8 {q0-q1}, [src1,:128]! > + pld [src2, #64] > + vld1.u8 {q8-q9}, [src2,:128]! > + vhadd.u8 q0, q0, q8 > + sub size, size, #64 > + vld1.u8 {q2-q3}, [src1,:128]! > + vhadd.u8 q1, q1, q9 > + vld1.u8 {q10-q11}, [src2,:128]! > + vhadd.u8 q2, q2, q10 > + cmp size, #64 > + vhadd.u8 q3, q3, q11 > + vst1.u8 {q0-q1}, [dest,:128]! > + vst1.u8 {q2-q3}, [dest,:128]! > + bhs 1b > +2: > + cmp size, #32 > + blo 3f > + vld1.u8 {q0-q1}, [src1,:128]! > + sub size, size, #32 > + vld1.u8 {q8-q9}, [src2,:128]! > + vhadd.u8 q0, q0, q8 > + vhadd.u8 q1, q1, q9 > + vst1.u8 {q0-q1}, [dest,:128]! > +3: > + cmp size, #16 > + it lo > + bxlo lr > + vld1.u8 {q0}, [src1,:128]! > + sub size, size, #16 > + vld1.u8 {q8}, [src2,:128]! > + vhadd.u8 q0, q0, q8 > + vst1.u8 {q0}, [dest,:128]! > + bx lr > +endfunc > + > + .align 2 > +function ff_merge16_neon, export=1 > + cmp size, #64 > + blo 2f > +1: > + pld [src1, #64] > + vld1.u16 {q0-q1}, [src1,:128]! > + pld [src2, #64] > + vld1.u16 {q8-q9}, [src2,:128]! > + vhadd.u16 q0, q0, q8 > + sub size, size, #64 > + vld1.u16 {q2-q3}, [src1,:128]! > + vhadd.u16 q1, q1, q9 > + vld1.u16 {q10-q11}, [src2,:128]! > + vhadd.u16 q2, q2, q10 > + cmp size, #64 > + vhadd.u16 q3, q3, q11 > + vst1.u16 {q0-q1}, [dest,:128]! > + vst1.u16 {q2-q3}, [dest,:128]! > + bhs 1b > +2: > + cmp size, #32 > + blo 3f > + vld1.u16 {q0-q1}, [src1,:128]! > + sub size, size, #32 > + vld1.u16 {q8-q9}, [src2,:128]! > + vhadd.u16 q0, q0, q8 > + vhadd.u16 q1, q1, q9 > + vst1.u16 {q0-q1}, [dest,:128]! > +3: > + cmp size, #16 > + it lo > + bxlo lr > + vld1.u16 {q0}, [src1,:128]! > + sub size, size, #16 > + vld1.u16 {q8}, [src2,:128]! > + vhadd.u16 q0, q0, q8 > + vst1.u16 {q0}, [dest,:128]! > + bx lr > +endfunc > \ No newline at end of file > diff --git a/libavfilter/vf_fastdeint.c b/libavfilter/vf_fastdeint.c > new file mode 100644 > index 0000000000..5ddd8be392 > --- /dev/null > +++ b/libavfilter/vf_fastdeint.c > @@ -0,0 +1,588 @@ > +/* > + * Copyright (C) 2015 Aman Gupta <aman@tmm1.net> > + * 2000-2011 VLC authors and VideoLAN > + * > + * Author: Sam Hocevar <sam@zoy.org> > + * Damien Lucas <nitrox@videolan.org> > + * Laurent Aimar <fenrir@videolan.org> > + * Sigmund Augdal Helberg <sigmunau@videolan.org> > + * > + * These algorithms are derived from the VLC project's > + * modules/video_filter/deinterlace/algo_basic.c > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > + */ > + > +#include "libavutil/avassert.h" > +#include "libavutil/cpu.h" > +#include "libavutil/common.h" > +#include "libavutil/opt.h" > +#include "libavutil/pixdesc.h" > +#include "libavutil/imgutils.h" > +#include "libavutil/timestamp.h" > +#include "avfilter.h" > +#include "formats.h" > +#include "internal.h" > +#include "video.h" > + > +enum Mode { > + MODE_DISCARD, > + MODE_MEAN, > + MODE_BLEND, > + MODE_BOB, > + MODE_LINEAR, > + MODE_MAX, > +}; > + > +typedef void (*merge_fn)(void *dst, const void *src1, const void *src2, size_t len); > + > +typedef struct FastDeintContext { > + const AVClass *class; > + merge_fn merge; > + int merge_size; > + int merge_aligned; > + AVFrame *cur, *next; > + enum Mode mode; > + int eof; > +} FastDeintContext; > + > +static void merge8_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, size_t bytes) > +{ > + for (; bytes > 0; bytes--) > + *dst++ = ( *src1++ + *src2++ ) >> 1; > +} > + > +static void merge16_c(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, size_t bytes) > +{ > + for (size_t words = bytes / 2; words > 0; words--) > + *dst++ = ( *src1++ + *src2++ ) >> 1; > +} > + > +static void merge8_unaligned(FastDeintContext *s, uint8_t *dst, const uint8_t *src1, const uint8_t *src2, size_t bytes) > +{ > + if (s->merge_aligned) { > + size_t remainder = bytes % 16; > + if (remainder > 0) { > + merge8_c(dst, src1, src2, remainder); > + bytes -= remainder; > + dst += remainder; > + src1 += remainder; > + src2 += remainder; > + } > + } > + s->merge(dst, src1, src2, bytes); > +} > + > +static void merge16_unaligned(FastDeintContext *s, uint16_t *dst, const uint16_t *src1, const uint16_t *src2, size_t bytes) > +{ > + if (s->merge_aligned) { > + size_t words = bytes / 2; > + size_t remainder = words % 8; > + if (remainder > 0) { > + merge16_c(dst, src1, src2, remainder); > + words -= remainder; > + dst += remainder; > + src1 += remainder; > + src2 += remainder; > + } > + } > + s->merge(dst, src1, src2, bytes); > +} > + > +static void merge_unaligned(FastDeintContext *s, void *dst, const void *src1, const void *src2, size_t bytes) > +{ > + if (s->merge_size == 16) > + merge16_unaligned(s, dst, src1, src2, bytes); > + else > + merge8_unaligned(s, dst, src1, src2, bytes); > +} > + > +#if HAVE_SSE2_INLINE && defined(__x86_64__) No inline asm. This code needs to be ported to nasm syntax. Also, no arch specific code should be present in arch agnostic source files, beyond calls to init() functions. > +static void merge8_sse2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, size_t bytes) > +{ > + for(; bytes > 0 && ((uintptr_t)src1 & 15); bytes--) > + *dst++ = ( *src1++ + *src2++ ) >> 1; > + > + for (; bytes >= 16; bytes -= 16) { > + __asm__ __volatile__( "movdqu %2,%%xmm1;" > + "pavgb %1, %%xmm1;" > + "movdqu %%xmm1, %0" :"=m" (*dst): > + "m" (*src1), > + "m" (*src2) : "xmm1" ); > + dst += 16; > + src1 += 16; > + src2 += 16; > + } > + > + if (bytes > 0) { > + merge8_c(dst, src1, src2, bytes); > + } > +} > +static void merge16_sse2(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, size_t bytes) > +{ > + size_t words = bytes / 2; > + > + for(; words > 0 && ((uintptr_t)src1 & 15); words--) > + *dst++ = ( *src1++ + *src2++ ) >> 1; > + > + for (; words >= 8; words -= 8) { > + __asm__ __volatile__( "movdqu %2,%%xmm1;" > + "pavgw %1, %%xmm1;" > + "movdqu %%xmm1, %0" :"=m" (*dst): > + "m" (*src1), > + "m" (*src2) : "xmm1" ); > + dst += 8; > + src1 += 8; > + src2 += 8; > + } > + > + if (words > 0) { > + merge16_c(dst, src1, src2, words * 2); > + } > +} > +#define merge8 merge8_sse2 > +#define merge16 merge16_sse2 > +#else > +#define merge8 merge8_c > +#define merge16 merge16_c > +#endif > + > +static void render_image_single(FastDeintContext *s, AVFrame *out, AVFrame *frame) > +{ > + int i, planes_nb = 0; > + enum Mode mode = s->mode; > + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(out->format); > + > + for (i = 0; i < desc->nb_components; i++) > + planes_nb = FFMAX(planes_nb, desc->comp[i].plane + 1); > + > + for (i = 0; i < planes_nb; i++) { > + int height, bwidth; > + int dst_linesize, src_linesize; > + const uint8_t *src; > + uint8_t *dst; > + > + bwidth = av_image_get_linesize(out->format, out->width, i); > + if (bwidth < 0) { > + av_log(s, AV_LOG_ERROR, "av_image_get_linesize failed\n"); > + return; > + } > + > + height = out->height; > + if (i == 1 || i == 2) { > + height = FF_CEIL_RSHIFT(out->height, desc->log2_chroma_h); > + } > + > + src = frame->data[i]; > + dst = out->data[i]; > + dst_linesize = out->linesize[i]; > + src_linesize = frame->linesize[i]; > + > + if (mode == MODE_BLEND) { > + // Copy first line > + memcpy(dst, src, bwidth); > + dst += dst_linesize; > + height--; > + } > + > + // Merge remaining lines > + for (; height > 0; height--) { > + if (mode == MODE_DISCARD) > + memcpy(dst, src, bwidth); > + else > + merge_unaligned(s, dst, src, src + src_linesize, bwidth); > + dst += dst_linesize; > + src += src_linesize; > + if (mode == MODE_MEAN || mode == MODE_DISCARD) { > + src += src_linesize; > + height--; > + } > + } > + } > + if (mode != MODE_DISCARD) > + emms_c(); > +} > + > +static void render_image_doubler(FastDeintContext *s, AVFrame *out, AVFrame *frame, int field) > +{ > + int i, planes_nb = 0; > + enum Mode mode = s->mode; > + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(out->format); > + > + for (i = 0; i < desc->nb_components; i++) > + planes_nb = FFMAX(planes_nb, desc->comp[i].plane + 1); > + > + for (i = 0; i < planes_nb; i++) { > + int height, bwidth; > + int dst_linesize, src_linesize; > + const uint8_t *src; > + uint8_t *dst; > + > + bwidth = av_image_get_linesize(out->format, out->width, i); > + if (bwidth < 0) { > + av_log(s, AV_LOG_ERROR, "av_image_get_linesize failed\n"); > + return; > + } > + height = out->height; > + if (i == 1 || i == 2) { > + height = FF_CEIL_RSHIFT(out->height, desc->log2_chroma_h); > + } > + > + src = frame->data[i]; > + dst = out->data[i]; > + src_linesize = frame->linesize[i]; > + dst_linesize = out->linesize[i]; > + > + // For BOTTOM field we need to add the first line > + if (field == 1) { > + memcpy(dst, src, bwidth); > + dst += dst_linesize; > + src += src_linesize; > + height--; > + } > + > + height -= 2; > + > + for (; height > 0; height-=2) { > + memcpy(dst, src, bwidth); > + dst += dst_linesize; > + > + if (mode == MODE_LINEAR) > + merge_unaligned(s, dst, src, src + 2 * src_linesize, bwidth); > + else > + memcpy(dst, src, bwidth); > + dst += dst_linesize; > + > + src += src_linesize * 2; > + } > + > + memcpy(dst, src, bwidth); > + > + // For TOP field we need to add the last line > + if (field == 0) > + { > + dst += dst_linesize; > + src += src_linesize; > + memcpy(dst, src, bwidth); > + } > + } > + if (mode == MODE_LINEAR) > + emms_c(); > +} > + > +static int filter_frame_single(AVFilterLink *link, AVFrame *frame) > +{ > + AVFilterContext *ctx = link->dst; > + AVFrame *out; > + FastDeintContext *s = ctx->priv; > + > + if (!frame->interlaced_frame) { > + return ff_filter_frame(ctx->outputs[0], frame); > + } > + > + out = ff_get_video_buffer(ctx->outputs[0], link->w, link->h); > + if (!out) { > + av_frame_free(&frame); > + return AVERROR(ENOMEM); > + } > + > + av_frame_copy_props(out, frame); > + out->interlaced_frame = 0; > + render_image_single(s, out, frame); > + > + av_frame_free(&frame); > + return ff_filter_frame(ctx->outputs[0], out); > +} > + > +static AVFrame *copy_frame(AVFilterLink *link, AVFrame *frame) > +{ > + AVFilterContext *ctx = link->dst; > + AVFrame *out; > + > + if (frame->format == AV_PIX_FMT_VIDEOTOOLBOX) > + out = av_frame_alloc(); > + else > + out = ff_get_video_buffer(ctx->outputs[0], link->w, link->h); > + > + if (!out) > + return NULL; > + > + av_frame_copy_props(out, frame); > + return out; > +} > + > +static int filter_frame_double(AVFilterLink *link, AVFrame *in) > +{ > + AVFilterContext *ctx = link->dst; > + FastDeintContext *s = ctx->priv; > + AVFrame *frame, *out, *out2; > + int tff, ret; > + > + s->cur = s->next; > + s->next = in; > + > + if (!s->cur) { > + return 0; > + } > + > + frame = s->cur; > + > + if (!frame->interlaced_frame) { > + if (frame->pts != AV_NOPTS_VALUE) > + frame->pts *= 2; > + s->cur = NULL; > + return ff_filter_frame(ctx->outputs[0], frame); > + } > + > + tff = frame->top_field_first; > + out = copy_frame(link, frame); > + if (!out) { > + av_frame_free(&frame); > + s->cur = NULL; > + return AVERROR(ENOMEM); > + } > + > + out->interlaced_frame = 0; > + if (out->pts != AV_NOPTS_VALUE) > + out->pts = out->pts * 2; > + render_image_doubler(s, out, frame, !tff); > + > + ret = ff_filter_frame(ctx->outputs[0], out); > + if (ret < 0) { > + av_frame_free(&frame); > + s->cur = NULL; > + return ret; > + } > + > + out2 = copy_frame(link, frame); > + if (!out2) { > + av_frame_free(&frame); > + s->cur = NULL; > + return AVERROR(ENOMEM); > + } > + > + out2->interlaced_frame = 0; > + av_frame_remove_side_data(out2, AV_FRAME_DATA_A53_CC); > + if (out2->pts != AV_NOPTS_VALUE) { > + out2->pts = frame->pts + s->next->pts; > + } > + render_image_doubler(s, out2, frame, tff); > + > + av_frame_free(&frame); > + s->cur = NULL; > + > + return ff_filter_frame(ctx->outputs[0], out2); > +} > + > +static int filter_frame(AVFilterLink *link, AVFrame *frame) > +{ > + AVFilterContext *ctx = link->dst; > + FastDeintContext *s = ctx->priv; > + > + av_assert0(frame); > + > + if (s->mode == MODE_LINEAR || s->mode == MODE_BOB) { > + return filter_frame_double(link, frame); > + } else { > + return filter_frame_single(link, frame); > + } > +} > + > +static av_cold void uninit(AVFilterContext *ctx) > +{ > + FastDeintContext *s = ctx->priv; > + av_frame_free(&s->cur); > + av_frame_free(&s->next); > +} > + > +static int query_formats(AVFilterContext *ctx) > +{ > + static const enum AVPixelFormat pix_fmts[] = { > + AV_PIX_FMT_YUV420P, > + AV_PIX_FMT_YUV422P, > + AV_PIX_FMT_YUV444P, > + AV_PIX_FMT_YUV410P, > + AV_PIX_FMT_YUV411P, > + AV_PIX_FMT_GRAY8, > + AV_PIX_FMT_YUVJ420P, > + AV_PIX_FMT_YUVJ422P, > + AV_PIX_FMT_YUVJ444P, > + AV_PIX_FMT_GRAY16, > + AV_PIX_FMT_YUV440P, > + AV_PIX_FMT_YUVJ440P, > + AV_PIX_FMT_YUV420P9, > + AV_PIX_FMT_YUV422P9, > + AV_PIX_FMT_YUV444P9, > + AV_PIX_FMT_YUV420P10, > + AV_PIX_FMT_YUV422P10, > + AV_PIX_FMT_YUV444P10, > + AV_PIX_FMT_YUV420P12, > + AV_PIX_FMT_YUV422P12, > + AV_PIX_FMT_YUV444P12, > + AV_PIX_FMT_YUV420P14, > + AV_PIX_FMT_YUV422P14, > + AV_PIX_FMT_YUV444P14, > + AV_PIX_FMT_YUV420P16, > + AV_PIX_FMT_YUV422P16, > + AV_PIX_FMT_YUV444P16, > + AV_PIX_FMT_YUVA420P, > + AV_PIX_FMT_YUVA422P, > + AV_PIX_FMT_YUVA444P, > + AV_PIX_FMT_GBRP, > + AV_PIX_FMT_GBRP9, > + AV_PIX_FMT_GBRP10, > + AV_PIX_FMT_GBRP12, > + AV_PIX_FMT_GBRP14, > + AV_PIX_FMT_GBRP16, > + AV_PIX_FMT_GBRAP, > + AV_PIX_FMT_NONE > + }; > + > + AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts); > + if (!fmts_list) > + return AVERROR(ENOMEM); > + return ff_set_common_formats(ctx, fmts_list); > +} > + > +#if ARCH_ARM > +#include "libavutil/arm/cpu.h" > +#endif > +#if ARCH_AARCH64 > +#include "libavutil/aarch64/cpu.h" > +#endif > +#if ARCH_AARCH64 || ARCH_ARM > +void ff_merge8_neon(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, size_t bytes); > +void ff_merge16_neon(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, size_t bytes); > +void ff_merge8_armv6(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, size_t bytes); > +void ff_merge16_armv6(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, size_t bytes); > +#endif > + > +static int config_props(AVFilterLink *link) > +{ > + AVFilterContext *ctx = link->src; > + FastDeintContext *s = ctx->priv; > + const AVPixFmtDescriptor *pix; > +#if ARCH_AARCH64 || ARCH_ARM > + int cpu_flags = av_get_cpu_flags(); > +#endif > + > + link->w = link->src->inputs[0]->w; > + link->h = link->src->inputs[0]->h; > + link->time_base = link->src->inputs[0]->time_base; > + link->frame_rate = link->src->inputs[0]->frame_rate; > + link->sample_aspect_ratio = link->src->inputs[0]->sample_aspect_ratio; > + > + if (s->mode == MODE_MEAN || s->mode == MODE_DISCARD) { > + link->h /= 2; > + link->sample_aspect_ratio = av_mul_q(link->sample_aspect_ratio, av_make_q(1, 2)); > + } > + if (s->mode == MODE_LINEAR || s->mode == MODE_BOB) { > + link->time_base = av_mul_q(link->time_base, av_make_q(1, 2)); > + link->frame_rate = av_mul_q(link->frame_rate, av_make_q(2, 1)); > + } > + > + pix = av_pix_fmt_desc_get(link->format); > + s->merge_size = (pix->comp[0].depth > 8) ? 16 : 8; > + s->merge = s->merge_size == 16 ? (merge_fn)merge16 : (merge_fn)merge8; > + > +#if ARCH_ARM > + if (have_armv6(cpu_flags)) { > + s->merge = s->merge_size == 16 ? (merge_fn)ff_merge16_armv6 : (merge_fn)ff_merge8_armv6; > + s->merge_aligned = 1; > + } > +#endif > +#if ARCH_AARCH64 || ARCH_ARM > + if (have_neon(cpu_flags)) { > + s->merge = s->merge_size == 16 ? (merge_fn)ff_merge16_neon : (merge_fn)ff_merge8_neon; > + s->merge_aligned = 1; > + } > +#endif As i mentioned above, this kind of initialization and any function prototypes should be added to init files in the respective folders. In here you should only call init() functions which will set the above. See how other filters do it, like tinterlace. > + > + return 0; > +} > + > +static int request_frame(AVFilterLink *link) > +{ > + AVFilterContext *ctx = link->src; > + FastDeintContext *s = ctx->priv; > + int ret; > + > + if (s->eof) > + return AVERROR_EOF; > + > + ret = ff_request_frame(ctx->inputs[0]); > + > + if (ret == AVERROR_EOF && s->cur) { > + AVFrame *next = av_frame_clone(s->next); > + if (!next) > + return AVERROR(ENOMEM); > + > + next->pts = s->next->pts * 2 - s->cur->pts; > + filter_frame(ctx->inputs[0], next); > + s->eof = 1; > + } else if (ret < 0) { > + return ret; > + } > + > + return 0; > +} > + > +#define OFFSET(x) offsetof(FastDeintContext, x) > +#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM > + > +#define CONST(name, help, val, unit) { name, help, 0, AV_OPT_TYPE_CONST, {.i64=val}, INT_MIN, INT_MAX, FLAGS, unit } > + > +static const AVOption fastdeint_options[] = { > + { "mode", "specify the deinterlacing mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64=MODE_BLEND}, 0, MODE_MAX-1, FLAGS, "mode" }, > + CONST("discard", "discard bottom frame", MODE_DISCARD, "mode"), > + CONST("mean", "half resolution blender", MODE_MEAN, "mode"), > + CONST("blend", "full resolution blender", MODE_BLEND, "mode"), > + CONST("bob", "bob doubler", MODE_BOB, "mode"), > + CONST("linear", "bob doubler with linear interpolation", MODE_LINEAR, "mode"), > + > + { NULL } > +}; > + > +AVFILTER_DEFINE_CLASS(fastdeint); > + > +static const AVFilterPad fastdeint_inputs[] = { > + { > + .name = "default", > + .type = AVMEDIA_TYPE_VIDEO, > + .filter_frame = filter_frame, > + }, > + { NULL } > +}; > + > +static const AVFilterPad fastdeint_outputs[] = { > + { > + .name = "default", > + .type = AVMEDIA_TYPE_VIDEO, > + .config_props = config_props, > + .request_frame = request_frame > + }, > + { NULL } > +}; > + > +AVFilter ff_vf_fastdeint = { > + .name = "fastdeint", > + .description = NULL_IF_CONFIG_SMALL("fast deinterlacing algorithms"), > + .priv_size = sizeof(FastDeintContext), > + .priv_class = &fastdeint_class, > + .uninit = uninit, > + .query_formats = query_formats, > + .inputs = fastdeint_inputs, > + .outputs = fastdeint_outputs, > +}; >
Am Mo., 9. Sept. 2019 um 22:19 Uhr schrieb Aman Gupta <ffmpeg@tmm1.net>: > > From: Aman Gupta <aman@tmm1.net> > > These are simple algorithms which can be run efficiently > on low powered devices to produce deinteraced images. Please provide some numbers about the performance (and subjective visual quality) of the new C code in comparison to existing deinterlacers in FFmpeg. Carl Eugen
On Mon, Sep 9, 2019 at 2:47 PM Carl Eugen Hoyos <ceffmpeg@gmail.com> wrote: > Am Mo., 9. Sept. 2019 um 22:19 Uhr schrieb Aman Gupta <ffmpeg@tmm1.net>: > > > > From: Aman Gupta <aman@tmm1.net> > > > > These are simple algorithms which can be run efficiently > > on low powered devices to produce deinteraced images. > > Please provide some numbers about the performance > (and subjective visual quality) of the new C code in > comparison to existing deinterlacers in FFmpeg. > Comparison of visual quality can be seen on VLC's website: https://wiki.videolan.org/Deinterlacing Regarding performance- none of the filters currently available in ffmpeg are fast enough to deinterlace video in real time on ARM chips used by popular Android or iOS devices. They're all very computationally expensive, and do not have any ARM SIMD implementations. The deinterlacers from VLC use simple mathematical averages optimized by SIMD, and have been used by VLC on such devices for many years. I don't have any hard numbers to share, but in my experience I can decode+deinterlace video for real time playback in VLC on any cheap Android phone, whereas other ffmpeg-based players cannot. Aman > Carl Eugen > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
On Tue, Sep 10, 2019 at 12:00 AM Aman Gupta <ffmpeg@tmm1.net> wrote: > > On Mon, Sep 9, 2019 at 2:47 PM Carl Eugen Hoyos <ceffmpeg@gmail.com> wrote: > > > Am Mo., 9. Sept. 2019 um 22:19 Uhr schrieb Aman Gupta <ffmpeg@tmm1.net>: > > > > > > From: Aman Gupta <aman@tmm1.net> > > > > > > These are simple algorithms which can be run efficiently > > > on low powered devices to produce deinteraced images. > > > > Please provide some numbers about the performance > > (and subjective visual quality) of the new C code in > > comparison to existing deinterlacers in FFmpeg. > > > > Comparison of visual quality can be seen on VLC's website: > https://wiki.videolan.org/Deinterlacing > > Regarding performance- none of the filters currently available in ffmpeg > are fast enough to deinterlace video in real time on ARM chips used by > popular Android or iOS devices. They're all very computationally expensive, > and do not have any ARM SIMD implementations. The deinterlacers from VLC > use simple mathematical averages optimized by SIMD, and have been used by > VLC on such devices for many years. I don't have any hard numbers to share, > but in my experience I can decode+deinterlace video for real time playback > in VLC on any cheap Android phone, whereas other ffmpeg-based players > cannot. > None of those algorithms are really worth using, none are actual "deinterlacers". Blend and Mean are just plain out terrible, and the other options are just dumb bob'ers which you can do with avfilter as-is today with a combination of the separatefields filter (which is zero-copy based on frame metadata only) and optional scaling afterwards. - Hendrik
On Mon, Sep 9, 2019 at 3:19 PM Hendrik Leppkes <h.leppkes@gmail.com> wrote: > On Tue, Sep 10, 2019 at 12:00 AM Aman Gupta <ffmpeg@tmm1.net> wrote: > > > > On Mon, Sep 9, 2019 at 2:47 PM Carl Eugen Hoyos <ceffmpeg@gmail.com> > wrote: > > > > > Am Mo., 9. Sept. 2019 um 22:19 Uhr schrieb Aman Gupta <ffmpeg@tmm1.net > >: > > > > > > > > From: Aman Gupta <aman@tmm1.net> > > > > > > > > These are simple algorithms which can be run efficiently > > > > on low powered devices to produce deinteraced images. > > > > > > Please provide some numbers about the performance > > > (and subjective visual quality) of the new C code in > > > comparison to existing deinterlacers in FFmpeg. > > > > > > > Comparison of visual quality can be seen on VLC's website: > > https://wiki.videolan.org/Deinterlacing > > > > Regarding performance- none of the filters currently available in ffmpeg > > are fast enough to deinterlace video in real time on ARM chips used by > > popular Android or iOS devices. They're all very computationally > expensive, > > and do not have any ARM SIMD implementations. The deinterlacers from VLC > > use simple mathematical averages optimized by SIMD, and have been used by > > VLC on such devices for many years. I don't have any hard numbers to > share, > > but in my experience I can decode+deinterlace video for real time > playback > > in VLC on any cheap Android phone, whereas other ffmpeg-based players > > cannot. > > > > None of those algorithms are really worth using, none are actual > "deinterlacers". Blend and Mean are just plain out terrible, and the > other options are just dumb bob'ers which you can do with avfilter > as-is today with a combination of the separatefields filter (which is > zero-copy based on frame metadata only) and optional scaling > afterwards. > I don't disagree that many of them are overly simplistic. I only copied them all for completeness sake. However, as terrible as they may be they're not as bad as displaying interlaced frames directly. Blend and Linear produce acceptable image quality imho. Linear averages lines from both fields to generate a new image. Is something like this possible with any existing filter combined with separatefields? Aman > > - Hendrik > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Am Di., 10. Sept. 2019 um 00:00 Uhr schrieb Aman Gupta <ffmpeg@tmm1.net>: > > On Mon, Sep 9, 2019 at 2:47 PM Carl Eugen Hoyos <ceffmpeg@gmail.com> wrote: > > > Am Mo., 9. Sept. 2019 um 22:19 Uhr schrieb Aman Gupta <ffmpeg@tmm1.net>: > > > > > > From: Aman Gupta <aman@tmm1.net> > > > > > > These are simple algorithms which can be run efficiently > > > on low powered devices to produce deinteraced images. > > > > Please provide some numbers about the performance > > (and subjective visual quality) of the new C code in > > comparison to existing deinterlacers in FFmpeg. > > > > Comparison of visual quality can be seen on VLC's website: > https://wiki.videolan.org/Deinterlacing > > Regarding performance- none of the filters currently available in ffmpeg > are fast enough to deinterlace video in real time on ARM chips used by > popular Android or iOS devices. That was not my question and I believe the commit message absolutely needs some hints about the performance and the quality. Carl Eugen
On Tue, Sep 10, 2019 at 1:03 AM Aman Gupta <ffmpeg@tmm1.net> wrote: > On Mon, Sep 9, 2019 at 3:19 PM Hendrik Leppkes <h.leppkes@gmail.com> > wrote: > > > On Tue, Sep 10, 2019 at 12:00 AM Aman Gupta <ffmpeg@tmm1.net> wrote: > > > > > > On Mon, Sep 9, 2019 at 2:47 PM Carl Eugen Hoyos <ceffmpeg@gmail.com> > > wrote: > > > > > > > Am Mo., 9. Sept. 2019 um 22:19 Uhr schrieb Aman Gupta < > ffmpeg@tmm1.net > > >: > > > > > > > > > > From: Aman Gupta <aman@tmm1.net> > > > > > > > > > > These are simple algorithms which can be run efficiently > > > > > on low powered devices to produce deinteraced images. > > > > > > > > Please provide some numbers about the performance > > > > (and subjective visual quality) of the new C code in > > > > comparison to existing deinterlacers in FFmpeg. > > > > > > > > > > Comparison of visual quality can be seen on VLC's website: > > > https://wiki.videolan.org/Deinterlacing > > > > > > Regarding performance- none of the filters currently available in > ffmpeg > > > are fast enough to deinterlace video in real time on ARM chips used by > > > popular Android or iOS devices. They're all very computationally > > expensive, > > > and do not have any ARM SIMD implementations. The deinterlacers from > VLC > > > use simple mathematical averages optimized by SIMD, and have been used > by > > > VLC on such devices for many years. I don't have any hard numbers to > > share, > > > but in my experience I can decode+deinterlace video for real time > > playback > > > in VLC on any cheap Android phone, whereas other ffmpeg-based players > > > cannot. > > > > > > > None of those algorithms are really worth using, none are actual > > "deinterlacers". Blend and Mean are just plain out terrible, and the > > other options are just dumb bob'ers which you can do with avfilter > > as-is today with a combination of the separatefields filter (which is > > zero-copy based on frame metadata only) and optional scaling > > afterwards. > > > > I don't disagree that many of them are overly simplistic. I only copied > them all for completeness sake. > > However, as terrible as they may be they're not as bad as displaying > interlaced frames directly. Blend and Linear produce acceptable image > quality imho. > > Linear averages lines from both fields to generate a new image. Is > something like this possible with any existing filter combined with > separatefields? > I do not think its currently possible, anyway just add only linear variant and be done. > > Aman > > > > > > - Hendrik > > _______________________________________________ > > ffmpeg-devel mailing list > > ffmpeg-devel@ffmpeg.org > > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > > > To unsubscribe, visit link above, or email > > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
diff --git a/doc/filters.texi b/doc/filters.texi index 6c81e1da40..55d9adeb81 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -9796,6 +9796,33 @@ fade=t=in:st=5.5:d=0.5 @end itemize +@section fastdeint +Fast deinterlacing algorithms. + +@table @option +@item mode +Deinterlacing algorithm to use. + +It accepts the following values: +@table @samp +@item discard +Discard bottom frame. + +@item mean +Half resolution blender. + +@item blend +Full resolution blender. + +@item bob +Bob doubler. + +@item linear +Bob doubler with linear interpolation. +@end table + +@end table + @section fftdnoiz Denoise frames using 3D FFT (frequency domain filtering). diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 3ef4191d9a..a2b3566ec0 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -234,6 +234,7 @@ OBJS-$(CONFIG_EROSION_OPENCL_FILTER) += vf_neighbor_opencl.o opencl.o \ opencl/neighbor.o OBJS-$(CONFIG_EXTRACTPLANES_FILTER) += vf_extractplanes.o OBJS-$(CONFIG_FADE_FILTER) += vf_fade.o +OBJS-$(CONFIG_FASTDEINT_FILTER) += vf_fastdeint.o OBJS-$(CONFIG_FFTDNOIZ_FILTER) += vf_fftdnoiz.o OBJS-$(CONFIG_FFTFILT_FILTER) += vf_fftfilt.o OBJS-$(CONFIG_FIELD_FILTER) += vf_field.o diff --git a/libavfilter/aarch64/Makefile b/libavfilter/aarch64/Makefile index b58daa3a3f..2b0ad92893 100644 --- a/libavfilter/aarch64/Makefile +++ b/libavfilter/aarch64/Makefile @@ -1,3 +1,4 @@ OBJS-$(CONFIG_NLMEANS_FILTER) += aarch64/vf_nlmeans_init.o +NEON-OBJS-$(CONFIG_FASTDEINT_FILTER) += aarch64/merge_neon.o NEON-OBJS-$(CONFIG_NLMEANS_FILTER) += aarch64/vf_nlmeans_neon.o diff --git a/libavfilter/aarch64/merge_neon.S b/libavfilter/aarch64/merge_neon.S new file mode 100644 index 0000000000..62377331a4 --- /dev/null +++ b/libavfilter/aarch64/merge_neon.S @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2009-2016 Rémi Denis-Courmont, Janne Grunau, VLC authors + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/aarch64/asm.S" + +#define dest x0 +#define src1 x1 +#define src2 x2 +#define size x3 + + .align 2 + // NOTE: Offset and pitch must be multiple of 16-bytes. +function ff_merge8_neon, export=1 + ands x5, size, #~63 + b.eq 2f + mov x10, #64 + add x11, src1, #32 + add x12, src2, #32 +1: + ld1 {v0.16b,v1.16b}, [src1], x10 + ld1 {v4.16b,v5.16b}, [src2], x10 + ld1 {v2.16b,v3.16b}, [x11], x10 + uhadd v0.16b, v0.16b, v4.16b + ld1 {v6.16b,v7.16b}, [x12], x10 + subs x5, x5, #64 + uhadd v1.16b, v1.16b, v5.16b + uhadd v2.16b, v2.16b, v6.16b + uhadd v3.16b, v3.16b, v7.16b + st1 {v0.16b,v1.16b}, [dest], #32 + st1 {v2.16b,v3.16b}, [dest], #32 + b.gt 1b +2: + tbz size, #5, 3f + ld1 {v0.16b,v1.16b}, [src1], #32 + ld1 {v4.16b,v5.16b}, [src2], #32 + uhadd v0.16b, v0.16b, v4.16b + uhadd v1.16b, v1.16b, v5.16b + st1 {v0.16b,v1.16b}, [dest], #32 +3: + tbz size, #4, 4f + ld1 {v0.16b}, [src1] + ld1 {v4.16b}, [src2] + uhadd v0.16b, v0.16b, v4.16b + st1 {v0.16b}, [dest] +4: + ret +endfunc + + .align 2 +function ff_merge16_neon, export=1 + ands x5, size, #~63 + b.eq 2f +1: + ld1 {v0.8h,v1.8h}, [src1], #32 + ld1 {v4.8h,v5.8h}, [src2], #32 + ld1 {v2.8h,v3.8h}, [src1], #32 + uhadd v0.8h, v0.8h, v4.8h + ld1 {v6.8h,v7.8h}, [src2], #32 + uhadd v1.8h, v1.8h, v5.8h + uhadd v2.8h, v2.8h, v6.8h + uhadd v3.8h, v3.8h, v7.8h + st1 {v0.8h,v1.8h}, [dest], #32 + st1 {v2.8h,v3.8h}, [dest], #32 + subs x5, x5, #64 + b.gt 1b +2: + tbz size, #5, 3f + ld1 {v0.8h,v1.8h}, [src1], #32 + ld1 {v4.8h,v5.8h}, [src2], #32 + uhadd v0.8h, v0.8h, v4.8h + uhadd v1.8h, v1.8h, v5.8h + st1 {v0.8h,v1.8h}, [dest], #32 +3: + tbz size, #4, 4f + ld1 {v0.8h}, [src1] + ld1 {v4.8h}, [src2] + uhadd v0.8h, v0.8h,v4.8h + st1 {v0.8h}, [dest] +4: + ret +endfunc diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index b675c688ee..6631af2ffe 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -219,6 +219,7 @@ extern AVFilter ff_vf_erosion; extern AVFilter ff_vf_erosion_opencl; extern AVFilter ff_vf_extractplanes; extern AVFilter ff_vf_fade; +extern AVFilter ff_vf_fastdeint; extern AVFilter ff_vf_fftdnoiz; extern AVFilter ff_vf_fftfilt; extern AVFilter ff_vf_field; diff --git a/libavfilter/arm/Makefile b/libavfilter/arm/Makefile new file mode 100644 index 0000000000..c92d62fac9 --- /dev/null +++ b/libavfilter/arm/Makefile @@ -0,0 +1,3 @@ +ARMV6-OBJS-$(CONFIG_FASTDEINT_FILTER) += arm/merge_armv6.o + +NEON-OBJS-$(CONFIG_FASTDEINT_FILTER) += arm/merge_neon.o diff --git a/libavfilter/arm/merge_armv6.S b/libavfilter/arm/merge_armv6.S new file mode 100644 index 0000000000..9b551c2c6c --- /dev/null +++ b/libavfilter/arm/merge_armv6.S @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2009-2012 Rémi Denis-Courmont, VLC authors + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/arm/asm.S" + +#define dest r0 +#define src1 r1 +#define src2 r2 +#define size r3 + + .align 2 +function ff_merge8_armv6, export=1 + push {r4-r9,lr} +1: + pld [src1, #64] + ldm src1!, {r4-r5} + pld [src2, #64] + ldm src2!, {r8-r9} + subs size, size, #16 + uhadd8 r4, r4, r8 + ldm src1!, {r6-r7} + uhadd8 r5, r5, r9 + ldm src2!, {ip,lr} + uhadd8 r6, r6, ip + stm dest!, {r4-r5} + uhadd8 r7, r7, lr + stm dest!, {r6-r7} + it eq + popeq {r4-r9,pc} + b 1b +endfunc + + .align 2 +function ff_merge16_armv6, export=1 + push {r4-r9,lr} +1: + pld [src1, #64] + ldm src1!, {r4-r5} + pld [src2, #64] + ldm src2!, {r8-r9} + subs size, size, #16 + uhadd16 r4, r4, r8 + ldm src1!, {r6-r7} + uhadd16 r5, r5, r9 + ldm src2!, {ip,lr} + uhadd16 r6, r6, ip + stm dest!, {r4-r5} + uhadd16 r7, r7, lr + stm dest!, {r6-r7} + it eq + popeq {r4-r9,pc} + b 1b +endfunc \ No newline at end of file diff --git a/libavfilter/arm/merge_neon.S b/libavfilter/arm/merge_neon.S new file mode 100644 index 0000000000..ae36cf3ca9 --- /dev/null +++ b/libavfilter/arm/merge_neon.S @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2009-2012 Rémi Denis-Courmont, VLC authors + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/arm/asm.S" + +#define dest r0 +#define src1 r1 +#define src2 r2 +#define size r3 + + .align 2 + @ NOTE: Offset and pitch must be multiple of 16-bytes. +function ff_merge8_neon, export=1 + cmp size, #64 + blo 2f +1: + pld [src1, #64] + vld1.u8 {q0-q1}, [src1,:128]! + pld [src2, #64] + vld1.u8 {q8-q9}, [src2,:128]! + vhadd.u8 q0, q0, q8 + sub size, size, #64 + vld1.u8 {q2-q3}, [src1,:128]! + vhadd.u8 q1, q1, q9 + vld1.u8 {q10-q11}, [src2,:128]! + vhadd.u8 q2, q2, q10 + cmp size, #64 + vhadd.u8 q3, q3, q11 + vst1.u8 {q0-q1}, [dest,:128]! + vst1.u8 {q2-q3}, [dest,:128]! + bhs 1b +2: + cmp size, #32 + blo 3f + vld1.u8 {q0-q1}, [src1,:128]! + sub size, size, #32 + vld1.u8 {q8-q9}, [src2,:128]! + vhadd.u8 q0, q0, q8 + vhadd.u8 q1, q1, q9 + vst1.u8 {q0-q1}, [dest,:128]! +3: + cmp size, #16 + it lo + bxlo lr + vld1.u8 {q0}, [src1,:128]! + sub size, size, #16 + vld1.u8 {q8}, [src2,:128]! + vhadd.u8 q0, q0, q8 + vst1.u8 {q0}, [dest,:128]! + bx lr +endfunc + + .align 2 +function ff_merge16_neon, export=1 + cmp size, #64 + blo 2f +1: + pld [src1, #64] + vld1.u16 {q0-q1}, [src1,:128]! + pld [src2, #64] + vld1.u16 {q8-q9}, [src2,:128]! + vhadd.u16 q0, q0, q8 + sub size, size, #64 + vld1.u16 {q2-q3}, [src1,:128]! + vhadd.u16 q1, q1, q9 + vld1.u16 {q10-q11}, [src2,:128]! + vhadd.u16 q2, q2, q10 + cmp size, #64 + vhadd.u16 q3, q3, q11 + vst1.u16 {q0-q1}, [dest,:128]! + vst1.u16 {q2-q3}, [dest,:128]! + bhs 1b +2: + cmp size, #32 + blo 3f + vld1.u16 {q0-q1}, [src1,:128]! + sub size, size, #32 + vld1.u16 {q8-q9}, [src2,:128]! + vhadd.u16 q0, q0, q8 + vhadd.u16 q1, q1, q9 + vst1.u16 {q0-q1}, [dest,:128]! +3: + cmp size, #16 + it lo + bxlo lr + vld1.u16 {q0}, [src1,:128]! + sub size, size, #16 + vld1.u16 {q8}, [src2,:128]! + vhadd.u16 q0, q0, q8 + vst1.u16 {q0}, [dest,:128]! + bx lr +endfunc \ No newline at end of file diff --git a/libavfilter/vf_fastdeint.c b/libavfilter/vf_fastdeint.c new file mode 100644 index 0000000000..5ddd8be392 --- /dev/null +++ b/libavfilter/vf_fastdeint.c @@ -0,0 +1,588 @@ +/* + * Copyright (C) 2015 Aman Gupta <aman@tmm1.net> + * 2000-2011 VLC authors and VideoLAN + * + * Author: Sam Hocevar <sam@zoy.org> + * Damien Lucas <nitrox@videolan.org> + * Laurent Aimar <fenrir@videolan.org> + * Sigmund Augdal Helberg <sigmunau@videolan.org> + * + * These algorithms are derived from the VLC project's + * modules/video_filter/deinterlace/algo_basic.c + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/avassert.h" +#include "libavutil/cpu.h" +#include "libavutil/common.h" +#include "libavutil/opt.h" +#include "libavutil/pixdesc.h" +#include "libavutil/imgutils.h" +#include "libavutil/timestamp.h" +#include "avfilter.h" +#include "formats.h" +#include "internal.h" +#include "video.h" + +enum Mode { + MODE_DISCARD, + MODE_MEAN, + MODE_BLEND, + MODE_BOB, + MODE_LINEAR, + MODE_MAX, +}; + +typedef void (*merge_fn)(void *dst, const void *src1, const void *src2, size_t len); + +typedef struct FastDeintContext { + const AVClass *class; + merge_fn merge; + int merge_size; + int merge_aligned; + AVFrame *cur, *next; + enum Mode mode; + int eof; +} FastDeintContext; + +static void merge8_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, size_t bytes) +{ + for (; bytes > 0; bytes--) + *dst++ = ( *src1++ + *src2++ ) >> 1; +} + +static void merge16_c(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, size_t bytes) +{ + for (size_t words = bytes / 2; words > 0; words--) + *dst++ = ( *src1++ + *src2++ ) >> 1; +} + +static void merge8_unaligned(FastDeintContext *s, uint8_t *dst, const uint8_t *src1, const uint8_t *src2, size_t bytes) +{ + if (s->merge_aligned) { + size_t remainder = bytes % 16; + if (remainder > 0) { + merge8_c(dst, src1, src2, remainder); + bytes -= remainder; + dst += remainder; + src1 += remainder; + src2 += remainder; + } + } + s->merge(dst, src1, src2, bytes); +} + +static void merge16_unaligned(FastDeintContext *s, uint16_t *dst, const uint16_t *src1, const uint16_t *src2, size_t bytes) +{ + if (s->merge_aligned) { + size_t words = bytes / 2; + size_t remainder = words % 8; + if (remainder > 0) { + merge16_c(dst, src1, src2, remainder); + words -= remainder; + dst += remainder; + src1 += remainder; + src2 += remainder; + } + } + s->merge(dst, src1, src2, bytes); +} + +static void merge_unaligned(FastDeintContext *s, void *dst, const void *src1, const void *src2, size_t bytes) +{ + if (s->merge_size == 16) + merge16_unaligned(s, dst, src1, src2, bytes); + else + merge8_unaligned(s, dst, src1, src2, bytes); +} + +#if HAVE_SSE2_INLINE && defined(__x86_64__) +static void merge8_sse2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, size_t bytes) +{ + for(; bytes > 0 && ((uintptr_t)src1 & 15); bytes--) + *dst++ = ( *src1++ + *src2++ ) >> 1; + + for (; bytes >= 16; bytes -= 16) { + __asm__ __volatile__( "movdqu %2,%%xmm1;" + "pavgb %1, %%xmm1;" + "movdqu %%xmm1, %0" :"=m" (*dst): + "m" (*src1), + "m" (*src2) : "xmm1" ); + dst += 16; + src1 += 16; + src2 += 16; + } + + if (bytes > 0) { + merge8_c(dst, src1, src2, bytes); + } +} +static void merge16_sse2(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, size_t bytes) +{ + size_t words = bytes / 2; + + for(; words > 0 && ((uintptr_t)src1 & 15); words--) + *dst++ = ( *src1++ + *src2++ ) >> 1; + + for (; words >= 8; words -= 8) { + __asm__ __volatile__( "movdqu %2,%%xmm1;" + "pavgw %1, %%xmm1;" + "movdqu %%xmm1, %0" :"=m" (*dst): + "m" (*src1), + "m" (*src2) : "xmm1" ); + dst += 8; + src1 += 8; + src2 += 8; + } + + if (words > 0) { + merge16_c(dst, src1, src2, words * 2); + } +} +#define merge8 merge8_sse2 +#define merge16 merge16_sse2 +#else +#define merge8 merge8_c +#define merge16 merge16_c +#endif + +static void render_image_single(FastDeintContext *s, AVFrame *out, AVFrame *frame) +{ + int i, planes_nb = 0; + enum Mode mode = s->mode; + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(out->format); + + for (i = 0; i < desc->nb_components; i++) + planes_nb = FFMAX(planes_nb, desc->comp[i].plane + 1); + + for (i = 0; i < planes_nb; i++) { + int height, bwidth; + int dst_linesize, src_linesize; + const uint8_t *src; + uint8_t *dst; + + bwidth = av_image_get_linesize(out->format, out->width, i); + if (bwidth < 0) { + av_log(s, AV_LOG_ERROR, "av_image_get_linesize failed\n"); + return; + } + + height = out->height; + if (i == 1 || i == 2) { + height = FF_CEIL_RSHIFT(out->height, desc->log2_chroma_h); + } + + src = frame->data[i]; + dst = out->data[i]; + dst_linesize = out->linesize[i]; + src_linesize = frame->linesize[i]; + + if (mode == MODE_BLEND) { + // Copy first line + memcpy(dst, src, bwidth); + dst += dst_linesize; + height--; + } + + // Merge remaining lines + for (; height > 0; height--) { + if (mode == MODE_DISCARD) + memcpy(dst, src, bwidth); + else + merge_unaligned(s, dst, src, src + src_linesize, bwidth); + dst += dst_linesize; + src += src_linesize; + if (mode == MODE_MEAN || mode == MODE_DISCARD) { + src += src_linesize; + height--; + } + } + } + if (mode != MODE_DISCARD) + emms_c(); +} + +static void render_image_doubler(FastDeintContext *s, AVFrame *out, AVFrame *frame, int field) +{ + int i, planes_nb = 0; + enum Mode mode = s->mode; + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(out->format); + + for (i = 0; i < desc->nb_components; i++) + planes_nb = FFMAX(planes_nb, desc->comp[i].plane + 1); + + for (i = 0; i < planes_nb; i++) { + int height, bwidth; + int dst_linesize, src_linesize; + const uint8_t *src; + uint8_t *dst; + + bwidth = av_image_get_linesize(out->format, out->width, i); + if (bwidth < 0) { + av_log(s, AV_LOG_ERROR, "av_image_get_linesize failed\n"); + return; + } + height = out->height; + if (i == 1 || i == 2) { + height = FF_CEIL_RSHIFT(out->height, desc->log2_chroma_h); + } + + src = frame->data[i]; + dst = out->data[i]; + src_linesize = frame->linesize[i]; + dst_linesize = out->linesize[i]; + + // For BOTTOM field we need to add the first line + if (field == 1) { + memcpy(dst, src, bwidth); + dst += dst_linesize; + src += src_linesize; + height--; + } + + height -= 2; + + for (; height > 0; height-=2) { + memcpy(dst, src, bwidth); + dst += dst_linesize; + + if (mode == MODE_LINEAR) + merge_unaligned(s, dst, src, src + 2 * src_linesize, bwidth); + else + memcpy(dst, src, bwidth); + dst += dst_linesize; + + src += src_linesize * 2; + } + + memcpy(dst, src, bwidth); + + // For TOP field we need to add the last line + if (field == 0) + { + dst += dst_linesize; + src += src_linesize; + memcpy(dst, src, bwidth); + } + } + if (mode == MODE_LINEAR) + emms_c(); +} + +static int filter_frame_single(AVFilterLink *link, AVFrame *frame) +{ + AVFilterContext *ctx = link->dst; + AVFrame *out; + FastDeintContext *s = ctx->priv; + + if (!frame->interlaced_frame) { + return ff_filter_frame(ctx->outputs[0], frame); + } + + out = ff_get_video_buffer(ctx->outputs[0], link->w, link->h); + if (!out) { + av_frame_free(&frame); + return AVERROR(ENOMEM); + } + + av_frame_copy_props(out, frame); + out->interlaced_frame = 0; + render_image_single(s, out, frame); + + av_frame_free(&frame); + return ff_filter_frame(ctx->outputs[0], out); +} + +static AVFrame *copy_frame(AVFilterLink *link, AVFrame *frame) +{ + AVFilterContext *ctx = link->dst; + AVFrame *out; + + if (frame->format == AV_PIX_FMT_VIDEOTOOLBOX) + out = av_frame_alloc(); + else + out = ff_get_video_buffer(ctx->outputs[0], link->w, link->h); + + if (!out) + return NULL; + + av_frame_copy_props(out, frame); + return out; +} + +static int filter_frame_double(AVFilterLink *link, AVFrame *in) +{ + AVFilterContext *ctx = link->dst; + FastDeintContext *s = ctx->priv; + AVFrame *frame, *out, *out2; + int tff, ret; + + s->cur = s->next; + s->next = in; + + if (!s->cur) { + return 0; + } + + frame = s->cur; + + if (!frame->interlaced_frame) { + if (frame->pts != AV_NOPTS_VALUE) + frame->pts *= 2; + s->cur = NULL; + return ff_filter_frame(ctx->outputs[0], frame); + } + + tff = frame->top_field_first; + out = copy_frame(link, frame); + if (!out) { + av_frame_free(&frame); + s->cur = NULL; + return AVERROR(ENOMEM); + } + + out->interlaced_frame = 0; + if (out->pts != AV_NOPTS_VALUE) + out->pts = out->pts * 2; + render_image_doubler(s, out, frame, !tff); + + ret = ff_filter_frame(ctx->outputs[0], out); + if (ret < 0) { + av_frame_free(&frame); + s->cur = NULL; + return ret; + } + + out2 = copy_frame(link, frame); + if (!out2) { + av_frame_free(&frame); + s->cur = NULL; + return AVERROR(ENOMEM); + } + + out2->interlaced_frame = 0; + av_frame_remove_side_data(out2, AV_FRAME_DATA_A53_CC); + if (out2->pts != AV_NOPTS_VALUE) { + out2->pts = frame->pts + s->next->pts; + } + render_image_doubler(s, out2, frame, tff); + + av_frame_free(&frame); + s->cur = NULL; + + return ff_filter_frame(ctx->outputs[0], out2); +} + +static int filter_frame(AVFilterLink *link, AVFrame *frame) +{ + AVFilterContext *ctx = link->dst; + FastDeintContext *s = ctx->priv; + + av_assert0(frame); + + if (s->mode == MODE_LINEAR || s->mode == MODE_BOB) { + return filter_frame_double(link, frame); + } else { + return filter_frame_single(link, frame); + } +} + +static av_cold void uninit(AVFilterContext *ctx) +{ + FastDeintContext *s = ctx->priv; + av_frame_free(&s->cur); + av_frame_free(&s->next); +} + +static int query_formats(AVFilterContext *ctx) +{ + static const enum AVPixelFormat pix_fmts[] = { + AV_PIX_FMT_YUV420P, + AV_PIX_FMT_YUV422P, + AV_PIX_FMT_YUV444P, + AV_PIX_FMT_YUV410P, + AV_PIX_FMT_YUV411P, + AV_PIX_FMT_GRAY8, + AV_PIX_FMT_YUVJ420P, + AV_PIX_FMT_YUVJ422P, + AV_PIX_FMT_YUVJ444P, + AV_PIX_FMT_GRAY16, + AV_PIX_FMT_YUV440P, + AV_PIX_FMT_YUVJ440P, + AV_PIX_FMT_YUV420P9, + AV_PIX_FMT_YUV422P9, + AV_PIX_FMT_YUV444P9, + AV_PIX_FMT_YUV420P10, + AV_PIX_FMT_YUV422P10, + AV_PIX_FMT_YUV444P10, + AV_PIX_FMT_YUV420P12, + AV_PIX_FMT_YUV422P12, + AV_PIX_FMT_YUV444P12, + AV_PIX_FMT_YUV420P14, + AV_PIX_FMT_YUV422P14, + AV_PIX_FMT_YUV444P14, + AV_PIX_FMT_YUV420P16, + AV_PIX_FMT_YUV422P16, + AV_PIX_FMT_YUV444P16, + AV_PIX_FMT_YUVA420P, + AV_PIX_FMT_YUVA422P, + AV_PIX_FMT_YUVA444P, + AV_PIX_FMT_GBRP, + AV_PIX_FMT_GBRP9, + AV_PIX_FMT_GBRP10, + AV_PIX_FMT_GBRP12, + AV_PIX_FMT_GBRP14, + AV_PIX_FMT_GBRP16, + AV_PIX_FMT_GBRAP, + AV_PIX_FMT_NONE + }; + + AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts); + if (!fmts_list) + return AVERROR(ENOMEM); + return ff_set_common_formats(ctx, fmts_list); +} + +#if ARCH_ARM +#include "libavutil/arm/cpu.h" +#endif +#if ARCH_AARCH64 +#include "libavutil/aarch64/cpu.h" +#endif +#if ARCH_AARCH64 || ARCH_ARM +void ff_merge8_neon(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, size_t bytes); +void ff_merge16_neon(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, size_t bytes); +void ff_merge8_armv6(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, size_t bytes); +void ff_merge16_armv6(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, size_t bytes); +#endif + +static int config_props(AVFilterLink *link) +{ + AVFilterContext *ctx = link->src; + FastDeintContext *s = ctx->priv; + const AVPixFmtDescriptor *pix; +#if ARCH_AARCH64 || ARCH_ARM + int cpu_flags = av_get_cpu_flags(); +#endif + + link->w = link->src->inputs[0]->w; + link->h = link->src->inputs[0]->h; + link->time_base = link->src->inputs[0]->time_base; + link->frame_rate = link->src->inputs[0]->frame_rate; + link->sample_aspect_ratio = link->src->inputs[0]->sample_aspect_ratio; + + if (s->mode == MODE_MEAN || s->mode == MODE_DISCARD) { + link->h /= 2; + link->sample_aspect_ratio = av_mul_q(link->sample_aspect_ratio, av_make_q(1, 2)); + } + if (s->mode == MODE_LINEAR || s->mode == MODE_BOB) { + link->time_base = av_mul_q(link->time_base, av_make_q(1, 2)); + link->frame_rate = av_mul_q(link->frame_rate, av_make_q(2, 1)); + } + + pix = av_pix_fmt_desc_get(link->format); + s->merge_size = (pix->comp[0].depth > 8) ? 16 : 8; + s->merge = s->merge_size == 16 ? (merge_fn)merge16 : (merge_fn)merge8; + +#if ARCH_ARM + if (have_armv6(cpu_flags)) { + s->merge = s->merge_size == 16 ? (merge_fn)ff_merge16_armv6 : (merge_fn)ff_merge8_armv6; + s->merge_aligned = 1; + } +#endif +#if ARCH_AARCH64 || ARCH_ARM + if (have_neon(cpu_flags)) { + s->merge = s->merge_size == 16 ? (merge_fn)ff_merge16_neon : (merge_fn)ff_merge8_neon; + s->merge_aligned = 1; + } +#endif + + return 0; +} + +static int request_frame(AVFilterLink *link) +{ + AVFilterContext *ctx = link->src; + FastDeintContext *s = ctx->priv; + int ret; + + if (s->eof) + return AVERROR_EOF; + + ret = ff_request_frame(ctx->inputs[0]); + + if (ret == AVERROR_EOF && s->cur) { + AVFrame *next = av_frame_clone(s->next); + if (!next) + return AVERROR(ENOMEM); + + next->pts = s->next->pts * 2 - s->cur->pts; + filter_frame(ctx->inputs[0], next); + s->eof = 1; + } else if (ret < 0) { + return ret; + } + + return 0; +} + +#define OFFSET(x) offsetof(FastDeintContext, x) +#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM + +#define CONST(name, help, val, unit) { name, help, 0, AV_OPT_TYPE_CONST, {.i64=val}, INT_MIN, INT_MAX, FLAGS, unit } + +static const AVOption fastdeint_options[] = { + { "mode", "specify the deinterlacing mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64=MODE_BLEND}, 0, MODE_MAX-1, FLAGS, "mode" }, + CONST("discard", "discard bottom frame", MODE_DISCARD, "mode"), + CONST("mean", "half resolution blender", MODE_MEAN, "mode"), + CONST("blend", "full resolution blender", MODE_BLEND, "mode"), + CONST("bob", "bob doubler", MODE_BOB, "mode"), + CONST("linear", "bob doubler with linear interpolation", MODE_LINEAR, "mode"), + + { NULL } +}; + +AVFILTER_DEFINE_CLASS(fastdeint); + +static const AVFilterPad fastdeint_inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .filter_frame = filter_frame, + }, + { NULL } +}; + +static const AVFilterPad fastdeint_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = config_props, + .request_frame = request_frame + }, + { NULL } +}; + +AVFilter ff_vf_fastdeint = { + .name = "fastdeint", + .description = NULL_IF_CONFIG_SMALL("fast deinterlacing algorithms"), + .priv_size = sizeof(FastDeintContext), + .priv_class = &fastdeint_class, + .uninit = uninit, + .query_formats = query_formats, + .inputs = fastdeint_inputs, + .outputs = fastdeint_outputs, +};