Message ID | 20170403150042.2554-1-lumosomul@gmail.com |
---|---|
State | New |
Headers | show |
2017-04-03 23:00 GMT+08:00 Betty Wu <lumosomul@gmail.com>: > A new filter ANSNR is added. libavfilter/Makefile is changed. > Run 'ffmpeg -i input1 -i input2 -lavfi ansnr -f null -' to get an overall > score while per-frame value is stored but not printed. > This implementation is for constructing the vmaf filter later since ANSNR > is one of individual tools used in vmaf. > > Signed-off-by: Betty Wu <lumosomul@gmail.com> > --- > libavfilter/Makefile | 1 + > libavfilter/allfilters.c | 1 + > libavfilter/vf_ansnr.c | 425 ++++++++++++++++++++++++++++++ > +++++++++++++++++ > 3 files changed, 427 insertions(+) > create mode 100644 libavfilter/vf_ansnr.c > > diff --git a/libavfilter/Makefile b/libavfilter/Makefile > index 9c15ed62d2..5416e0f34f 100644 > --- a/libavfilter/Makefile > +++ b/libavfilter/Makefile > @@ -123,6 +123,7 @@ OBJS-$(CONFIG_ANULLSINK_FILTER) += > asink_anullsink.o > # video filters > OBJS-$(CONFIG_ALPHAEXTRACT_FILTER) += vf_extractplanes.o > OBJS-$(CONFIG_ALPHAMERGE_FILTER) += vf_alphamerge.o > +OBJS-$(CONFIG_ANSNR_FILTER) += vf_ansnr.o dualinput.o > framesync.o > OBJS-$(CONFIG_ASS_FILTER) += vf_subtitles.o > OBJS-$(CONFIG_ATADENOISE_FILTER) += vf_atadenoise.o > OBJS-$(CONFIG_AVGBLUR_FILTER) += vf_avgblur.o > diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c > index 64b634e8f3..fbd9ec026e 100644 > --- a/libavfilter/allfilters.c > +++ b/libavfilter/allfilters.c > @@ -134,6 +134,7 @@ static void register_all(void) > > REGISTER_FILTER(ALPHAEXTRACT, alphaextract, vf); > REGISTER_FILTER(ALPHAMERGE, alphamerge, vf); > + REGISTER_FILTER(ANSNR, ansnr, vf); > REGISTER_FILTER(ASS, ass, vf); > REGISTER_FILTER(ATADENOISE, atadenoise, vf); > REGISTER_FILTER(AVGBLUR, avgblur, vf); > diff --git a/libavfilter/vf_ansnr.c b/libavfilter/vf_ansnr.c > new file mode 100644 > index 0000000000..cfea6efbd4 > --- /dev/null > +++ b/libavfilter/vf_ansnr.c > @@ -0,0 +1,425 @@ > +/* > + * Copyright (c) 2017 Betty Wu > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA > 02110-1301 USA > + */ > + > +/* > + * @file > + * Caculate the ANSNR between two input videos. > + * @author Betty Wu > + */ > + > +#include "libavutil/avstring.h" > +#include "libavutil/opt.h" > +#include "libavutil/pixdesc.h" > +#include "avfilter.h" > +#include "dualinput.h" > +#include "drawutils.h" > +#include "formats.h" > +#include "internal.h" > +#include "video.h" > + > +#define OPT_RANGE_PIXEL_OFFSET -128 > + > +typedef double number_t; > + > +typedef struct ANSNRContext { > + const AVClass *class; > + FFDualInputContext dinput; > + uint64_t nb_frames; > + FILE *stats_file; > + char *stats_file_str; > + int stats_version; > + int stats_header_written; > + int stats_add_max; > + int is_rgb; > + uint8_t rgba_map[4]; > + double score; > + double score_total; > + char comps[4]; > + int nb_components; > + int planewidth[4]; > + int planeheight[4]; > + > +} ANSNRContext; > +#define OFFSET(x) offsetof(ANSNRContext, x) > +#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM > + > +static const AVOption ansnr_options[] = { > + {"stats_file", "Set file where to store per-frame difference > information", OFFSET(stats_file_str), AV_OPT_TYPE_STRING, {.str=NULL}, 0, > 0, FLAGS }, > + {"f", "Set file where to store per-frame difference > information", OFFSET(stats_file_str), AV_OPT_TYPE_STRING, {.str=NULL}, 0, > 0, FLAGS }, > + { NULL } > +}; > Document please. > +AVFILTER_DEFINE_CLASS(ansnr); > + > +const int ans_filter3_stride = 3; > +const int ans_filter5_stride = 5; > + > +const double ans_filter3[ans_filter3_stride*ans_filter3_stride] = { > + 1.0 / 16.0, 2.0 / 16.0, 1.0 / 16.0, > + 2.0 / 16.0, 4.0 / 16.0, 2.0 / 16.0, > + 1.0 / 16.0, 2.0 / 16.0, 1.0 / 16.0 > +}; > + > +const double ans_filter5[ans_filter5_stride*ans_filter5_stride] = { > + 2.0 / 571.0, 7.0 / 571.0, 12.0 / 571.0, 7.0 / 571.0, 2.0 / 571.0, > + 7.0 / 571.0, 31.0 / 571.0, 52.0 / 571.0, 31.0 / 571.0, 7.0 / 571.0, > + 12.0 / 571.0, 52.0 / 571.0, 127.0 / 571.0, 52.0 / 571.0, 12.0 / 571.0, > + 7.0 / 571.0, 31.0 / 571.0, 52.0 / 571.0, 31.0 / 571.0, 7.0 / 571.0, > + 2.0 / 571.0, 7.0 / 571.0, 12.0 / 571.0, 7.0 / 571.0, 2.0 / 571.0 > +}; > + > +static inline number_t pow_2(number_t base) > +{ > + return base*base; > +} > + > +static inline double get_ansnr(double score) > +{ > + return 10.0 * log10(score); > +} > + > +static inline double get_pow(double score) > +{ > + return pow(10, score/10.0); > +} > + > +static void ansnr_mse(const number_t *ref, const number_t *dis, number_t > *img_sig, number_t *img_noise, int row, int col) > +{ > + int ref_stride = col; > + int dis_stride = col; > + > + *img_sig = 0; > + *img_noise = 0; > + > + for (int i = 0; i < row; ++i) { > + for (int j = 0; j < col; ++j) { > + int ref_index = i * ref_stride + j; > + int dis_index = i * dis_stride + j; > + > + *img_sig += pow_2(ref[ref_index]); > + *img_noise += pow_2(ref[ref_index] - dis[dis_index]); > + } > + } > +} > + > +static int ansnr_filter(const uint8_t* src_image, number_t* dst_image, > int img_row, int img_col, const double *filter, int stride) > +{ > + int filter_row = stride; > + int filter_col = stride; > + int start_row = filter_row / 2; > + int start_col = filter_col / 2; > + int imme_row = img_row + 2 * start_row; > + int imme_col = img_col + 2 * start_col; > + > + number_t **imme_image; > + if(!(imme_image = (number_t **)av_malloc((size_t)imme_row * > sizeof(number_t *)))) > + return AVERROR(ENOMEM); > + > + for(int i = 0; i < imme_row; i++) { > + if(!(imme_image[i] = (number_t *)av_malloc((size_t)imme_col * > sizeof(number_t)))) > + return AVERROR(ENOMEM); > memleak > + } > + > + for (int i = 0; i < imme_row; i++) { > + for (int j = 0; j < imme_col; j++) { > + int src_i = i - start_row; > + int src_j = j - start_col; > + > + src_i = abs(src_i) >= img_row? 2 * img_row - abs(src_i) - 1 : > abs(src_i); > + src_j = abs(src_j) >= img_col? 2 * img_col - abs(src_j) - 1 : > abs(src_j); > + > + imme_image[i][j] = (number_t)src_image[src_i * img_col + > src_j] + OPT_RANGE_PIXEL_OFFSET; > + } > + } > + > + int end_row = imme_row -1 - start_row ; > + int end_col = imme_col -1 - start_col ; > + > + for (int i = start_row; i <= end_row; i++) { > + for (int j = start_col; j <= end_col; j++) { > + int dst_i = i - start_row ; > + int dst_j = j - start_col ; > + int dst_index = dst_i * img_col + dst_j; > + dst_image[dst_index] = 0; > + > + for (int m = 0; m < filter_row; m++) { > + for (int n = 0; n < filter_col; n++) { > + dst_image[dst_index] += (imme_image[dst_i + m][dst_j > + n] * filter[m * filter_col + n]); > + } > + } > + } > + } > + > + for(int i = 0; i < imme_row; i++) > + av_free(imme_image[i]); > + av_free(imme_image); > + > + return 0; > +} > + > +static inline > +int compute_ansnr(const uint8_t *anc, const uint8_t *dis, int col, int > row, double psnr_max, double *score, double *out) > +{ > + *score = 0.0; > + *out = 0.0; > + > + number_t *anc_after; > + if(!(anc_after = (number_t *)av_malloc((size_t)row * col * > sizeof(number_t)))) { > + av_free(anc_after); > + return AVERROR(ENOMEM); > + } > + > + number_t *dis_after; > + if(!(dis_after = (number_t *)av_malloc((size_t)row * col * > sizeof(number_t)))) { > + av_free(dis_after); > memleak; add av_free(anc_after); > + return AVERROR(ENOMEM); > + } > + > + int ret; > + ret = ansnr_filter(anc, anc_after, row, col, ans_filter3, > ans_filter3_stride); > + if(ret < 0) > + return ret; > memleak! > + ret = ansnr_filter(dis, dis_after, row, col, ans_filter5, > ans_filter5_stride); > + if(ret < 0) > + return ret; > memleak! > + > + number_t sig, noise; > + > + ansnr_mse(anc_after, dis_after, &sig, &noise, row, col); > + *score = noise==0 ? psnr_max : get_ansnr((double)sig / noise); > *score = !noise ? get_ansnr((double)sig / noise) : psnr_max; > + *out = get_pow(*score); > + > + av_free(anc_after); > + av_free(dis_after); > + > + return 0; > +} > + > +static void set_meta(AVDictionary **metadata, const char *key, char comp, > float d) > +{ > + char value[128]; > + snprintf(value, sizeof(value), "%0.2f", d); > + if (comp) { > + char key2[128]; > + snprintf(key2, sizeof(key2), "%s%c", key, comp); > + av_dict_set(metadata, key2, value, 0); > + } else { > + av_dict_set(metadata, key, value, 0); > + } > +} > + > +static int query_formats(AVFilterContext *ctx) > +{ > + static const enum AVPixelFormat pix_fmts[] = { > + AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY16, > +#define PF_NOALPHA(suf) AV_PIX_FMT_YUV420##suf, AV_PIX_FMT_YUV422##suf, > AV_PIX_FMT_YUV444##suf > +#define PF_ALPHA(suf) AV_PIX_FMT_YUVA420##suf, AV_PIX_FMT_YUVA422##suf, > AV_PIX_FMT_YUVA444##suf > +#define PF(suf) PF_NOALPHA(suf), PF_ALPHA(suf) > + PF(P), PF(P9), PF(P10), PF_NOALPHA(P12), PF_NOALPHA(P14), PF(P16), > + AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P, > + AV_PIX_FMT_YUVJ411P, AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P, > + AV_PIX_FMT_YUVJ440P, AV_PIX_FMT_YUVJ444P, > + AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10, > + AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16, > + AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRAP16, > + AV_PIX_FMT_NONE > + }; > + > + AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts); > + if (!fmts_list) > + return AVERROR(ENOMEM); > + return ff_set_common_formats(ctx, fmts_list); > +} > + > +static AVFrame *do_ansnr(AVFilterContext *ctx, AVFrame *main, > + const AVFrame *ref) > +{ > + ANSNRContext *s = ctx->priv; > + AVDictionary **metadata = avpriv_frame_get_metadatap(main); > + double psnr_max; > + char *frame_format = (char *)av_get_pix_fmt_name(main->format); > + > + if( !strcmp(frame_format,"yuv420p") || !strcmp(frame_format,"yuv422p") > || !strcmp(frame_format,"yuv444p")) > + psnr_max = 60; > + if( !strcmp(frame_format,"yuv420p10le") || !strcmp(frame_format,"yuv422p10le") > || !strcmp(frame_format,"yuv444p10le")) > + psnr_max = 72; > use av_strcasecmp; > + > + double score = 0.0; > + double out = 0.0; > + > + compute_ansnr(ref->data[0], main->data[0], s->planewidth[0], > s->planeheight[0], psnr_max, &score, &out); > + s->nb_frames++; > + s->score = score; > + s->score_total += out; > + > + set_meta(metadata, "lavfi.ansnr.All", 0, score); > + > + if (s->stats_file) { > + fprintf(s->stats_file, "n:%"PRId64" ", s->nb_frames); > + fprintf(s->stats_file, "All:%f\n", score); > + } > + > + return main; > +} > + > +static av_cold int init(AVFilterContext *ctx) > +{ > + ANSNRContext *s = ctx->priv; > + s->score_total = 0; > + s->score = 0; > + > + if (s->stats_file_str) { > + if (s->stats_version < 2 && s->stats_add_max) { > + av_log(ctx, AV_LOG_ERROR, > + "stats_add_max was specified but stats_version < 2.\n" ); > + return AVERROR(EINVAL); > + } > + > + if (!strcmp(s->stats_file_str, "-")) { > use av_strcasecmp > + s->stats_file = stdout; > + } else { > + s->stats_file = fopen(s->stats_file_str, "w"); > + if (!s->stats_file) { > + int err = AVERROR(errno); > + char buf[128]; > + av_strerror(err, buf, sizeof(buf)); > + av_log(ctx, AV_LOG_ERROR, "Could not open stats file %s: > %s\n", > + s->stats_file_str, buf); > + return err; > + } > + } > + } > + > + s->dinput.process = do_ansnr; > + return 0; > +} > + > +static int config_input_ref(AVFilterLink *inlink) > +{ > + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format); > + AVFilterContext *ctx = inlink->dst; > + ANSNRContext *s = ctx->priv; > + > + s->nb_components = desc->nb_components; > + > + if (ctx->inputs[0]->w != ctx->inputs[1]->w || > + ctx->inputs[0]->h != ctx->inputs[1]->h) { > + av_log(ctx, AV_LOG_ERROR, "Width and height of input videos must > be same.\n"); > + return AVERROR(EINVAL); > + } > + if (ctx->inputs[0]->format != ctx->inputs[1]->format) { > + av_log(ctx, AV_LOG_ERROR, "Inputs must be of same pixel > format.\n"); > + return AVERROR(EINVAL); > + } > + s->is_rgb = ff_fill_rgba_map(s->rgba_map, inlink->format) >= 0; > + s->comps[0] = s->is_rgb ? 'r' : 'y' ; > + s->comps[1] = s->is_rgb ? 'g' : 'u' ; > + s->comps[2] = s->is_rgb ? 'b' : 'v' ; > + s->comps[3] = 'a'; > + > + s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, > desc->log2_chroma_h); > + s->planeheight[0] = s->planeheight[3] = inlink->h; > + s->planewidth[1] = s->planewidth[2] = AV_CEIL_RSHIFT(inlink->w, > desc->log2_chroma_w); > + s->planewidth[0] = s->planewidth[3] = inlink->w; > + > + return 0; > +} > + > +static int config_output(AVFilterLink *outlink) > +{ > + AVFilterContext *ctx = outlink->src; > + ANSNRContext *s = ctx->priv; > + AVFilterLink *mainlink = ctx->inputs[0]; > + int ret; > + > + outlink->w = mainlink->w; > + outlink->h = mainlink->h; > + outlink->time_base = mainlink->time_base; > + outlink->sample_aspect_ratio = mainlink->sample_aspect_ratio; > + outlink->frame_rate = mainlink->frame_rate; > + > + if ((ret = ff_dualinput_init(ctx, &s->dinput)) < 0) > + return ret; > + > + return 0; > +} > + > +static int filter_frame(AVFilterLink *inlink, AVFrame *inpicref) > +{ > + ANSNRContext *s = inlink->dst->priv; > + return ff_dualinput_filter_frame(&s->dinput, inlink, inpicref); > +} > + > +static int request_frame(AVFilterLink *outlink) > +{ > + ANSNRContext *s = outlink->src->priv; > + return ff_dualinput_request_frame(&s->dinput, outlink); > +} > + > +static av_cold void uninit(AVFilterContext *ctx) > +{ > + ANSNRContext *s = ctx->priv; > + > + if (s->nb_frames > 0) { > + char buf[256]; > + buf[0] = 0; > + av_log(ctx, AV_LOG_INFO, "ANSNR%s All:%f\n", buf, > get_ansnr(s->score_total)); > + } > + > + ff_dualinput_uninit(&s->dinput); > + > + if (s->stats_file && s->stats_file != stdout) > + fclose(s->stats_file); > +} > + > +static const AVFilterPad ansnr_inputs[] = { > + { > + .name = "main", > + .type = AVMEDIA_TYPE_VIDEO, > + .filter_frame = filter_frame, > + },{ > + .name = "reference", > + .type = AVMEDIA_TYPE_VIDEO, > + .filter_frame = filter_frame, > + .config_props = config_input_ref, > + }, > + { NULL } > +}; > + > +static const AVFilterPad ansnr_outputs[] = { > + { > + .name = "default", > + .type = AVMEDIA_TYPE_VIDEO, > + .config_props = config_output, > + .request_frame = request_frame, > + }, > + { NULL } > +}; > + > +AVFilter ff_vf_ansnr = { > + .name = "ansnr", > + .description = NULL_IF_CONFIG_SMALL("Calculate the ANSNR between > two video streams."), > + .init = init, > + .uninit = uninit, > + .query_formats = query_formats, > + .priv_size = sizeof(ANSNRContext), > + .priv_class = &ansnr_class, > + .inputs = ansnr_inputs, > + .outputs = ansnr_outputs, > +}; > -- > 2.12.1 > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel >
Hi Betty, On Mon, Apr 3, 2017 at 11:00 AM, Betty Wu <lumosomul@gmail.com> wrote: > +typedef double number_t; > Why? +static int ansnr_filter(const uint8_t* src_image, number_t* dst_image, int > img_row, int img_col, const double *filter, int stride) > [..] > + number_t **imme_image; > + if(!(imme_image = (number_t **)av_malloc((size_t)imme_row * > sizeof(number_t *)))) > + return AVERROR(ENOMEM); > + > + for(int i = 0; i < imme_row; i++) { > + if(!(imme_image[i] = (number_t *)av_malloc((size_t)imme_col * > sizeof(number_t)))) > + return AVERROR(ENOMEM); > + } > In ffmpeg, we typically don't allocate lines and cols in separate arrays. You can just use a 1D array and use y*stride+x for indexing, which is what we do elsewhere. These arrays should also be initialized once in the init function and then reuse, instead of being re-allocated for each frame. + number_t *anc_after; > + if(!(anc_after = (number_t *)av_malloc((size_t)row * col * > sizeof(number_t)))) { > + av_free(anc_after); > + return AVERROR(ENOMEM); > + } > Why free if allocation failed? And this also should be allocated once during init, not re-allocated for each frame. > +static int query_formats(AVFilterContext *ctx) > +{ > + static const enum AVPixelFormat pix_fmts[] = { > + AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY16, > +#define PF_NOALPHA(suf) AV_PIX_FMT_YUV420##suf, AV_PIX_FMT_YUV422##suf, > AV_PIX_FMT_YUV444##suf > +#define PF_ALPHA(suf) AV_PIX_FMT_YUVA420##suf, AV_PIX_FMT_YUVA422##suf, > AV_PIX_FMT_YUVA444##suf > +#define PF(suf) PF_NOALPHA(suf), PF_ALPHA(suf) > + PF(P), PF(P9), PF(P10), PF_NOALPHA(P12), PF_NOALPHA(P14), PF(P16), > + AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P, > + AV_PIX_FMT_YUVJ411P, AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P, > + AV_PIX_FMT_YUVJ440P, AV_PIX_FMT_YUVJ444P, > + AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10, > + AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16, > + AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRAP16, > + AV_PIX_FMT_NONE > + }; > Have all of these been tested? Since ANSNR is Y-only, it doesn't work on GBR. Likewise, I believe the code is 8-bit only so it shouldn't work on gray16 or any of the P9/10/12/14/16 formats. For SIMD purposes, I would probably encourage you to work in fixed-point integer. Maybe for the qualification task we can skip that (I hadn't really thought about it), but I think for the final implementation, fixed-point will be much faster, and speed is a significant goal here. An interesting idea for speed is to make the filter decomposable (i.e. split the horizontal/vertical pass) if that's possible. At least for the 3-tap filter, that should be trivial (it's just a 121 lowpass in both directions), and should give some speed gains because you go from n^2 to 2n + an extra load/store pair in terms of complexity (whether it's actually faster remains to be proven). The other filter should also be decomposable but I don't see from the top of my head what it decomposes into and I don't feel like writing a script to figure it out. :-). Ronald
diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 9c15ed62d2..5416e0f34f 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -123,6 +123,7 @@ OBJS-$(CONFIG_ANULLSINK_FILTER) += asink_anullsink.o # video filters OBJS-$(CONFIG_ALPHAEXTRACT_FILTER) += vf_extractplanes.o OBJS-$(CONFIG_ALPHAMERGE_FILTER) += vf_alphamerge.o +OBJS-$(CONFIG_ANSNR_FILTER) += vf_ansnr.o dualinput.o framesync.o OBJS-$(CONFIG_ASS_FILTER) += vf_subtitles.o OBJS-$(CONFIG_ATADENOISE_FILTER) += vf_atadenoise.o OBJS-$(CONFIG_AVGBLUR_FILTER) += vf_avgblur.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index 64b634e8f3..fbd9ec026e 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -134,6 +134,7 @@ static void register_all(void) REGISTER_FILTER(ALPHAEXTRACT, alphaextract, vf); REGISTER_FILTER(ALPHAMERGE, alphamerge, vf); + REGISTER_FILTER(ANSNR, ansnr, vf); REGISTER_FILTER(ASS, ass, vf); REGISTER_FILTER(ATADENOISE, atadenoise, vf); REGISTER_FILTER(AVGBLUR, avgblur, vf); diff --git a/libavfilter/vf_ansnr.c b/libavfilter/vf_ansnr.c new file mode 100644 index 0000000000..cfea6efbd4 --- /dev/null +++ b/libavfilter/vf_ansnr.c @@ -0,0 +1,425 @@ +/* + * Copyright (c) 2017 Betty Wu + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * @file + * Caculate the ANSNR between two input videos. + * @author Betty Wu + */ + +#include "libavutil/avstring.h" +#include "libavutil/opt.h" +#include "libavutil/pixdesc.h" +#include "avfilter.h" +#include "dualinput.h" +#include "drawutils.h" +#include "formats.h" +#include "internal.h" +#include "video.h" + +#define OPT_RANGE_PIXEL_OFFSET -128 + +typedef double number_t; + +typedef struct ANSNRContext { + const AVClass *class; + FFDualInputContext dinput; + uint64_t nb_frames; + FILE *stats_file; + char *stats_file_str; + int stats_version; + int stats_header_written; + int stats_add_max; + int is_rgb; + uint8_t rgba_map[4]; + double score; + double score_total; + char comps[4]; + int nb_components; + int planewidth[4]; + int planeheight[4]; + +} ANSNRContext; +#define OFFSET(x) offsetof(ANSNRContext, x) +#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM + +static const AVOption ansnr_options[] = { + {"stats_file", "Set file where to store per-frame difference information", OFFSET(stats_file_str), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS }, + {"f", "Set file where to store per-frame difference information", OFFSET(stats_file_str), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS }, + { NULL } +}; +AVFILTER_DEFINE_CLASS(ansnr); + +const int ans_filter3_stride = 3; +const int ans_filter5_stride = 5; + +const double ans_filter3[ans_filter3_stride*ans_filter3_stride] = { + 1.0 / 16.0, 2.0 / 16.0, 1.0 / 16.0, + 2.0 / 16.0, 4.0 / 16.0, 2.0 / 16.0, + 1.0 / 16.0, 2.0 / 16.0, 1.0 / 16.0 +}; + +const double ans_filter5[ans_filter5_stride*ans_filter5_stride] = { + 2.0 / 571.0, 7.0 / 571.0, 12.0 / 571.0, 7.0 / 571.0, 2.0 / 571.0, + 7.0 / 571.0, 31.0 / 571.0, 52.0 / 571.0, 31.0 / 571.0, 7.0 / 571.0, + 12.0 / 571.0, 52.0 / 571.0, 127.0 / 571.0, 52.0 / 571.0, 12.0 / 571.0, + 7.0 / 571.0, 31.0 / 571.0, 52.0 / 571.0, 31.0 / 571.0, 7.0 / 571.0, + 2.0 / 571.0, 7.0 / 571.0, 12.0 / 571.0, 7.0 / 571.0, 2.0 / 571.0 +}; + +static inline number_t pow_2(number_t base) +{ + return base*base; +} + +static inline double get_ansnr(double score) +{ + return 10.0 * log10(score); +} + +static inline double get_pow(double score) +{ + return pow(10, score/10.0); +} + +static void ansnr_mse(const number_t *ref, const number_t *dis, number_t *img_sig, number_t *img_noise, int row, int col) +{ + int ref_stride = col; + int dis_stride = col; + + *img_sig = 0; + *img_noise = 0; + + for (int i = 0; i < row; ++i) { + for (int j = 0; j < col; ++j) { + int ref_index = i * ref_stride + j; + int dis_index = i * dis_stride + j; + + *img_sig += pow_2(ref[ref_index]); + *img_noise += pow_2(ref[ref_index] - dis[dis_index]); + } + } +} + +static int ansnr_filter(const uint8_t* src_image, number_t* dst_image, int img_row, int img_col, const double *filter, int stride) +{ + int filter_row = stride; + int filter_col = stride; + int start_row = filter_row / 2; + int start_col = filter_col / 2; + int imme_row = img_row + 2 * start_row; + int imme_col = img_col + 2 * start_col; + + number_t **imme_image; + if(!(imme_image = (number_t **)av_malloc((size_t)imme_row * sizeof(number_t *)))) + return AVERROR(ENOMEM); + + for(int i = 0; i < imme_row; i++) { + if(!(imme_image[i] = (number_t *)av_malloc((size_t)imme_col * sizeof(number_t)))) + return AVERROR(ENOMEM); + } + + for (int i = 0; i < imme_row; i++) { + for (int j = 0; j < imme_col; j++) { + int src_i = i - start_row; + int src_j = j - start_col; + + src_i = abs(src_i) >= img_row? 2 * img_row - abs(src_i) - 1 : abs(src_i); + src_j = abs(src_j) >= img_col? 2 * img_col - abs(src_j) - 1 : abs(src_j); + + imme_image[i][j] = (number_t)src_image[src_i * img_col + src_j] + OPT_RANGE_PIXEL_OFFSET; + } + } + + int end_row = imme_row -1 - start_row ; + int end_col = imme_col -1 - start_col ; + + for (int i = start_row; i <= end_row; i++) { + for (int j = start_col; j <= end_col; j++) { + int dst_i = i - start_row ; + int dst_j = j - start_col ; + int dst_index = dst_i * img_col + dst_j; + dst_image[dst_index] = 0; + + for (int m = 0; m < filter_row; m++) { + for (int n = 0; n < filter_col; n++) { + dst_image[dst_index] += (imme_image[dst_i + m][dst_j + n] * filter[m * filter_col + n]); + } + } + } + } + + for(int i = 0; i < imme_row; i++) + av_free(imme_image[i]); + av_free(imme_image); + + return 0; +} + +static inline +int compute_ansnr(const uint8_t *anc, const uint8_t *dis, int col, int row, double psnr_max, double *score, double *out) +{ + *score = 0.0; + *out = 0.0; + + number_t *anc_after; + if(!(anc_after = (number_t *)av_malloc((size_t)row * col * sizeof(number_t)))) { + av_free(anc_after); + return AVERROR(ENOMEM); + } + + number_t *dis_after; + if(!(dis_after = (number_t *)av_malloc((size_t)row * col * sizeof(number_t)))) { + av_free(dis_after); + return AVERROR(ENOMEM); + } + + int ret; + ret = ansnr_filter(anc, anc_after, row, col, ans_filter3, ans_filter3_stride); + if(ret < 0) + return ret; + ret = ansnr_filter(dis, dis_after, row, col, ans_filter5, ans_filter5_stride); + if(ret < 0) + return ret; + + number_t sig, noise; + + ansnr_mse(anc_after, dis_after, &sig, &noise, row, col); + *score = noise==0 ? psnr_max : get_ansnr((double)sig / noise); + *out = get_pow(*score); + + av_free(anc_after); + av_free(dis_after); + + return 0; +} + +static void set_meta(AVDictionary **metadata, const char *key, char comp, float d) +{ + char value[128]; + snprintf(value, sizeof(value), "%0.2f", d); + if (comp) { + char key2[128]; + snprintf(key2, sizeof(key2), "%s%c", key, comp); + av_dict_set(metadata, key2, value, 0); + } else { + av_dict_set(metadata, key, value, 0); + } +} + +static int query_formats(AVFilterContext *ctx) +{ + static const enum AVPixelFormat pix_fmts[] = { + AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY16, +#define PF_NOALPHA(suf) AV_PIX_FMT_YUV420##suf, AV_PIX_FMT_YUV422##suf, AV_PIX_FMT_YUV444##suf +#define PF_ALPHA(suf) AV_PIX_FMT_YUVA420##suf, AV_PIX_FMT_YUVA422##suf, AV_PIX_FMT_YUVA444##suf +#define PF(suf) PF_NOALPHA(suf), PF_ALPHA(suf) + PF(P), PF(P9), PF(P10), PF_NOALPHA(P12), PF_NOALPHA(P14), PF(P16), + AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P, + AV_PIX_FMT_YUVJ411P, AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P, + AV_PIX_FMT_YUVJ440P, AV_PIX_FMT_YUVJ444P, + AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10, + AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16, + AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRAP16, + AV_PIX_FMT_NONE + }; + + AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts); + if (!fmts_list) + return AVERROR(ENOMEM); + return ff_set_common_formats(ctx, fmts_list); +} + +static AVFrame *do_ansnr(AVFilterContext *ctx, AVFrame *main, + const AVFrame *ref) +{ + ANSNRContext *s = ctx->priv; + AVDictionary **metadata = avpriv_frame_get_metadatap(main); + double psnr_max; + char *frame_format = (char *)av_get_pix_fmt_name(main->format); + + if( !strcmp(frame_format,"yuv420p") || !strcmp(frame_format,"yuv422p") || !strcmp(frame_format,"yuv444p")) + psnr_max = 60; + if( !strcmp(frame_format,"yuv420p10le") || !strcmp(frame_format,"yuv422p10le") || !strcmp(frame_format,"yuv444p10le")) + psnr_max = 72; + + double score = 0.0; + double out = 0.0; + + compute_ansnr(ref->data[0], main->data[0], s->planewidth[0], s->planeheight[0], psnr_max, &score, &out); + s->nb_frames++; + s->score = score; + s->score_total += out; + + set_meta(metadata, "lavfi.ansnr.All", 0, score); + + if (s->stats_file) { + fprintf(s->stats_file, "n:%"PRId64" ", s->nb_frames); + fprintf(s->stats_file, "All:%f\n", score); + } + + return main; +} + +static av_cold int init(AVFilterContext *ctx) +{ + ANSNRContext *s = ctx->priv; + s->score_total = 0; + s->score = 0; + + if (s->stats_file_str) { + if (s->stats_version < 2 && s->stats_add_max) { + av_log(ctx, AV_LOG_ERROR, + "stats_add_max was specified but stats_version < 2.\n" ); + return AVERROR(EINVAL); + } + + if (!strcmp(s->stats_file_str, "-")) { + s->stats_file = stdout; + } else { + s->stats_file = fopen(s->stats_file_str, "w"); + if (!s->stats_file) { + int err = AVERROR(errno); + char buf[128]; + av_strerror(err, buf, sizeof(buf)); + av_log(ctx, AV_LOG_ERROR, "Could not open stats file %s: %s\n", + s->stats_file_str, buf); + return err; + } + } + } + + s->dinput.process = do_ansnr; + return 0; +} + +static int config_input_ref(AVFilterLink *inlink) +{ + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format); + AVFilterContext *ctx = inlink->dst; + ANSNRContext *s = ctx->priv; + + s->nb_components = desc->nb_components; + + if (ctx->inputs[0]->w != ctx->inputs[1]->w || + ctx->inputs[0]->h != ctx->inputs[1]->h) { + av_log(ctx, AV_LOG_ERROR, "Width and height of input videos must be same.\n"); + return AVERROR(EINVAL); + } + if (ctx->inputs[0]->format != ctx->inputs[1]->format) { + av_log(ctx, AV_LOG_ERROR, "Inputs must be of same pixel format.\n"); + return AVERROR(EINVAL); + } + s->is_rgb = ff_fill_rgba_map(s->rgba_map, inlink->format) >= 0; + s->comps[0] = s->is_rgb ? 'r' : 'y' ; + s->comps[1] = s->is_rgb ? 'g' : 'u' ; + s->comps[2] = s->is_rgb ? 'b' : 'v' ; + s->comps[3] = 'a'; + + s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h); + s->planeheight[0] = s->planeheight[3] = inlink->h; + s->planewidth[1] = s->planewidth[2] = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w); + s->planewidth[0] = s->planewidth[3] = inlink->w; + + return 0; +} + +static int config_output(AVFilterLink *outlink) +{ + AVFilterContext *ctx = outlink->src; + ANSNRContext *s = ctx->priv; + AVFilterLink *mainlink = ctx->inputs[0]; + int ret; + + outlink->w = mainlink->w; + outlink->h = mainlink->h; + outlink->time_base = mainlink->time_base; + outlink->sample_aspect_ratio = mainlink->sample_aspect_ratio; + outlink->frame_rate = mainlink->frame_rate; + + if ((ret = ff_dualinput_init(ctx, &s->dinput)) < 0) + return ret; + + return 0; +} + +static int filter_frame(AVFilterLink *inlink, AVFrame *inpicref) +{ + ANSNRContext *s = inlink->dst->priv; + return ff_dualinput_filter_frame(&s->dinput, inlink, inpicref); +} + +static int request_frame(AVFilterLink *outlink) +{ + ANSNRContext *s = outlink->src->priv; + return ff_dualinput_request_frame(&s->dinput, outlink); +} + +static av_cold void uninit(AVFilterContext *ctx) +{ + ANSNRContext *s = ctx->priv; + + if (s->nb_frames > 0) { + char buf[256]; + buf[0] = 0; + av_log(ctx, AV_LOG_INFO, "ANSNR%s All:%f\n", buf, get_ansnr(s->score_total)); + } + + ff_dualinput_uninit(&s->dinput); + + if (s->stats_file && s->stats_file != stdout) + fclose(s->stats_file); +} + +static const AVFilterPad ansnr_inputs[] = { + { + .name = "main", + .type = AVMEDIA_TYPE_VIDEO, + .filter_frame = filter_frame, + },{ + .name = "reference", + .type = AVMEDIA_TYPE_VIDEO, + .filter_frame = filter_frame, + .config_props = config_input_ref, + }, + { NULL } +}; + +static const AVFilterPad ansnr_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = config_output, + .request_frame = request_frame, + }, + { NULL } +}; + +AVFilter ff_vf_ansnr = { + .name = "ansnr", + .description = NULL_IF_CONFIG_SMALL("Calculate the ANSNR between two video streams."), + .init = init, + .uninit = uninit, + .query_formats = query_formats, + .priv_size = sizeof(ANSNRContext), + .priv_class = &ansnr_class, + .inputs = ansnr_inputs, + .outputs = ansnr_outputs, +};
A new filter ANSNR is added. libavfilter/Makefile is changed. Run 'ffmpeg -i input1 -i input2 -lavfi ansnr -f null -' to get an overall score while per-frame value is stored but not printed. This implementation is for constructing the vmaf filter later since ANSNR is one of individual tools used in vmaf. Signed-off-by: Betty Wu <lumosomul@gmail.com> --- libavfilter/Makefile | 1 + libavfilter/allfilters.c | 1 + libavfilter/vf_ansnr.c | 425 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 427 insertions(+) create mode 100644 libavfilter/vf_ansnr.c