Message ID | 2f81b7a9.a096.16a00f319aa.Coremail.xwmeng@pku.edu.cn |
---|---|
State | New |
Headers | show |
> -----Original Message----- > From: ffmpeg-devel [mailto:ffmpeg-devel-bounces@ffmpeg.org] On Behalf > Of xwmeng@pku.edu.cn > Sent: Tuesday, April 09, 2019 3:15 PM > To: ffmpeg-devel@ffmpeg.org > Cc: lq@chinaffmpeg.org > Subject: [FFmpeg-devel] [PATCH] libavfilter: Add derain filter init version-- > GSoC Qualification Task. > > This patch is the qualification task of the derain filter project in GSoC. > > > > > > > > From 61463dfe14c0e0de4e233f68c8404d73d5bd9f8f Mon Sep 17 00:00:00 > 2001 > > From: Xuewei Meng <xwmeng@pku.edu.cn> > Date: Tue, 9 Apr 2019 15:09:33 +0800 > Subject: [PATCH] Add derain filter init version-GSoC Qualification Task > > > Signed-off-by: Xuewei Meng <xwmeng@pku.edu.cn> > --- > doc/filters.texi | 41 ++++++++ > libavfilter/Makefile | 1 + > libavfilter/allfilters.c | 1 + > libavfilter/vf_derain.c | 204 > +++++++++++++++++++++++++++++++++++++++ > 4 files changed, 247 insertions(+) > create mode 100644 libavfilter/vf_derain.c > > > diff --git a/doc/filters.texi b/doc/filters.texi > index 867607d870..0117c418b4 100644 > --- a/doc/filters.texi > +++ b/doc/filters.texi > @@ -8036,6 +8036,47 @@ delogo=x=0:y=0:w=100:h=77:band=10 > > @end itemize > > +@section derain > + > +Remove the rain in the input image/video by applying the derain methods > based on > +convolutional neural networks. Supported models: > + > +@itemize > +@item > +Efficient Sub-Pixel Convolutional Neural Network model (ESPCN). > +See @url{https://arxiv.org/abs/1609.05158}. > +@end itemize > + > +Training scripts as well as scripts for model generation are provided in > +the repository at @url{https://github.com/XueweiMeng/derain_filter.git}. > + > +The filter accepts the following options: > + > +@table @option > +@item dnn_backend > +Specify which DNN backend to use for model loading and execution. This > option accepts > +the following values: > + > +@table @samp > +@item native > +Native implementation of DNN loading and execution. > + > +@item tensorflow > +TensorFlow backend. To enable this backend you > +need to install the TensorFlow for C library (see > +@url{https://www.tensorflow.org/install/install_c}) and configure FFmpeg > with > +@code{--enable-libtensorflow} > +@end table > + > +Default value is @samp{native}. > + > +@item model > +Set path to model file specifying network architecture and its parameters. > +Note that different backends use different file formats. TensorFlow > backend > +can load files for both formats, while native backend can load files for only > +its format. > +@end table > + > @section deshake > > Attempt to fix small changes in horizontal and/or vertical shift. This > diff --git a/libavfilter/Makefile b/libavfilter/Makefile > index fef6ec5c55..7809bac565 100644 > --- a/libavfilter/Makefile > +++ b/libavfilter/Makefile > @@ -194,6 +194,7 @@ OBJS-$(CONFIG_DATASCOPE_FILTER) += > vf_datascope.o > OBJS-$(CONFIG_DCTDNOIZ_FILTER) += vf_dctdnoiz.o > OBJS-$(CONFIG_DEBAND_FILTER) += vf_deband.o > OBJS-$(CONFIG_DEBLOCK_FILTER) += vf_deblock.o > +OBJS-$(CONFIG_DERAIN_FILTER) += vf_derain.o in alphabet order > OBJS-$(CONFIG_DECIMATE_FILTER) += vf_decimate.o > OBJS-$(CONFIG_DECONVOLVE_FILTER) += vf_convolve.o framesync.o > OBJS-$(CONFIG_DEDOT_FILTER) += vf_dedot.o > diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c > index c51ae0f3c7..ee2a5b63e6 100644 > --- a/libavfilter/allfilters.c > +++ b/libavfilter/allfilters.c > @@ -182,6 +182,7 @@ extern AVFilter ff_vf_datascope; > extern AVFilter ff_vf_dctdnoiz; > extern AVFilter ff_vf_deband; > extern AVFilter ff_vf_deblock; > +extern AVFilter ff_vf_derain; > extern AVFilter ff_vf_decimate; > extern AVFilter ff_vf_deconvolve; > extern AVFilter ff_vf_dedot; > diff --git a/libavfilter/vf_derain.c b/libavfilter/vf_derain.c > new file mode 100644 > index 0000000000..f72ae1cd3a > --- /dev/null > +++ b/libavfilter/vf_derain.c > @@ -0,0 +1,204 @@ > +/* > + * Copyright (c) 2019 Xuewei Meng > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + */ > + > +/** > + * @file > + * Filter implementing image derain filter using deep convolutional I made some changes for the tensorflow C API interface to support more features, see the patch set of https://patchwork.ffmpeg.org/patch/12567/, your code might be better to rebase it when these are pushed ... > networks. > + * https://arxiv.org/abs/1609.05158 > + * > http://openaccess.thecvf.com/content_ECCV_2018/html/Xia_Li_Recurrent_ > Squeeze-and-Excitation_Context_ECCV_2018_paper.html > + */ > + > +#include "libavutil/opt.h" > +#include "libavformat/avio.h" > +#include "libswscale/swscale.h" > +#include "avfilter.h" > +#include "formats.h" > +#include "internal.h" > +#include "dnn_interface.h" > + > +typedef struct DRContext { > + const AVClass *class; > + > + char *model_filename; > + DNNBackendType backend_type; > + DNNModule *dnn_module; > + DNNModel *model; > + DNNData input; > + DNNData output; > +} DRContext; > + > +#define OFFSET(x) offsetof(DRContext, x) > +#define FLAGS AV_OPT_FLAG_FILTERING_PARAM | > AV_OPT_FLAG_VIDEO_PARAM > +static const AVOption derain_options[] = { > + { "dnn_backend", "DNN backend", OFFSET(backend_type), > AV_OPT_TYPE_FLAGS, { .i64 = 0 }, 0, 1, FLAGS, "backend" }, > + { "native", "native backend flag", 0, AV_OPT_TYPE_CONST, > { .i64 = 0 }, 0, 0, FLAGS, "backend" }, > +#if (CONFIG_LIBTENSORFLOW == 1) > + { "tensorflow", "tensorflow backend flag", 0, > AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, "backend" }, > +#endif > + { "model", "path to model file", OFFSET(model_filename), > AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS }, > + { NULL } > +}; > + > +AVFILTER_DEFINE_CLASS(derain); > + > +static int query_formats(AVFilterContext *ctx) > +{ > + AVFilterFormats *formats; > + const enum AVPixelFormat pixel_fmts[] = { > + AV_PIX_FMT_RGB24, > + AV_PIX_FMT_NONE > + }; > + > + formats = ff_make_format_list(pixel_fmts); > + if (!formats) { > + av_log(ctx, AV_LOG_ERROR, "could not create formats list\n"); > + return AVERROR(ENOMEM); > + } > + > + return ff_set_common_formats(ctx, formats); > +} > + > +static int config_inputs(AVFilterLink *inlink) > +{ > + AVFilterContext *ctx = inlink->dst; > + DRContext *dr_context = ctx->priv; > + AVFilterLink *outlink = ctx->outputs[0]; > + DNNReturnType result; > + > + dr_context->input.width = inlink->w; > + dr_context->input.height = inlink->h; > + dr_context->input.channels = 3; > + > + result = (dr_context->model->set_input_output)(dr_context->model- > >model, &dr_context->input, &dr_context->output); > + if (result != DNN_SUCCESS) { > + av_log(ctx, AV_LOG_ERROR, "could not set input and output for the > model\n"); > + return AVERROR(EIO); > + } > + > + outlink->h = dr_context->output.height; > + outlink->w = dr_context->output.width; > + > + return 0; > +} > + > +static int filter_frame(AVFilterLink *inlink, AVFrame *in) > +{ > + AVFilterContext *ctx = inlink->dst; > + AVFilterLink *outlink = ctx->outputs[0]; > + DRContext *dr_context = ctx->priv; > + DNNReturnType dnn_result; > + > + AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h); > + if (!out) { > + av_log(ctx, AV_LOG_ERROR, "could not allocate memory for output > frame\n"); > + av_frame_free(&in); > + return AVERROR(ENOMEM); > + } > + > + av_frame_copy_props(out, in); > + out->height = dr_context->output.height; > + out->width = dr_context->output.width; > + > + for (int i = 0; i < out->height * out->width * 3; i++) { > + dr_context->input.data[i] = in->data[0][i] / 255.0; > + } > + > + av_frame_free(&in); > + dnn_result = (dr_context->dnn_module->execute_model)(dr_context- > >model); > + if (dnn_result != DNN_SUCCESS){ > + av_log(ctx, AV_LOG_ERROR, "failed to execute model\n"); > + return AVERROR(EIO); > + } > + > + for (int i = 0; i < out->height * out->width * 3; i++) { > + out->data[0][i] = (int)(dr_context->output.data[i] * 255); > + } > + > + return ff_filter_frame(outlink, out); > +} > + > +static av_cold int init(AVFilterContext *ctx) > +{ > + DRContext *dr_context = ctx->priv; > + > + dr_context->dnn_module = ff_get_dnn_module(dr_context- > >backend_type); > + if (!dr_context->dnn_module) { > + av_log(ctx, AV_LOG_ERROR, "could not create DNN module for > requested backend\n"); > + return AVERROR(ENOMEM); > + } > + if (!dr_context->model_filename) { > + av_log(ctx, AV_LOG_ERROR, "model file for network is not > specified\n"); > + return AVERROR(EINVAL); > + } > + if (!dr_context->dnn_module->load_model) { > + av_log(ctx, AV_LOG_ERROR, "load_model for network is not > specified\n"); > + return AVERROR(EINVAL); > + } > + > + dr_context->model = (dr_context->dnn_module- > >load_model)(dr_context->model_filename); > + if (!dr_context->model) { > + av_log(ctx, AV_LOG_ERROR, "could not load DNN model\n"); > + return AVERROR(EINVAL); > + } > + > + return 0; > +} > + > +static av_cold void uninit(AVFilterContext *ctx) > +{ > + DRContext *dr_context = ctx->priv; > + > + if (dr_context->dnn_module) { > + (dr_context->dnn_module->free_model)(&dr_context->model); > + av_freep(&dr_context->dnn_module); > + } > +} > + > +static const AVFilterPad derain_inputs[] = { > + { > + .name = "default", > + .type = AVMEDIA_TYPE_VIDEO, > + .config_props = config_inputs, > + .filter_frame = filter_frame, > + }, > + { NULL } > +}; > + > +static const AVFilterPad derain_outputs[] = { > + { > + .name = "default", > + .type = AVMEDIA_TYPE_VIDEO, > + }, > + { NULL } > +}; > + > +AVFilter ff_vf_derain = { > + .name = "derain", > + .description = NULL_IF_CONFIG_SMALL("Apply derain filter to the > input."), > + .priv_size = sizeof(DRContext), > + .init = init, > + .uninit = uninit, > + .query_formats = query_formats, > + .inputs = derain_inputs, > + .outputs = derain_outputs, > + .priv_class = &derain_class, > + .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | > AVFILTER_FLAG_SLICE_THREADS, > +}; > + > -- > 2.17.1 > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
> 在 2019年4月9日,下午3:14,xwmeng@pku.edu.cn 写道: > > This patch is the qualification task of the derain filter project in GSoC. > It maybe better if you submit a model file and test example here. > From 61463dfe14c0e0de4e233f68c8404d73d5bd9f8f Mon Sep 17 00:00:00 2001 > > From: Xuewei Meng <xwmeng@pku.edu.cn> > Date: Tue, 9 Apr 2019 15:09:33 +0800 > Subject: [PATCH] Add derain filter init version-GSoC Qualification Task > > > Signed-off-by: Xuewei Meng <xwmeng@pku.edu.cn> > --- > doc/filters.texi | 41 ++++++++ > libavfilter/Makefile | 1 + > libavfilter/allfilters.c | 1 + > libavfilter/vf_derain.c | 204 +++++++++++++++++++++++++++++++++++++++ > 4 files changed, 247 insertions(+) > create mode 100644 libavfilter/vf_derain.c > > > diff --git a/doc/filters.texi b/doc/filters.texi > index 867607d870..0117c418b4 100644 > --- a/doc/filters.texi > +++ b/doc/filters.texi > @@ -8036,6 +8036,47 @@ delogo=x=0:y=0:w=100:h=77:band=10 > > @end itemize > > +@section derain > + > +Remove the rain in the input image/video by applying the derain methods based on > +convolutional neural networks. Supported models: > + > +@itemize > +@item > +Efficient Sub-Pixel Convolutional Neural Network model (ESPCN). > +See @url{https://arxiv.org/abs/1609.05158}. > +@end itemize > + > +Training scripts as well as scripts for model generation are provided in > +the repository at @url{https://github.com/XueweiMeng/derain_filter.git}. > + > +The filter accepts the following options: > + > +@table @option > +@item dnn_backend > +Specify which DNN backend to use for model loading and execution. This option accepts > +the following values: > + > +@table @samp > +@item native > +Native implementation of DNN loading and execution. > + > +@item tensorflow > +TensorFlow backend. To enable this backend you > +need to install the TensorFlow for C library (see > +@url{https://www.tensorflow.org/install/install_c}) and configure FFmpeg with > +@code{--enable-libtensorflow} > +@end table > + > +Default value is @samp{native}. > + > +@item model > +Set path to model file specifying network architecture and its parameters. > +Note that different backends use different file formats. TensorFlow backend > +can load files for both formats, while native backend can load files for only > +its format. > +@end table > + > @section deshake > > Attempt to fix small changes in horizontal and/or vertical shift. This > diff --git a/libavfilter/Makefile b/libavfilter/Makefile > index fef6ec5c55..7809bac565 100644 > --- a/libavfilter/Makefile > +++ b/libavfilter/Makefile > @@ -194,6 +194,7 @@ OBJS-$(CONFIG_DATASCOPE_FILTER) += vf_datascope.o > OBJS-$(CONFIG_DCTDNOIZ_FILTER) += vf_dctdnoiz.o > OBJS-$(CONFIG_DEBAND_FILTER) += vf_deband.o > OBJS-$(CONFIG_DEBLOCK_FILTER) += vf_deblock.o > +OBJS-$(CONFIG_DERAIN_FILTER) += vf_derain.o > OBJS-$(CONFIG_DECIMATE_FILTER) += vf_decimate.o > OBJS-$(CONFIG_DECONVOLVE_FILTER) += vf_convolve.o framesync.o > OBJS-$(CONFIG_DEDOT_FILTER) += vf_dedot.o > diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c > index c51ae0f3c7..ee2a5b63e6 100644 > --- a/libavfilter/allfilters.c > +++ b/libavfilter/allfilters.c > @@ -182,6 +182,7 @@ extern AVFilter ff_vf_datascope; > extern AVFilter ff_vf_dctdnoiz; > extern AVFilter ff_vf_deband; > extern AVFilter ff_vf_deblock; > +extern AVFilter ff_vf_derain; > extern AVFilter ff_vf_decimate; > extern AVFilter ff_vf_deconvolve; > extern AVFilter ff_vf_dedot; > diff --git a/libavfilter/vf_derain.c b/libavfilter/vf_derain.c > new file mode 100644 > index 0000000000..f72ae1cd3a > --- /dev/null > +++ b/libavfilter/vf_derain.c > @@ -0,0 +1,204 @@ > +/* > + * Copyright (c) 2019 Xuewei Meng > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > + */ > + > +/** > + * @file > + * Filter implementing image derain filter using deep convolutional networks. > + * https://arxiv.org/abs/1609.05158 > + * http://openaccess.thecvf.com/content_ECCV_2018/html/Xia_Li_Recurrent_Squeeze-and-Excitation_Context_ECCV_2018_paper.html > + */ > + > +#include "libavutil/opt.h" > +#include "libavformat/avio.h" > +#include "libswscale/swscale.h" > +#include "avfilter.h" > +#include "formats.h" > +#include "internal.h" > +#include "dnn_interface.h" > + > +typedef struct DRContext { > + const AVClass *class; > + > + char *model_filename; > + DNNBackendType backend_type; > + DNNModule *dnn_module; > + DNNModel *model; > + DNNData input; > + DNNData output; > +} DRContext; > + > +#define OFFSET(x) offsetof(DRContext, x) > +#define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM > +static const AVOption derain_options[] = { > + { "dnn_backend", "DNN backend", OFFSET(backend_type), AV_OPT_TYPE_FLAGS, { .i64 = 0 }, 0, 1, FLAGS, "backend" }, > + { "native", "native backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, FLAGS, "backend" }, > +#if (CONFIG_LIBTENSORFLOW == 1) > + { "tensorflow", "tensorflow backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, "backend" }, > +#endif > + { "model", "path to model file", OFFSET(model_filename), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS }, > + { NULL } > +}; > + > +AVFILTER_DEFINE_CLASS(derain); > + > +static int query_formats(AVFilterContext *ctx) > +{ > + AVFilterFormats *formats; > + const enum AVPixelFormat pixel_fmts[] = { > + AV_PIX_FMT_RGB24, > + AV_PIX_FMT_NONE > + }; > + > + formats = ff_make_format_list(pixel_fmts); > + if (!formats) { > + av_log(ctx, AV_LOG_ERROR, "could not create formats list\n"); > + return AVERROR(ENOMEM); > + } > + > + return ff_set_common_formats(ctx, formats); > +} > + > +static int config_inputs(AVFilterLink *inlink) > +{ > + AVFilterContext *ctx = inlink->dst; > + DRContext *dr_context = ctx->priv; > + AVFilterLink *outlink = ctx->outputs[0]; > + DNNReturnType result; > + > + dr_context->input.width = inlink->w; > + dr_context->input.height = inlink->h; > + dr_context->input.channels = 3; > + > + result = (dr_context->model->set_input_output)(dr_context->model->model, &dr_context->input, &dr_context->output); > + if (result != DNN_SUCCESS) { > + av_log(ctx, AV_LOG_ERROR, "could not set input and output for the model\n"); > + return AVERROR(EIO); > + } > + > + outlink->h = dr_context->output.height; > + outlink->w = dr_context->output.width; > + > + return 0; > +} > + > +static int filter_frame(AVFilterLink *inlink, AVFrame *in) > +{ > + AVFilterContext *ctx = inlink->dst; > + AVFilterLink *outlink = ctx->outputs[0]; > + DRContext *dr_context = ctx->priv; > + DNNReturnType dnn_result; > + > + AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h); > + if (!out) { > + av_log(ctx, AV_LOG_ERROR, "could not allocate memory for output frame\n"); > + av_frame_free(&in); > + return AVERROR(ENOMEM); > + } > + > + av_frame_copy_props(out, in); > + out->height = dr_context->output.height; > + out->width = dr_context->output.width; > + > + for (int i = 0; i < out->height * out->width * 3; i++) { > + dr_context->input.data[i] = in->data[0][i] / 255.0; > + } > + > + av_frame_free(&in); > + dnn_result = (dr_context->dnn_module->execute_model)(dr_context->model); > + if (dnn_result != DNN_SUCCESS){ > + av_log(ctx, AV_LOG_ERROR, "failed to execute model\n"); > + return AVERROR(EIO); > + } > + > + for (int i = 0; i < out->height * out->width * 3; i++) { > + out->data[0][i] = (int)(dr_context->output.data[i] * 255); > + } > + > + return ff_filter_frame(outlink, out); > +} > + > +static av_cold int init(AVFilterContext *ctx) > +{ > + DRContext *dr_context = ctx->priv; > + > + dr_context->dnn_module = ff_get_dnn_module(dr_context->backend_type); > + if (!dr_context->dnn_module) { > + av_log(ctx, AV_LOG_ERROR, "could not create DNN module for requested backend\n"); > + return AVERROR(ENOMEM); > + } > + if (!dr_context->model_filename) { > + av_log(ctx, AV_LOG_ERROR, "model file for network is not specified\n"); > + return AVERROR(EINVAL); > + } > + if (!dr_context->dnn_module->load_model) { > + av_log(ctx, AV_LOG_ERROR, "load_model for network is not specified\n"); > + return AVERROR(EINVAL); > + } > + > + dr_context->model = (dr_context->dnn_module->load_model)(dr_context->model_filename); > + if (!dr_context->model) { > + av_log(ctx, AV_LOG_ERROR, "could not load DNN model\n"); > + return AVERROR(EINVAL); > + } > + > + return 0; > +} > + > +static av_cold void uninit(AVFilterContext *ctx) > +{ > + DRContext *dr_context = ctx->priv; > + > + if (dr_context->dnn_module) { > + (dr_context->dnn_module->free_model)(&dr_context->model); > + av_freep(&dr_context->dnn_module); > + } > +} > + > +static const AVFilterPad derain_inputs[] = { > + { > + .name = "default", > + .type = AVMEDIA_TYPE_VIDEO, > + .config_props = config_inputs, > + .filter_frame = filter_frame, > + }, > + { NULL } > +}; > + > +static const AVFilterPad derain_outputs[] = { > + { > + .name = "default", > + .type = AVMEDIA_TYPE_VIDEO, > + }, > + { NULL } > +}; > + > +AVFilter ff_vf_derain = { > + .name = "derain", > + .description = NULL_IF_CONFIG_SMALL("Apply derain filter to the input."), > + .priv_size = sizeof(DRContext), > + .init = init, > + .uninit = uninit, > + .query_formats = query_formats, > + .inputs = derain_inputs, > + .outputs = derain_outputs, > + .priv_class = &derain_class, > + .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS, > +}; > + > -- > 2.17.1 > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Hi, Em ter, 9 de abr de 2019 às 04:15, <xwmeng@pku.edu.cn> escreveu: > +@section derain > + > +Remove the rain in the input image/video by applying the derain methods based on > +convolutional neural networks. Supported models: > + > +@itemize > +@item > +Efficient Sub-Pixel Convolutional Neural Network model (ESPCN). > +See @url{https://arxiv.org/abs/1609.05158}. > +@end itemize As the doc suggests, you're using the espcn model for deraining? if so, it would be more relevant to link to paper which justifies this usage as it currently seems to suggest you're using super-resolution. In case you are the one which is proposing this usage, it worth at least give some justification. is it better the current methods in any way? > + > +Training scripts as well as scripts for model generation are provided in > +the repository at @url{https://github.com/XueweiMeng/derain_filter.git}. > + > +The filter accepts the following options: > + > +@table @option > +@item dnn_backend > +Specify which DNN backend to use for model loading and execution. This option accepts > +the following values: > + > +@table @samp > +@item native > +Native implementation of DNN loading and execution. > + > +@item tensorflow > +TensorFlow backend. To enable this backend you > +need to install the TensorFlow for C library (see > +@url{https://www.tensorflow.org/install/install_c}) and configure FFmpeg with > +@code{--enable-libtensorflow} > +@end table > + > +Default value is @samp{native}. > + > +@item model > +Set path to model file specifying network architecture and its parameters. > +Note that different backends use different file formats. TensorFlow backend > +can load files for both formats, while native backend can load files for only > +its format. > +@end table > + > @section deshake > > Attempt to fix small changes in horizontal and/or vertical shift. This > diff --git a/libavfilter/Makefile b/libavfilter/Makefile > index fef6ec5c55..7809bac565 100644 > --- a/libavfilter/Makefile > +++ b/libavfilter/Makefile > @@ -194,6 +194,7 @@ OBJS-$(CONFIG_DATASCOPE_FILTER) += vf_datascope.o > OBJS-$(CONFIG_DCTDNOIZ_FILTER) += vf_dctdnoiz.o > OBJS-$(CONFIG_DEBAND_FILTER) += vf_deband.o > OBJS-$(CONFIG_DEBLOCK_FILTER) += vf_deblock.o > +OBJS-$(CONFIG_DERAIN_FILTER) += vf_derain.o > OBJS-$(CONFIG_DECIMATE_FILTER) += vf_decimate.o > OBJS-$(CONFIG_DECONVOLVE_FILTER) += vf_convolve.o framesync.o > OBJS-$(CONFIG_DEDOT_FILTER) += vf_dedot.o > diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c > index c51ae0f3c7..ee2a5b63e6 100644 > --- a/libavfilter/allfilters.c > +++ b/libavfilter/allfilters.c > @@ -182,6 +182,7 @@ extern AVFilter ff_vf_datascope; > extern AVFilter ff_vf_dctdnoiz; > extern AVFilter ff_vf_deband; > extern AVFilter ff_vf_deblock; > +extern AVFilter ff_vf_derain; > extern AVFilter ff_vf_decimate; > extern AVFilter ff_vf_deconvolve; > extern AVFilter ff_vf_dedot; > diff --git a/libavfilter/vf_derain.c b/libavfilter/vf_derain.c > new file mode 100644 > index 0000000000..f72ae1cd3a > --- /dev/null > +++ b/libavfilter/vf_derain.c > @@ -0,0 +1,204 @@ > +/* > + * Copyright (c) 2019 Xuewei Meng > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > + */ > + > +/** > + * @file > + * Filter implementing image derain filter using deep convolutional networks. > + * https://arxiv.org/abs/1609.05158 > + * http://openaccess.thecvf.com/content_ECCV_2018/html/Xia_Li_Recurrent_Squeeze-and-Excitation_Context_ECCV_2018_paper.html > + */ > + > +#include "libavutil/opt.h" > +#include "libavformat/avio.h" > +#include "libswscale/swscale.h" > +#include "avfilter.h" > +#include "formats.h" > +#include "internal.h" > +#include "dnn_interface.h" > + > +typedef struct DRContext { > + const AVClass *class; > + > + char *model_filename; > + DNNBackendType backend_type; > + DNNModule *dnn_module; > + DNNModel *model; > + DNNData input; > + DNNData output; > +} DRContext; > + > +#define OFFSET(x) offsetof(DRContext, x) > +#define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM > +static const AVOption derain_options[] = { > + { "dnn_backend", "DNN backend", OFFSET(backend_type), AV_OPT_TYPE_FLAGS, { .i64 = 0 }, 0, 1, FLAGS, "backend" }, > + { "native", "native backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, FLAGS, "backend" }, > +#if (CONFIG_LIBTENSORFLOW == 1) > + { "tensorflow", "tensorflow backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, "backend" }, > +#endif > + { "model", "path to model file", OFFSET(model_filename), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS }, > + { NULL } > +}; > + > +AVFILTER_DEFINE_CLASS(derain); > + > +static int query_formats(AVFilterContext *ctx) > +{ > + AVFilterFormats *formats; > + const enum AVPixelFormat pixel_fmts[] = { > + AV_PIX_FMT_RGB24, > + AV_PIX_FMT_NONE > + }; > + > + formats = ff_make_format_list(pixel_fmts); > + if (!formats) { > + av_log(ctx, AV_LOG_ERROR, "could not create formats list\n"); > + return AVERROR(ENOMEM); > + } > + > + return ff_set_common_formats(ctx, formats); > +} > + > +static int config_inputs(AVFilterLink *inlink) > +{ > + AVFilterContext *ctx = inlink->dst; > + DRContext *dr_context = ctx->priv; > + AVFilterLink *outlink = ctx->outputs[0]; > + DNNReturnType result; > + > + dr_context->input.width = inlink->w; > + dr_context->input.height = inlink->h; > + dr_context->input.channels = 3; > + > + result = (dr_context->model->set_input_output)(dr_context->model->model, &dr_context->input, &dr_context->output); > + if (result != DNN_SUCCESS) { > + av_log(ctx, AV_LOG_ERROR, "could not set input and output for the model\n"); > + return AVERROR(EIO); > + } > + > + outlink->h = dr_context->output.height; > + outlink->w = dr_context->output.width; > + > + return 0; > +} > + > +static int filter_frame(AVFilterLink *inlink, AVFrame *in) > +{ > + AVFilterContext *ctx = inlink->dst; > + AVFilterLink *outlink = ctx->outputs[0]; > + DRContext *dr_context = ctx->priv; > + DNNReturnType dnn_result; > + > + AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h); > + if (!out) { > + av_log(ctx, AV_LOG_ERROR, "could not allocate memory for output frame\n"); > + av_frame_free(&in); > + return AVERROR(ENOMEM); > + } > + > + av_frame_copy_props(out, in); > + out->height = dr_context->output.height; > + out->width = dr_context->output.width; > + > + for (int i = 0; i < out->height * out->width * 3; i++) { > + dr_context->input.data[i] = in->data[0][i] / 255.0; > + } > + > + av_frame_free(&in); > + dnn_result = (dr_context->dnn_module->execute_model)(dr_context->model); > + if (dnn_result != DNN_SUCCESS){ > + av_log(ctx, AV_LOG_ERROR, "failed to execute model\n"); > + return AVERROR(EIO); > + } > + > + for (int i = 0; i < out->height * out->width * 3; i++) { > + out->data[0][i] = (int)(dr_context->output.data[i] * 255); > + } > + > + return ff_filter_frame(outlink, out); > +} > + > +static av_cold int init(AVFilterContext *ctx) > +{ > + DRContext *dr_context = ctx->priv; > + > + dr_context->dnn_module = ff_get_dnn_module(dr_context->backend_type); > + if (!dr_context->dnn_module) { > + av_log(ctx, AV_LOG_ERROR, "could not create DNN module for requested backend\n"); > + return AVERROR(ENOMEM); > + } > + if (!dr_context->model_filename) { > + av_log(ctx, AV_LOG_ERROR, "model file for network is not specified\n"); > + return AVERROR(EINVAL); > + } > + if (!dr_context->dnn_module->load_model) { > + av_log(ctx, AV_LOG_ERROR, "load_model for network is not specified\n"); > + return AVERROR(EINVAL); > + } > + > + dr_context->model = (dr_context->dnn_module->load_model)(dr_context->model_filename); > + if (!dr_context->model) { > + av_log(ctx, AV_LOG_ERROR, "could not load DNN model\n"); > + return AVERROR(EINVAL); > + } > + > + return 0; > +} > + > +static av_cold void uninit(AVFilterContext *ctx) > +{ > + DRContext *dr_context = ctx->priv; > + > + if (dr_context->dnn_module) { > + (dr_context->dnn_module->free_model)(&dr_context->model); > + av_freep(&dr_context->dnn_module); > + } > +} > + > +static const AVFilterPad derain_inputs[] = { > + { > + .name = "default", > + .type = AVMEDIA_TYPE_VIDEO, > + .config_props = config_inputs, > + .filter_frame = filter_frame, > + }, > + { NULL } > +}; > + > +static const AVFilterPad derain_outputs[] = { > + { > + .name = "default", > + .type = AVMEDIA_TYPE_VIDEO, > + }, > + { NULL } > +}; > + > +AVFilter ff_vf_derain = { > + .name = "derain", > + .description = NULL_IF_CONFIG_SMALL("Apply derain filter to the input."), > + .priv_size = sizeof(DRContext), > + .init = init, > + .uninit = uninit, > + .query_formats = query_formats, > + .inputs = derain_inputs, > + .outputs = derain_outputs, > + .priv_class = &derain_class, > + .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS, > +}; > + > -- > 2.17.1 > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Em ter, 9 de abr de 2019 às 04:15, <xwmeng@pku.edu.cn> escreveu: > > +Training scripts as well as scripts for model generation are provided in > +the repository at @url{https://github.com/XueweiMeng/derain_filter.git}. This repository is a copy of a previous year student [1], which is MIT licensed, and therefore you should have included the original author copyrights. IMO the most polite behavior would have been to fork his repository and make the necessary changes you need, keeping the copyrights. [1] - https://github.com/HighVoltageRocknRoll/sr
Hi, Yes, I use the espcn model for deraining as the initial version as it's a easier way to implement the filter, although the paper proposes it for super-resolution. And the model does have some effect on deraining project. While, it is just the first version. I will use more suitable and more powerful model for derain filter according to the latest models proposed in derain task, and I will upload the new model soon. As for the model training source code, I did develop the derain training code initially based on the sr model training code in order to confirm the feasibility of our method quickly. And sorry, I forgot to include the original author copyrights. I have been writing the model training code by myself, and will upload it soon. Thanks for your suggestion! Xuewei > -----原始邮件----- > 发件人: "Pedro Arthur" <bygrandao@gmail.com> > 发送时间: 2019-04-10 01:21:06 (星期三) > 收件人: "FFmpeg development discussions and patches" <ffmpeg-devel@ffmpeg.org> > 抄送: "Steven Liu" <lq@chinaffmpeg.org> > 主题: Re: [FFmpeg-devel] [PATCH] libavfilter: Add derain filter init version--GSoC Qualification Task. > > Hi, > > Em ter, 9 de abr de 2019 às 04:15, <xwmeng@pku.edu.cn> escreveu: > > +@section derain > > + > > +Remove the rain in the input image/video by applying the derain methods based on > > +convolutional neural networks. Supported models: > > + > > +@itemize > > +@item > > +Efficient Sub-Pixel Convolutional Neural Network model (ESPCN). > > +See @url{https://arxiv.org/abs/1609.05158}. > > +@end itemize > > As the doc suggests, you're using the espcn model for deraining? if > so, it would be more relevant to link to paper which justifies this > usage as it currently seems to suggest you're using super-resolution. > > In case you are the one which is proposing this usage, it worth at > least give some justification. is it better the current methods in any > way? > > > > + > > +Training scripts as well as scripts for model generation are provided in > > +the repository at @url{https://github.com/XueweiMeng/derain_filter.git}. > > + > > +The filter accepts the following options: > > + > > +@table @option > > +@item dnn_backend > > +Specify which DNN backend to use for model loading and execution. This option accepts > > +the following values: > > + > > +@table @samp > > +@item native > > +Native implementation of DNN loading and execution. > > + > > +@item tensorflow > > +TensorFlow backend. To enable this backend you > > +need to install the TensorFlow for C library (see > > +@url{https://www.tensorflow.org/install/install_c}) and configure FFmpeg with > > +@code{--enable-libtensorflow} > > +@end table > > + > > +Default value is @samp{native}. > > + > > +@item model > > +Set path to model file specifying network architecture and its parameters. > > +Note that different backends use different file formats. TensorFlow backend > > +can load files for both formats, while native backend can load files for only > > +its format. > > +@end table > > + > > @section deshake > > > > Attempt to fix small changes in horizontal and/or vertical shift. This > > diff --git a/libavfilter/Makefile b/libavfilter/Makefile > > index fef6ec5c55..7809bac565 100644 > > --- a/libavfilter/Makefile > > +++ b/libavfilter/Makefile > > @@ -194,6 +194,7 @@ OBJS-$(CONFIG_DATASCOPE_FILTER) += vf_datascope.o > > OBJS-$(CONFIG_DCTDNOIZ_FILTER) += vf_dctdnoiz.o > > OBJS-$(CONFIG_DEBAND_FILTER) += vf_deband.o > > OBJS-$(CONFIG_DEBLOCK_FILTER) += vf_deblock.o > > +OBJS-$(CONFIG_DERAIN_FILTER) += vf_derain.o > > OBJS-$(CONFIG_DECIMATE_FILTER) += vf_decimate.o > > OBJS-$(CONFIG_DECONVOLVE_FILTER) += vf_convolve.o framesync.o > > OBJS-$(CONFIG_DEDOT_FILTER) += vf_dedot.o > > diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c > > index c51ae0f3c7..ee2a5b63e6 100644 > > --- a/libavfilter/allfilters.c > > +++ b/libavfilter/allfilters.c > > @@ -182,6 +182,7 @@ extern AVFilter ff_vf_datascope; > > extern AVFilter ff_vf_dctdnoiz; > > extern AVFilter ff_vf_deband; > > extern AVFilter ff_vf_deblock; > > +extern AVFilter ff_vf_derain; > > extern AVFilter ff_vf_decimate; > > extern AVFilter ff_vf_deconvolve; > > extern AVFilter ff_vf_dedot; > > diff --git a/libavfilter/vf_derain.c b/libavfilter/vf_derain.c > > new file mode 100644 > > index 0000000000..f72ae1cd3a > > --- /dev/null > > +++ b/libavfilter/vf_derain.c > > @@ -0,0 +1,204 @@ > > +/* > > + * Copyright (c) 2019 Xuewei Meng > > + * > > + * This file is part of FFmpeg. > > + * > > + * FFmpeg is free software; you can redistribute it and/or > > + * modify it under the terms of the GNU Lesser General Public > > + * License as published by the Free Software Foundation; either > > + * version 2.1 of the License, or (at your option) any later version. > > + * > > + * FFmpeg is distributed in the hope that it will be useful, > > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + * Lesser General Public License for more details. > > + * > > + * You should have received a copy of the GNU Lesser General Public > > + * License along with FFmpeg; if not, write to the Free Software > > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > > + */ > > + > > +/** > > + * @file > > + * Filter implementing image derain filter using deep convolutional networks. > > + * https://arxiv.org/abs/1609.05158 > > + * http://openaccess.thecvf.com/content_ECCV_2018/html/Xia_Li_Recurrent_Squeeze-and-Excitation_Context_ECCV_2018_paper.html > > + */ > > + > > +#include "libavutil/opt.h" > > +#include "libavformat/avio.h" > > +#include "libswscale/swscale.h" > > +#include "avfilter.h" > > +#include "formats.h" > > +#include "internal.h" > > +#include "dnn_interface.h" > > + > > +typedef struct DRContext { > > + const AVClass *class; > > + > > + char *model_filename; > > + DNNBackendType backend_type; > > + DNNModule *dnn_module; > > + DNNModel *model; > > + DNNData input; > > + DNNData output; > > +} DRContext; > > + > > +#define OFFSET(x) offsetof(DRContext, x) > > +#define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM > > +static const AVOption derain_options[] = { > > + { "dnn_backend", "DNN backend", OFFSET(backend_type), AV_OPT_TYPE_FLAGS, { .i64 = 0 }, 0, 1, FLAGS, "backend" }, > > + { "native", "native backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, FLAGS, "backend" }, > > +#if (CONFIG_LIBTENSORFLOW == 1) > > + { "tensorflow", "tensorflow backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, "backend" }, > > +#endif > > + { "model", "path to model file", OFFSET(model_filename), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS }, > > + { NULL } > > +}; > > + > > +AVFILTER_DEFINE_CLASS(derain); > > + > > +static int query_formats(AVFilterContext *ctx) > > +{ > > + AVFilterFormats *formats; > > + const enum AVPixelFormat pixel_fmts[] = { > > + AV_PIX_FMT_RGB24, > > + AV_PIX_FMT_NONE > > + }; > > + > > + formats = ff_make_format_list(pixel_fmts); > > + if (!formats) { > > + av_log(ctx, AV_LOG_ERROR, "could not create formats list\n"); > > + return AVERROR(ENOMEM); > > + } > > + > > + return ff_set_common_formats(ctx, formats); > > +} > > + > > +static int config_inputs(AVFilterLink *inlink) > > +{ > > + AVFilterContext *ctx = inlink->dst; > > + DRContext *dr_context = ctx->priv; > > + AVFilterLink *outlink = ctx->outputs[0]; > > + DNNReturnType result; > > + > > + dr_context->input.width = inlink->w; > > + dr_context->input.height = inlink->h; > > + dr_context->input.channels = 3; > > + > > + result = (dr_context->model->set_input_output)(dr_context->model->model, &dr_context->input, &dr_context->output); > > + if (result != DNN_SUCCESS) { > > + av_log(ctx, AV_LOG_ERROR, "could not set input and output for the model\n"); > > + return AVERROR(EIO); > > + } > > + > > + outlink->h = dr_context->output.height; > > + outlink->w = dr_context->output.width; > > + > > + return 0; > > +} > > + > > +static int filter_frame(AVFilterLink *inlink, AVFrame *in) > > +{ > > + AVFilterContext *ctx = inlink->dst; > > + AVFilterLink *outlink = ctx->outputs[0]; > > + DRContext *dr_context = ctx->priv; > > + DNNReturnType dnn_result; > > + > > + AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h); > > + if (!out) { > > + av_log(ctx, AV_LOG_ERROR, "could not allocate memory for output frame\n"); > > + av_frame_free(&in); > > + return AVERROR(ENOMEM); > > + } > > + > > + av_frame_copy_props(out, in); > > + out->height = dr_context->output.height; > > + out->width = dr_context->output.width; > > + > > + for (int i = 0; i < out->height * out->width * 3; i++) { > > + dr_context->input.data[i] = in->data[0][i] / 255.0; > > + } > > + > > + av_frame_free(&in); > > + dnn_result = (dr_context->dnn_module->execute_model)(dr_context->model); > > + if (dnn_result != DNN_SUCCESS){ > > + av_log(ctx, AV_LOG_ERROR, "failed to execute model\n"); > > + return AVERROR(EIO); > > + } > > + > > + for (int i = 0; i < out->height * out->width * 3; i++) { > > + out->data[0][i] = (int)(dr_context->output.data[i] * 255); > > + } > > + > > + return ff_filter_frame(outlink, out); > > +} > > + > > +static av_cold int init(AVFilterContext *ctx) > > +{ > > + DRContext *dr_context = ctx->priv; > > + > > + dr_context->dnn_module = ff_get_dnn_module(dr_context->backend_type); > > + if (!dr_context->dnn_module) { > > + av_log(ctx, AV_LOG_ERROR, "could not create DNN module for requested backend\n"); > > + return AVERROR(ENOMEM); > > + } > > + if (!dr_context->model_filename) { > > + av_log(ctx, AV_LOG_ERROR, "model file for network is not specified\n"); > > + return AVERROR(EINVAL); > > + } > > + if (!dr_context->dnn_module->load_model) { > > + av_log(ctx, AV_LOG_ERROR, "load_model for network is not specified\n"); > > + return AVERROR(EINVAL); > > + } > > + > > + dr_context->model = (dr_context->dnn_module->load_model)(dr_context->model_filename); > > + if (!dr_context->model) { > > + av_log(ctx, AV_LOG_ERROR, "could not load DNN model\n"); > > + return AVERROR(EINVAL); > > + } > > + > > + return 0; > > +} > > + > > +static av_cold void uninit(AVFilterContext *ctx) > > +{ > > + DRContext *dr_context = ctx->priv; > > + > > + if (dr_context->dnn_module) { > > + (dr_context->dnn_module->free_model)(&dr_context->model); > > + av_freep(&dr_context->dnn_module); > > + } > > +} > > + > > +static const AVFilterPad derain_inputs[] = { > > + { > > + .name = "default", > > + .type = AVMEDIA_TYPE_VIDEO, > > + .config_props = config_inputs, > > + .filter_frame = filter_frame, > > + }, > > + { NULL } > > +}; > > + > > +static const AVFilterPad derain_outputs[] = { > > + { > > + .name = "default", > > + .type = AVMEDIA_TYPE_VIDEO, > > + }, > > + { NULL } > > +}; > > + > > +AVFilter ff_vf_derain = { > > + .name = "derain", > > + .description = NULL_IF_CONFIG_SMALL("Apply derain filter to the input."), > > + .priv_size = sizeof(DRContext), > > + .init = init, > > + .uninit = uninit, > > + .query_formats = query_formats, > > + .inputs = derain_inputs, > > + .outputs = derain_outputs, > > + .priv_class = &derain_class, > > + .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS, > > +}; > > + > > -- > > 2.17.1 > > > > _______________________________________________ > > ffmpeg-devel mailing list > > ffmpeg-devel@ffmpeg.org > > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > > > To unsubscribe, visit link above, or email > > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Hi, Em ter, 9 de abr de 2019 às 22:42, <xwmeng@pku.edu.cn> escreveu: > > Yes, I use the espcn model for deraining as the initial version as it's a easier way to implement the filter, although the paper proposes it for super-resolution. And the model does have some effect on deraining project. While, it is just the first version. I will use more suitable and more powerful model for derain filter according to the latest models proposed in derain task, and I will upload the new model soon. > There is no problem in using the espcn infrastructure to start learning, but I still can't see how using the espcn model fits your purpose. The espcn model should output an image bigger than the input, why would a derain filter do that? Also you did not provide any data regarding the results you might have obtained neither the model file and appropriate input/reference so we can test it. What are the results you get so far (psnr before/after applying the filter)?
> -----原始邮件----- > 发件人: "Liu Steven" <lq@chinaffmpeg.org> > 发送时间: 2019-04-09 16:00:25 (星期二) > 收件人: "FFmpeg development discussions and patches" <ffmpeg-devel@ffmpeg.org> > 抄送: "Liu Steven" <lq@chinaffmpeg.org> > 主题: Re: [FFmpeg-devel] [PATCH] libavfilter: Add derain filter init version--GSoC Qualification Task. > > > > > 在 2019年4月9日,下午3:14,xwmeng@pku.edu.cn 写道: > > > > This patch is the qualification task of the derain filter project in GSoC. > > > It maybe better if you submit a model file and test example here. The model file has been uploaded (https://github.com/XueweiMeng/derain_filter). And you can download the test/train dataset from http://www.icst.pku.edu.cn/struct/Projects/joint_rain_removal.html xuewei > > From 61463dfe14c0e0de4e233f68c8404d73d5bd9f8f Mon Sep 17 00:00:00 2001 > > > > From: Xuewei Meng <xwmeng@pku.edu.cn> > > Date: Tue, 9 Apr 2019 15:09:33 +0800 > > Subject: [PATCH] Add derain filter init version-GSoC Qualification Task > > > > > > Signed-off-by: Xuewei Meng <xwmeng@pku.edu.cn> > > --- > > doc/filters.texi | 41 ++++++++ > > libavfilter/Makefile | 1 + > > libavfilter/allfilters.c | 1 + > > libavfilter/vf_derain.c | 204 +++++++++++++++++++++++++++++++++++++++ > > 4 files changed, 247 insertions(+) > > create mode 100644 libavfilter/vf_derain.c > > > > > > diff --git a/doc/filters.texi b/doc/filters.texi > > index 867607d870..0117c418b4 100644 > > --- a/doc/filters.texi > > +++ b/doc/filters.texi > > @@ -8036,6 +8036,47 @@ delogo=x=0:y=0:w=100:h=77:band=10 > > > > @end itemize > > > > +@section derain > > + > > +Remove the rain in the input image/video by applying the derain methods based on > > +convolutional neural networks. Supported models: > > + > > +@itemize > > +@item > > +Efficient Sub-Pixel Convolutional Neural Network model (ESPCN). > > +See @url{https://arxiv.org/abs/1609.05158}. > > +@end itemize > > + > > +Training scripts as well as scripts for model generation are provided in > > +the repository at @url{https://github.com/XueweiMeng/derain_filter.git}. > > + > > +The filter accepts the following options: > > + > > +@table @option > > +@item dnn_backend > > +Specify which DNN backend to use for model loading and execution. This option accepts > > +the following values: > > + > > +@table @samp > > +@item native > > +Native implementation of DNN loading and execution. > > + > > +@item tensorflow > > +TensorFlow backend. To enable this backend you > > +need to install the TensorFlow for C library (see > > +@url{https://www.tensorflow.org/install/install_c}) and configure FFmpeg with > > +@code{--enable-libtensorflow} > > +@end table > > + > > +Default value is @samp{native}. > > + > > +@item model > > +Set path to model file specifying network architecture and its parameters. > > +Note that different backends use different file formats. TensorFlow backend > > +can load files for both formats, while native backend can load files for only > > +its format. > > +@end table > > + > > @section deshake > > > > Attempt to fix small changes in horizontal and/or vertical shift. This > > diff --git a/libavfilter/Makefile b/libavfilter/Makefile > > index fef6ec5c55..7809bac565 100644 > > --- a/libavfilter/Makefile > > +++ b/libavfilter/Makefile > > @@ -194,6 +194,7 @@ OBJS-$(CONFIG_DATASCOPE_FILTER) += vf_datascope.o > > OBJS-$(CONFIG_DCTDNOIZ_FILTER) += vf_dctdnoiz.o > > OBJS-$(CONFIG_DEBAND_FILTER) += vf_deband.o > > OBJS-$(CONFIG_DEBLOCK_FILTER) += vf_deblock.o > > +OBJS-$(CONFIG_DERAIN_FILTER) += vf_derain.o > > OBJS-$(CONFIG_DECIMATE_FILTER) += vf_decimate.o > > OBJS-$(CONFIG_DECONVOLVE_FILTER) += vf_convolve.o framesync.o > > OBJS-$(CONFIG_DEDOT_FILTER) += vf_dedot.o > > diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c > > index c51ae0f3c7..ee2a5b63e6 100644 > > --- a/libavfilter/allfilters.c > > +++ b/libavfilter/allfilters.c > > @@ -182,6 +182,7 @@ extern AVFilter ff_vf_datascope; > > extern AVFilter ff_vf_dctdnoiz; > > extern AVFilter ff_vf_deband; > > extern AVFilter ff_vf_deblock; > > +extern AVFilter ff_vf_derain; > > extern AVFilter ff_vf_decimate; > > extern AVFilter ff_vf_deconvolve; > > extern AVFilter ff_vf_dedot; > > diff --git a/libavfilter/vf_derain.c b/libavfilter/vf_derain.c > > new file mode 100644 > > index 0000000000..f72ae1cd3a > > --- /dev/null > > +++ b/libavfilter/vf_derain.c > > @@ -0,0 +1,204 @@ > > +/* > > + * Copyright (c) 2019 Xuewei Meng > > + * > > + * This file is part of FFmpeg. > > + * > > + * FFmpeg is free software; you can redistribute it and/or > > + * modify it under the terms of the GNU Lesser General Public > > + * License as published by the Free Software Foundation; either > > + * version 2.1 of the License, or (at your option) any later version. > > + * > > + * FFmpeg is distributed in the hope that it will be useful, > > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + * Lesser General Public License for more details. > > + * > > + * You should have received a copy of the GNU Lesser General Public > > + * License along with FFmpeg; if not, write to the Free Software > > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > > + */ > > + > > +/** > > + * @file > > + * Filter implementing image derain filter using deep convolutional networks. > > + * https://arxiv.org/abs/1609.05158 > > + * http://openaccess.thecvf.com/content_ECCV_2018/html/Xia_Li_Recurrent_Squeeze-and-Excitation_Context_ECCV_2018_paper.html > > + */ > > + > > +#include "libavutil/opt.h" > > +#include "libavformat/avio.h" > > +#include "libswscale/swscale.h" > > +#include "avfilter.h" > > +#include "formats.h" > > +#include "internal.h" > > +#include "dnn_interface.h" > > + > > +typedef struct DRContext { > > + const AVClass *class; > > + > > + char *model_filename; > > + DNNBackendType backend_type; > > + DNNModule *dnn_module; > > + DNNModel *model; > > + DNNData input; > > + DNNData output; > > +} DRContext; > > + > > +#define OFFSET(x) offsetof(DRContext, x) > > +#define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM > > +static const AVOption derain_options[] = { > > + { "dnn_backend", "DNN backend", OFFSET(backend_type), AV_OPT_TYPE_FLAGS, { .i64 = 0 }, 0, 1, FLAGS, "backend" }, > > + { "native", "native backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, FLAGS, "backend" }, > > +#if (CONFIG_LIBTENSORFLOW == 1) > > + { "tensorflow", "tensorflow backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, "backend" }, > > +#endif > > + { "model", "path to model file", OFFSET(model_filename), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS }, > > + { NULL } > > +}; > > + > > +AVFILTER_DEFINE_CLASS(derain); > > + > > +static int query_formats(AVFilterContext *ctx) > > +{ > > + AVFilterFormats *formats; > > + const enum AVPixelFormat pixel_fmts[] = { > > + AV_PIX_FMT_RGB24, > > + AV_PIX_FMT_NONE > > + }; > > + > > + formats = ff_make_format_list(pixel_fmts); > > + if (!formats) { > > + av_log(ctx, AV_LOG_ERROR, "could not create formats list\n"); > > + return AVERROR(ENOMEM); > > + } > > + > > + return ff_set_common_formats(ctx, formats); > > +} > > + > > +static int config_inputs(AVFilterLink *inlink) > > +{ > > + AVFilterContext *ctx = inlink->dst; > > + DRContext *dr_context = ctx->priv; > > + AVFilterLink *outlink = ctx->outputs[0]; > > + DNNReturnType result; > > + > > + dr_context->input.width = inlink->w; > > + dr_context->input.height = inlink->h; > > + dr_context->input.channels = 3; > > + > > + result = (dr_context->model->set_input_output)(dr_context->model->model, &dr_context->input, &dr_context->output); > > + if (result != DNN_SUCCESS) { > > + av_log(ctx, AV_LOG_ERROR, "could not set input and output for the model\n"); > > + return AVERROR(EIO); > > + } > > + > > + outlink->h = dr_context->output.height; > > + outlink->w = dr_context->output.width; > > + > > + return 0; > > +} > > + > > +static int filter_frame(AVFilterLink *inlink, AVFrame *in) > > +{ > > + AVFilterContext *ctx = inlink->dst; > > + AVFilterLink *outlink = ctx->outputs[0]; > > + DRContext *dr_context = ctx->priv; > > + DNNReturnType dnn_result; > > + > > + AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h); > > + if (!out) { > > + av_log(ctx, AV_LOG_ERROR, "could not allocate memory for output frame\n"); > > + av_frame_free(&in); > > + return AVERROR(ENOMEM); > > + } > > + > > + av_frame_copy_props(out, in); > > + out->height = dr_context->output.height; > > + out->width = dr_context->output.width; > > + > > + for (int i = 0; i < out->height * out->width * 3; i++) { > > + dr_context->input.data[i] = in->data[0][i] / 255.0; > > + } > > + > > + av_frame_free(&in); > > + dnn_result = (dr_context->dnn_module->execute_model)(dr_context->model); > > + if (dnn_result != DNN_SUCCESS){ > > + av_log(ctx, AV_LOG_ERROR, "failed to execute model\n"); > > + return AVERROR(EIO); > > + } > > + > > + for (int i = 0; i < out->height * out->width * 3; i++) { > > + out->data[0][i] = (int)(dr_context->output.data[i] * 255); > > + } > > + > > + return ff_filter_frame(outlink, out); > > +} > > + > > +static av_cold int init(AVFilterContext *ctx) > > +{ > > + DRContext *dr_context = ctx->priv; > > + > > + dr_context->dnn_module = ff_get_dnn_module(dr_context->backend_type); > > + if (!dr_context->dnn_module) { > > + av_log(ctx, AV_LOG_ERROR, "could not create DNN module for requested backend\n"); > > + return AVERROR(ENOMEM); > > + } > > + if (!dr_context->model_filename) { > > + av_log(ctx, AV_LOG_ERROR, "model file for network is not specified\n"); > > + return AVERROR(EINVAL); > > + } > > + if (!dr_context->dnn_module->load_model) { > > + av_log(ctx, AV_LOG_ERROR, "load_model for network is not specified\n"); > > + return AVERROR(EINVAL); > > + } > > + > > + dr_context->model = (dr_context->dnn_module->load_model)(dr_context->model_filename); > > + if (!dr_context->model) { > > + av_log(ctx, AV_LOG_ERROR, "could not load DNN model\n"); > > + return AVERROR(EINVAL); > > + } > > + > > + return 0; > > +} > > + > > +static av_cold void uninit(AVFilterContext *ctx) > > +{ > > + DRContext *dr_context = ctx->priv; > > + > > + if (dr_context->dnn_module) { > > + (dr_context->dnn_module->free_model)(&dr_context->model); > > + av_freep(&dr_context->dnn_module); > > + } > > +} > > + > > +static const AVFilterPad derain_inputs[] = { > > + { > > + .name = "default", > > + .type = AVMEDIA_TYPE_VIDEO, > > + .config_props = config_inputs, > > + .filter_frame = filter_frame, > > + }, > > + { NULL } > > +}; > > + > > +static const AVFilterPad derain_outputs[] = { > > + { > > + .name = "default", > > + .type = AVMEDIA_TYPE_VIDEO, > > + }, > > + { NULL } > > +}; > > + > > +AVFilter ff_vf_derain = { > > + .name = "derain", > > + .description = NULL_IF_CONFIG_SMALL("Apply derain filter to the input."), > > + .priv_size = sizeof(DRContext), > > + .init = init, > > + .uninit = uninit, > > + .query_formats = query_formats, > > + .inputs = derain_inputs, > > + .outputs = derain_outputs, > > + .priv_class = &derain_class, > > + .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS, > > +}; > > + > > -- > > 2.17.1 > > > > _______________________________________________ > > ffmpeg-devel mailing list > > ffmpeg-devel@ffmpeg.org > > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > > > To unsubscribe, visit link above, or email > > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
> -----原始邮件----- > 发件人: "Pedro Arthur" <bygrandao@gmail.com> > 发送时间: 2019-04-11 00:12:09 (星期四) > 收件人: "FFmpeg development discussions and patches" <ffmpeg-devel@ffmpeg.org> > 抄送: > 主题: Re: [FFmpeg-devel] [PATCH] libavfilter: Add derain filter init version--GSoC Qualification Task. > > Hi, > Em ter, 9 de abr de 2019 às 22:42, <xwmeng@pku.edu.cn> escreveu: > > > > Yes, I use the espcn model for deraining as the initial version as it's a easier way to implement the filter, although the paper proposes it for super-resolution. And the model does have some effect on deraining project. While, it is just the first version. I will use more suitable and more powerful model for derain filter according to the latest models proposed in derain task, and I will upload the new model soon. > > > > There is no problem in using the espcn infrastructure to start > learning, but I still can't see how using the espcn model fits your > purpose. > The espcn model should output an image bigger than the input, why > would a derain filter do that? > > Also you did not provide any data regarding the results you might have > obtained neither the model file and appropriate input/reference so we > can test it. > What are the results you get so far (psnr before/after applying the filter)? > _______________________________________________ We made some modifications on the original ESPCN model, such as change the input image from one channel(Y) to three channels(RGB) and remove the up-sampling procedure. The model file has been uploaded in https://github.com/XueweiMeng/derain_filter and you can download the training/testing dataset from http://www.icst.pku.edu.cn/struct/Projects/joint_rain_removal.html. I didn't save the PSNR/SSIM score during the training and evaluating process. So the data will be uploaded later. Xuewei
> 在 2019年4月11日,下午1:46,xwmeng@pku.edu.cn 写道: > > > > >> -----原始邮件----- >> 发件人: "Liu Steven" <lq@chinaffmpeg.org> >> 发送时间: 2019-04-09 16:00:25 (星期二) >> 收件人: "FFmpeg development discussions and patches" <ffmpeg-devel@ffmpeg.org> >> 抄送: "Liu Steven" <lq@chinaffmpeg.org> >> 主题: Re: [FFmpeg-devel] [PATCH] libavfilter: Add derain filter init version--GSoC Qualification Task. >> >> >> >>> 在 2019年4月9日,下午3:14,xwmeng@pku.edu.cn 写道: >>> >>> This patch is the qualification task of the derain filter project in GSoC. >>> >> It maybe better if you submit a model file and test example here. > > The model file has been uploaded (https://github.com/XueweiMeng/derain_filter). And you can download the test/train dataset from http://www.icst.pku.edu.cn/struct/Projects/joint_rain_removal.html How should the people training the data? updoad the source ASAP. > > xuewei > >>> From 61463dfe14c0e0de4e233f68c8404d73d5bd9f8f Mon Sep 17 00:00:00 2001 >>> >>> From: Xuewei Meng <xwmeng@pku.edu.cn> >>> Date: Tue, 9 Apr 2019 15:09:33 +0800 >>> Subject: [PATCH] Add derain filter init version-GSoC Qualification Task >>> >>> >>> Signed-off-by: Xuewei Meng <xwmeng@pku.edu.cn> >>> --- >>> doc/filters.texi | 41 ++++++++ >>> libavfilter/Makefile | 1 + >>> libavfilter/allfilters.c | 1 + >>> libavfilter/vf_derain.c | 204 +++++++++++++++++++++++++++++++++++++++ >>> 4 files changed, 247 insertions(+) >>> create mode 100644 libavfilter/vf_derain.c >>> >>> >>> diff --git a/doc/filters.texi b/doc/filters.texi >>> index 867607d870..0117c418b4 100644 >>> --- a/doc/filters.texi >>> +++ b/doc/filters.texi >>> @@ -8036,6 +8036,47 @@ delogo=x=0:y=0:w=100:h=77:band=10 >>> >>> @end itemize >>> >>> +@section derain >>> + >>> +Remove the rain in the input image/video by applying the derain methods based on >>> +convolutional neural networks. Supported models: >>> + >>> +@itemize >>> +@item >>> +Efficient Sub-Pixel Convolutional Neural Network model (ESPCN). >>> +See @url{https://arxiv.org/abs/1609.05158}. >>> +@end itemize >>> + >>> +Training scripts as well as scripts for model generation are provided in >>> +the repository at @url{https://github.com/XueweiMeng/derain_filter.git}. >>> + >>> +The filter accepts the following options: >>> + >>> +@table @option >>> +@item dnn_backend >>> +Specify which DNN backend to use for model loading and execution. This option accepts >>> +the following values: >>> + >>> +@table @samp >>> +@item native >>> +Native implementation of DNN loading and execution. >>> + >>> +@item tensorflow >>> +TensorFlow backend. To enable this backend you >>> +need to install the TensorFlow for C library (see >>> +@url{https://www.tensorflow.org/install/install_c}) and configure FFmpeg with >>> +@code{--enable-libtensorflow} >>> +@end table >>> + >>> +Default value is @samp{native}. >>> + >>> +@item model >>> +Set path to model file specifying network architecture and its parameters. >>> +Note that different backends use different file formats. TensorFlow backend >>> +can load files for both formats, while native backend can load files for only >>> +its format. >>> +@end table >>> + >>> @section deshake >>> >>> Attempt to fix small changes in horizontal and/or vertical shift. This >>> diff --git a/libavfilter/Makefile b/libavfilter/Makefile >>> index fef6ec5c55..7809bac565 100644 >>> --- a/libavfilter/Makefile >>> +++ b/libavfilter/Makefile >>> @@ -194,6 +194,7 @@ OBJS-$(CONFIG_DATASCOPE_FILTER) += vf_datascope.o >>> OBJS-$(CONFIG_DCTDNOIZ_FILTER) += vf_dctdnoiz.o >>> OBJS-$(CONFIG_DEBAND_FILTER) += vf_deband.o >>> OBJS-$(CONFIG_DEBLOCK_FILTER) += vf_deblock.o >>> +OBJS-$(CONFIG_DERAIN_FILTER) += vf_derain.o >>> OBJS-$(CONFIG_DECIMATE_FILTER) += vf_decimate.o >>> OBJS-$(CONFIG_DECONVOLVE_FILTER) += vf_convolve.o framesync.o >>> OBJS-$(CONFIG_DEDOT_FILTER) += vf_dedot.o >>> diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c >>> index c51ae0f3c7..ee2a5b63e6 100644 >>> --- a/libavfilter/allfilters.c >>> +++ b/libavfilter/allfilters.c >>> @@ -182,6 +182,7 @@ extern AVFilter ff_vf_datascope; >>> extern AVFilter ff_vf_dctdnoiz; >>> extern AVFilter ff_vf_deband; >>> extern AVFilter ff_vf_deblock; >>> +extern AVFilter ff_vf_derain; >>> extern AVFilter ff_vf_decimate; >>> extern AVFilter ff_vf_deconvolve; >>> extern AVFilter ff_vf_dedot; >>> diff --git a/libavfilter/vf_derain.c b/libavfilter/vf_derain.c >>> new file mode 100644 >>> index 0000000000..f72ae1cd3a >>> --- /dev/null >>> +++ b/libavfilter/vf_derain.c >>> @@ -0,0 +1,204 @@ >>> +/* >>> + * Copyright (c) 2019 Xuewei Meng >>> + * >>> + * This file is part of FFmpeg. >>> + * >>> + * FFmpeg is free software; you can redistribute it and/or >>> + * modify it under the terms of the GNU Lesser General Public >>> + * License as published by the Free Software Foundation; either >>> + * version 2.1 of the License, or (at your option) any later version. >>> + * >>> + * FFmpeg is distributed in the hope that it will be useful, >>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of >>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >>> + * Lesser General Public License for more details. >>> + * >>> + * You should have received a copy of the GNU Lesser General Public >>> + * License along with FFmpeg; if not, write to the Free Software >>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA >>> + */ >>> + >>> +/** >>> + * @file >>> + * Filter implementing image derain filter using deep convolutional networks. >>> + * https://arxiv.org/abs/1609.05158 >>> + * http://openaccess.thecvf.com/content_ECCV_2018/html/Xia_Li_Recurrent_Squeeze-and-Excitation_Context_ECCV_2018_paper.html >>> + */ >>> + >>> +#include "libavutil/opt.h" >>> +#include "libavformat/avio.h" >>> +#include "libswscale/swscale.h" >>> +#include "avfilter.h" >>> +#include "formats.h" >>> +#include "internal.h" >>> +#include "dnn_interface.h" >>> + >>> +typedef struct DRContext { >>> + const AVClass *class; >>> + >>> + char *model_filename; >>> + DNNBackendType backend_type; >>> + DNNModule *dnn_module; >>> + DNNModel *model; >>> + DNNData input; >>> + DNNData output; >>> +} DRContext; >>> + >>> +#define OFFSET(x) offsetof(DRContext, x) >>> +#define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM >>> +static const AVOption derain_options[] = { >>> + { "dnn_backend", "DNN backend", OFFSET(backend_type), AV_OPT_TYPE_FLAGS, { .i64 = 0 }, 0, 1, FLAGS, "backend" }, >>> + { "native", "native backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, FLAGS, "backend" }, >>> +#if (CONFIG_LIBTENSORFLOW == 1) >>> + { "tensorflow", "tensorflow backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, "backend" }, >>> +#endif >>> + { "model", "path to model file", OFFSET(model_filename), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS }, >>> + { NULL } >>> +}; >>> + >>> +AVFILTER_DEFINE_CLASS(derain); >>> + >>> +static int query_formats(AVFilterContext *ctx) >>> +{ >>> + AVFilterFormats *formats; >>> + const enum AVPixelFormat pixel_fmts[] = { >>> + AV_PIX_FMT_RGB24, >>> + AV_PIX_FMT_NONE >>> + }; >>> + >>> + formats = ff_make_format_list(pixel_fmts); >>> + if (!formats) { >>> + av_log(ctx, AV_LOG_ERROR, "could not create formats list\n"); >>> + return AVERROR(ENOMEM); >>> + } >>> + >>> + return ff_set_common_formats(ctx, formats); >>> +} >>> + >>> +static int config_inputs(AVFilterLink *inlink) >>> +{ >>> + AVFilterContext *ctx = inlink->dst; >>> + DRContext *dr_context = ctx->priv; >>> + AVFilterLink *outlink = ctx->outputs[0]; >>> + DNNReturnType result; >>> + >>> + dr_context->input.width = inlink->w; >>> + dr_context->input.height = inlink->h; >>> + dr_context->input.channels = 3; >>> + >>> + result = (dr_context->model->set_input_output)(dr_context->model->model, &dr_context->input, &dr_context->output); >>> + if (result != DNN_SUCCESS) { >>> + av_log(ctx, AV_LOG_ERROR, "could not set input and output for the model\n"); >>> + return AVERROR(EIO); >>> + } >>> + >>> + outlink->h = dr_context->output.height; >>> + outlink->w = dr_context->output.width; >>> + >>> + return 0; >>> +} >>> + >>> +static int filter_frame(AVFilterLink *inlink, AVFrame *in) >>> +{ >>> + AVFilterContext *ctx = inlink->dst; >>> + AVFilterLink *outlink = ctx->outputs[0]; >>> + DRContext *dr_context = ctx->priv; >>> + DNNReturnType dnn_result; >>> + >>> + AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h); >>> + if (!out) { >>> + av_log(ctx, AV_LOG_ERROR, "could not allocate memory for output frame\n"); >>> + av_frame_free(&in); >>> + return AVERROR(ENOMEM); >>> + } >>> + >>> + av_frame_copy_props(out, in); >>> + out->height = dr_context->output.height; >>> + out->width = dr_context->output.width; >>> + >>> + for (int i = 0; i < out->height * out->width * 3; i++) { >>> + dr_context->input.data[i] = in->data[0][i] / 255.0; >>> + } >>> + >>> + av_frame_free(&in); >>> + dnn_result = (dr_context->dnn_module->execute_model)(dr_context->model); >>> + if (dnn_result != DNN_SUCCESS){ >>> + av_log(ctx, AV_LOG_ERROR, "failed to execute model\n"); >>> + return AVERROR(EIO); >>> + } >>> + >>> + for (int i = 0; i < out->height * out->width * 3; i++) { >>> + out->data[0][i] = (int)(dr_context->output.data[i] * 255); >>> + } >>> + >>> + return ff_filter_frame(outlink, out); >>> +} >>> + >>> +static av_cold int init(AVFilterContext *ctx) >>> +{ >>> + DRContext *dr_context = ctx->priv; >>> + >>> + dr_context->dnn_module = ff_get_dnn_module(dr_context->backend_type); >>> + if (!dr_context->dnn_module) { >>> + av_log(ctx, AV_LOG_ERROR, "could not create DNN module for requested backend\n"); >>> + return AVERROR(ENOMEM); >>> + } >>> + if (!dr_context->model_filename) { >>> + av_log(ctx, AV_LOG_ERROR, "model file for network is not specified\n"); >>> + return AVERROR(EINVAL); >>> + } >>> + if (!dr_context->dnn_module->load_model) { >>> + av_log(ctx, AV_LOG_ERROR, "load_model for network is not specified\n"); >>> + return AVERROR(EINVAL); >>> + } >>> + >>> + dr_context->model = (dr_context->dnn_module->load_model)(dr_context->model_filename); >>> + if (!dr_context->model) { >>> + av_log(ctx, AV_LOG_ERROR, "could not load DNN model\n"); >>> + return AVERROR(EINVAL); >>> + } >>> + >>> + return 0; >>> +} >>> + >>> +static av_cold void uninit(AVFilterContext *ctx) >>> +{ >>> + DRContext *dr_context = ctx->priv; >>> + >>> + if (dr_context->dnn_module) { >>> + (dr_context->dnn_module->free_model)(&dr_context->model); >>> + av_freep(&dr_context->dnn_module); >>> + } >>> +} >>> + >>> +static const AVFilterPad derain_inputs[] = { >>> + { >>> + .name = "default", >>> + .type = AVMEDIA_TYPE_VIDEO, >>> + .config_props = config_inputs, >>> + .filter_frame = filter_frame, >>> + }, >>> + { NULL } >>> +}; >>> + >>> +static const AVFilterPad derain_outputs[] = { >>> + { >>> + .name = "default", >>> + .type = AVMEDIA_TYPE_VIDEO, >>> + }, >>> + { NULL } >>> +}; >>> + >>> +AVFilter ff_vf_derain = { >>> + .name = "derain", >>> + .description = NULL_IF_CONFIG_SMALL("Apply derain filter to the input."), >>> + .priv_size = sizeof(DRContext), >>> + .init = init, >>> + .uninit = uninit, >>> + .query_formats = query_formats, >>> + .inputs = derain_inputs, >>> + .outputs = derain_outputs, >>> + .priv_class = &derain_class, >>> + .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS, >>> +}; >>> + >>> -- >>> 2.17.1 >>> >>> _______________________________________________ >>> ffmpeg-devel mailing list >>> ffmpeg-devel@ffmpeg.org >>> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel >>> >>> To unsubscribe, visit link above, or email >>> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". >> >> _______________________________________________ >> ffmpeg-devel mailing list >> ffmpeg-devel@ffmpeg.org >> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel >> >> To unsubscribe, visit link above, or email >> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Em qui, 11 de abr de 2019 às 02:55, <xwmeng@pku.edu.cn> escreveu: > > We made some modifications on the original ESPCN model, such as change the input image from one channel(Y) to three channels(RGB) and remove the up-sampling procedure. The model file has been uploaded in https://github.com/XueweiMeng/derain_filter and you can download the training/testing dataset from http://www.icst.pku.edu.cn/struct/Projects/joint_rain_removal.html. I didn't save the PSNR/SSIM score during the training and evaluating process. So the data will be uploaded later. > Indeed the model is not the espcn anymore, as it would imply the use of the up-sampling layer. I think it is better to label this network as a generic convolutional network and just describe its layout (number of layers and layer dimension). Using the espcn name is misleading. Please, always include your training results and relevant observations when sending a patch otherwise it is hard to evaluate your work if we did not even know what to expect from the output.
diff --git a/doc/filters.texi b/doc/filters.texi index 867607d870..0117c418b4 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -8036,6 +8036,47 @@ delogo=x=0:y=0:w=100:h=77:band=10 @end itemize +@section derain + +Remove the rain in the input image/video by applying the derain methods based on +convolutional neural networks. Supported models: + +@itemize +@item +Efficient Sub-Pixel Convolutional Neural Network model (ESPCN). +See @url{https://arxiv.org/abs/1609.05158}. +@end itemize + +Training scripts as well as scripts for model generation are provided in +the repository at @url{https://github.com/XueweiMeng/derain_filter.git}. + +The filter accepts the following options: + +@table @option +@item dnn_backend +Specify which DNN backend to use for model loading and execution. This option accepts +the following values: + +@table @samp +@item native +Native implementation of DNN loading and execution. + +@item tensorflow +TensorFlow backend. To enable this backend you +need to install the TensorFlow for C library (see +@url{https://www.tensorflow.org/install/install_c}) and configure FFmpeg with +@code{--enable-libtensorflow} +@end table + +Default value is @samp{native}. + +@item model +Set path to model file specifying network architecture and its parameters. +Note that different backends use different file formats. TensorFlow backend +can load files for both formats, while native backend can load files for only +its format. +@end table + @section deshake Attempt to fix small changes in horizontal and/or vertical shift. This diff --git a/libavfilter/Makefile b/libavfilter/Makefile index fef6ec5c55..7809bac565 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -194,6 +194,7 @@ OBJS-$(CONFIG_DATASCOPE_FILTER) += vf_datascope.o OBJS-$(CONFIG_DCTDNOIZ_FILTER) += vf_dctdnoiz.o OBJS-$(CONFIG_DEBAND_FILTER) += vf_deband.o OBJS-$(CONFIG_DEBLOCK_FILTER) += vf_deblock.o +OBJS-$(CONFIG_DERAIN_FILTER) += vf_derain.o OBJS-$(CONFIG_DECIMATE_FILTER) += vf_decimate.o OBJS-$(CONFIG_DECONVOLVE_FILTER) += vf_convolve.o framesync.o OBJS-$(CONFIG_DEDOT_FILTER) += vf_dedot.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index c51ae0f3c7..ee2a5b63e6 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -182,6 +182,7 @@ extern AVFilter ff_vf_datascope; extern AVFilter ff_vf_dctdnoiz; extern AVFilter ff_vf_deband; extern AVFilter ff_vf_deblock; +extern AVFilter ff_vf_derain; extern AVFilter ff_vf_decimate; extern AVFilter ff_vf_deconvolve; extern AVFilter ff_vf_dedot; diff --git a/libavfilter/vf_derain.c b/libavfilter/vf_derain.c new file mode 100644 index 0000000000..f72ae1cd3a --- /dev/null +++ b/libavfilter/vf_derain.c @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2019 Xuewei Meng + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Filter implementing image derain filter using deep convolutional networks. + * https://arxiv.org/abs/1609.05158 + * http://openaccess.thecvf.com/content_ECCV_2018/html/Xia_Li_Recurrent_Squeeze-and-Excitation_Context_ECCV_2018_paper.html + */ + +#include "libavutil/opt.h" +#include "libavformat/avio.h" +#include "libswscale/swscale.h" +#include "avfilter.h" +#include "formats.h" +#include "internal.h" +#include "dnn_interface.h" + +typedef struct DRContext { + const AVClass *class; + + char *model_filename; + DNNBackendType backend_type; + DNNModule *dnn_module; + DNNModel *model; + DNNData input; + DNNData output; +} DRContext; + +#define OFFSET(x) offsetof(DRContext, x) +#define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM +static const AVOption derain_options[] = { + { "dnn_backend", "DNN backend", OFFSET(backend_type), AV_OPT_TYPE_FLAGS, { .i64 = 0 }, 0, 1, FLAGS, "backend" }, + { "native", "native backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, FLAGS, "backend" }, +#if (CONFIG_LIBTENSORFLOW == 1) + { "tensorflow", "tensorflow backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, "backend" }, +#endif + { "model", "path to model file", OFFSET(model_filename), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS }, + { NULL } +}; + +AVFILTER_DEFINE_CLASS(derain); + +static int query_formats(AVFilterContext *ctx) +{ + AVFilterFormats *formats; + const enum AVPixelFormat pixel_fmts[] = { + AV_PIX_FMT_RGB24, + AV_PIX_FMT_NONE + }; + + formats = ff_make_format_list(pixel_fmts); + if (!formats) { + av_log(ctx, AV_LOG_ERROR, "could not create formats list\n"); + return AVERROR(ENOMEM); + } + + return ff_set_common_formats(ctx, formats); +} + +static int config_inputs(AVFilterLink *inlink) +{ + AVFilterContext *ctx = inlink->dst; + DRContext *dr_context = ctx->priv; + AVFilterLink *outlink = ctx->outputs[0]; + DNNReturnType result; + + dr_context->input.width = inlink->w; + dr_context->input.height = inlink->h; + dr_context->input.channels = 3; + + result = (dr_context->model->set_input_output)(dr_context->model->model, &dr_context->input, &dr_context->output); + if (result != DNN_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "could not set input and output for the model\n"); + return AVERROR(EIO); + } + + outlink->h = dr_context->output.height; + outlink->w = dr_context->output.width; + + return 0; +} + +static int filter_frame(AVFilterLink *inlink, AVFrame *in) +{ + AVFilterContext *ctx = inlink->dst; + AVFilterLink *outlink = ctx->outputs[0]; + DRContext *dr_context = ctx->priv; + DNNReturnType dnn_result; + + AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h); + if (!out) { + av_log(ctx, AV_LOG_ERROR, "could not allocate memory for output frame\n"); + av_frame_free(&in); + return AVERROR(ENOMEM); + } + + av_frame_copy_props(out, in); + out->height = dr_context->output.height; + out->width = dr_context->output.width; + + for (int i = 0; i < out->height * out->width * 3; i++) { + dr_context->input.data[i] = in->data[0][i] / 255.0; + } + + av_frame_free(&in); + dnn_result = (dr_context->dnn_module->execute_model)(dr_context->model); + if (dnn_result != DNN_SUCCESS){ + av_log(ctx, AV_LOG_ERROR, "failed to execute model\n"); + return AVERROR(EIO); + } + + for (int i = 0; i < out->height * out->width * 3; i++) { + out->data[0][i] = (int)(dr_context->output.data[i] * 255); + } + + return ff_filter_frame(outlink, out); +} + +static av_cold int init(AVFilterContext *ctx) +{ + DRContext *dr_context = ctx->priv; + + dr_context->dnn_module = ff_get_dnn_module(dr_context->backend_type); + if (!dr_context->dnn_module) { + av_log(ctx, AV_LOG_ERROR, "could not create DNN module for requested backend\n"); + return AVERROR(ENOMEM); + } + if (!dr_context->model_filename) { + av_log(ctx, AV_LOG_ERROR, "model file for network is not specified\n"); + return AVERROR(EINVAL); + } + if (!dr_context->dnn_module->load_model) { + av_log(ctx, AV_LOG_ERROR, "load_model for network is not specified\n"); + return AVERROR(EINVAL); + } + + dr_context->model = (dr_context->dnn_module->load_model)(dr_context->model_filename); + if (!dr_context->model) { + av_log(ctx, AV_LOG_ERROR, "could not load DNN model\n"); + return AVERROR(EINVAL); + } + + return 0; +} + +static av_cold void uninit(AVFilterContext *ctx) +{ + DRContext *dr_context = ctx->priv; + + if (dr_context->dnn_module) { + (dr_context->dnn_module->free_model)(&dr_context->model); + av_freep(&dr_context->dnn_module); + } +} + +static const AVFilterPad derain_inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = config_inputs, + .filter_frame = filter_frame, + }, + { NULL } +}; + +static const AVFilterPad derain_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + }, + { NULL } +}; + +AVFilter ff_vf_derain = { + .name = "derain", + .description = NULL_IF_CONFIG_SMALL("Apply derain filter to the input."), + .priv_size = sizeof(DRContext), + .init = init, + .uninit = uninit, + .query_formats = query_formats, + .inputs = derain_inputs, + .outputs = derain_outputs, + .priv_class = &derain_class, + .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS, +}; +