[FFmpeg-devel,v2] lavf/vf_find_rect: add the dual input support function

Message ID	20190627021235.2360-1-lance.lmwang@gmail.com
State	Superseded
Headers	show Return-Path: <ffmpeg-devel-bounces@ffmpeg.org> From: lance.lmwang@gmail.com To: ffmpeg-devel@ffmpeg.org Date: Thu, 27 Jun 2019 10:12:35 +0800 Message-Id: <20190627021235.2360-1-lance.lmwang@gmail.com> In-Reply-To: <20190627013457.466-1-lance.lmwang@gmail.com> References: <20190627013457.466-1-lance.lmwang@gmail.com> MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH v2] lavf/vf_find_rect: add the dual input support function Precedence: list Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> Cc: Limin Wang <lance.lmwang@gmail.com> Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>

diff --git a/doc/filters.texi b/doc/filters.texi index 2d9af46a6b..ceb66aba3d 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -10156,12 +10156,14 @@ Set color for pixels in fixed mode. Default is @var{black}. Find a rectangular object +This filter takes in two video input, the first input is considered +the "main" source and is passed unchanged to the output. The "second" +input is used as a rectangular object for finding, now the "second" +input will be auto converted to gray8 format. + It accepts the following options: @table @option -@item object -Filepath of the object image, needs to be in gray8. - @item threshold Detection threshold, default is 0.5. @@ -10178,7 +10180,7 @@ Specifies the rectangle in which to search. @item Cover a rectangular object by the supplied image of a given video using @command{ffmpeg}: @example -ffmpeg -i file.ts -vf find_rect=newref.pgm,cover_rect=cover.jpg:mode=cover new.mkv +ffmpeg -i file.ts -newref.pgm -filter_complex find_rect,cover_rect=cover.jpg:mode=cover new.mkv @end example @end itemize @@ -10212,7 +10214,7 @@ Default value is @var{blur}. @item Cover a rectangular object by the supplied image of a given video using @command{ffmpeg}: @example -ffmpeg -i file.ts -vf find_rect=newref.pgm,cover_rect=cover.jpg:mode=cover new.mkv +ffmpeg -i file.ts -newref.pgm -filter_complex find_rect,cover_rect=cover.jpg:mode=cover new.mkv @end example @end itemize diff --git a/libavfilter/vf_find_rect.c b/libavfilter/vf_find_rect.c index d7e6579af7..055d2d5f4a 100644 --- a/libavfilter/vf_find_rect.c +++ b/libavfilter/vf_find_rect.c @@ -18,13 +18,10 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ -/** - * @todo switch to dualinput - */ - #include "libavutil/avassert.h" #include "libavutil/imgutils.h" #include "libavutil/opt.h" +#include "framesync.h" #include "internal.h" #include "lavfutils.h" @@ -36,9 +33,9 @@ typedef struct FOCContext { float threshold; int mipmaps; int xmin, ymin, xmax, ymax; - char *obj_filename; int last_x, last_y; - AVFrame *obj_frame; + FFFrameSync fs; + AVFrame *needle_frame[MAX_MIPMAPS]; AVFrame *haystack_frame[MAX_MIPMAPS]; } FOCContext; @@ -46,7 +43,6 @@ typedef struct FOCContext { #define OFFSET(x) offsetof(FOCContext, x) #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM static const AVOption find_rect_options[] = { - { "object", "object bitmap filename", OFFSET(obj_filename), AV_OPT_TYPE_STRING, {.str = NULL}, .flags = FLAGS }, { "threshold", "set threshold", OFFSET(threshold), AV_OPT_TYPE_FLOAT, {.dbl = 0.5}, 0, 1.0, FLAGS }, { "mipmaps", "set mipmaps", OFFSET(mipmaps), AV_OPT_TYPE_INT, {.i64 = 3}, 1, MAX_MIPMAPS, FLAGS }, { "xmin", "", OFFSET(xmin), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, FLAGS }, @@ -56,17 +52,32 @@ static const AVOption find_rect_options[] = { { NULL } }; -AVFILTER_DEFINE_CLASS(find_rect); +FRAMESYNC_DEFINE_CLASS(find_rect, FOCContext, fs); static int query_formats(AVFilterContext *ctx) { - static const enum AVPixelFormat pix_fmts[] = { - AV_PIX_FMT_YUV420P, - AV_PIX_FMT_YUVJ420P, - AV_PIX_FMT_NONE - }; + static const enum AVPixelFormat in_fmts[] = {AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_NONE}; + static const enum AVPixelFormat obj_fmts[] = {AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE}; + static const enum AVPixelFormat out_fmts[] = {AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_NONE}; + int ret; + AVFilterFormats *in = ff_make_format_list(in_fmts); + AVFilterFormats *obj = ff_make_format_list(obj_fmts); + AVFilterFormats *out = ff_make_format_list(out_fmts); + + if (!in || !obj || !out) { + av_freep(&in); + av_freep(&obj); + av_freep(&out); + return AVERROR(ENOMEM); + } + + if ((ret = ff_formats_ref(in , &ctx->inputs[0]->out_formats)) < 0 || + (ret = ff_formats_ref(obj , &ctx->inputs[1]->out_formats)) < 0 || + (ret = ff_formats_ref(out , &ctx->outputs[0]->in_formats)) < 0) + return ret; + + return 0; - return ff_set_common_formats(ctx, ff_make_format_list(pix_fmts)); } static AVFrame *downscale(AVFrame *in) @@ -140,19 +151,54 @@ static float compare(const AVFrame *haystack, const AVFrame *obj, int offx, int return 1 - fabs(c); } -static int config_input(AVFilterLink *inlink) +static int config_main_input(AVFilterLink *inlink) +{ + AVFilterContext *ctx = inlink->dst; + + av_log(ctx, AV_LOG_DEBUG, "main input width: %d, height: %d\n", inlink->w, inlink->h); + return 0; +} + +static int config_find_rect_input(AVFilterLink *inlink) { AVFilterContext *ctx = inlink->dst; FOCContext *foc = ctx->priv; + AVFilterLink *mainlink = ctx->inputs[0]; + + if (inlink->format != AV_PIX_FMT_GRAY8) { + av_log(ctx, AV_LOG_ERROR, "object input is not a grayscale input: %s\n", + av_get_pix_fmt_name(inlink->format)); + return AVERROR(EINVAL); + } if (foc->xmax <= 0) - foc->xmax = inlink->w - foc->obj_frame->width; + foc->xmax = mainlink->w - inlink->w; if (foc->ymax <= 0) - foc->ymax = inlink->h - foc->obj_frame->height; + foc->ymax = mainlink->h - inlink->h; + av_log(ctx, AV_LOG_DEBUG, "object input width: %d, height: %d\n", inlink->w, inlink->h); return 0; } +static int config_output(AVFilterLink *outlink) +{ + AVFilterContext *ctx = outlink->src; + FOCContext *foc = ctx->priv; + int ret; + AVFilterLink *mainlink = ctx->inputs[0]; + + if ((ret = ff_framesync_init_dualinput(&foc->fs, ctx)) < 0) + return ret; + + outlink->w = mainlink->w; + outlink->h = mainlink->h; + outlink->time_base = mainlink->time_base; + outlink->sample_aspect_ratio = mainlink->sample_aspect_ratio; + outlink->frame_rate = mainlink->frame_rate; + + return ff_framesync_configure(&foc->fs); +} + static float search(FOCContext *foc, int pass, int maxpass, int xmin, int xmax, int ymin, int ymax, int *best_x, int *best_y, float best_score) { int x, y; @@ -180,19 +226,33 @@ static float search(FOCContext *foc, int pass, int maxpass, int xmin, int xmax, return best_score; } -static int filter_frame(AVFilterLink *inlink, AVFrame *in) +static int do_find_rect(FFFrameSync *fs) { - AVFilterContext *ctx = inlink->dst; + AVFilterContext *ctx = fs->parent; + AVFrame *mainframe, *second; FOCContext *foc = ctx->priv; float best_score; int best_x, best_y; - int i; + int ret, i; + + ret = ff_framesync_dualinput_get_writable(fs, &mainframe, &second); + if (ret < 0) + return ret; + if (!second) + return ff_filter_frame(ctx->outputs[0], mainframe); - foc->haystack_frame[0] = av_frame_clone(in); + foc->haystack_frame[0] = av_frame_clone(mainframe); for (i=1; i<foc->mipmaps; i++) { foc->haystack_frame[i] = downscale(foc->haystack_frame[i-1]); } + foc->needle_frame[0] = av_frame_clone(second); + for (i = 1; i < foc->mipmaps; i++) { + foc->needle_frame[i] = downscale(foc->needle_frame[i-1]); + if (!foc->needle_frame[i]) + return AVERROR(ENOMEM); + } + best_score = search(foc, 0, 0, FFMAX(foc->xmin, foc->last_x - 8), FFMIN(foc->xmax, foc->last_x + 8), @@ -207,22 +267,25 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) av_frame_free(&foc->haystack_frame[i]); } + for (i = 1; i < foc->mipmaps; i++) { + av_frame_free(&foc->needle_frame[i]); + } + if (best_score > foc->threshold) { - return ff_filter_frame(ctx->outputs[0], in); + return ff_filter_frame(ctx->outputs[0], mainframe); } av_log(ctx, AV_LOG_DEBUG, "Found at %d %d score %f\n", best_x, best_y, best_score); foc->last_x = best_x; foc->last_y = best_y; - av_frame_make_writable(in); + av_frame_make_writable(mainframe); - av_dict_set_int(&in->metadata, "lavfi.rect.w", foc->obj_frame->width, 0); - av_dict_set_int(&in->metadata, "lavfi.rect.h", foc->obj_frame->height, 0); - av_dict_set_int(&in->metadata, "lavfi.rect.x", best_x, 0); - av_dict_set_int(&in->metadata, "lavfi.rect.y", best_y, 0); - - return ff_filter_frame(ctx->outputs[0], in); + av_dict_set_int(&mainframe->metadata, "lavfi.rect.w", second->width, 0); + av_dict_set_int(&mainframe->metadata, "lavfi.rect.h", second->height, 0); + av_dict_set_int(&mainframe->metadata, "lavfi.rect.x", best_x, 0); + av_dict_set_int(&mainframe->metadata, "lavfi.rect.y", best_y, 0); + return ff_filter_frame(ctx->outputs[0], mainframe); } static av_cold void uninit(AVFilterContext *ctx) @@ -234,52 +297,32 @@ static av_cold void uninit(AVFilterContext *ctx) av_frame_free(&foc->needle_frame[i]); av_frame_free(&foc->haystack_frame[i]); } - - if (foc->obj_frame) - av_freep(&foc->obj_frame->data[0]); - av_frame_free(&foc->obj_frame); } static av_cold int init(AVFilterContext *ctx) { FOCContext *foc = ctx->priv; - int ret, i; - - if (!foc->obj_filename) { - av_log(ctx, AV_LOG_ERROR, "object filename not set\n"); - return AVERROR(EINVAL); - } - - foc->obj_frame = av_frame_alloc(); - if (!foc->obj_frame) - return AVERROR(ENOMEM); - - if ((ret = ff_load_image(foc->obj_frame->data, foc->obj_frame->linesize, - &foc->obj_frame->width, &foc->obj_frame->height, - &foc->obj_frame->format, foc->obj_filename, ctx)) < 0) - return ret; - - if (foc->obj_frame->format != AV_PIX_FMT_GRAY8) { - av_log(ctx, AV_LOG_ERROR, "object image is not a grayscale image\n"); - return AVERROR(EINVAL); - } - - foc->needle_frame[0] = av_frame_clone(foc->obj_frame); - for (i = 1; i < foc->mipmaps; i++) { - foc->needle_frame[i] = downscale(foc->needle_frame[i-1]); - if (!foc->needle_frame[i]) - return AVERROR(ENOMEM); - } + foc->fs.on_event = do_find_rect; return 0; } +static int activate(AVFilterContext *ctx) +{ + FOCContext *foc = ctx->priv; + return ff_framesync_activate(&foc->fs); +} + static const AVFilterPad foc_inputs[] = { { - .name = "default", + .name = "main", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = config_main_input, + }, + { + .name = "object", .type = AVMEDIA_TYPE_VIDEO, - .config_props = config_input, - .filter_frame = filter_frame, + .config_props = config_find_rect_input, }, { NULL } }; @@ -288,6 +331,7 @@ static const AVFilterPad foc_outputs[] = { { .name = "default", .type = AVMEDIA_TYPE_VIDEO, + .config_props = config_output, }, { NULL } }; @@ -296,7 +340,9 @@ AVFilter ff_vf_find_rect = { .name = "find_rect", .description = NULL_IF_CONFIG_SMALL("Find a user specified object."), .priv_size = sizeof(FOCContext), + .preinit = find_rect_framesync_preinit, .init = init, + .activate = activate, .uninit = uninit, .query_formats = query_formats, .inputs = foc_inputs,

[FFmpeg-devel,v2] lavf/vf_find_rect: add the dual input support function

Commit Message

Comments

Patch