[FFmpeg-devel,V4] lavf/vf_ocr: add subregion support

Message ID	tencent_A2077191B5510CFE819816959B1DBE516A07@qq.com
State	Superseded
Headers	show Delivered-To: andriy.gelman@gmail.com Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100; Message-ID: <tencent_A2077191B5510CFE819816959B1DBE516A07@qq.com> From: Lingjiang Fang <vacingfang@foxmail.com> To: ffmpeg-devel@ffmpeg.org Date: Mon, 12 Jul 2021 13:11:28 +0800 MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH V4] lavf/vf_ocr: add subregion support Precedence: list Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> Cc: Lingjiang Fang <vacingfang@foxmail.com> Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org> Content-Length: 4337
Series	[FFmpeg-devel,V4] lavf/vf_ocr: add subregion support \| expand [FFmpeg-devel,V4] lavf/vf_ocr: add subregion support

Message ID

tencent_A2077191B5510CFE819816959B1DBE516A07@qq.com

State

Superseded

Headers

Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org
 designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100;
Message-ID: <tencent_A2077191B5510CFE819816959B1DBE516A07@qq.com>
From: Lingjiang Fang <vacingfang@foxmail.com>
To: ffmpeg-devel@ffmpeg.org
Date: Mon, 12 Jul 2021 13:11:28 +0800
MIME-Version: 1.0
Subject: [FFmpeg-devel] [PATCH V4] lavf/vf_ocr: add subregion support
Precedence: list
Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
Cc: Lingjiang Fang <vacingfang@foxmail.com>
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7bit
Errors-To: ffmpeg-devel-bounces@ffmpeg.org
Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>
Content-Length: 4337

Series

[FFmpeg-devel,V4] lavf/vf_ocr: add subregion support | expand

Checks

Context	Check	Description
andriy/x86_make	success	Make finished
andriy/x86_make_fate	success	Make fate finished
andriy/PPC64_make	success	Make finished
andriy/PPC64_make_fate	success	Make fate finished

Context

Check

Description

andriy/x86_make

success

Make finished

andriy/x86_make_fate

success

Make fate finished

andriy/PPC64_make

success

Make finished

andriy/PPC64_make_fate

success

Make fate finished

Commit Message

Lingjiang Fang July 12, 2021, 5:11 a.m. UTC

follow comments from Steven Liu
---
 doc/filters.texi     |  8 ++++++++
 libavfilter/vf_ocr.c | 45 +++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 52 insertions(+), 1 deletion(-)

diff --git a/doc/filters.texi b/doc/filters.texi
index d991c06628..f41ba0ce46 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -15457,6 +15457,14 @@  Set character whitelist.
 
 @item blacklist
 Set character blacklist.
+
+@item x, y
+Set top-left corner of the subregion, in pixels, default is (0,0).
+
+@item w, h
+Set width and height of the subregion, in pixels,
+default is the bottom-right part from given top-left corner.
+
 @end table
 
 The filter exports recognized text as the frame metadata @code{lavfi.ocr.text}.
diff --git a/libavfilter/vf_ocr.c b/libavfilter/vf_ocr.c
index 6de474025a..55f04b6592 100644
--- a/libavfilter/vf_ocr.c
+++ b/libavfilter/vf_ocr.c
@@ -33,6 +33,8 @@  typedef struct OCRContext {
     char *language;
     char *whitelist;
     char *blacklist;
+    int x, y, x_in, y_in;
+    int w, h, w_in, h_in;
 
     TessBaseAPI *tess;
 } OCRContext;
@@ -45,6 +47,10 @@  static const AVOption ocr_options[] = {
     { "language",  "set language",            OFFSET(language),  AV_OPT_TYPE_STRING, {.str="eng"}, 0, 0, FLAGS },
     { "whitelist", "set character whitelist", OFFSET(whitelist), AV_OPT_TYPE_STRING, {.str="0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.:;,-+_!?\"'[]{}()<>|/\\=*&%$#@!~ "}, 0, 0, FLAGS },
     { "blacklist", "set character blacklist", OFFSET(blacklist), AV_OPT_TYPE_STRING, {.str=""},    0, 0, FLAGS },
+    { "x",         "top x of sub region",     OFFSET(x),         AV_OPT_TYPE_INT,    {.i64=0},     0, INT_MAX, FLAGS },
+    { "y",         "top y of sub region",     OFFSET(y),         AV_OPT_TYPE_INT,    {.i64=0},     0, INT_MAX, FLAGS },
+    { "w",         "width of sub region",     OFFSET(w),         AV_OPT_TYPE_INT,    {.i64=0},     0, INT_MAX, FLAGS },
+    { "h",         "height of sub region",    OFFSET(h),         AV_OPT_TYPE_INT,    {.i64=0},     0, INT_MAX, FLAGS },
     { NULL }
 };
 
@@ -93,6 +99,41 @@  static int query_formats(AVFilterContext *ctx)
     return ff_set_common_formats(ctx, fmts_list);
 }
 
+static void check_fix(int *x, int *y, int *w, int *h, int pic_w, int pic_h)
+{
+    // 0 <= x < pic_w
+    if (*x >= pic_w)
+        *x = 0;
+    // 0 <= y < pic_h
+    if (*y >= pic_h)
+        *y = 0;
+
+    if (*w == 0 || *w + *x > pic_w)
+        *w = pic_w - *x;
+    if (*h == 0 || *h + *y > pic_h)
+        *h = pic_h - *y;
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    OCRContext *s = ctx->priv;
+
+    s->x_in = s->x;
+    s->y_in = s->y;
+    s->w_in = s->w;
+    s->h_in = s->h;
+    check_fix(&s->x_in, &s->y_in, &s->w_in, &s->h_in, inlink->w, inlink->h);
+    if ( s->x_in != s->x || s->y_in != s->y  ||
+        (s->w != 0 && s->w_in != s->w) || (s->h != 0 && s->h_in != s->h)) {
+        av_log(s, AV_LOG_WARNING, "config error, subregion changed to "
+                                  "x=%d, y=%d, w=%d, h=%d\n",
+                                  s->x_in, s->y_in, s->w_in, s->h_in);
+    }
+
+    return 0;
+}
+
 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 {
     AVDictionary **metadata = &in->metadata;
@@ -102,8 +143,9 @@  static int filter_frame(AVFilterLink *inlink, AVFrame *in)
     char *result;
     int *confs;
 
+    // TODO(vacing): support expression
     result = TessBaseAPIRect(s->tess, in->data[0], 1,
-                             in->linesize[0], 0, 0, in->width, in->height);
+                             in->linesize[0], s->x_in, s->y_in, s->w_in, s->h_in);
     confs = TessBaseAPIAllWordConfidences(s->tess);
     av_dict_set(metadata, "lavfi.ocr.text", result, 0);
     for (int i = 0; confs[i] != -1; i++) {
@@ -134,6 +176,7 @@  static const AVFilterPad ocr_inputs[] = {
         .name         = "default",
         .type         = AVMEDIA_TYPE_VIDEO,
         .filter_frame = filter_frame,
+        .config_props = config_input,
     },
     { NULL }
 };

[FFmpeg-devel,V4] lavf/vf_ocr: add subregion support

Checks

Commit Message

Patch