diff mbox series

[FFmpeg-devel,V2] lavf/vf_ocr: add subregion support

Message ID tencent_2634C6C2004E8BCDCD265F2EB588B8E45A06@qq.com
State Superseded
Headers show
Series [FFmpeg-devel,V2] lavf/vf_ocr: add subregion support | expand

Checks

Context Check Description
andriy/x86_make success Make finished
andriy/x86_make_fate success Make fate finished

Commit Message

Lingjiang Fang June 17, 2021, 2:58 p.m. UTC
fix bugs of previous patch, ping for review
---
 doc/filters.texi     |  7 +++++++
 libavfilter/vf_ocr.c | 35 ++++++++++++++++++++++++++++++++++-
 2 files changed, 41 insertions(+), 1 deletion(-)

Comments

Gyan Doshi June 17, 2021, 4:38 p.m. UTC | #1
On 2021-06-17 20:28, Lingjiang Fang wrote:
> fix bugs of previous patch, ping for review
> ---
>   doc/filters.texi     |  7 +++++++
>   libavfilter/vf_ocr.c | 35 ++++++++++++++++++++++++++++++++++-
>   2 files changed, 41 insertions(+), 1 deletion(-)
>
> diff --git a/doc/filters.texi b/doc/filters.texi
> index da8f7d7726..a955cf46e0 100644
> --- a/doc/filters.texi
> +++ b/doc/filters.texi
> @@ -15451,6 +15451,13 @@ Set character whitelist.
>   
>   @item blacklist
>   Set character blacklist.
> +
> +@item x, y
> +Set top point position of subregion, not support expression now

This isn't idiomatic. And the docs should state what the option accepts, 
not what it doesn't.

Change to

     Set position of top-left corner, in pixels.


> +
> +@item w, h
> +Set width and height of subregion
> +
>   @end table
>   
>   The filter exports recognized text as the frame metadata @code{lavfi.ocr.text}.
> diff --git a/libavfilter/vf_ocr.c b/libavfilter/vf_ocr.c
> index 6de474025a..e96dce2d87 100644
> --- a/libavfilter/vf_ocr.c
> +++ b/libavfilter/vf_ocr.c
> @@ -33,6 +33,8 @@ typedef struct OCRContext {
>       char *language;
>       char *whitelist;
>       char *blacklist;
> +    int x, y;
> +    int w, h;
>   
>       TessBaseAPI *tess;
>   } OCRContext;
> @@ -45,6 +47,10 @@ static const AVOption ocr_options[] = {
>       { "language",  "set language",            OFFSET(language),  AV_OPT_TYPE_STRING, {.str="eng"}, 0, 0, FLAGS },
>       { "whitelist", "set character whitelist", OFFSET(whitelist), AV_OPT_TYPE_STRING, {.str="0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.:;,-+_!?\"'[]{}()<>|/\\=*&%$#@!~ "}, 0, 0, FLAGS },
>       { "blacklist", "set character blacklist", OFFSET(blacklist), AV_OPT_TYPE_STRING, {.str=""},    0, 0, FLAGS },
> +    { "x",         "top x of sub region",     OFFSET(x),         AV_OPT_TYPE_INT,    {.i64=0},     0, INT_MAX, FLAGS },
> +    { "y",         "top y of sub region",     OFFSET(y),         AV_OPT_TYPE_INT,    {.i64=0},     0, INT_MAX, FLAGS },
> +    { "w",         "width of sub region",     OFFSET(w),         AV_OPT_TYPE_INT,    {.i64=0},     0, INT_MAX, FLAGS },
> +    { "h",         "height of sub region",    OFFSET(h),         AV_OPT_TYPE_INT,    {.i64=0},     0, INT_MAX, FLAGS },
>       { NULL }
>   };
>   
> @@ -93,6 +99,21 @@ static int query_formats(AVFilterContext *ctx)
>       return ff_set_common_formats(ctx, fmts_list);
>   }
>   
> +static void check_fix(int *x, int *y, int *w, int *h, int pic_w, int pic_h)
> +{
> +    // 0 <= x < pic_w
> +    if (*x >= pic_w)
> +        *x = 0;
> +    // 0 <= y < pic_h
> +    if (*y >= pic_h)
> +        *y = 0;
> +
> +    if (*w == 0 || *w + *x > pic_w)
> +        *w = pic_w - *x;
> +    if (*h == 0 || *h + *y > pic_h)
> +        *h = pic_h - *y;
> +}
> +
>   static int filter_frame(AVFilterLink *inlink, AVFrame *in)
>   {
>       AVDictionary **metadata = &in->metadata;
> @@ -102,8 +123,20 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
>       char *result;
>       int *confs;
>   
> +    // TODO: support expression
> +    int x = s->x;
> +    int y = s->y;
> +    int w = s->w;
> +    int h = s->h;
> +    check_fix(&x, &y, &w, &h, in->width, in->height);
> +    if ( x != s->x || y != s->y  ||
> +        (s->w != 0 && w != s->w) || (s->h != 0 && h != s->h)) {
> +        av_log(s, AV_LOG_WARNING, "config error, subregion changed to x=%d, y=%d, w=%d, h=%d\n",
> +                                                                    x, y, w, h);
> +    }
> +
>       result = TessBaseAPIRect(s->tess, in->data[0], 1,
> -                             in->linesize[0], 0, 0, in->width, in->height);
> +                             in->linesize[0], x, y, w, h);
>       confs = TessBaseAPIAllWordConfidences(s->tess);
>       av_dict_set(metadata, "lavfi.ocr.text", result, 0);
>       for (int i = 0; confs[i] != -1; i++) {
Lingjiang Fang June 18, 2021, 5:42 a.m. UTC | #2
On Thu, 17 Jun 2021 22:08:52 +0530
Gyan Doshi <ffmpeg@gyani.pro> wrote:

> On 2021-06-17 20:28, Lingjiang Fang wrote:
> > fix bugs of previous patch, ping for review
> > ---
> >   doc/filters.texi     |  7 +++++++
> >   libavfilter/vf_ocr.c | 35 ++++++++++++++++++++++++++++++++++-
> >   2 files changed, 41 insertions(+), 1 deletion(-)
> >
> > diff --git a/doc/filters.texi b/doc/filters.texi
> > index da8f7d7726..a955cf46e0 100644
> > --- a/doc/filters.texi
> > +++ b/doc/filters.texi
> > @@ -15451,6 +15451,13 @@ Set character whitelist.
> >   
> >   @item blacklist
> >   Set character blacklist.
> > +
> > +@item x, y
> > +Set top point position of subregion, not support expression now  
> 
> This isn't idiomatic. And the docs should state what the option
> accepts, not what it doesn't.
> 
> Change to
> 
>      Set position of top-left corner, in pixels.
> 
thank you for your correction

If no more comments I will send v3 later

> 
> > +
> > +@item w, h
> > +Set width and height of subregion
> > +
> >   @end table
> >   
> >   The filter exports recognized text as the frame metadata
> > @code{lavfi.ocr.text}. diff --git a/libavfilter/vf_ocr.c
> > b/libavfilter/vf_ocr.c index 6de474025a..e96dce2d87 100644
> > --- a/libavfilter/vf_ocr.c
> > +++ b/libavfilter/vf_ocr.c
> > @@ -33,6 +33,8 @@ typedef struct OCRContext {
> >       char *language;
> >       char *whitelist;
> >       char *blacklist;
> > +    int x, y;
> > +    int w, h;
> >   
> >       TessBaseAPI *tess;
> >   } OCRContext;
> > @@ -45,6 +47,10 @@ static const AVOption ocr_options[] = {
> >       { "language",  "set language",            OFFSET(language),
> > AV_OPT_TYPE_STRING, {.str="eng"}, 0, 0, FLAGS }, { "whitelist",
> > "set character whitelist", OFFSET(whitelist), AV_OPT_TYPE_STRING,
> > {.str="0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.:;,-+_!?\"'[]{}()<>|/\\=*&%$#@!~
> > "}, 0, 0, FLAGS }, { "blacklist", "set character blacklist",
> > OFFSET(blacklist), AV_OPT_TYPE_STRING, {.str=""},    0, 0, FLAGS },
> > +    { "x",         "top x of sub region",     OFFSET(x),
> > AV_OPT_TYPE_INT,    {.i64=0},     0, INT_MAX, FLAGS },
> > +    { "y",         "top y of sub region",     OFFSET(y),
> > AV_OPT_TYPE_INT,    {.i64=0},     0, INT_MAX, FLAGS },
> > +    { "w",         "width of sub region",     OFFSET(w),
> > AV_OPT_TYPE_INT,    {.i64=0},     0, INT_MAX, FLAGS },
> > +    { "h",         "height of sub region",    OFFSET(h),
> > AV_OPT_TYPE_INT,    {.i64=0},     0, INT_MAX, FLAGS }, { NULL }
> >   };
> >   
> > @@ -93,6 +99,21 @@ static int query_formats(AVFilterContext *ctx)
> >       return ff_set_common_formats(ctx, fmts_list);
> >   }
> >   
> > +static void check_fix(int *x, int *y, int *w, int *h, int pic_w,
> > int pic_h) +{
> > +    // 0 <= x < pic_w
> > +    if (*x >= pic_w)
> > +        *x = 0;
> > +    // 0 <= y < pic_h
> > +    if (*y >= pic_h)
> > +        *y = 0;
> > +
> > +    if (*w == 0 || *w + *x > pic_w)
> > +        *w = pic_w - *x;
> > +    if (*h == 0 || *h + *y > pic_h)
> > +        *h = pic_h - *y;
> > +}
> > +
> >   static int filter_frame(AVFilterLink *inlink, AVFrame *in)
> >   {
> >       AVDictionary **metadata = &in->metadata;
> > @@ -102,8 +123,20 @@ static int filter_frame(AVFilterLink *inlink,
> > AVFrame *in) char *result;
> >       int *confs;
> >   
> > +    // TODO: support expression
> > +    int x = s->x;
> > +    int y = s->y;
> > +    int w = s->w;
> > +    int h = s->h;
> > +    check_fix(&x, &y, &w, &h, in->width, in->height);
> > +    if ( x != s->x || y != s->y  ||
> > +        (s->w != 0 && w != s->w) || (s->h != 0 && h != s->h)) {
> > +        av_log(s, AV_LOG_WARNING, "config error, subregion changed
> > to x=%d, y=%d, w=%d, h=%d\n",
> > +
> >  x, y, w, h);
> > +    }
> > +
> >       result = TessBaseAPIRect(s->tess, in->data[0], 1,
> > -                             in->linesize[0], 0, 0, in->width,
> > in->height);
> > +                             in->linesize[0], x, y, w, h);
> >       confs = TessBaseAPIAllWordConfidences(s->tess);
> >       av_dict_set(metadata, "lavfi.ocr.text", result, 0);
> >       for (int i = 0; confs[i] != -1; i++) {  
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".



Regards,
Lingjiang Fang
diff mbox series

Patch

diff --git a/doc/filters.texi b/doc/filters.texi
index da8f7d7726..a955cf46e0 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -15451,6 +15451,13 @@  Set character whitelist.
 
 @item blacklist
 Set character blacklist.
+
+@item x, y
+Set top point position of subregion, not support expression now
+
+@item w, h
+Set width and height of subregion
+
 @end table
 
 The filter exports recognized text as the frame metadata @code{lavfi.ocr.text}.
diff --git a/libavfilter/vf_ocr.c b/libavfilter/vf_ocr.c
index 6de474025a..e96dce2d87 100644
--- a/libavfilter/vf_ocr.c
+++ b/libavfilter/vf_ocr.c
@@ -33,6 +33,8 @@  typedef struct OCRContext {
     char *language;
     char *whitelist;
     char *blacklist;
+    int x, y;
+    int w, h;
 
     TessBaseAPI *tess;
 } OCRContext;
@@ -45,6 +47,10 @@  static const AVOption ocr_options[] = {
     { "language",  "set language",            OFFSET(language),  AV_OPT_TYPE_STRING, {.str="eng"}, 0, 0, FLAGS },
     { "whitelist", "set character whitelist", OFFSET(whitelist), AV_OPT_TYPE_STRING, {.str="0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.:;,-+_!?\"'[]{}()<>|/\\=*&%$#@!~ "}, 0, 0, FLAGS },
     { "blacklist", "set character blacklist", OFFSET(blacklist), AV_OPT_TYPE_STRING, {.str=""},    0, 0, FLAGS },
+    { "x",         "top x of sub region",     OFFSET(x),         AV_OPT_TYPE_INT,    {.i64=0},     0, INT_MAX, FLAGS },
+    { "y",         "top y of sub region",     OFFSET(y),         AV_OPT_TYPE_INT,    {.i64=0},     0, INT_MAX, FLAGS },
+    { "w",         "width of sub region",     OFFSET(w),         AV_OPT_TYPE_INT,    {.i64=0},     0, INT_MAX, FLAGS },
+    { "h",         "height of sub region",    OFFSET(h),         AV_OPT_TYPE_INT,    {.i64=0},     0, INT_MAX, FLAGS },
     { NULL }
 };
 
@@ -93,6 +99,21 @@  static int query_formats(AVFilterContext *ctx)
     return ff_set_common_formats(ctx, fmts_list);
 }
 
+static void check_fix(int *x, int *y, int *w, int *h, int pic_w, int pic_h)
+{
+    // 0 <= x < pic_w
+    if (*x >= pic_w)
+        *x = 0;
+    // 0 <= y < pic_h
+    if (*y >= pic_h)
+        *y = 0;
+
+    if (*w == 0 || *w + *x > pic_w)
+        *w = pic_w - *x;
+    if (*h == 0 || *h + *y > pic_h)
+        *h = pic_h - *y;
+}
+
 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 {
     AVDictionary **metadata = &in->metadata;
@@ -102,8 +123,20 @@  static int filter_frame(AVFilterLink *inlink, AVFrame *in)
     char *result;
     int *confs;
 
+    // TODO: support expression
+    int x = s->x;
+    int y = s->y;
+    int w = s->w;
+    int h = s->h;
+    check_fix(&x, &y, &w, &h, in->width, in->height);
+    if ( x != s->x || y != s->y  ||
+        (s->w != 0 && w != s->w) || (s->h != 0 && h != s->h)) {
+        av_log(s, AV_LOG_WARNING, "config error, subregion changed to x=%d, y=%d, w=%d, h=%d\n",
+                                                                    x, y, w, h);
+    }
+
     result = TessBaseAPIRect(s->tess, in->data[0], 1,
-                             in->linesize[0], 0, 0, in->width, in->height);
+                             in->linesize[0], x, y, w, h);
     confs = TessBaseAPIAllWordConfidences(s->tess);
     av_dict_set(metadata, "lavfi.ocr.text", result, 0);
     for (int i = 0; confs[i] != -1; i++) {