Message ID | tencent_2634C6C2004E8BCDCD265F2EB588B8E45A06@qq.com |
---|---|
State | Superseded |
Headers | show |
Series | [FFmpeg-devel,V2] lavf/vf_ocr: add subregion support | expand |
Context | Check | Description |
---|---|---|
andriy/x86_make | success | Make finished |
andriy/x86_make_fate | success | Make fate finished |
On 2021-06-17 20:28, Lingjiang Fang wrote: > fix bugs of previous patch, ping for review > --- > doc/filters.texi | 7 +++++++ > libavfilter/vf_ocr.c | 35 ++++++++++++++++++++++++++++++++++- > 2 files changed, 41 insertions(+), 1 deletion(-) > > diff --git a/doc/filters.texi b/doc/filters.texi > index da8f7d7726..a955cf46e0 100644 > --- a/doc/filters.texi > +++ b/doc/filters.texi > @@ -15451,6 +15451,13 @@ Set character whitelist. > > @item blacklist > Set character blacklist. > + > +@item x, y > +Set top point position of subregion, not support expression now This isn't idiomatic. And the docs should state what the option accepts, not what it doesn't. Change to Set position of top-left corner, in pixels. > + > +@item w, h > +Set width and height of subregion > + > @end table > > The filter exports recognized text as the frame metadata @code{lavfi.ocr.text}. > diff --git a/libavfilter/vf_ocr.c b/libavfilter/vf_ocr.c > index 6de474025a..e96dce2d87 100644 > --- a/libavfilter/vf_ocr.c > +++ b/libavfilter/vf_ocr.c > @@ -33,6 +33,8 @@ typedef struct OCRContext { > char *language; > char *whitelist; > char *blacklist; > + int x, y; > + int w, h; > > TessBaseAPI *tess; > } OCRContext; > @@ -45,6 +47,10 @@ static const AVOption ocr_options[] = { > { "language", "set language", OFFSET(language), AV_OPT_TYPE_STRING, {.str="eng"}, 0, 0, FLAGS }, > { "whitelist", "set character whitelist", OFFSET(whitelist), AV_OPT_TYPE_STRING, {.str="0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.:;,-+_!?\"'[]{}()<>|/\\=*&%$#@!~ "}, 0, 0, FLAGS }, > { "blacklist", "set character blacklist", OFFSET(blacklist), AV_OPT_TYPE_STRING, {.str=""}, 0, 0, FLAGS }, > + { "x", "top x of sub region", OFFSET(x), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS }, > + { "y", "top y of sub region", OFFSET(y), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS }, > + { "w", "width of sub region", OFFSET(w), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS }, > + { "h", "height of sub region", OFFSET(h), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS }, > { NULL } > }; > > @@ -93,6 +99,21 @@ static int query_formats(AVFilterContext *ctx) > return ff_set_common_formats(ctx, fmts_list); > } > > +static void check_fix(int *x, int *y, int *w, int *h, int pic_w, int pic_h) > +{ > + // 0 <= x < pic_w > + if (*x >= pic_w) > + *x = 0; > + // 0 <= y < pic_h > + if (*y >= pic_h) > + *y = 0; > + > + if (*w == 0 || *w + *x > pic_w) > + *w = pic_w - *x; > + if (*h == 0 || *h + *y > pic_h) > + *h = pic_h - *y; > +} > + > static int filter_frame(AVFilterLink *inlink, AVFrame *in) > { > AVDictionary **metadata = &in->metadata; > @@ -102,8 +123,20 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) > char *result; > int *confs; > > + // TODO: support expression > + int x = s->x; > + int y = s->y; > + int w = s->w; > + int h = s->h; > + check_fix(&x, &y, &w, &h, in->width, in->height); > + if ( x != s->x || y != s->y || > + (s->w != 0 && w != s->w) || (s->h != 0 && h != s->h)) { > + av_log(s, AV_LOG_WARNING, "config error, subregion changed to x=%d, y=%d, w=%d, h=%d\n", > + x, y, w, h); > + } > + > result = TessBaseAPIRect(s->tess, in->data[0], 1, > - in->linesize[0], 0, 0, in->width, in->height); > + in->linesize[0], x, y, w, h); > confs = TessBaseAPIAllWordConfidences(s->tess); > av_dict_set(metadata, "lavfi.ocr.text", result, 0); > for (int i = 0; confs[i] != -1; i++) {
On Thu, 17 Jun 2021 22:08:52 +0530 Gyan Doshi <ffmpeg@gyani.pro> wrote: > On 2021-06-17 20:28, Lingjiang Fang wrote: > > fix bugs of previous patch, ping for review > > --- > > doc/filters.texi | 7 +++++++ > > libavfilter/vf_ocr.c | 35 ++++++++++++++++++++++++++++++++++- > > 2 files changed, 41 insertions(+), 1 deletion(-) > > > > diff --git a/doc/filters.texi b/doc/filters.texi > > index da8f7d7726..a955cf46e0 100644 > > --- a/doc/filters.texi > > +++ b/doc/filters.texi > > @@ -15451,6 +15451,13 @@ Set character whitelist. > > > > @item blacklist > > Set character blacklist. > > + > > +@item x, y > > +Set top point position of subregion, not support expression now > > This isn't idiomatic. And the docs should state what the option > accepts, not what it doesn't. > > Change to > > Set position of top-left corner, in pixels. > thank you for your correction If no more comments I will send v3 later > > > + > > +@item w, h > > +Set width and height of subregion > > + > > @end table > > > > The filter exports recognized text as the frame metadata > > @code{lavfi.ocr.text}. diff --git a/libavfilter/vf_ocr.c > > b/libavfilter/vf_ocr.c index 6de474025a..e96dce2d87 100644 > > --- a/libavfilter/vf_ocr.c > > +++ b/libavfilter/vf_ocr.c > > @@ -33,6 +33,8 @@ typedef struct OCRContext { > > char *language; > > char *whitelist; > > char *blacklist; > > + int x, y; > > + int w, h; > > > > TessBaseAPI *tess; > > } OCRContext; > > @@ -45,6 +47,10 @@ static const AVOption ocr_options[] = { > > { "language", "set language", OFFSET(language), > > AV_OPT_TYPE_STRING, {.str="eng"}, 0, 0, FLAGS }, { "whitelist", > > "set character whitelist", OFFSET(whitelist), AV_OPT_TYPE_STRING, > > {.str="0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.:;,-+_!?\"'[]{}()<>|/\\=*&%$#@!~ > > "}, 0, 0, FLAGS }, { "blacklist", "set character blacklist", > > OFFSET(blacklist), AV_OPT_TYPE_STRING, {.str=""}, 0, 0, FLAGS }, > > + { "x", "top x of sub region", OFFSET(x), > > AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS }, > > + { "y", "top y of sub region", OFFSET(y), > > AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS }, > > + { "w", "width of sub region", OFFSET(w), > > AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS }, > > + { "h", "height of sub region", OFFSET(h), > > AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS }, { NULL } > > }; > > > > @@ -93,6 +99,21 @@ static int query_formats(AVFilterContext *ctx) > > return ff_set_common_formats(ctx, fmts_list); > > } > > > > +static void check_fix(int *x, int *y, int *w, int *h, int pic_w, > > int pic_h) +{ > > + // 0 <= x < pic_w > > + if (*x >= pic_w) > > + *x = 0; > > + // 0 <= y < pic_h > > + if (*y >= pic_h) > > + *y = 0; > > + > > + if (*w == 0 || *w + *x > pic_w) > > + *w = pic_w - *x; > > + if (*h == 0 || *h + *y > pic_h) > > + *h = pic_h - *y; > > +} > > + > > static int filter_frame(AVFilterLink *inlink, AVFrame *in) > > { > > AVDictionary **metadata = &in->metadata; > > @@ -102,8 +123,20 @@ static int filter_frame(AVFilterLink *inlink, > > AVFrame *in) char *result; > > int *confs; > > > > + // TODO: support expression > > + int x = s->x; > > + int y = s->y; > > + int w = s->w; > > + int h = s->h; > > + check_fix(&x, &y, &w, &h, in->width, in->height); > > + if ( x != s->x || y != s->y || > > + (s->w != 0 && w != s->w) || (s->h != 0 && h != s->h)) { > > + av_log(s, AV_LOG_WARNING, "config error, subregion changed > > to x=%d, y=%d, w=%d, h=%d\n", > > + > > x, y, w, h); > > + } > > + > > result = TessBaseAPIRect(s->tess, in->data[0], 1, > > - in->linesize[0], 0, 0, in->width, > > in->height); > > + in->linesize[0], x, y, w, h); > > confs = TessBaseAPIAllWordConfidences(s->tess); > > av_dict_set(metadata, "lavfi.ocr.text", result, 0); > > for (int i = 0; confs[i] != -1; i++) { > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". Regards, Lingjiang Fang
diff --git a/doc/filters.texi b/doc/filters.texi index da8f7d7726..a955cf46e0 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -15451,6 +15451,13 @@ Set character whitelist. @item blacklist Set character blacklist. + +@item x, y +Set top point position of subregion, not support expression now + +@item w, h +Set width and height of subregion + @end table The filter exports recognized text as the frame metadata @code{lavfi.ocr.text}. diff --git a/libavfilter/vf_ocr.c b/libavfilter/vf_ocr.c index 6de474025a..e96dce2d87 100644 --- a/libavfilter/vf_ocr.c +++ b/libavfilter/vf_ocr.c @@ -33,6 +33,8 @@ typedef struct OCRContext { char *language; char *whitelist; char *blacklist; + int x, y; + int w, h; TessBaseAPI *tess; } OCRContext; @@ -45,6 +47,10 @@ static const AVOption ocr_options[] = { { "language", "set language", OFFSET(language), AV_OPT_TYPE_STRING, {.str="eng"}, 0, 0, FLAGS }, { "whitelist", "set character whitelist", OFFSET(whitelist), AV_OPT_TYPE_STRING, {.str="0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.:;,-+_!?\"'[]{}()<>|/\\=*&%$#@!~ "}, 0, 0, FLAGS }, { "blacklist", "set character blacklist", OFFSET(blacklist), AV_OPT_TYPE_STRING, {.str=""}, 0, 0, FLAGS }, + { "x", "top x of sub region", OFFSET(x), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS }, + { "y", "top y of sub region", OFFSET(y), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS }, + { "w", "width of sub region", OFFSET(w), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS }, + { "h", "height of sub region", OFFSET(h), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS }, { NULL } }; @@ -93,6 +99,21 @@ static int query_formats(AVFilterContext *ctx) return ff_set_common_formats(ctx, fmts_list); } +static void check_fix(int *x, int *y, int *w, int *h, int pic_w, int pic_h) +{ + // 0 <= x < pic_w + if (*x >= pic_w) + *x = 0; + // 0 <= y < pic_h + if (*y >= pic_h) + *y = 0; + + if (*w == 0 || *w + *x > pic_w) + *w = pic_w - *x; + if (*h == 0 || *h + *y > pic_h) + *h = pic_h - *y; +} + static int filter_frame(AVFilterLink *inlink, AVFrame *in) { AVDictionary **metadata = &in->metadata; @@ -102,8 +123,20 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) char *result; int *confs; + // TODO: support expression + int x = s->x; + int y = s->y; + int w = s->w; + int h = s->h; + check_fix(&x, &y, &w, &h, in->width, in->height); + if ( x != s->x || y != s->y || + (s->w != 0 && w != s->w) || (s->h != 0 && h != s->h)) { + av_log(s, AV_LOG_WARNING, "config error, subregion changed to x=%d, y=%d, w=%d, h=%d\n", + x, y, w, h); + } + result = TessBaseAPIRect(s->tess, in->data[0], 1, - in->linesize[0], 0, 0, in->width, in->height); + in->linesize[0], x, y, w, h); confs = TessBaseAPIAllWordConfidences(s->tess); av_dict_set(metadata, "lavfi.ocr.text", result, 0); for (int i = 0; confs[i] != -1; i++) {