Message ID | tencent_71C3C4E3D7723078068E5BC46F20F8BEA809@qq.com |
---|---|
State | Superseded |
Headers | show |
Series | [FFmpeg-devel] lavf/vf_ocr: add subregion support | expand |
Context | Check | Description |
---|---|---|
andriy/x86_make | success | Make finished |
andriy/x86_make_fate | success | Make fate finished |
On 2021-06-17 11:31, Lingjiang Fang wrote: > --- > doc/filters.texi | 7 +++++++ > libavfilter/vf_ocr.c | 30 +++++++++++++++++++++++++++++- > 2 files changed, 36 insertions(+), 1 deletion(-) > > diff --git a/doc/filters.texi b/doc/filters.texi > index da8f7d7726..9c650a2a5a 100644 > --- a/doc/filters.texi > +++ b/doc/filters.texi > @@ -15451,6 +15451,13 @@ Set character whitelist. > > @item blacklist > Set character blacklist. > + > +@item x, y > +Set top point position of subregion, not support expression now --> Set position of top-left corner, in pixels. > + > +@item w, h > +Set Width and height of subregion s/Width/width > + > @end table > > The filter exports recognized text as the frame metadata @code{lavfi.ocr.text}. > diff --git a/libavfilter/vf_ocr.c b/libavfilter/vf_ocr.c > index 6de474025a..7beb101679 100644 > --- a/libavfilter/vf_ocr.c > +++ b/libavfilter/vf_ocr.c > @@ -33,6 +33,8 @@ typedef struct OCRContext { > char *language; > char *whitelist; > char *blacklist; > + int x, y; > + int w, h; > > TessBaseAPI *tess; > } OCRContext; > @@ -45,6 +47,10 @@ static const AVOption ocr_options[] = { > { "language", "set language", OFFSET(language), AV_OPT_TYPE_STRING, {.str="eng"}, 0, 0, FLAGS }, > { "whitelist", "set character whitelist", OFFSET(whitelist), AV_OPT_TYPE_STRING, {.str="0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.:;,-+_!?\"'[]{}()<>|/\\=*&%$#@!~ "}, 0, 0, FLAGS }, > { "blacklist", "set character blacklist", OFFSET(blacklist), AV_OPT_TYPE_STRING, {.str=""}, 0, 0, FLAGS }, > + { "x", "top x of sub region", OFFSET(x), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS }, > + { "y", "top y of sub region", OFFSET(y), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS }, > + { "w", "width of sub region", OFFSET(w), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS }, > + { "h", "height of sub region", OFFSET(h), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS }, > { NULL } > }; > > @@ -73,6 +79,19 @@ static av_cold int init(AVFilterContext *ctx) > return 0; > } > > +static int config_input(AVFilterLink *inlink) > +{ > + OCRContext *s = inlink->dst->priv; > + > + // may call many times, we don't check w/h here > + if (s->x < 0 || s->y < 0) { > + s->x = 0; > + s->y = 0; These are AV_OPT_TYPE_INT with range set as 0 to INT_MAX, so the opt parser should disallow negative values. Regards, Gyan > + } > + > + return 0; > +} > + > static int query_formats(AVFilterContext *ctx) > { > static const enum AVPixelFormat pix_fmts[] = { > @@ -101,9 +120,17 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) > OCRContext *s = ctx->priv; > char *result; > int *confs; > + int w = s->w; > + int h = s->h; > + > + if (w <= 0 || h <= 0) { > + w = in->width; > + h = in->height; > + } > > + av_log(s, AV_LOG_ERROR, "x=%d, y=%d, w=%d, h=%d\n", s->x, s->y, w, h); > result = TessBaseAPIRect(s->tess, in->data[0], 1, > - in->linesize[0], 0, 0, in->width, in->height); > + in->linesize[0], s->x, s->y, w, h); > confs = TessBaseAPIAllWordConfidences(s->tess); > av_dict_set(metadata, "lavfi.ocr.text", result, 0); > for (int i = 0; confs[i] != -1; i++) { > @@ -134,6 +161,7 @@ static const AVFilterPad ocr_inputs[] = { > .name = "default", > .type = AVMEDIA_TYPE_VIDEO, > .filter_frame = filter_frame, > + .config_props = config_input, > }, > { NULL } > };
On Thu, 17 Jun 2021 11:57:00 +0530 Gyan Doshi <ffmpeg@gyani.pro> wrote: > On 2021-06-17 11:31, Lingjiang Fang wrote: > > --- > > doc/filters.texi | 7 +++++++ > > libavfilter/vf_ocr.c | 30 +++++++++++++++++++++++++++++- > > 2 files changed, 36 insertions(+), 1 deletion(-) > > > > diff --git a/doc/filters.texi b/doc/filters.texi > > index da8f7d7726..9c650a2a5a 100644 > > --- a/doc/filters.texi > > +++ b/doc/filters.texi > > @@ -15451,6 +15451,13 @@ Set character whitelist. > > > > @item blacklist > > Set character blacklist. > > + > > +@item x, y > > +Set top point position of subregion, not support expression now > > --> Set position of top-left corner, in pixels. > > > + > > +@item w, h > > +Set Width and height of subregion > > s/Width/width fixed in verson 2, thanks > > > > + > > @end table > > > > The filter exports recognized text as the frame metadata > > @code{lavfi.ocr.text}. diff --git a/libavfilter/vf_ocr.c > > b/libavfilter/vf_ocr.c index 6de474025a..7beb101679 100644 > > --- a/libavfilter/vf_ocr.c > > +++ b/libavfilter/vf_ocr.c > > @@ -33,6 +33,8 @@ typedef struct OCRContext { > > char *language; > > char *whitelist; > > char *blacklist; > > + int x, y; > > + int w, h; > > > > TessBaseAPI *tess; > > } OCRContext; > > @@ -45,6 +47,10 @@ static const AVOption ocr_options[] = { > > { "language", "set language", OFFSET(language), > > AV_OPT_TYPE_STRING, {.str="eng"}, 0, 0, FLAGS }, { "whitelist", > > "set character whitelist", OFFSET(whitelist), AV_OPT_TYPE_STRING, > > {.str="0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.:;,-+_!?\"'[]{}()<>|/\\=*&%$#@!~ > > "}, 0, 0, FLAGS }, { "blacklist", "set character blacklist", > > OFFSET(blacklist), AV_OPT_TYPE_STRING, {.str=""}, 0, 0, FLAGS }, > > + { "x", "top x of sub region", OFFSET(x), > > AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS }, > > + { "y", "top y of sub region", OFFSET(y), > > AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS }, > > + { "w", "width of sub region", OFFSET(w), > > AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS }, > > + { "h", "height of sub region", OFFSET(h), > > AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS }, { NULL } > > }; > > > > @@ -73,6 +79,19 @@ static av_cold int init(AVFilterContext *ctx) > > return 0; > > } > > > > +static int config_input(AVFilterLink *inlink) > > +{ > > + OCRContext *s = inlink->dst->priv; > > + > > + // may call many times, we don't check w/h here > > + if (s->x < 0 || s->y < 0) { > > + s->x = 0; > > + s->y = 0; > > These are AV_OPT_TYPE_INT with range set as 0 to INT_MAX, so the opt > parser should disallow negative values. removed redundant check in version 2, thanks > > Regards, > Gyan > > > + } > > + > > + return 0; > > +} > > + > > static int query_formats(AVFilterContext *ctx) > > { > > static const enum AVPixelFormat pix_fmts[] = { > > @@ -101,9 +120,17 @@ static int filter_frame(AVFilterLink *inlink, > > AVFrame *in) OCRContext *s = ctx->priv; > > char *result; > > int *confs; > > + int w = s->w; > > + int h = s->h; > > + > > + if (w <= 0 || h <= 0) { > > + w = in->width; > > + h = in->height; > > + } > > > > + av_log(s, AV_LOG_ERROR, "x=%d, y=%d, w=%d, h=%d\n", s->x, > > s->y, w, h); result = TessBaseAPIRect(s->tess, in->data[0], 1, > > - in->linesize[0], 0, 0, in->width, > > in->height); > > + in->linesize[0], s->x, s->y, w, h); > > confs = TessBaseAPIAllWordConfidences(s->tess); > > av_dict_set(metadata, "lavfi.ocr.text", result, 0); > > for (int i = 0; confs[i] != -1; i++) { > > @@ -134,6 +161,7 @@ static const AVFilterPad ocr_inputs[] = { > > .name = "default", > > .type = AVMEDIA_TYPE_VIDEO, > > .filter_frame = filter_frame, > > + .config_props = config_input, > > }, > > { NULL } > > }; > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". Regards, Lingjiang Fang
diff --git a/doc/filters.texi b/doc/filters.texi index da8f7d7726..9c650a2a5a 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -15451,6 +15451,13 @@ Set character whitelist. @item blacklist Set character blacklist. + +@item x, y +Set top point position of subregion, not support expression now + +@item w, h +Set Width and height of subregion + @end table The filter exports recognized text as the frame metadata @code{lavfi.ocr.text}. diff --git a/libavfilter/vf_ocr.c b/libavfilter/vf_ocr.c index 6de474025a..7beb101679 100644 --- a/libavfilter/vf_ocr.c +++ b/libavfilter/vf_ocr.c @@ -33,6 +33,8 @@ typedef struct OCRContext { char *language; char *whitelist; char *blacklist; + int x, y; + int w, h; TessBaseAPI *tess; } OCRContext; @@ -45,6 +47,10 @@ static const AVOption ocr_options[] = { { "language", "set language", OFFSET(language), AV_OPT_TYPE_STRING, {.str="eng"}, 0, 0, FLAGS }, { "whitelist", "set character whitelist", OFFSET(whitelist), AV_OPT_TYPE_STRING, {.str="0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.:;,-+_!?\"'[]{}()<>|/\\=*&%$#@!~ "}, 0, 0, FLAGS }, { "blacklist", "set character blacklist", OFFSET(blacklist), AV_OPT_TYPE_STRING, {.str=""}, 0, 0, FLAGS }, + { "x", "top x of sub region", OFFSET(x), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS }, + { "y", "top y of sub region", OFFSET(y), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS }, + { "w", "width of sub region", OFFSET(w), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS }, + { "h", "height of sub region", OFFSET(h), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS }, { NULL } }; @@ -73,6 +79,19 @@ static av_cold int init(AVFilterContext *ctx) return 0; } +static int config_input(AVFilterLink *inlink) +{ + OCRContext *s = inlink->dst->priv; + + // may call many times, we don't check w/h here + if (s->x < 0 || s->y < 0) { + s->x = 0; + s->y = 0; + } + + return 0; +} + static int query_formats(AVFilterContext *ctx) { static const enum AVPixelFormat pix_fmts[] = { @@ -101,9 +120,17 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) OCRContext *s = ctx->priv; char *result; int *confs; + int w = s->w; + int h = s->h; + + if (w <= 0 || h <= 0) { + w = in->width; + h = in->height; + } + av_log(s, AV_LOG_ERROR, "x=%d, y=%d, w=%d, h=%d\n", s->x, s->y, w, h); result = TessBaseAPIRect(s->tess, in->data[0], 1, - in->linesize[0], 0, 0, in->width, in->height); + in->linesize[0], s->x, s->y, w, h); confs = TessBaseAPIAllWordConfidences(s->tess); av_dict_set(metadata, "lavfi.ocr.text", result, 0); for (int i = 0; confs[i] != -1; i++) { @@ -134,6 +161,7 @@ static const AVFilterPad ocr_inputs[] = { .name = "default", .type = AVMEDIA_TYPE_VIDEO, .filter_frame = filter_frame, + .config_props = config_input, }, { NULL } };