[FFmpeg-devel] [PATCH V2] lavf/vf_ocr: add subregion support
Lingjiang Fang
vacingfang at foxmail.com
Thu Jun 17 17:58:40 EEST 2021
fix bugs of previous patch, ping for review
---
doc/filters.texi | 7 +++++++
libavfilter/vf_ocr.c | 35 ++++++++++++++++++++++++++++++++++-
2 files changed, 41 insertions(+), 1 deletion(-)
diff --git a/doc/filters.texi b/doc/filters.texi
index da8f7d7726..a955cf46e0 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -15451,6 +15451,13 @@ Set character whitelist.
@item blacklist
Set character blacklist.
+
+ at item x, y
+Set top point position of subregion, not support expression now
+
+ at item w, h
+Set width and height of subregion
+
@end table
The filter exports recognized text as the frame metadata @code{lavfi.ocr.text}.
diff --git a/libavfilter/vf_ocr.c b/libavfilter/vf_ocr.c
index 6de474025a..e96dce2d87 100644
--- a/libavfilter/vf_ocr.c
+++ b/libavfilter/vf_ocr.c
@@ -33,6 +33,8 @@ typedef struct OCRContext {
char *language;
char *whitelist;
char *blacklist;
+ int x, y;
+ int w, h;
TessBaseAPI *tess;
} OCRContext;
@@ -45,6 +47,10 @@ static const AVOption ocr_options[] = {
{ "language", "set language", OFFSET(language), AV_OPT_TYPE_STRING, {.str="eng"}, 0, 0, FLAGS },
{ "whitelist", "set character whitelist", OFFSET(whitelist), AV_OPT_TYPE_STRING, {.str="0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.:;,-+_!?\"'[]{}()<>|/\\=*&%$#@!~ "}, 0, 0, FLAGS },
{ "blacklist", "set character blacklist", OFFSET(blacklist), AV_OPT_TYPE_STRING, {.str=""}, 0, 0, FLAGS },
+ { "x", "top x of sub region", OFFSET(x), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS },
+ { "y", "top y of sub region", OFFSET(y), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS },
+ { "w", "width of sub region", OFFSET(w), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS },
+ { "h", "height of sub region", OFFSET(h), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS },
{ NULL }
};
@@ -93,6 +99,21 @@ static int query_formats(AVFilterContext *ctx)
return ff_set_common_formats(ctx, fmts_list);
}
+static void check_fix(int *x, int *y, int *w, int *h, int pic_w, int pic_h)
+{
+ // 0 <= x < pic_w
+ if (*x >= pic_w)
+ *x = 0;
+ // 0 <= y < pic_h
+ if (*y >= pic_h)
+ *y = 0;
+
+ if (*w == 0 || *w + *x > pic_w)
+ *w = pic_w - *x;
+ if (*h == 0 || *h + *y > pic_h)
+ *h = pic_h - *y;
+}
+
static int filter_frame(AVFilterLink *inlink, AVFrame *in)
{
AVDictionary **metadata = &in->metadata;
@@ -102,8 +123,20 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
char *result;
int *confs;
+ // TODO: support expression
+ int x = s->x;
+ int y = s->y;
+ int w = s->w;
+ int h = s->h;
+ check_fix(&x, &y, &w, &h, in->width, in->height);
+ if ( x != s->x || y != s->y ||
+ (s->w != 0 && w != s->w) || (s->h != 0 && h != s->h)) {
+ av_log(s, AV_LOG_WARNING, "config error, subregion changed to x=%d, y=%d, w=%d, h=%d\n",
+ x, y, w, h);
+ }
+
result = TessBaseAPIRect(s->tess, in->data[0], 1,
- in->linesize[0], 0, 0, in->width, in->height);
+ in->linesize[0], x, y, w, h);
confs = TessBaseAPIAllWordConfidences(s->tess);
av_dict_set(metadata, "lavfi.ocr.text", result, 0);
for (int i = 0; confs[i] != -1; i++) {
--
2.29.2
More information about the ffmpeg-devel
mailing list