20#include "config_auto.h"
40static INT_VAR(textord_tabfind_show_strokewidths, 0,
"Show stroke widths");
41static BOOL_VAR(textord_tabfind_only_strokewidths,
false,
"Only run stroke widths");
111 :
BlobGrid(gridsize, bleft, tright), nontext_map_(nullptr), projection_(nullptr),
112 denorm_(nullptr), grid_box_(bleft, tright), rerotation_(1.0f, 0.0f) {
113 leaders_win_ =
nullptr;
114 widths_win_ =
nullptr;
115 initial_widths_win_ =
nullptr;
116 chains_win_ =
nullptr;
117 diacritics_win_ =
nullptr;
118 textlines_win_ =
nullptr;
119 smoothed_win_ =
nullptr;
123 if (widths_win_ !=
nullptr) {
124 #ifndef GRAPHICS_DISABLED
127 if (textord_tabfind_only_strokewidths)
132 delete initial_widths_win_;
134 delete textlines_win_;
135 delete smoothed_win_;
136 delete diacritics_win_;
145 BLOBNBOX_IT blob_it(&block->
blobs);
146 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
147 SetNeighbours(
false,
false, blob_it.data());
160 InsertBlobs(input_block);
162 while (cjk_merge && FixBrokenCJK(input_block));
164 FindTextlineFlowDirection(pageseg_mode,
false);
170static void CollectHorizVertBlobs(BLOBNBOX_LIST* input_blobs,
171 int* num_vertical_blobs,
172 int* num_horizontal_blobs,
173 BLOBNBOX_CLIST* vertical_blobs,
174 BLOBNBOX_CLIST* horizontal_blobs,
175 BLOBNBOX_CLIST* nondescript_blobs) {
176 BLOBNBOX_C_IT v_it(vertical_blobs);
177 BLOBNBOX_C_IT h_it(horizontal_blobs);
178 BLOBNBOX_C_IT n_it(nondescript_blobs);
179 BLOBNBOX_IT blob_it(input_blobs);
180 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
183 float y_x =
static_cast<float>(box.
height()) / box.
width();
184 float x_y = 1.0f / y_x;
186 float ratio = x_y > y_x ? x_y : y_x;
190 ++*num_vertical_blobs;
191 if (ok_blob) v_it.add_after_then_move(blob);
193 ++*num_horizontal_blobs;
194 if (ok_blob) h_it.add_after_then_move(blob);
195 }
else if (ok_blob) {
196 n_it.add_after_then_move(blob);
210 BLOBNBOX_CLIST* osd_blobs) {
211 int vertical_boxes = 0;
212 int horizontal_boxes = 0;
214 BLOBNBOX_CLIST vertical_blobs;
215 BLOBNBOX_CLIST horizontal_blobs;
216 BLOBNBOX_CLIST nondescript_blobs;
217 CollectHorizVertBlobs(&block->
blobs, &vertical_boxes, &horizontal_boxes,
218 &vertical_blobs, &horizontal_blobs, &nondescript_blobs);
219 CollectHorizVertBlobs(&block->
large_blobs, &vertical_boxes, &horizontal_boxes,
220 &vertical_blobs, &horizontal_blobs, &nondescript_blobs);
222 tprintf(
"TextDir hbox=%d vs vbox=%d, %dH, %dV, %dN osd blobs\n",
223 horizontal_boxes, vertical_boxes,
224 horizontal_blobs.length(), vertical_blobs.length(),
225 nondescript_blobs.length());
226 if (osd_blobs !=
nullptr && vertical_boxes == 0 && horizontal_boxes == 0) {
228 BLOBNBOX_C_IT osd_it(osd_blobs);
229 osd_it.add_list_after(&nondescript_blobs);
232 int min_vert_boxes =
static_cast<int>((vertical_boxes + horizontal_boxes) *
233 find_vertical_text_ratio);
234 if (vertical_boxes >= min_vert_boxes) {
235 if (osd_blobs !=
nullptr) {
236 BLOBNBOX_C_IT osd_it(osd_blobs);
237 osd_it.add_list_after(&vertical_blobs);
241 if (osd_blobs !=
nullptr) {
242 BLOBNBOX_C_IT osd_it(osd_blobs);
243 osd_it.add_list_after(&horizontal_blobs);
254 rerotation_.
set_x(rotation.
x());
255 rerotation_.
set_y(-rotation.
y());
263 ColPartition_LIST leader_parts;
264 FindLeadersAndMarkNoise(block, &leader_parts);
268 for (ColPartition_IT it(&leader_parts); !it.empty(); it.forward()) {
271 MarkLeaderNeighbours(part,
LR_LEFT);
272 MarkLeaderNeighbours(part,
LR_RIGHT);
293 TBOX search_box = box;
294 search_box.
pad(padding, padding);
303 if (n == bbox)
continue;
305 if (nbox.
height() > max_height) {
306 max_height = nbox.
height();
310 tprintf(
"Max neighbour size=%d for candidate line box at:", max_height);
314 #ifndef GRAPHICS_DISABLED
315 if (leaders_win_ !=
nullptr) {
345 Pix* nontext_pix,
const DENORM* denorm,
bool cjk_script,
348 nontext_map_ = nontext_pix;
349 projection_ = projection;
360 FindTextlineFlowDirection(pageseg_mode,
false);
362 if (textord_tabfind_show_strokewidths) {
373 FindTextlineFlowDirection(pageseg_mode,
true);
375 FindInitialPartitions(pageseg_mode, rerotation,
true, block,
376 diacritic_blobs, part_grid, big_parts, &skew);
378 tprintf(
"Detected %d diacritics\n", diacritic_blobs->length());
382 FindTextlineFlowDirection(pageseg_mode,
true);
383 r = FindInitialPartitions(pageseg_mode, rerotation,
false, block,
384 diacritic_blobs, part_grid, big_parts, &skew);
386 nontext_map_ =
nullptr;
387 projection_ =
nullptr;
391static void PrintBoxWidths(
BLOBNBOX* neighbour) {
393 tprintf(
"Box (%d,%d)->(%d,%d): h-width=%.1f, v-width=%.1f p-width=%1.f\n",
406 FCOORD click(
static_cast<float>(x),
static_cast<float>(y));
410 PrintBoxWidths(neighbour);
421 tprintf(
"Left gap=%d, right=%d, above=%d, below=%d, horz=%d, vert=%d\n"
422 "Good= %d %d %d %d\n",
445void StrokeWidth::FindLeadersAndMarkNoise(
TO_BLOCK* block,
446 ColPartition_LIST* leader_parts) {
452 gsearch.StartFullSearch();
453 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
454 SetNeighbours(
true,
false, bbox);
456 ColPartition_IT part_it(leader_parts);
457 gsearch.StartFullSearch();
458 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
466 for (blob = bbox; blob !=
nullptr && blob->
flow() ==
BTFT_NONE;
473 if (part->MarkAsLeaderIfMonospaced())
474 part_it.add_after_then_move(part);
479 if (textord_tabfind_show_strokewidths) {
480 leaders_win_ = DisplayGoodBlobs(
"LeaderNeighbours", 0, 0);
484 BLOBNBOX_IT blob_it(&block->
blobs);
486 for (small_it.mark_cycle_pt(); !small_it.cycled_list(); small_it.forward()) {
492 blob_it.add_to_end(small_it.extract());
499 for (noise_it.mark_cycle_pt(); !noise_it.cycled_list(); noise_it.forward()) {
502 small_it.add_to_end(noise_it.extract());
514void StrokeWidth::InsertBlobs(
TO_BLOCK* block) {
522void StrokeWidth::MarkLeaderNeighbours(
const ColPartition* part,
524 const TBOX& part_box = part->bounding_box();
529 blobsearch.StartSideSearch(side ==
LR_LEFT ? part_box.
left()
533 while ((blob = blobsearch.NextSideSearch(side ==
LR_LEFT)) !=
nullptr) {
537 int x_gap = blob_box.
x_gap(part_box);
540 }
else if (best_blob ==
nullptr || x_gap < best_gap) {
545 if (best_blob !=
nullptr) {
550 #ifndef GRAPHICS_DISABLED
551 if (leaders_win_ !=
nullptr) {
562static int UpperQuartileCJKSize(
int gridsize, BLOBNBOX_LIST* blobs) {
564 BLOBNBOX_IT it(blobs);
565 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
570 sizes.add(height, 1);
572 return static_cast<int>(sizes.ile(0.75f) + 0.5);
580bool StrokeWidth::FixBrokenCJK(
TO_BLOCK* block) {
581 BLOBNBOX_LIST* blobs = &block->
blobs;
582 int median_height = UpperQuartileCJKSize(
gridsize(), blobs);
586 BLOBNBOX_IT blob_it(blobs);
588 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
596 tprintf(
"Checking for Broken CJK (max size=%d):", max_height);
600 BLOBNBOX_CLIST overlapped_blobs;
601 AccumulateOverlaps(blob, debug, max_height, max_dist,
602 &bbox, &overlapped_blobs);
603 if (!overlapped_blobs.empty()) {
610 tprintf(
"Bad final aspectratio:");
618 tprintf(
"Too many neighbours: %d\n", overlapped_blobs.length());
622 BLOBNBOX_C_IT n_it(&overlapped_blobs);
623 for (n_it.mark_cycle_pt(); !n_it.cycled_list(); n_it.forward()) {
625 neighbour = n_it.data();
630 if (!n_it.cycled_list()) {
633 PrintBoxWidths(blob);
643 for (n_it.mark_cycle_pt(); !n_it.cycled_list(); n_it.forward()) {
649 if (rerotation_.
x() != 1.0f || rerotation_.
y() != 0.0f) {
662 int num_remaining = 0;
663 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
680static bool AcceptableCJKMerge(
const TBOX& bbox,
const TBOX& nbox,
681 bool debug,
int max_size,
int max_dist,
682 int* x_gap,
int* y_gap) {
683 *x_gap = bbox.
x_gap(nbox);
684 *y_gap = bbox.
y_gap(nbox);
688 tprintf(
"gaps = %d, %d, merged_box:", *x_gap, *y_gap);
691 if (*x_gap <= max_dist && *y_gap <= max_dist &&
692 merged.width() <= max_size && merged.height() <= max_size) {
694 double old_ratio =
static_cast<double>(bbox.
width()) / bbox.
height();
695 if (old_ratio < 1.0) old_ratio = 1.0 / old_ratio;
696 double new_ratio =
static_cast<double>(merged.width()) / merged.height();
697 if (new_ratio < 1.0) new_ratio = 1.0 / new_ratio;
708void StrokeWidth::AccumulateOverlaps(
const BLOBNBOX* not_this,
bool debug,
709 int max_size,
int max_dist,
710 TBOX* bbox, BLOBNBOX_CLIST* blobs) {
716 for (
auto & nearest : nearests) {
719 int x = (bbox->
left() + bbox->
right()) / 2;
720 int y = (bbox->
bottom() + bbox->
top()) / 2;
725 while ((neighbour = radsearch.NextRadSearch()) !=
nullptr) {
726 if (neighbour == not_this)
continue;
729 if (AcceptableCJKMerge(*bbox, nbox, debug, max_size, max_dist,
733 blobs->add_sorted(SortByBoxLeft<BLOBNBOX>,
true, neighbour);
739 for (
int dir = 0; dir <
BND_COUNT; ++dir) {
740 if (nearests[dir] ==
nullptr)
continue;
741 nbox = nearests[dir]->bounding_box();
742 if (AcceptableCJKMerge(*bbox, nbox, debug, max_size,
743 max_dist, &x_gap, &y_gap)) {
746 blobs->add_sorted(SortByBoxLeft<BLOBNBOX>,
true, nearests[dir]);
751 nearests[dir] =
nullptr;
755 }
else if (x_gap < 0 && x_gap <= y_gap) {
758 if (nearests[dir] ==
nullptr ||
759 y_gap < bbox->y_gap(nearests[dir]->bounding_box())) {
760 nearests[dir] = neighbour;
762 }
else if (y_gap < 0 && y_gap <= x_gap) {
765 if (nearests[dir] ==
nullptr ||
766 x_gap < bbox->x_gap(nearests[dir]->bounding_box())) {
767 nearests[dir] = neighbour;
776 for (
auto & nearest : nearests) {
777 if (nearest ==
nullptr)
continue;
778 const TBOX& nbox = nearest->bounding_box();
780 tprintf(
"Testing for overlap with:");
784 blobs->shallow_clear();
786 tprintf(
"Final box overlaps nearest\n");
799void StrokeWidth::FindTextlineFlowDirection(
PageSegMode pageseg_mode,
800 bool display_if_debugging) {
804 gsearch.StartFullSearch();
805 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
806 SetNeighbours(
false, display_if_debugging, bbox);
809 gsearch.StartFullSearch();
810 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
811 SimplifyObviousNeighbours(bbox);
814 gsearch.StartFullSearch();
815 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
816 if (FindingVerticalOnly(pageseg_mode)) {
819 }
else if (FindingHorizontalOnly(pageseg_mode)) {
823 SetNeighbourFlows(bbox);
826 if ((textord_tabfind_show_strokewidths && display_if_debugging) ||
827 textord_tabfind_show_strokewidths > 1) {
828 initial_widths_win_ = DisplayGoodBlobs(
"InitialStrokewidths", 400, 0);
831 gsearch.StartFullSearch();
832 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
833 SmoothNeighbourTypes(pageseg_mode,
false, bbox);
836 gsearch.StartFullSearch();
837 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
838 SmoothNeighbourTypes(pageseg_mode,
true, bbox);
841 gsearch.StartFullSearch();
842 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
843 SmoothNeighbourTypes(pageseg_mode,
true, bbox);
845 if ((textord_tabfind_show_strokewidths && display_if_debugging) ||
846 textord_tabfind_show_strokewidths > 1) {
847 widths_win_ = DisplayGoodBlobs(
"ImprovedStrokewidths", 800, 0);
855void StrokeWidth::SetNeighbours(
bool leaders,
bool activate_line_trap,
857 int line_trap_count = 0;
858 for (
int dir = 0; dir <
BND_COUNT; ++dir) {
860 line_trap_count += FindGoodNeighbour(bnd, leaders, blob);
862 if (line_trap_count > 0 && activate_line_trap) {
884 tprintf(
"FGN in dir %d for blob:", dir);
887 int top = blob_box.
top();
888 int bottom = blob_box.
bottom();
889 int left = blob_box.
left();
890 int right = blob_box.
right();
891 int width = right - left;
892 int height = top - bottom;
900 int line_trap_count = 0;
903 ? height / 2 : width / 2;
905 ? height / 3 : width / 3;
907 min_good_overlap = min_decent_overlap = 1;
909 int search_pad =
static_cast<int>(
913 TBOX search_box = blob_box;
926 search_box.
set_top(search_box.
top() + search_pad);
933 rectsearch.StartRectSearch(search_box);
935 double best_goodness = 0.0;
936 bool best_is_good =
false;
938 while ((neighbour = rectsearch.NextRectSearch()) !=
nullptr) {
940 if (neighbour == blob)
942 int mid_x = (nbox.
left() + nbox.
right()) / 2;
943 if (mid_x < blob->left_rule() || mid_x > blob->
right_rule())
952 int n_width = nbox.
width();
953 int n_height = nbox.
height();
954 if (std::min(n_width, n_height) > line_trap_min &&
955 std::max(n_width, n_height) < line_trap_max)
961 std::max(width, height)) &&
966 if (debug)
tprintf(
"Bad size\n");
978 overlap = std::min(
static_cast<int>(nbox.
top()), top) - std::max(
static_cast<int>(nbox.
bottom()), bottom);
980 perp_overlap = nbox.
width();
982 perp_overlap = overlap;
985 if (debug)
tprintf(
"On wrong side\n");
990 overlap = std::min(
static_cast<int>(nbox.
right()), right) - std::max(
static_cast<int>(nbox.
left()), left);
992 perp_overlap = nbox.
height();
994 perp_overlap = overlap;
997 if (debug)
tprintf(
"On wrong side\n");
1002 if (-gap > overlap) {
1003 if (debug)
tprintf(
"Overlaps wrong way\n");
1006 if (perp_overlap < min_decent_overlap) {
1007 if (debug)
tprintf(
"Doesn't overlap enough\n");
1012 bool is_good = overlap >= min_good_overlap && !bad_sizes &&
1019 if (gap < 1) gap = 1;
1020 double goodness = (1.0 + is_good) * overlap / gap;
1022 tprintf(
"goodness = %g vs best of %g, good=%d, overlap=%d, gap=%d\n",
1023 goodness, best_goodness, is_good, overlap, gap);
1025 if (goodness > best_goodness) {
1026 best_neighbour = neighbour;
1027 best_goodness = goodness;
1028 best_is_good = is_good;
1032 return line_trap_count;
1036static void ListNeighbours(
const BLOBNBOX* blob,
1037 BLOBNBOX_CLIST* neighbours) {
1038 for (
int dir = 0; dir <
BND_COUNT; ++dir) {
1041 if (neighbour !=
nullptr) {
1042 neighbours->add_sorted(SortByBoxLeft<BLOBNBOX>,
true, neighbour);
1048static void List2ndNeighbours(
const BLOBNBOX* blob,
1049 BLOBNBOX_CLIST* neighbours) {
1050 ListNeighbours(blob, neighbours);
1051 for (
int dir = 0; dir <
BND_COUNT; ++dir) {
1054 if (neighbour !=
nullptr) {
1055 ListNeighbours(neighbour, neighbours);
1061static void List3rdNeighbours(
const BLOBNBOX* blob,
1062 BLOBNBOX_CLIST* neighbours) {
1063 List2ndNeighbours(blob, neighbours);
1064 for (
int dir = 0; dir <
BND_COUNT; ++dir) {
1067 if (neighbour !=
nullptr) {
1068 List2ndNeighbours(neighbour, neighbours);
1075static void CountNeighbourGaps(
bool debug, BLOBNBOX_CLIST* neighbours,
1076 int* pure_h_count,
int* pure_v_count) {
1079 BLOBNBOX_C_IT it(neighbours);
1080 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1082 int h_min, h_max, v_min, v_max;
1085 tprintf(
"Hgaps [%d,%d], vgaps [%d,%d]:", h_min, h_max, v_min, v_max);
1086 if (h_max < v_min ||
1090 if (debug)
tprintf(
"Horz at:");
1091 }
else if (v_max < h_min) {
1094 if (debug)
tprintf(
"Vert at:");
1096 if (debug)
tprintf(
"Neither at:");
1106void StrokeWidth::SetNeighbourFlows(
BLOBNBOX* blob) {
1112 tprintf(
"SetNeighbourFlows (current flow=%d, type=%d) on:",
1116 BLOBNBOX_CLIST neighbours;
1117 List3rdNeighbours(blob, &neighbours);
1119 int pure_h_count = 0;
1120 int pure_v_count = 0;
1121 CountNeighbourGaps(debug, &neighbours, &pure_h_count, &pure_v_count);
1125 tprintf(
"SetFlows: h_count=%d, v_count=%d\n",
1126 pure_h_count, pure_v_count);
1128 if (!neighbours.empty()) {
1131 if (pure_h_count > 2 * pure_v_count) {
1134 }
else if (pure_v_count > 2 * pure_h_count) {
1147static void CountNeighbourTypes(BLOBNBOX_CLIST* neighbours,
1148 int* pure_h_count,
int* pure_v_count) {
1149 BLOBNBOX_C_IT it(neighbours);
1150 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1162void StrokeWidth::SimplifyObviousNeighbours(
BLOBNBOX* blob) {
1184 int h_min, h_max, v_min, v_max;
1186 if ((h_max + margin < v_min && h_max < margin / 2) ||
1191 }
else if (v_max + margin < h_min && v_max < margin / 2) {
1201void StrokeWidth::SmoothNeighbourTypes(
PageSegMode pageseg_mode,
bool reset_all,
1205 BLOBNBOX_CLIST neighbours;
1206 List2ndNeighbours(blob, &neighbours);
1208 int pure_h_count = 0;
1209 int pure_v_count = 0;
1210 CountNeighbourTypes(&neighbours, &pure_h_count, &pure_v_count);
1215 tprintf(
"pure_h=%d, pure_v=%d\n",
1216 pure_h_count, pure_v_count);
1218 if (pure_h_count > pure_v_count && !FindingVerticalOnly(pageseg_mode)) {
1222 }
else if (pure_v_count > pure_h_count &&
1223 !FindingHorizontalOnly(pageseg_mode)) {
1232 tprintf(
"Clean on pass 3!\n");
1250 TO_BLOCK* block, BLOBNBOX_LIST* diacritic_blobs,
1251 ColPartitionGrid* part_grid, ColPartition_LIST* big_parts,
1253 if (!FindingHorizontalOnly(pageseg_mode)) FindVerticalTextChains(part_grid);
1254 if (!FindingVerticalOnly(pageseg_mode)) FindHorizontalTextChains(part_grid);
1255 if (textord_tabfind_show_strokewidths) {
1256 chains_win_ =
MakeWindow(0, 400,
"Initial text chains");
1257 part_grid->DisplayBoxes(chains_win_);
1260 if (find_problems) {
1264 part_grid->SplitOverlappingPartitions(big_parts);
1265 EasyMerges(part_grid);
1266 RemoveLargeUnusedBlobs(block, part_grid, big_parts);
1268 while (part_grid->GridSmoothNeighbours(
BTFT_CHAIN, nontext_map_, grid_box,
1271 grid_box, rerotation));
1272 int pre_overlap = part_grid->ComputeTotalOverlap(
nullptr);
1273 TestDiacritics(part_grid, block);
1274 MergeDiacritics(block, part_grid);
1275 if (find_problems && diacritic_blobs !=
nullptr &&
1276 DetectAndRemoveNoise(pre_overlap, grid_box, block, part_grid,
1280 if (textord_tabfind_show_strokewidths) {
1281 textlines_win_ =
MakeWindow(400, 400,
"GoodTextline blobs");
1282 part_grid->DisplayBoxes(textlines_win_);
1283 diacritics_win_ = DisplayDiacritics(
"Diacritics", 0, 0, block);
1285 PartitionRemainingBlobs(pageseg_mode, part_grid);
1286 part_grid->SplitOverlappingPartitions(big_parts);
1287 EasyMerges(part_grid);
1288 while (part_grid->GridSmoothNeighbours(
BTFT_CHAIN, nontext_map_, grid_box,
1291 grid_box, rerotation));
1294 grid_box, rerotation));
1295 if (textord_tabfind_show_strokewidths) {
1296 smoothed_win_ =
MakeWindow(800, 400,
"Smoothed blobs");
1297 part_grid->DisplayBoxes(smoothed_win_);
1306bool StrokeWidth::DetectAndRemoveNoise(
int pre_overlap,
const TBOX& grid_box,
1308 ColPartitionGrid* part_grid,
1309 BLOBNBOX_LIST* diacritic_blobs) {
1310 ColPartitionGrid* noise_grid =
nullptr;
1311 int post_overlap = part_grid->ComputeTotalOverlap(&noise_grid);
1312 if (pre_overlap == 0) pre_overlap = 1;
1313 BLOBNBOX_IT diacritic_it(diacritic_blobs);
1314 if (noise_grid !=
nullptr) {
1318 if (textord_tabfind_show_strokewidths) {
1320 noise_grid->DisplayBoxes(noise_win);
1322 part_grid->DeleteNonLeaderParts();
1325 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
1333 rsearch.StartRectSearch(search_box);
1334 ColPartition* part = rsearch.NextRectSearch();
1335 if (part !=
nullptr) {
1339 diacritic_it.add_after_then_move(blob_it.extract());
1342 noise_grid->DeleteParts();
1346 noise_grid->DeleteParts();
1359 if (next_blob ==
nullptr || next_blob->
owner() !=
nullptr ||
1368void StrokeWidth::FindVerticalTextChains(ColPartitionGrid* part_grid) {
1374 gsearch.StartFullSearch();
1375 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
1380 (blob = MutualUnusedVNeighbour(bbox,
BND_ABOVE)) !=
nullptr) {
1384 while (blob !=
nullptr) {
1386 blob = MutualUnusedVNeighbour(blob,
BND_ABOVE);
1388 blob = MutualUnusedVNeighbour(bbox,
BND_BELOW);
1389 while (blob !=
nullptr) {
1391 blob = MutualUnusedVNeighbour(blob,
BND_BELOW);
1393 CompletePartition(pageseg_mode, part, part_grid);
1405 if (next_blob ==
nullptr || next_blob->
owner() !=
nullptr ||
1414void StrokeWidth::FindHorizontalTextChains(ColPartitionGrid* part_grid) {
1420 gsearch.StartFullSearch();
1421 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
1424 (blob = MutualUnusedHNeighbour(bbox,
BND_RIGHT)) !=
nullptr) {
1428 while (blob !=
nullptr) {
1430 blob = MutualUnusedHNeighbour(blob,
BND_RIGHT);
1432 blob = MutualUnusedHNeighbour(bbox,
BND_LEFT);
1433 while (blob !=
nullptr) {
1435 blob = MutualUnusedVNeighbour(blob,
BND_LEFT);
1437 CompletePartition(pageseg_mode, part, part_grid);
1449void StrokeWidth::TestDiacritics(ColPartitionGrid* part_grid,
TO_BLOCK* block) {
1452 small_grid.InsertBlobList(&block->
blobs);
1453 int medium_diacritics = 0;
1454 int small_diacritics = 0;
1456 for (small_it.mark_cycle_pt(); !small_it.cycled_list(); small_it.forward()) {
1459 DiacriticBlob(&small_grid, blob)) {
1463 BLOBNBOX_IT blob_it(&block->
blobs);
1464 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
1467 small_it.add_to_end(blob_it.extract());
1470 ColPartition* part = blob->
owner();
1471 if (part ==
nullptr && DiacriticBlob(&small_grid, blob)) {
1472 ++medium_diacritics;
1474 small_it.add_to_end(blob_it.extract());
1475 }
else if (part !=
nullptr && !part->block_owned() &&
1476 part->boxes_count() < 3) {
1482 BLOBNBOX_C_IT box_it(part->boxes());
1483 for (box_it.mark_cycle_pt(); !box_it.cycled_list() &&
1484 DiacriticBlob(&small_grid, box_it.data());
1486 if (box_it.cycled_list()) {
1488 while (!box_it.empty()) {
1497 ++medium_diacritics;
1504 small_it.add_to_end(blob_it.extract());
1505 part_grid->RemoveBBox(part);
1510 tprintf(
"Blob not available to be a diacritic at:");
1514 if (textord_tabfind_show_strokewidths) {
1515 tprintf(
"Found %d small diacritics, %d medium\n",
1516 small_diacritics, medium_diacritics);
1526bool StrokeWidth::DiacriticBlob(BlobGrid* small_grid,
BLOBNBOX* blob) {
1532 small_box.bottom());
1534 tprintf(
"Testing blob for diacriticness at:");
1537 int x = (small_box.left() + small_box.right()) / 2;
1538 int y = (small_box.bottom() + small_box.top()) / 2;
1541 int height = small_box.height();
1554 BLOBNBOX* best_x_overlap =
nullptr;
1555 BLOBNBOX* best_y_overlap =
nullptr;
1556 int best_total_dist = 0;
1560 TBOX search_box(small_box);
1563 search_box.
pad(x_pad, y_pad);
1565 rsearch.SetUniqueMode(
true);
1567 rsearch.StartRectSearch(search_box);
1569 while ((neighbour = rsearch.NextRectSearch()) !=
nullptr) {
1571 neighbour == blob || neighbour->
owner() == blob->
owner())
1578 tprintf(
"Neighbour not strong enough:");
1583 if (nbox.
height() < min_height) {
1585 tprintf(
"Neighbour not big enough:");
1590 int x_gap = small_box.x_gap(nbox);
1591 int y_gap = small_box.y_gap(nbox);
1595 if (debug)
tprintf(
"xgap=%d, y=%d, total dist=%d\n",
1596 x_gap, y_gap, total_distance);
1597 if (total_distance >
1600 tprintf(
"Neighbour with median size %d too far away:",
1608 tprintf(
"Computing reduced box for :");
1611 int left = small_box.left() - small_box.width();
1612 int right = small_box.right() + small_box.width();
1614 y_gap = small_box.
y_gap(nbox);
1615 if (best_x_overlap ==
nullptr || y_gap < best_y_gap) {
1616 best_x_overlap = neighbour;
1624 tprintf(
"Shrunken box doesn't win:");
1628 if (best_y_overlap ==
nullptr || total_distance < best_total_dist) {
1630 tprintf(
"New best y overlap:");
1633 best_y_overlap = neighbour;
1634 best_total_dist = total_distance;
1636 tprintf(
"New y overlap box doesn't win:");
1640 tprintf(
"Neighbour wrong side of a tab:");
1644 if (best_x_overlap !=
nullptr &&
1645 (best_y_overlap ==
nullptr ||
1650 tprintf(
"DiacriticBlob OK! (x-overlap:");
1656 if (best_y_overlap !=
nullptr &&
1657 DiacriticXGapFilled(small_grid, small_box,
1659 NoNoiseInBetween(small_box, best_y_overlap->
bounding_box())) {
1663 tprintf(
"DiacriticBlob OK! (y-overlap:");
1670 tprintf(
"DiacriticBlob fails:");
1672 tprintf(
"Best x+y gap = %d, y = %d\n", best_total_dist, best_y_gap);
1673 if (best_y_overlap !=
nullptr) {
1674 tprintf(
"XGapFilled=%d, NoiseBetween=%d\n",
1675 DiacriticXGapFilled(small_grid, small_box,
1677 NoNoiseInBetween(small_box, best_y_overlap->
bounding_box()));
1696bool StrokeWidth::DiacriticXGapFilled(BlobGrid* grid,
1697 const TBOX& diacritic_box,
1698 const TBOX& base_box) {
1702 TBOX occupied_box(base_box);
1704 while ((diacritic_gap = diacritic_box.
x_gap(occupied_box)) > max_gap) {
1705 TBOX search_box(occupied_box);
1706 if (diacritic_box.
left() > search_box.
right()) {
1716 rsearch.StartRectSearch(search_box);
1718 while ((neighbour = rsearch.NextRectSearch()) !=
nullptr) {
1720 if (nbox.
x_gap(diacritic_box) < diacritic_gap) {
1721 if (nbox.
left() < occupied_box.left())
1723 if (nbox.
right() > occupied_box.right())
1724 occupied_box.set_right(nbox.
right());
1728 if (neighbour ==
nullptr)
1735void StrokeWidth::MergeDiacritics(
TO_BLOCK* block,
1736 ColPartitionGrid* part_grid) {
1738 for (small_it.mark_cycle_pt(); !small_it.cycled_list(); small_it.forward()) {
1744 if (part !=
nullptr && !part->block_owned() && blob->
owner() ==
nullptr &&
1748 part_grid->RemoveBBox(part);
1753 part_grid->InsertBBox(
true,
true, part);
1764void StrokeWidth::RemoveLargeUnusedBlobs(
TO_BLOCK* block,
1765 ColPartitionGrid* part_grid,
1766 ColPartition_LIST* big_parts) {
1768 for (large_it.mark_cycle_pt(); !large_it.cycled_list(); large_it.forward()) {
1770 ColPartition* big_part = blob->
owner();
1771 if (big_part ==
nullptr) {
1781void StrokeWidth::PartitionRemainingBlobs(
PageSegMode pageseg_mode,
1782 ColPartitionGrid* part_grid) {
1785 int prev_grid_x = -1;
1786 int prev_grid_y = -1;
1787 BLOBNBOX_CLIST cell_list;
1788 BLOBNBOX_C_IT cell_it(&cell_list);
1789 bool cell_all_noise =
true;
1790 gsearch.StartFullSearch();
1791 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
1792 int grid_x = gsearch.GridX();
1793 int grid_y = gsearch.GridY();
1794 if (grid_x != prev_grid_x || grid_y != prev_grid_y) {
1796 MakePartitionsFromCellList(pageseg_mode, cell_all_noise, part_grid,
1798 cell_it.set_to_list(&cell_list);
1799 prev_grid_x = grid_x;
1800 prev_grid_y = grid_y;
1801 cell_all_noise =
true;
1803 if (bbox->
owner() ==
nullptr) {
1804 cell_it.add_to_end(bbox);
1806 cell_all_noise =
false;
1808 cell_all_noise =
false;
1811 MakePartitionsFromCellList(pageseg_mode, cell_all_noise, part_grid,
1817void StrokeWidth::MakePartitionsFromCellList(
PageSegMode pageseg_mode,
1819 ColPartitionGrid* part_grid,
1820 BLOBNBOX_CLIST* cell_list) {
1821 if (cell_list->empty())
1823 BLOBNBOX_C_IT cell_it(cell_list);
1825 BLOBNBOX* bbox = cell_it.extract();
1828 part->set_flow(bbox->
flow());
1829 for (cell_it.forward(); !cell_it.empty(); cell_it.forward()) {
1830 part->AddBox(cell_it.extract());
1832 CompletePartition(pageseg_mode, part, part_grid);
1834 for (; !cell_it.empty(); cell_it.forward()) {
1835 BLOBNBOX* bbox = cell_it.extract();
1837 part->set_flow(bbox->
flow());
1839 CompletePartition(pageseg_mode, part, part_grid);
1846void StrokeWidth::CompletePartition(
PageSegMode pageseg_mode,
1848 ColPartitionGrid* part_grid) {
1849 part->ComputeLimits();
1855 if (value > 0 && FindingVerticalOnly(pageseg_mode)) {
1856 value = part->boxes_count() == 1 ? 0 : -2;
1857 }
else if (value < 0 && FindingHorizontalOnly(pageseg_mode)) {
1858 value = part->boxes_count() == 1 ? 0 : 2;
1860 part->SetRegionAndFlowTypesFromProjectionValue(value);
1862 part_grid->InsertBBox(
true,
true, part);
1867void StrokeWidth::EasyMerges(ColPartitionGrid* part_grid) {
1876bool StrokeWidth::OrientationSearchBox(ColPartition* part,
TBOX* box) {
1877 if (part->IsVerticalType()) {
1888bool StrokeWidth::ConfirmEasyMerge(
const ColPartition* p1,
1889 const ColPartition* p2) {
1895 if ((p1->IsVerticalType() || p2->IsVerticalType()) &&
1896 p1->HCoreOverlap(*p2) <= 0 &&
1897 ((!p1->IsSingleton() &&
1898 !p2->IsSingleton()) ||
1899 !p1->bounding_box().major_overlap(p2->bounding_box())))
1901 if ((p1->IsHorizontalType() || p2->IsHorizontalType()) &&
1902 p1->VCoreOverlap(*p2) <= 0 &&
1903 ((!p1->IsSingleton() &&
1904 !p2->IsSingleton()) ||
1905 (!p1->bounding_box().major_overlap(p2->bounding_box()) &&
1906 !p1->OKDiacriticMerge(*p2,
false) &&
1907 !p2->OKDiacriticMerge(*p1,
false))))
1909 if (!p1->ConfirmNoTabViolation(*p2))
1913 return NoNoiseInBetween(p1->bounding_box(), p2->bounding_box());
1917bool StrokeWidth::NoNoiseInBetween(
const TBOX& box1,
const TBOX& box2)
const {
1925ScrollView* StrokeWidth::DisplayGoodBlobs(
const char* window_name,
1928#ifndef GRAPHICS_DISABLED
1935 gsearch.StartFullSearch();
1937 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
1939 int left_x = box.
left();
1940 int right_x = box.
right();
1941 int top_y = box.
top();
1942 int bottom_y = box.
bottom();
1953 else if (goodness == 1)
1959 window->
Rectangle(left_x, bottom_y, right_x, top_y);
1967#ifndef GRAPHICS_DISABLED
1969 int top = std::max(
static_cast<int>(blob_box.
top()), blob->
base_char_top());
1971 int x = (blob_box.
left() + blob_box.
right()) / 2;
1972 window->
Line(x, top, x, bottom);
1977ScrollView* StrokeWidth::DisplayDiacritics(
const char* window_name,
1980#ifndef GRAPHICS_DISABLED
1985 BLOBNBOX_IT it(&block->
blobs);
1986 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1990 DrawDiacriticJoiner(blob, window);
1998 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
2002 DrawDiacriticJoiner(blob, window);
BlobNeighbourDir DirOtherWay(BlobNeighbourDir dir)
int IntCastRounded(double x)
#define BOOL_VAR(name, val, comment)
#define INT_VAR(name, val, comment)
_ConstTessMemberResultCallback_5_0< false, R, T1, P1, P2, P3, P4, P5 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)(P1, P2, P3, P4, P5) const, typename Identity< P1 >::type p1, typename Identity< P2 >::type p2, typename Identity< P3 >::type p3, typename Identity< P4 >::type p4, typename Identity< P5 >::type p5)
DLLSYM void tprintf(const char *format,...)
int textord_debug_tabfind
const double kMaxDiacriticDistanceRatio
const int kLineResiduePadRatio
const double kNoiseOverlapAreaFactor
const int kCJKMaxComponents
@ PSM_SINGLE_BLOCK_VERT_TEXT
aligned text.
@ PSM_SINGLE_COLUMN
Assume a single column of text of variable sizes.
const double kMinDiacriticSizeRatio
const double kCJKBrokenDistanceFraction
const int kLineTrapLongest
const double kCJKAspectRatio
const double kStrokeWidthTolerance
const double kNoiseOverlapGrowthFactor
const double kCJKAspectRatioIncrease
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
const double kNeighbourSearchFactor
const double kLineResidueAspectRatio
const double kBrokenCJKIterationFraction
const int kMaxCJKSizeRatio
const double kDiacriticXPadRatio
const double kLineResidueSizeRatio
const double kMaxDiacriticGapToBaseCharHeight
const int kLineTrapShortest
const double kStrokeWidthFractionTolerance
const double kStrokeWidthFractionCJK
const double kStrokeWidthCJK
const double kDiacriticYPadRatio
const float kSizeRatioToReject
const int kMostlyOneDirRatio
GridSearch< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > BlobGridSearch
void MinMaxGapsClipped(int *h_min, int *h_max, int *v_min, int *v_max) const
void set_leader_on_right(bool flag)
bool good_stroke_neighbour(BlobNeighbourDir n) const
BLOBNBOX * base_char_blob() const
bool leader_on_right() const
bool DefiniteIndividualFlow()
bool ConfirmNoTabViolation(const BLOBNBOX &other) const
bool MatchingStrokeWidth(const BLOBNBOX &other, double fractional_tolerance, double constant_tolerance) const
float vert_stroke_width() const
bool leader_on_left() const
float horz_stroke_width() const
void compute_bounding_box()
void really_merge(BLOBNBOX *other)
void set_owns_cblob(bool value)
BlobRegionType region_type() const
void rotate_box(FCOORD rotation)
void set_horz_possible(bool value)
void set_vert_possible(bool value)
bool vert_possible() const
TBOX BoundsWithinLimits(int left, int right)
BLOBNBOX * neighbour(BlobNeighbourDir n) const
float area_stroke_width() const
ScrollView::Color BoxColor() const
void set_leader_on_left(bool flag)
void set_base_char_blob(BLOBNBOX *blob)
static ScrollView::Color TextlineColor(BlobRegionType region_type, BlobTextFlowType flow_type)
void set_flow(BlobTextFlowType value)
void set_neighbour(BlobNeighbourDir n, BLOBNBOX *neighbour, bool good)
void set_region_type(BlobRegionType new_type)
int base_char_bottom() const
int base_char_top() const
const TBOX & bounding_box() const
tesseract::ColPartition * owner() const
bool joined_to_prev() const
BlobTextFlowType flow() const
void NeighbourGaps(int gaps[BND_COUNT]) const
bool UniquelyHorizontal() const
void set_owner(tesseract::ColPartition *new_owner)
bool horz_possible() const
static bool UnMergeableType(BlobRegionType type)
void set_diacritic_box(const TBOX &diacritic_box)
bool UniquelyVertical() const
void DeleteUnownedNoise()
BLOBNBOX_LIST noise_blobs
BLOBNBOX_LIST large_blobs
BLOBNBOX_LIST small_blobs
void set_y(float yin)
rewrite function
void set_x(float xin)
rewrite function
bool overlap(const TBOX &box) const
bool y_overlap(const TBOX &box) const
int y_gap(const TBOX &box) const
bool contains(const FCOORD pt) const
void pad(int xpad, int ypad)
bool major_y_overlap(const TBOX &box) const
int x_gap(const TBOX &box) const
C_OUTLINE_LIST * out_list()
static bool WithinTestRegion(int detail_level, int x, int y)
void StartRadSearch(int x, int y, int max_radius)
void StartRectSearch(const TBOX &rect)
const ICOORD & bleft() const
void GridCoords(int x, int y, int *grid_x, int *grid_y) const
const ICOORD & tright() const
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
void InsertBBox(bool h_spread, bool v_spread, BBC *bbox)
virtual void HandleClick(int x, int y)
ScrollView * MakeWindow(int x, int y, const char *window_name)
void RemoveBBox(BLOBNBOX *bbox)
BlobGrid(int gridsize, const ICOORD &bleft, const ICOORD &tright)
void InsertBlobList(BLOBNBOX_LIST *blobs)
static ColPartition * MakeBigPartition(BLOBNBOX *box, ColPartition_LIST *big_part_list)
void AddBox(BLOBNBOX *box)
bool IsVerticalType() const
int median_height() const
static bool BlankImageInBetween(const TBOX &box1, const TBOX &box2, const TBOX &im_box, const FCOORD &rotation, Pix *pix)
void FindTextlineDirectionAndFixBrokenCJK(PageSegMode pageseg_mode, bool cjk_merge, TO_BLOCK *input_block)
void CorrectForRotation(const FCOORD &rerotation, ColPartitionGrid *part_grid)
void HandleClick(int x, int y) override
StrokeWidth(int gridsize, const ICOORD &bleft, const ICOORD &tright)
void RemoveLineResidue(ColPartition_LIST *big_part_list)
void SetNeighboursOnMediumBlobs(TO_BLOCK *block)
void FindLeaderPartitions(TO_BLOCK *block, ColPartitionGrid *part_grid)
bool TestVerticalTextDirection(double find_vertical_text_ratio, TO_BLOCK *block, BLOBNBOX_CLIST *osd_blobs)
void GradeBlobsIntoPartitions(PageSegMode pageseg_mode, const FCOORD &rerotation, TO_BLOCK *block, Pix *nontext_pix, const DENORM *denorm, bool cjk_script, TextlineProjection *projection, BLOBNBOX_LIST *diacritic_blobs, ColPartitionGrid *part_grid, ColPartition_LIST *big_parts)
static bool DifferentSizes(int size1, int size2)
static bool VeryDifferentSizes(int size1, int size2)
void MoveNonTextlineBlobs(BLOBNBOX_LIST *blobs, BLOBNBOX_LIST *small_blobs) const
int EvaluateColPartition(const ColPartition &part, const DENORM *denorm, bool debug) const
void ConstructProjection(TO_BLOCK *input_block, const FCOORD &rotation, Pix *nontext_map)
int DistanceOfBoxFromBox(const TBOX &from_box, const TBOX &to_box, bool horizontal_textline, const DENORM *denorm, bool debug) const
void PlotGradedBlobs(BLOBNBOX_LIST *blobs, ScrollView *win)
void DisplayProjection() const
SVEvent * AwaitEvent(SVEventType type)
void Line(int x1, int y1, int x2, int y2)
void Rectangle(int x1, int y1, int x2, int y2)