80 {
81 SplitStrategy split_strategy = split_for_pageseg ? pageseg_split_strategy_ :
82 ocr_split_strategy_;
84 return false;
85 }
90 tprintf(
"Splitting shiro-rekha ...\n");
91 tprintf(
"Split strategy = %s\n",
93 tprintf(
"Initial pageseg available = %s\n",
94 segmentation_block_list_ ? "yes" : "no");
95 }
96
97 pixDestroy(&splitted_image_);
98 splitted_image_ = pixCopy(nullptr, orig_pix_);
99
100
102 pixDestroy(&debug_image_);
103 debug_image_ = pixConvertTo32(orig_pix_);
104 }
105
106
107
108 Pix* pix_for_ccs = pixClone(orig_pix_);
110 !segmentation_block_list_) {
112 tprintf(
"Performing a global close operation..\n");
113 }
114
115
116 pixDestroy(&pix_for_ccs);
117 pix_for_ccs = pixCopy(nullptr, orig_pix_);
118 PerformClose(pix_for_ccs, global_xheight_);
119 }
120 Pixa* ccs;
121 Boxa* tmp_boxa = pixConnComp(pix_for_ccs, &ccs, 8);
122 boxaDestroy(&tmp_boxa);
123 pixDestroy(&pix_for_ccs);
124
125
126
127
128 Boxa* regions_to_clear = boxaCreate(0);
129 int num_ccs = 0;
130 if (ccs != nullptr) num_ccs = pixaGetCount(ccs);
131 for (int i = 0; i < num_ccs; ++i) {
132 Box* box = ccs->boxa->box[i];
133 Pix* word_pix = pixClipRectangle(orig_pix_, box, nullptr);
135 int xheight = GetXheightForCC(box);
138 pixRenderBoxArb(debug_image_, box, 1, 255, 0, 0);
139 }
140
141
142
143
145 (box->w > xheight / 3 && box->h > xheight / 2)) {
146 SplitWordShiroRekha(split_strategy, word_pix, xheight,
147 box->x, box->y, regions_to_clear);
149 tprintf(
"CC dropped from splitting: %d,%d (%d, %d)\n",
150 box->x, box->y, box->w, box->h);
151 }
152 pixDestroy(&word_pix);
153 }
154
155 for (int i = 0; i < boxaGetCount(regions_to_clear); ++i) {
156 Box* box = boxaGetBox(regions_to_clear, i, L_CLONE);
157 pixClearInRect(splitted_image_, box);
158 boxDestroy(&box);
159 }
160 boxaDestroy(®ions_to_clear);
161 pixaDestroy(&ccs);
163 pixa_debug->AddPix(debug_image_,
164 split_for_pageseg ? "pageseg_split" : "ocr_split");
165 }
166 return true;
167}