tesseract 4.1.1
Loading...
Searching...
No Matches
cjkpitch.cpp
Go to the documentation of this file.
1
2// File: cjkpitch.cpp
3// Description: Code to determine fixed pitchness and the pitch if fixed,
4// for CJK text.
5// Author: takenaka@google.com (Hiroshi Takenaka)
6//
7// Copyright 2011 Google Inc. All Rights Reserved.
8// Licensed under the Apache License, Version 2.0 (the "License");
9// you may not use this file except in compliance with the License.
10// You may obtain a copy of the License at
11// http://www.apache.org/licenses/LICENSE-2.0
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17//
19
20#include "cjkpitch.h"
21#include "genericvector.h"
22#include "topitch.h"
23#include "tovars.h"
24
25#include <algorithm>
26#include <vector> // for std::vector
27
28static BOOL_VAR(textord_space_size_is_variable, false,
29 "If true, word delimiter spaces are assumed to have "
30 "variable width, even though characters have fixed pitch.");
31
32namespace {
33
34// Allow +/-10% error for character pitch / body size.
35static const float kFPTolerance = 0.1;
36
37// Minimum ratio of "good" character pitch for a row to be considered
38// to be fixed-pitch.
39static const float kFixedPitchThreshold = 0.35;
40
41// rank statistics for a small collection of float values.
42class SimpleStats {
43 public:
44 SimpleStats(): finalized_(false), values_() { }
45 ~SimpleStats() { }
46
47 void Clear() {
48 values_.clear();
49 finalized_ = false;
50 }
51
52 void Add(float value) {
53 values_.push_back(value);
54 finalized_ = false;
55 }
56
57 void Finish() {
58 values_.sort(float_compare);
59 finalized_ = true;
60 }
61
62 float ile(double frac) {
63 if (!finalized_) Finish();
64 if (values_.empty()) return 0.0;
65 if (frac >= 1.0) return values_.back();
66 if (frac <= 0.0 || values_.size() == 1) return values_[0];
67 int index = static_cast<int>((values_.size() - 1) * frac);
68 float reminder = (values_.size() - 1) * frac - index;
69
70 return values_[index] * (1.0 - reminder) +
71 values_[index + 1] * reminder;
72 }
73
74 float median() {
75 return ile(0.5);
76 }
77
78 float minimum() {
79 if (!finalized_) Finish();
80 if (values_.empty()) return 0.0;
81 return values_[0];
82 }
83
84 int size() const {
85 return values_.size();
86 }
87
88 private:
89 static int float_compare(const void* a, const void* b) {
90 const auto* f_a = static_cast<const float*>(a);
91 const auto* f_b = static_cast<const float*>(b);
92 return (*f_a > *f_b) ? 1 : ((*f_a < *f_b) ? -1 : 0);
93 }
94
95 bool finalized_;
97};
98
99// statistics for a small collection of float pairs (x, y).
100// EstimateYFor(x, r) returns the estimated y at x, based on
101// existing samples between x*(1-r) ~ x*(1+r).
102class LocalCorrelation {
103 public:
104 struct float_pair {
105 float x, y;
106 int vote;
107 };
108
109 LocalCorrelation(): finalized_(false) { }
110 ~LocalCorrelation() { }
111
112 void Finish() {
113 values_.sort(float_pair_compare);
114 finalized_ = true;
115 }
116
117 void Clear() {
118 finalized_ = false;
119 }
120
121 void Add(float x, float y, int v) {
122 struct float_pair value;
123 value.x = x;
124 value.y = y;
125 value.vote = v;
126 values_.push_back(value);
127 finalized_ = false;
128 }
129
130 float EstimateYFor(float x, float r) {
131 ASSERT_HOST(finalized_);
132 int start = 0, end = values_.size();
133 // Because the number of samples (used_) is assumed to be small,
134 // just use linear search to find values within the range.
135 while (start < values_.size() && values_[start].x < x * (1.0 - r)) start++;
136 while (end - 1 >= 0 && values_[end - 1].x > x * (1.0 + r)) end--;
137
138 // Fall back to the global average if there are no data within r
139 // of x.
140 if (start >= end) {
141 start = 0;
142 end = values_.size();
143 }
144
145 // Compute weighted average of the values.
146 float rc = 0;
147 int vote = 0;
148 for (int i = start; i < end; i++) {
149 rc += values_[i].vote * x * values_[i].y / values_[i].x;
150 vote += values_[i].vote;
151 }
152
153 return rc / vote;
154 }
155
156 private:
157 static int float_pair_compare(const void* a, const void* b) {
158 const auto* f_a = static_cast<const float_pair*>(a);
159 const auto* f_b = static_cast<const float_pair*>(b);
160 return (f_a->x > f_b->x) ? 1 : ((f_a->x < f_b->x) ? -1 : 0);
161 }
162
163 bool finalized_;
165};
166
167// Class to represent a character on a fixed pitch row. A FPChar may
168// consist of multiple blobs (BLOBNBOX's).
169class FPChar {
170 public:
171 enum Alignment {
172 ALIGN_UNKNOWN, ALIGN_GOOD, ALIGN_BAD
173 };
174
175 FPChar(): box_(), real_body_(),
176 from_(nullptr), to_(nullptr), num_blobs_(0), max_gap_(0),
177 final_(false), alignment_(ALIGN_UNKNOWN),
178 merge_to_prev_(false), delete_flag_(false) {
179 }
180
181 // Initialize from blob.
182 void Init(BLOBNBOX *blob) {
183 box_ = blob->bounding_box();
184 real_body_ = box_;
185 from_ = to_ = blob;
186 num_blobs_ = 1;
187 }
188
189 // Merge this character with "next". The "next" character should
190 // consist of succeeding blobs on the same row.
191 void Merge(const FPChar &next) {
192 int gap = real_body_.x_gap(next.real_body_);
193 if (gap > max_gap_) max_gap_ = gap;
194
195 box_ += next.box_;
196 real_body_ += next.real_body_;
197 to_ = next.to_;
198 num_blobs_ += next.num_blobs_;
199 }
200
201 // Accessors.
202 const TBOX &box() const { return box_; }
203 void set_box(const TBOX &box) {
204 box_ = box;
205 }
206 const TBOX &real_body() const { return real_body_; }
207
208 bool is_final() const { return final_; }
209 void set_final(bool flag) {
210 final_ = flag;
211 }
212
213 const Alignment& alignment() const {
214 return alignment_;
215 }
216 void set_alignment(Alignment alignment) {
217 alignment_ = alignment;
218 }
219
220 bool merge_to_prev() const {
221 return merge_to_prev_;
222 }
223 void set_merge_to_prev(bool flag) {
224 merge_to_prev_ = flag;
225 }
226
227 bool delete_flag() const {
228 return delete_flag_;
229 }
230 void set_delete_flag(bool flag) {
231 delete_flag_ = flag;
232 }
233
234 int max_gap() const {
235 return max_gap_;
236 }
237
238 int num_blobs() const {
239 return num_blobs_;
240 }
241
242 private:
243 TBOX box_; // Rectangle region considered to be occupied by this
244 // character. It could be bigger than the bounding box.
245 TBOX real_body_; // Real bounding box of this character.
246 BLOBNBOX *from_; // The first blob of this character.
247 BLOBNBOX *to_; // The last blob of this character.
248 int num_blobs_; // Number of blobs that belong to this character.
249 int max_gap_; // Maximum x gap between the blobs.
250
251 bool final_; // True if alignment/fragmentation decision for this
252 // character is finalized.
253
254 Alignment alignment_; // Alignment status.
255 bool merge_to_prev_; // True if this is a fragmented blob that
256 // needs to be merged to the previous
257 // character.
258
259 int delete_flag_; // True if this character is merged to another
260 // one and needs to be deleted.
261};
262
263// Class to represent a fixed pitch row, as a linear collection of
264// FPChar's.
265class FPRow {
266 public:
267 FPRow() : all_pitches_(), all_gaps_(), good_pitches_(), good_gaps_(),
268 heights_(), characters_() {
269 }
270
271 ~FPRow() { }
272
273 // Initialize from TD_ROW.
274 void Init(TO_ROW *row);
275
276 // Estimate character pitch of this row, based on current alignment
277 // status of underlying FPChar's. The argument pass1 can be set to
278 // true if the function is called after Pass1Analyze(), to eliminate
279 // some redundant computation.
280 void EstimatePitch(bool pass1);
281
282 // Check each character if it has good character pitches between its
283 // predecessor and its successor and set its alignment status. If
284 // we already calculated the estimated pitch for this row, the value
285 // is used. If we didn't, a character is considered to be good, if
286 // the pitches between its predecessor and its successor are almost
287 // equal.
288 void Pass1Analyze();
289
290 // Find characters that fit nicely into one imaginary body next to a
291 // character which is already finalized. Then mark them as character
292 // fragments.
293 bool Pass2Analyze();
294
295 // Merge FPChars marked as character fragments into one.
296 void MergeFragments();
297
298 // Finalize characters that are already large enough and cannot be
299 // merged with others any more.
300 void FinalizeLargeChars();
301
302 // Output pitch estimation results to attributes of TD_ROW.
303 void OutputEstimations();
304
305 void DebugOutputResult(int row_index);
306
307 int good_pitches() {
308 return good_pitches_.size();
309 }
310
311 float pitch() {
312 return pitch_;
313 }
314
315 float estimated_pitch() {
316 return estimated_pitch_;
317 }
318
319 void set_estimated_pitch(float v) {
320 estimated_pitch_ = v;
321 }
322
323 float height() {
324 return height_;
325 }
326
327 float height_pitch_ratio() {
328 if (good_pitches_.size() < 2) return -1.0;
329 return height_ / good_pitches_.median();
330 }
331
332 float gap() {
333 return gap_;
334 }
335
336 size_t num_chars() {
337 return characters_.size();
338 }
339 FPChar *character(int i) {
340 return &characters_[i];
341 }
342
343 const TBOX &box(int i) {
344 return characters_[i].box();
345 }
346
347 const TBOX &real_body(int i) {
348 return characters_[i].real_body();
349 }
350
351 bool is_box_modified(int i) {
352 return !(characters_[i].box() == characters_[i].real_body());
353 }
354
355 float center_x(int i) {
356 return (characters_[i].box().left() + characters_[i].box().right()) / 2.0;
357 }
358
359 bool is_final(int i) {
360 return characters_[i].is_final();
361 }
362
363 void finalize(int i) {
364 characters_[i].set_final(true);
365 }
366
367 bool is_good(int i) {
368 return characters_[i].alignment() == FPChar::ALIGN_GOOD;
369 }
370
371 void mark_good(int i) {
372 characters_[i].set_alignment(FPChar::ALIGN_GOOD);
373 }
374
375 void mark_bad(int i) {
376 characters_[i].set_alignment(FPChar::ALIGN_BAD);
377 }
378
379 void clear_alignment(int i) {
380 characters_[i].set_alignment(FPChar::ALIGN_UNKNOWN);
381 }
382
383 private:
384 static float x_overlap_fraction(const TBOX& box1, const TBOX& box2) {
385 if (std::min(box1.width(), box2.width()) == 0) return 0.0;
386 return -box1.x_gap(box2) / static_cast<float>(std::min(box1.width(), box2.width()));
387 }
388
389 static bool mostly_overlap(const TBOX& box1, const TBOX& box2) {
390 return x_overlap_fraction(box1, box2) > 0.9;
391 }
392
393 static bool significant_overlap(const TBOX& box1, const TBOX& box2) {
394 if (std::min(box1.width(), box2.width()) == 0) return false;
395 int overlap = -box1.x_gap(box2);
396 return overlap > 1 || x_overlap_fraction(box1, box2) > 0.1;
397 }
398
399 static float box_pitch(const TBOX& ref, const TBOX& box) {
400 return abs(ref.left() + ref.right() - box.left() - box.right()) / 2.0;
401 }
402
403 // Check if two neighboring characters satisfy the fixed pitch model.
404 static bool is_good_pitch(float pitch, const TBOX& box1, const TBOX& box2) {
405 // Character box shouldn't exceed pitch.
406 if (box1.width() >= pitch * (1.0 + kFPTolerance) ||
407 box2.width() >= pitch * (1.0 + kFPTolerance) ||
408 box1.height() >= pitch * (1.0 + kFPTolerance) ||
409 box2.height() >= pitch * (1.0 + kFPTolerance)) return false;
410
411 const float real_pitch = box_pitch(box1, box2);
412 if (fabs(real_pitch - pitch) < pitch * kFPTolerance) return true;
413
414 if (textord_space_size_is_variable) {
415 // Hangul characters usually have fixed pitch, but words are
416 // delimited by space which can be narrower than characters.
417 if (real_pitch > pitch && real_pitch < pitch * 2.0 &&
418 real_pitch - box1.x_gap(box2) < pitch) {
419 return true;
420 }
421 }
422 return false;
423 }
424
425 static bool is_interesting_blob(const BLOBNBOX *blob) {
426 return !blob->joined_to_prev() && blob->flow() != BTFT_LEADER;
427 }
428
429 // Cleanup chars that are already merged to others.
430 void DeleteChars() {
431 int index = 0;
432 for (int i = 0; i < characters_.size(); ++i) {
433 if (!characters_[i].delete_flag()) {
434 if (index != i) characters_[index] = characters_[i];
435 index++;
436 }
437 }
438 characters_.truncate(index);
439 }
440
441 float pitch_ = 0.0f; // Character pitch.
442 float estimated_pitch_ = 0.0f; // equal to pitch_ if pitch_ is considered
443 // to be good enough.
444 float height_ = 0.0f; // Character height.
445 float gap_ = 0.0f; // Minimum gap between characters.
446
447 // Pitches between any two successive characters.
448 SimpleStats all_pitches_;
449 // Gaps between any two successive characters.
450 SimpleStats all_gaps_;
451 // Pitches between any two successive characters that are consistent
452 // with the fixed pitch model.
453 SimpleStats good_pitches_;
454 // Gaps between any two successive characters that are consistent
455 // with the fixed pitch model.
456 SimpleStats good_gaps_;
457
458 SimpleStats heights_;
459
460 GenericVector<FPChar> characters_;
461 TO_ROW *real_row_ = nullptr; // Underlying TD_ROW for this row.
462};
463
464void FPRow::Init(TO_ROW *row) {
465 ASSERT_HOST(row != nullptr);
466 ASSERT_HOST(row->xheight > 0);
467 real_row_ = row;
468 real_row_->pitch_decision = PITCH_CORR_PROP; // Default decision.
469
470 BLOBNBOX_IT blob_it = row->blob_list();
471 // Initialize characters_ and compute the initial estimation of
472 // character height.
473 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
474 if (is_interesting_blob(blob_it.data())) {
475 FPChar fp_char;
476 fp_char.Init(blob_it.data());
477 // Merge unconditionally if two blobs overlap.
478 if (!characters_.empty() &&
479 significant_overlap(fp_char.box(), characters_.back().box())) {
480 characters_.back().Merge(fp_char);
481 } else {
482 characters_.push_back(fp_char);
483 }
484 TBOX bound = blob_it.data()->bounding_box();
485 if (bound.height() * 3.0 > bound.width()) {
486 heights_.Add(bound.height());
487 }
488 }
489 }
490 heights_.Finish();
491 height_ = heights_.ile(0.875);
492}
493
494void FPRow::OutputEstimations() {
495 if (good_pitches_.size() == 0) {
496 pitch_ = 0.0f;
497 real_row_->pitch_decision = PITCH_CORR_PROP;
498 return;
499 }
500
501 pitch_ = good_pitches_.median();
502 real_row_->fixed_pitch = pitch_;
503 // good_gaps_.ile(0.125) can be large if most characters on the row
504 // are skinny. Use pitch_ - height_ instead if it's smaller, but
505 // positive.
506 real_row_->kern_size = real_row_->pr_nonsp =
507 std::min(good_gaps_.ile(0.125), std::max(pitch_ - height_, 0.0f));
508 real_row_->body_size = pitch_ - real_row_->kern_size;
509
510 if (good_pitches_.size() < all_pitches_.size() * kFixedPitchThreshold) {
511 // If more than half of the characters of a line don't fit to the
512 // fixed pitch model, consider the line to be proportional. 50%
513 // seems to be a good threshold in practice as well.
514 // Anyway we store estimated values (fixed_pitch, kern_size, etc.) in
515 // real_row_ as a partial estimation result and try to use them in the
516 // normalization process.
517 real_row_->pitch_decision = PITCH_CORR_PROP;
518 return;
519 } else if (good_pitches_.size() > all_pitches_.size() * 0.75) {
520 real_row_->pitch_decision = PITCH_DEF_FIXED;
521 } else {
522 real_row_->pitch_decision = PITCH_CORR_FIXED;
523 }
524
525 real_row_->space_size = real_row_->pr_space = pitch_;
526 // Set min_space to 50% of character pitch so that we can break CJK
527 // text at a half-width space after punctuation.
528 real_row_->min_space = (pitch_ + good_gaps_.minimum()) * 0.5;
529
530 // Don't consider a quarter space as a real space, because it's used
531 // for line justification in traditional Japanese books.
532 real_row_->max_nonspace = std::max(pitch_ * 0.25 + good_gaps_.minimum(),
533 static_cast<double>(good_gaps_.ile(0.875)));
534
535 int space_threshold =
536 std::min((real_row_->max_nonspace + real_row_->min_space) / 2,
537 static_cast<int>(real_row_->xheight));
538
539 // Make max_nonspace larger than any intra-character gap so that
540 // make_prop_words() won't break a row at the middle of a character.
541 for (size_t i = 0; i < num_chars(); ++i) {
542 if (characters_[i].max_gap() > real_row_->max_nonspace) {
543 real_row_->max_nonspace = characters_[i].max_gap();
544 }
545 }
546 real_row_->space_threshold =
547 std::min((real_row_->max_nonspace + real_row_->min_space) / 2,
548 static_cast<int>(real_row_->xheight));
549 real_row_->used_dm_model = false;
550
551 // Setup char_cells.
552 ICOORDELT_IT cell_it = &real_row_->char_cells;
553 auto *cell = new ICOORDELT(real_body(0).left(), 0);
554 cell_it.add_after_then_move(cell);
555
556 int right = real_body(0).right();
557 for (size_t i = 1; i < num_chars(); ++i) {
558 // Put a word break if gap between two characters is bigger than
559 // space_threshold. Don't break if none of two characters
560 // couldn't be "finalized", because maybe they need to be merged
561 // to one character.
562 if ((is_final(i - 1) || is_final(i)) &&
563 real_body(i - 1).x_gap(real_body(i)) > space_threshold) {
564 cell = new ICOORDELT(right + 1, 0);
565 cell_it.add_after_then_move(cell);
566 while (right + pitch_ < box(i).left()) {
567 right += pitch_;
568 cell = new ICOORDELT(right + 1, 0);
569 cell_it.add_after_then_move(cell);
570 }
571 right = box(i).left();
572 }
573 cell = new ICOORDELT((right + real_body(i).left()) / 2, 0);
574 cell_it.add_after_then_move(cell);
575 right = real_body(i).right();
576 }
577
578 cell = new ICOORDELT(right + 1, 0);
579 cell_it.add_after_then_move(cell);
580
581 // TODO(takenaka): add code to store alignment/fragmentation
582 // information to blobs so that it can be reused later, e.g. in
583 // recognition phase.
584}
585
586void FPRow::EstimatePitch(bool pass1) {
587 good_pitches_.Clear();
588 all_pitches_.Clear();
589 good_gaps_.Clear();
590 all_gaps_.Clear();
591 heights_.Clear();
592 if (num_chars() == 0) return;
593
594 int32_t cx0, cx1;
595 bool prev_was_good = is_good(0);
596 cx0 = center_x(0);
597
598 heights_.Add(box(0).height());
599 for (size_t i = 1; i < num_chars(); i++) {
600 cx1 = center_x(i);
601 int32_t pitch = cx1 - cx0;
602 int32_t gap = std::max(0, real_body(i - 1).x_gap(real_body(i)));
603
604 heights_.Add(box(i).height());
605 // Ignore if the pitch is too close. But don't ignore wide pitch
606 // may be the result of large tracking.
607 if (pitch > height_ * 0.5) {
608 all_pitches_.Add(pitch);
609 all_gaps_.Add(gap);
610 if (is_good(i)) {
611 // In pass1 (after Pass1Analyze()), all characters marked as
612 // "good" have a good consistent pitch with their previous
613 // characters. However, it's not true in pass2 and a good
614 // character may have a good pitch only between its successor.
615 // So we collect only pitch values between two good
616 // characters. and within tolerance in pass2.
617 if (pass1 || (prev_was_good &&
618 fabs(estimated_pitch_ - pitch) <
619 kFPTolerance * estimated_pitch_)) {
620 good_pitches_.Add(pitch);
621 if (!is_box_modified(i - 1) && !is_box_modified(i)) {
622 good_gaps_.Add(gap);
623 }
624 }
625 prev_was_good = true;
626 } else {
627 prev_was_good = false;
628 }
629 }
630 cx0 = cx1;
631 }
632
633 good_pitches_.Finish();
634 all_pitches_.Finish();
635 good_gaps_.Finish();
636 all_gaps_.Finish();
637 heights_.Finish();
638
639 height_ = heights_.ile(0.875);
640 if (all_pitches_.size() == 0) {
641 pitch_ = 0.0f;
642 gap_ = 0.0f;
643 } else if (good_pitches_.size() < 2) {
644 // We don't have enough data to estimate the pitch of this row yet.
645 // Use median of all pitches as the initial guess.
646 pitch_ = all_pitches_.median();
647 ASSERT_HOST(pitch_ > 0.0f);
648 gap_ = all_gaps_.ile(0.125);
649 } else {
650 pitch_ = good_pitches_.median();
651 ASSERT_HOST(pitch_ > 0.0f);
652 gap_ = good_gaps_.ile(0.125);
653 }
654}
655
656void FPRow::DebugOutputResult(int row_index) {
657 if (num_chars() > 0) {
658 tprintf("Row %d: pitch_decision=%d, fixed_pitch=%f, max_nonspace=%d, "
659 "space_size=%f, space_threshold=%d, xheight=%f\n",
660 row_index, static_cast<int>(real_row_->pitch_decision),
661 real_row_->fixed_pitch, real_row_->max_nonspace,
662 real_row_->space_size, real_row_->space_threshold,
663 real_row_->xheight);
664
665 for (unsigned i = 0; i < num_chars(); i++) {
666 tprintf("Char %u: is_final=%d is_good=%d num_blobs=%d: ",
667 i, is_final(i), is_good(i), character(i)->num_blobs());
668 box(i).print();
669 }
670 }
671}
672
673void FPRow::Pass1Analyze() {
674 if (num_chars() < 2) return;
675
676 if (estimated_pitch_ > 0.0f) {
677 for (size_t i = 2; i < num_chars(); i++) {
678 if (is_good_pitch(estimated_pitch_, box(i - 2), box(i-1)) &&
679 is_good_pitch(estimated_pitch_, box(i - 1), box(i))) {
680 mark_good(i - 1);
681 }
682 }
683 } else {
684 for (size_t i = 2; i < num_chars(); i++) {
685 if (is_good_pitch(box_pitch(box(i-2), box(i-1)), box(i - 1), box(i))) {
686 mark_good(i - 1);
687 }
688 }
689 }
690 character(0)->set_alignment(character(1)->alignment());
691 character(num_chars() - 1)->set_alignment(
692 character(num_chars() - 2)->alignment());
693}
694
695bool FPRow::Pass2Analyze() {
696 bool changed = false;
697 if (num_chars() <= 1 || estimated_pitch_ == 0.0f) {
698 return false;
699 }
700 for (size_t i = 0; i < num_chars(); i++) {
701 if (is_final(i)) continue;
702
703 FPChar::Alignment alignment = character(i)->alignment();
704 bool intersecting = false;
705 bool not_intersecting = false;
706
707 if (i < num_chars() - 1 && is_final(i + 1)) {
708 // Next character is already finalized. Estimate the imaginary
709 // body including this character based on the character. Skip
710 // whitespace if necessary.
711 bool skipped_whitespaces = false;
712 float c1 = center_x(i + 1) - 1.5 * estimated_pitch_;
713 while (c1 > box(i).right()) {
714 skipped_whitespaces = true;
715 c1 -= estimated_pitch_;
716 }
717 TBOX ibody(c1, box(i).bottom(), c1 + estimated_pitch_, box(i).top());
718
719 // Collect all characters that mostly fit in the region.
720 // Also, their union height shouldn't be too big.
721 int j = i;
722 TBOX merged;
723 while (j >= 0 && !is_final(j) && mostly_overlap(ibody, box(j)) &&
724 merged.bounding_union(box(j)).height() <
725 estimated_pitch_ * (1 + kFPTolerance)) {
726 merged += box(j);
727 j--;
728 }
729
730 if (j >= 0 && significant_overlap(ibody, box(j))) {
731 // character(j) lies on the character boundary and doesn't fit
732 // well into the imaginary body.
733 if (!is_final(j)) intersecting = true;
734 } else {
735 not_intersecting = true;
736 if (i - j > 0) {
737 // Merge character(j+1) ... character(i) because they fit
738 // into the body nicely.
739 if (i - j == 1) {
740 // Only one char in the imaginary body.
741 if (!skipped_whitespaces) mark_good(i);
742 // set ibody as bounding box of this character to get
743 // better pitch analysis result for halfwidth glyphs
744 // followed by a halfwidth space.
745 if (box(i).width() <= estimated_pitch_ * 0.5) {
746 ibody += box(i);
747 character(i)->set_box(ibody);
748 }
749 character(i)->set_merge_to_prev(false);
750 finalize(i);
751 } else {
752 for (int k = i; k > j + 1; k--) {
753 character(k)->set_merge_to_prev(true);
754 }
755 }
756 }
757 }
758 }
759 if (i > 0 && is_final(i - 1)) {
760 // Now we repeat everything from the opposite side. Previous
761 // character is already finalized. Estimate the imaginary body
762 // including this character based on the character.
763 bool skipped_whitespaces = false;
764 float c1 = center_x(i - 1) + 1.5 * estimated_pitch_;
765 while (c1 < box(i).left()) {
766 skipped_whitespaces = true;
767 c1 += estimated_pitch_;
768 }
769 TBOX ibody(c1 - estimated_pitch_, box(i).bottom(), c1, box(i).top());
770
771 size_t j = i;
772 TBOX merged;
773 while (j < num_chars() && !is_final(j) && mostly_overlap(ibody, box(j)) &&
774 merged.bounding_union(box(j)).height() <
775 estimated_pitch_ * (1 + kFPTolerance)) {
776 merged += box(j);
777 j++;
778 }
779
780 if (j < num_chars() && significant_overlap(ibody, box(j))) {
781 if (!is_final(j)) intersecting = true;
782 } else {
783 not_intersecting = true;
784 if (j - i > 0) {
785 if (j - i == 1) {
786 if (!skipped_whitespaces) mark_good(i);
787 if (box(i).width() <= estimated_pitch_ * 0.5) {
788 ibody += box(i);
789 character(i)->set_box(ibody);
790 }
791 character(i)->set_merge_to_prev(false);
792 finalize(i);
793 } else {
794 for (size_t k = i + 1; k < j; k++) {
795 character(k)->set_merge_to_prev(true);
796 }
797 }
798 }
799 }
800 }
801
802 // This character doesn't fit well into the estimated imaginary
803 // bodies. Mark it as bad.
804 if (intersecting && !not_intersecting) mark_bad(i);
805 if (character(i)->alignment() != alignment ||
806 character(i)->merge_to_prev()) {
807 changed = true;
808 }
809 }
810
811 return changed;
812}
813
814void FPRow::MergeFragments() {
815 int last_char = 0;
816
817 for (size_t j = 0; j < num_chars(); ++j) {
818 if (character(j)->merge_to_prev()) {
819 character(last_char)->Merge(*character(j));
820 character(j)->set_delete_flag(true);
821 clear_alignment(last_char);
822 character(j-1)->set_merge_to_prev(false);
823 } else {
824 last_char = j;
825 }
826 }
827 DeleteChars();
828}
829
830void FPRow::FinalizeLargeChars() {
831 float row_pitch = estimated_pitch();
832 for (size_t i = 0; i < num_chars(); i++) {
833 if (is_final(i)) continue;
834
835 // Finalize if both neighbors are finalized. We have no other choice.
836 if (i > 0 && is_final(i - 1) && i < num_chars() - 1 && is_final(i + 1)) {
837 finalize(i);
838 continue;
839 }
840
841 float cx = center_x(i);
842 TBOX ibody(cx - 0.5 * row_pitch, 0, cx + 0.5 * row_pitch, 1);
843 if (i > 0) {
844 // The preceding character significantly intersects with the
845 // imaginary body of this character. Let Pass2Analyze() handle
846 // this case.
847 if (x_overlap_fraction(ibody, box(i - 1)) > 0.1) continue;
848 if (!is_final(i - 1)) {
849 TBOX merged = box(i);
850 merged += box(i - 1);
851 if (merged.width() < row_pitch) continue;
852 // This character cannot be finalized yet because it can be
853 // merged with the previous one. Again, let Pass2Analyze()
854 // handle this case.
855 }
856 }
857 if (i < num_chars() - 1) {
858 if (x_overlap_fraction(ibody, box(i + 1)) > 0.1) continue;
859 if (!is_final(i + 1)) {
860 TBOX merged = box(i);
861 merged += box(i + 1);
862 if (merged.width() < row_pitch) continue;
863 }
864 }
865 finalize(i);
866 }
867
868 // Update alignment decision. We only consider finalized characters
869 // in pass2. E.g. if a finalized character C has another finalized
870 // character L on its left and a not-finalized character R on its
871 // right, we mark C as good if the pitch between C and L is good,
872 // regardless of the pitch between C and R.
873 for (size_t i = 0; i < num_chars(); i++) {
874 if (!is_final(i)) continue;
875 bool good_pitch = false;
876 bool bad_pitch = false;
877 if (i > 0 && is_final(i - 1)) {
878 if (is_good_pitch(row_pitch, box(i - 1), box(i))) {
879 good_pitch = true;
880 } else {
881 bad_pitch = true;
882 }
883 }
884 if (i < num_chars() - 1 && is_final(i + 1)) {
885 if (is_good_pitch(row_pitch, box(i), box(i + 1))) {
886 good_pitch = true;
887 } else {
888 bad_pitch = true;
889 }
890 }
891 if (good_pitch && !bad_pitch) mark_good(i);
892 else if (!good_pitch && bad_pitch) mark_bad(i);
893 }
894}
895
896class FPAnalyzer {
897 public:
898 FPAnalyzer(ICOORD page_tr, TO_BLOCK_LIST *port_blocks);
899 ~FPAnalyzer() { }
900
901 void Pass1Analyze() {
902 for (auto & row : rows_) row.Pass1Analyze();
903 }
904
905 // Estimate character pitch for each row. The argument pass1 can be
906 // set to true if the function is called after Pass1Analyze(), to
907 // eliminate some redundant computation.
908 void EstimatePitch(bool pass1);
909
910 bool maybe_fixed_pitch() {
911 if (rows_.empty() ||
912 rows_.size() <= num_bad_rows_ + num_tall_rows_ + 1) return false;
913 return true;
914 }
915
916 void MergeFragments() {
917 for (auto & row : rows_) row.MergeFragments();
918 }
919
920 void FinalizeLargeChars() {
921 for (auto & row : rows_) row.FinalizeLargeChars();
922 }
923
924 bool Pass2Analyze() {
925 bool changed = false;
926 for (auto & row : rows_) {
927 if (row.Pass2Analyze()) {
928 changed = true;
929 }
930 }
931 return changed;
932 }
933
934 void OutputEstimations() {
935 for (auto & row : rows_) row.OutputEstimations();
936 // Don't we need page-level estimation of gaps/spaces?
937 }
938
939 void DebugOutputResult() {
940 tprintf("FPAnalyzer: final result\n");
941 for (size_t i = 0; i < rows_.size(); i++) rows_[i].DebugOutputResult(i);
942 }
943
944 size_t num_rows() {
945 return rows_.size();
946 }
947
948 // Returns the upper limit for pass2 loop iteration.
949 unsigned max_iteration() {
950 // We're fixing at least one character per iteration. So basically
951 // we shouldn't require more than max_chars_per_row_ iterations.
952 return max_chars_per_row_ + 100;
953 }
954
955 private:
956 ICOORD page_tr_;
957 std::vector<FPRow> rows_;
958 unsigned num_tall_rows_;
959 unsigned num_bad_rows_;
960 // TODO: num_empty_rows_ is incremented, but never used otherwise.
961 unsigned num_empty_rows_;
962 unsigned max_chars_per_row_;
963};
964
965FPAnalyzer::FPAnalyzer(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
966: page_tr_(page_tr),
967 num_tall_rows_(0),
968 num_bad_rows_(0),
969 num_empty_rows_(0),
970 max_chars_per_row_(0)
971{
972 TO_BLOCK_IT block_it(port_blocks);
973
974 for (block_it.mark_cycle_pt(); !block_it.cycled_list();
975 block_it.forward()) {
976 TO_BLOCK *block = block_it.data();
977 if (!block->get_rows()->empty()) {
978 ASSERT_HOST(block->xheight > 0);
979 find_repeated_chars(block, false);
980 }
981 }
982
983 for (block_it.mark_cycle_pt(); !block_it.cycled_list();
984 block_it.forward()) {
985 TO_ROW_IT row_it = block_it.data()->get_rows();
986 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
987 FPRow row;
988 row.Init(row_it.data());
989 rows_.push_back(row);
990 size_t num_chars = rows_.back().num_chars();
991 if (num_chars <= 1) num_empty_rows_++;
992 if (num_chars > max_chars_per_row_) max_chars_per_row_ = num_chars;
993 }
994 }
995}
996
997void FPAnalyzer::EstimatePitch(bool pass1) {
998 LocalCorrelation pitch_height_stats;
999
1000 num_tall_rows_ = 0;
1001 num_bad_rows_ = 0;
1002 pitch_height_stats.Clear();
1003 for (auto & row : rows_) {
1004 row.EstimatePitch(pass1);
1005 if (row.good_pitches()) {
1006 pitch_height_stats.Add(row.height() + row.gap(),
1007 row.pitch(), row.good_pitches());
1008 if (row.height_pitch_ratio() > 1.1) num_tall_rows_++;
1009 } else {
1010 num_bad_rows_++;
1011 }
1012 }
1013
1014 pitch_height_stats.Finish();
1015 for (auto & row : rows_) {
1016 if (row.good_pitches() >= 5) {
1017 // We have enough evidences. Just use the pitch estimation
1018 // from this row.
1019 row.set_estimated_pitch(row.pitch());
1020 } else if (row.num_chars() > 1) {
1021 float estimated_pitch =
1022 pitch_height_stats.EstimateYFor(row.height() + row.gap(),
1023 0.1);
1024 // CJK characters are more likely to be fragmented than poorly
1025 // chopped. So trust the page-level estimation of character
1026 // pitch only if it's larger than row-level estimation or
1027 // row-level estimation is too large (2x bigger than row height).
1028 if (estimated_pitch > row.pitch() ||
1029 row.pitch() > row.height() * 2.0) {
1030 row.set_estimated_pitch(estimated_pitch);
1031 } else {
1032 row.set_estimated_pitch(row.pitch());
1033 }
1034 }
1035 }
1036}
1037
1038} // namespace
1039
1041 TO_BLOCK_LIST *port_blocks) {
1042 FPAnalyzer analyzer(page_tr, port_blocks);
1043 if (analyzer.num_rows() == 0) return;
1044
1045 analyzer.Pass1Analyze();
1046 analyzer.EstimatePitch(true);
1047
1048 // Perform pass1 analysis again with the initial estimation of row
1049 // pitches, for better estimation.
1050 analyzer.Pass1Analyze();
1051 analyzer.EstimatePitch(true);
1052
1053 // Early exit if the page doesn't seem to contain fixed pitch rows.
1054 if (!analyzer.maybe_fixed_pitch()) {
1056 tprintf("Page doesn't seem to contain fixed pitch rows\n");
1057 }
1058 return;
1059 }
1060
1061 unsigned iteration = 0;
1062 do {
1063 analyzer.MergeFragments();
1064 analyzer.FinalizeLargeChars();
1065 analyzer.EstimatePitch(false);
1066 iteration++;
1067 } while (analyzer.Pass2Analyze() && iteration < analyzer.max_iteration());
1068
1070 tprintf("compute_fixed_pitch_cjk finished after %u iteration (limit=%u)\n",
1071 iteration, analyzer.max_iteration());
1072 }
1073
1074 analyzer.OutputEstimations();
1075 if (textord_debug_pitch_test) analyzer.DebugOutputResult();
1076}
@ PITCH_DEF_FIXED
Definition: blobbox.h:47
@ PITCH_CORR_FIXED
Definition: blobbox.h:51
@ PITCH_CORR_PROP
Definition: blobbox.h:52
@ BTFT_LEADER
Definition: blobbox.h:121
#define ASSERT_HOST(x)
Definition: errcode.h:88
#define BOOL_VAR(name, val, comment)
Definition: params.h:306
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
@ character
Definition: mfoutline.h:63
void compute_fixed_pitch_cjk(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
Definition: cjkpitch.cpp:1040
void find_repeated_chars(TO_BLOCK *block, bool testing_on)
Definition: topitch.cpp:1759
bool textord_debug_pitch_test
Definition: topitch.cpp:39
const TBOX & bounding_box() const
Definition: blobbox.h:230
bool joined_to_prev() const
Definition: blobbox.h:256
BlobTextFlowType flow() const
Definition: blobbox.h:295
float xheight
Definition: blobbox.h:657
PITCH_TYPE pitch_decision
Definition: blobbox.h:650
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:600
float xheight
Definition: blobbox.h:788
TO_ROW_LIST * get_rows()
Definition: blobbox.h:704
integer coordinate
Definition: points.h:32
Definition: rect.h:34
int16_t width() const
Definition: rect.h:115
int16_t height() const
Definition: rect.h:108
int16_t left() const
Definition: rect.h:72
TBOX bounding_union(const TBOX &box) const
Definition: rect.cpp:129
int x_gap(const TBOX &box) const
Definition: rect.h:225
int16_t right() const
Definition: rect.h:79