tesseract 4.1.1
Loading...
Searching...
No Matches
PAGE_RES_IT Class Reference

#include <pageres.h>

Public Member Functions

 PAGE_RES_IT ()=default
 
 PAGE_RES_IT (PAGE_RES *the_page_res)
 
bool operator== (const PAGE_RES_IT &other) const
 
bool operator!= (const PAGE_RES_IT &other) const
 
int cmp (const PAGE_RES_IT &other) const
 
WERD_RESrestart_page ()
 
WERD_RESrestart_page_with_empties ()
 
WERD_RESstart_page (bool empty_ok)
 
WERD_RESrestart_row ()
 
WERD_RESInsertSimpleCloneWord (const WERD_RES &clone_res, WERD *new_word)
 
void ReplaceCurrentWord (tesseract::PointerVector< WERD_RES > *words)
 
void DeleteCurrentWord ()
 
void MakeCurrentWordFuzzy ()
 
WERD_RESforward ()
 
WERD_RESforward_with_empties ()
 
WERD_RESforward_paragraph ()
 
WERD_RESforward_block ()
 
WERD_RESprev_word () const
 
ROW_RESprev_row () const
 
BLOCK_RESprev_block () const
 
WERD_RESword () const
 
ROW_RESrow () const
 
BLOCK_RESblock () const
 
WERD_RESnext_word () const
 
ROW_RESnext_row () const
 
BLOCK_RESnext_block () const
 
void rej_stat_word ()
 
void ResetWordIterator ()
 

Public Attributes

PAGE_RESpage_res
 

Detailed Description

Definition at line 675 of file pageres.h.

Constructor & Destructor Documentation

◆ PAGE_RES_IT() [1/2]

PAGE_RES_IT::PAGE_RES_IT ( )
default

◆ PAGE_RES_IT() [2/2]

PAGE_RES_IT::PAGE_RES_IT ( PAGE_RES the_page_res)
inline

Definition at line 681 of file pageres.h.

681 { // page result
682 page_res = the_page_res;
683 restart_page(); // ready to scan
684 }
WERD_RES * restart_page()
Definition: pageres.h:701
PAGE_RES * page_res
Definition: pageres.h:677

Member Function Documentation

◆ block()

BLOCK_RES * PAGE_RES_IT::block ( ) const
inline

Definition at line 760 of file pageres.h.

760 { // block of cur. word
761 return block_res;
762 }

◆ cmp()

int PAGE_RES_IT::cmp ( const PAGE_RES_IT other) const

Definition at line 1145 of file pageres.cpp.

1145 {
1146 ASSERT_HOST(page_res == other.page_res);
1147 if (other.block_res == nullptr) {
1148 // other points to the end of the page.
1149 if (block_res == nullptr)
1150 return 0;
1151 return -1;
1152 }
1153 if (block_res == nullptr) {
1154 return 1; // we point to the end of the page.
1155 }
1156 if (block_res == other.block_res) {
1157 if (other.row_res == nullptr || row_res == nullptr) {
1158 // this should only happen if we hit an image block.
1159 return 0;
1160 }
1161 if (row_res == other.row_res) {
1162 // we point to the same block and row.
1163 ASSERT_HOST(other.word_res != nullptr && word_res != nullptr);
1164 if (word_res == other.word_res) {
1165 // we point to the same word!
1166 return 0;
1167 }
1168
1169 WERD_RES_IT word_res_it(&row_res->word_res_list);
1170 for (word_res_it.mark_cycle_pt(); !word_res_it.cycled_list();
1171 word_res_it.forward()) {
1172 if (word_res_it.data() == word_res) {
1173 return -1;
1174 } else if (word_res_it.data() == other.word_res) {
1175 return 1;
1176 }
1177 }
1178 ASSERT_HOST("Error: Incomparable PAGE_RES_ITs" == nullptr);
1179 }
1180
1181 // we both point to the same block, but different rows.
1182 ROW_RES_IT row_res_it(&block_res->row_res_list);
1183 for (row_res_it.mark_cycle_pt(); !row_res_it.cycled_list();
1184 row_res_it.forward()) {
1185 if (row_res_it.data() == row_res) {
1186 return -1;
1187 } else if (row_res_it.data() == other.row_res) {
1188 return 1;
1189 }
1190 }
1191 ASSERT_HOST("Error: Incomparable PAGE_RES_ITs" == nullptr);
1192 }
1193
1194 // We point to different blocks.
1195 BLOCK_RES_IT block_res_it(&page_res->block_res_list);
1196 for (block_res_it.mark_cycle_pt();
1197 !block_res_it.cycled_list(); block_res_it.forward()) {
1198 if (block_res_it.data() == block_res) {
1199 return -1;
1200 } else if (block_res_it.data() == other.block_res) {
1201 return 1;
1202 }
1203 }
1204 // Shouldn't happen...
1205 ASSERT_HOST("Error: Incomparable PAGE_RES_ITs" == nullptr);
1206 return 0;
1207}
#define ASSERT_HOST(x)
Definition: errcode.h:88
BLOCK_RES_LIST block_res_list
Definition: pageres.h:80
ROW_RES_LIST row_res_list
Definition: pageres.h:125
WERD_RES_LIST word_res_list
Definition: pageres.h:144

◆ DeleteCurrentWord()

void PAGE_RES_IT::DeleteCurrentWord ( )

Definition at line 1440 of file pageres.cpp.

1440 {
1441 // Check that this word is as we expect. part_of_combos are NEVER iterated
1442 // by the normal iterator, so we should never be trying to delete them.
1443 ASSERT_HOST(!word_res->part_of_combo);
1444 if (!word_res->combination) {
1445 // Combinations own their own word, so we won't find the word on the
1446 // row's word_list, but it is legitimate to try to delete them.
1447 // Delete word from the ROW when not a combination.
1448 WERD_IT w_it(row()->row->word_list());
1449 for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
1450 if (w_it.data() == word_res->word) {
1451 break;
1452 }
1453 }
1454 ASSERT_HOST(!w_it.cycled_list());
1455 delete w_it.extract();
1456 }
1457 // Remove the WERD_RES for the new_word.
1458 // Remove the WORD_RES from the ROW_RES.
1459 WERD_RES_IT wr_it(&row()->word_res_list);
1460 for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) {
1461 if (wr_it.data() == word_res) {
1462 word_res = nullptr;
1463 break;
1464 }
1465 }
1466 ASSERT_HOST(!wr_it.cycled_list());
1467 delete wr_it.extract();
1469}
bool combination
Definition: pageres.h:339
bool part_of_combo
Definition: pageres.h:340
WERD * word
Definition: pageres.h:186
ROW_RES * row() const
Definition: pageres.h:757
void ResetWordIterator()
Definition: pageres.cpp:1523

◆ forward()

WERD_RES * PAGE_RES_IT::forward ( )
inline

Definition at line 734 of file pageres.h.

734 { // Get next word.
735 return internal_forward(false, false);
736 }

◆ forward_block()

WERD_RES * PAGE_RES_IT::forward_block ( )

Definition at line 1660 of file pageres.cpp.

1660 {
1661 while (block_res == next_block_res) {
1662 internal_forward(false, true);
1663 }
1664 return internal_forward(false, true);
1665}

◆ forward_paragraph()

WERD_RES * PAGE_RES_IT::forward_paragraph ( )

Definition at line 1645 of file pageres.cpp.

1645 {
1646 while (block_res == next_block_res &&
1647 (next_row_res != nullptr && next_row_res->row != nullptr &&
1648 row_res->row->para() == next_row_res->row->para())) {
1649 internal_forward(false, true);
1650 }
1651 return internal_forward(false, true);
1652}
PARA * para() const
Definition: ocrrow.h:118
ROW * row
Definition: pageres.h:140

◆ forward_with_empties()

WERD_RES * PAGE_RES_IT::forward_with_empties ( )
inline

Definition at line 738 of file pageres.h.

738 {
739 return internal_forward(false, true);
740 }

◆ InsertSimpleCloneWord()

WERD_RES * PAGE_RES_IT::InsertSimpleCloneWord ( const WERD_RES clone_res,
WERD new_word 
)

Definition at line 1213 of file pageres.cpp.

1214 {
1215 // Make a WERD_RES for the new_word.
1216 auto* new_res = new WERD_RES(new_word);
1217 new_res->CopySimpleFields(clone_res);
1218 new_res->combination = true;
1219 // Insert into the appropriate place in the ROW_RES.
1220 WERD_RES_IT wr_it(&row()->word_res_list);
1221 for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) {
1222 WERD_RES* word = wr_it.data();
1223 if (word == word_res)
1224 break;
1225 }
1226 ASSERT_HOST(!wr_it.cycled_list());
1227 wr_it.add_before_then_move(new_res);
1228 if (wr_it.at_first()) {
1229 // This is the new first word, so reset the member iterator so it
1230 // detects the cycled_list state correctly.
1232 }
1233 return new_res;
1234}
WERD_RES * word() const
Definition: pageres.h:754

◆ MakeCurrentWordFuzzy()

void PAGE_RES_IT::MakeCurrentWordFuzzy ( )

Definition at line 1473 of file pageres.cpp.

1473 {
1474 WERD* real_word = word_res->word;
1475 if (!real_word->flag(W_FUZZY_SP) && !real_word->flag(W_FUZZY_NON)) {
1476 real_word->set_flag(W_FUZZY_SP, true);
1477 if (word_res->combination) {
1478 // The next word should be the corresponding part of combo, but we have
1479 // already stepped past it, so find it by search.
1480 WERD_RES_IT wr_it(&row()->word_res_list);
1481 for (wr_it.mark_cycle_pt();
1482 !wr_it.cycled_list() && wr_it.data() != word_res; wr_it.forward()) {
1483 }
1484 wr_it.forward();
1485 ASSERT_HOST(wr_it.data()->part_of_combo);
1486 real_word = wr_it.data()->word;
1487 ASSERT_HOST(!real_word->flag(W_FUZZY_SP) &&
1488 !real_word->flag(W_FUZZY_NON));
1489 real_word->set_flag(W_FUZZY_SP, true);
1490 }
1491 }
1492}
@ W_FUZZY_SP
fuzzy space
Definition: werd.h:39
@ W_FUZZY_NON
fuzzy nonspace
Definition: werd.h:40
Definition: werd.h:56
void set_flag(WERD_FLAGS mask, bool value)
Definition: werd.h:118
bool flag(WERD_FLAGS mask) const
Definition: werd.h:117

◆ next_block()

BLOCK_RES * PAGE_RES_IT::next_block ( ) const
inline

Definition at line 769 of file pageres.h.

769 { // block of next word
770 return next_block_res;
771 }

◆ next_row()

ROW_RES * PAGE_RES_IT::next_row ( ) const
inline

Definition at line 766 of file pageres.h.

766 { // row of next word
767 return next_row_res;
768 }

◆ next_word()

WERD_RES * PAGE_RES_IT::next_word ( ) const
inline

Definition at line 763 of file pageres.h.

763 { // next word
764 return next_word_res;
765 }

◆ operator!=()

bool PAGE_RES_IT::operator!= ( const PAGE_RES_IT other) const
inline

Definition at line 693 of file pageres.h.

693{return !(*this == other); }

◆ operator==()

bool PAGE_RES_IT::operator== ( const PAGE_RES_IT other) const
inline

Definition at line 688 of file pageres.h.

688 {
689 return word_res == other.word_res && row_res == other.row_res &&
690 block_res == other.block_res;
691 }

◆ prev_block()

BLOCK_RES * PAGE_RES_IT::prev_block ( ) const
inline

Definition at line 751 of file pageres.h.

751 { // block of prev word
752 return prev_block_res;
753 }

◆ prev_row()

ROW_RES * PAGE_RES_IT::prev_row ( ) const
inline

Definition at line 748 of file pageres.h.

748 { // row of prev word
749 return prev_row_res;
750 }

◆ prev_word()

WERD_RES * PAGE_RES_IT::prev_word ( ) const
inline

Definition at line 745 of file pageres.h.

745 { // previous word
746 return prev_word_res;
747 }

◆ rej_stat_word()

void PAGE_RES_IT::rej_stat_word ( )

Definition at line 1667 of file pageres.cpp.

1667 {
1668 int16_t chars_in_word;
1669 int16_t rejects_in_word = 0;
1670
1671 chars_in_word = word_res->reject_map.length ();
1672 page_res->char_count += chars_in_word;
1673 block_res->char_count += chars_in_word;
1674 row_res->char_count += chars_in_word;
1675
1676 rejects_in_word = word_res->reject_map.reject_count ();
1677
1678 page_res->rej_count += rejects_in_word;
1679 block_res->rej_count += rejects_in_word;
1680 row_res->rej_count += rejects_in_word;
1681 if (chars_in_word == rejects_in_word)
1682 row_res->whole_word_rej_count += rejects_in_word;
1683}
int32_t rej_count
Definition: pageres.h:79
int32_t char_count
Definition: pageres.h:78
int32_t rej_count
Definition: pageres.h:118
int32_t char_count
Definition: pageres.h:117
int32_t whole_word_rej_count
Definition: pageres.h:143
int32_t rej_count
Definition: pageres.h:142
int32_t char_count
Definition: pageres.h:141
REJMAP reject_map
Definition: pageres.h:294
int16_t reject_count()
Definition: rejctmap.h:229
int32_t length() const
Definition: rejctmap.h:223

◆ ReplaceCurrentWord()

void PAGE_RES_IT::ReplaceCurrentWord ( tesseract::PointerVector< WERD_RES > *  words)

Definition at line 1333 of file pageres.cpp.

1334 {
1335 if (words->empty()) {
1337 return;
1338 }
1339 WERD_RES* input_word = word();
1340 // Set the BOL/EOL flags on the words from the input word.
1341 if (input_word->word->flag(W_BOL)) {
1342 (*words)[0]->word->set_flag(W_BOL, true);
1343 } else {
1344 (*words)[0]->word->set_blanks(input_word->word->space());
1345 }
1346 words->back()->word->set_flag(W_EOL, input_word->word->flag(W_EOL));
1347
1348 // Move the blobs from the input word to the new set of words.
1349 // If the input word_res is a combination, then the replacements will also be
1350 // combinations, and will own their own words. If the input word_res is not a
1351 // combination, then the final replacements will not be either, (although it
1352 // is allowed for the input words to be combinations) and their words
1353 // will get put on the row list. This maintains the ownership rules.
1354 WERD_IT w_it(row()->row->word_list());
1355 if (!input_word->combination) {
1356 for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
1357 WERD* word = w_it.data();
1358 if (word == input_word->word)
1359 break;
1360 }
1361 // w_it is now set to the input_word's word.
1362 ASSERT_HOST(!w_it.cycled_list());
1363 }
1364 // Insert into the appropriate place in the ROW_RES.
1365 WERD_RES_IT wr_it(&row()->word_res_list);
1366 for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) {
1367 WERD_RES* word = wr_it.data();
1368 if (word == input_word)
1369 break;
1370 }
1371 ASSERT_HOST(!wr_it.cycled_list());
1372 // Since we only have an estimate of the bounds between blobs, use the blob
1373 // x-middle as the determiner of where to put the blobs
1374 C_BLOB_IT src_b_it(input_word->word->cblob_list());
1375 src_b_it.sort(&C_BLOB::SortByXMiddle);
1376 C_BLOB_IT rej_b_it(input_word->word->rej_cblob_list());
1377 rej_b_it.sort(&C_BLOB::SortByXMiddle);
1378 TBOX clip_box;
1379 for (int w = 0; w < words->size(); ++w) {
1380 WERD_RES* word_w = (*words)[w];
1381 clip_box = ComputeWordBounds(*words, w, clip_box, wr_it_of_current_word);
1382 // Compute blob boundaries.
1383 GenericVector<int> blob_ends;
1384 C_BLOB_LIST* next_word_blobs =
1385 w + 1 < words->size() ? (*words)[w + 1]->word->cblob_list() : nullptr;
1386 ComputeBlobEnds(*word_w, clip_box, next_word_blobs, &blob_ends);
1387 // Remove the fake blobs on the current word, but keep safe for back-up if
1388 // no blob can be found.
1389 C_BLOB_LIST fake_blobs;
1390 C_BLOB_IT fake_b_it(&fake_blobs);
1391 fake_b_it.add_list_after(word_w->word->cblob_list());
1392 fake_b_it.move_to_first();
1393 word_w->word->cblob_list()->clear();
1394 C_BLOB_IT dest_it(word_w->word->cblob_list());
1395 // Build the box word as we move the blobs.
1396 auto* box_word = new tesseract::BoxWord;
1397 for (int i = 0; i < blob_ends.size(); ++i, fake_b_it.forward()) {
1398 int end_x = blob_ends[i];
1399 TBOX blob_box;
1400 // Add the blobs up to end_x.
1401 while (!src_b_it.empty() &&
1402 src_b_it.data()->bounding_box().x_middle() < end_x) {
1403 blob_box += MoveAndClipBlob(&src_b_it, &dest_it, clip_box);
1404 src_b_it.forward();
1405 }
1406 while (!rej_b_it.empty() &&
1407 rej_b_it.data()->bounding_box().x_middle() < end_x) {
1408 blob_box += MoveAndClipBlob(&rej_b_it, &dest_it, clip_box);
1409 rej_b_it.forward();
1410 }
1411 if (blob_box.null_box()) {
1412 // Use the original box as a back-up.
1413 blob_box = MoveAndClipBlob(&fake_b_it, &dest_it, clip_box);
1414 }
1415 box_word->InsertBox(i, blob_box);
1416 }
1417 delete word_w->box_word;
1418 word_w->box_word = box_word;
1419 if (!input_word->combination) {
1420 // Insert word_w->word into the ROW. It doesn't own its word, so the
1421 // ROW needs to own it.
1422 w_it.add_before_stay_put(word_w->word);
1423 word_w->combination = false;
1424 }
1425 (*words)[w] = nullptr; // We are taking ownership.
1426 wr_it.add_before_stay_put(word_w);
1427 }
1428 // We have taken ownership of the words.
1429 words->clear();
1430 // Delete the current word, which has been replaced. We could just call
1431 // DeleteCurrentWord, but that would iterate both lists again, and we know
1432 // we are already in the right place.
1433 if (!input_word->combination)
1434 delete w_it.extract();
1435 delete wr_it.extract();
1437}
@ W_EOL
end of line
Definition: werd.h:33
@ W_BOL
start of line
Definition: werd.h:32
bool empty() const
Definition: genericvector.h:91
int size() const
Definition: genericvector.h:72
T & back() const
tesseract::BoxWord * box_word
Definition: pageres.h:272
void DeleteCurrentWord()
Definition: pageres.cpp:1440
Definition: rect.h:34
bool null_box() const
Definition: rect.h:50
static int SortByXMiddle(const void *v1, const void *v2)
Definition: stepblob.h:125
C_BLOB_LIST * rej_cblob_list()
Definition: werd.h:90
C_BLOB_LIST * cblob_list()
Definition: werd.h:95
uint8_t space()
Definition: werd.h:99

◆ ResetWordIterator()

void PAGE_RES_IT::ResetWordIterator ( )

Definition at line 1523 of file pageres.cpp.

1523 {
1524 if (row_res == next_row_res) {
1525 // Reset the member iterator so it can move forward and detect the
1526 // cycled_list state correctly.
1527 word_res_it.move_to_first();
1528 for (word_res_it.mark_cycle_pt();
1529 !word_res_it.cycled_list() && word_res_it.data() != next_word_res;
1530 word_res_it.forward()) {
1531 if (!word_res_it.data()->part_of_combo) {
1532 if (prev_row_res == row_res) prev_word_res = word_res;
1533 word_res = word_res_it.data();
1534 }
1535 }
1536 ASSERT_HOST(!word_res_it.cycled_list());
1537 wr_it_of_next_word = word_res_it;
1538 word_res_it.forward();
1539 } else {
1540 // word_res_it is OK, but reset word_res and prev_word_res if needed.
1541 WERD_RES_IT wr_it(&row_res->word_res_list);
1542 for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) {
1543 if (!wr_it.data()->part_of_combo) {
1544 if (prev_row_res == row_res) prev_word_res = word_res;
1545 word_res = wr_it.data();
1546 }
1547 }
1548 }
1549}

◆ restart_page()

WERD_RES * PAGE_RES_IT::restart_page ( )
inline

Definition at line 701 of file pageres.h.

701 {
702 return start_page(false); // Skip empty blocks.
703 }
WERD_RES * start_page(bool empty_ok)
Definition: pageres.cpp:1500

◆ restart_page_with_empties()

WERD_RES * PAGE_RES_IT::restart_page_with_empties ( )
inline

Definition at line 704 of file pageres.h.

704 {
705 return start_page(true); // Allow empty blocks.
706 }

◆ restart_row()

WERD_RES * PAGE_RES_IT::restart_row ( )

Definition at line 1630 of file pageres.cpp.

1630 {
1631 ROW_RES *row = this->row();
1632 if (!row) return nullptr;
1633 for (restart_page(); this->row() != row; forward()) {
1634 // pass
1635 }
1636 return word();
1637}
WERD_RES * forward()
Definition: pageres.h:734

◆ row()

ROW_RES * PAGE_RES_IT::row ( ) const
inline

Definition at line 757 of file pageres.h.

757 { // row of current word
758 return row_res;
759 }

◆ start_page()

WERD_RES * PAGE_RES_IT::start_page ( bool  empty_ok)

Definition at line 1500 of file pageres.cpp.

1500 {
1501 block_res_it.set_to_list(&page_res->block_res_list);
1502 block_res_it.mark_cycle_pt();
1503 prev_block_res = nullptr;
1504 prev_row_res = nullptr;
1505 prev_word_res = nullptr;
1506 block_res = nullptr;
1507 row_res = nullptr;
1508 word_res = nullptr;
1509 next_block_res = nullptr;
1510 next_row_res = nullptr;
1511 next_word_res = nullptr;
1512 internal_forward(true, empty_ok);
1513 return internal_forward(false, empty_ok);
1514}

◆ word()

WERD_RES * PAGE_RES_IT::word ( ) const
inline

Definition at line 754 of file pageres.h.

754 { // current word
755 return word_res;
756 }

Member Data Documentation

◆ page_res

PAGE_RES* PAGE_RES_IT::page_res

Definition at line 677 of file pageres.h.


The documentation for this class was generated from the following files: