tesseract 4.1.1
Loading...
Searching...
No Matches
ocrblock.h File Reference
#include "ocrpara.h"
#include "ocrrow.h"
#include "pdblock.h"

Go to the source code of this file.

Classes

class  BLOCK
 

Functions

void PrintSegmentationStats (BLOCK_LIST *block_list)
 
void ExtractBlobsFromSegmentation (BLOCK_LIST *blocks, C_BLOB_LIST *output_blob_list)
 
void RefreshWordBlobsFromNewBlobs (BLOCK_LIST *block_list, C_BLOB_LIST *new_blobs, C_BLOB_LIST *not_found_blobs)
 

Function Documentation

◆ ExtractBlobsFromSegmentation()

void ExtractBlobsFromSegmentation ( BLOCK_LIST *  blocks,
C_BLOB_LIST *  output_blob_list 
)

Definition at line 438 of file ocrblock.cpp.

439 {
440 C_BLOB_IT return_list_it(output_blob_list);
441 BLOCK_IT block_it(blocks);
442 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
443 BLOCK* block = block_it.data();
444 ROW_IT row_it(block->row_list());
445 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
446 ROW* row = row_it.data();
447 // Iterate over all werds in the row.
448 WERD_IT werd_it(row->word_list());
449 for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) {
450 WERD* werd = werd_it.data();
451 return_list_it.move_to_last();
452 return_list_it.add_list_after(werd->cblob_list());
453 return_list_it.move_to_last();
454 return_list_it.add_list_after(werd->rej_cblob_list());
455 }
456 }
457 }
458}
Definition: ocrblock.h:31
ROW_LIST * row_list()
get rows
Definition: ocrblock.h:116
Definition: ocrrow.h:37
WERD_LIST * word_list()
Definition: ocrrow.h:55
Definition: werd.h:56
C_BLOB_LIST * rej_cblob_list()
Definition: werd.h:90
C_BLOB_LIST * cblob_list()
Definition: werd.h:95

◆ PrintSegmentationStats()

void PrintSegmentationStats ( BLOCK_LIST *  block_list)

Definition at line 405 of file ocrblock.cpp.

405 {
406 int num_blocks = 0;
407 int num_rows = 0;
408 int num_words = 0;
409 int num_blobs = 0;
410 BLOCK_IT block_it(block_list);
411 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
412 BLOCK* block = block_it.data();
413 ++num_blocks;
414 ROW_IT row_it(block->row_list());
415 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
416 ++num_rows;
417 ROW* row = row_it.data();
418 // Iterate over all werds in the row.
419 WERD_IT werd_it(row->word_list());
420 for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) {
421 WERD* werd = werd_it.data();
422 ++num_words;
423 num_blobs += werd->cblob_list()->length();
424 }
425 }
426 }
427 tprintf("Block list stats:\nBlocks = %d\nRows = %d\nWords = %d\nBlobs = %d\n",
428 num_blocks, num_rows, num_words, num_blobs);
429}
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35

◆ RefreshWordBlobsFromNewBlobs()

void RefreshWordBlobsFromNewBlobs ( BLOCK_LIST *  block_list,
C_BLOB_LIST *  new_blobs,
C_BLOB_LIST *  not_found_blobs 
)

Definition at line 473 of file ocrblock.cpp.

475 {
476 // Now iterate over all the blobs in the segmentation_block_list_, and just
477 // replace the corresponding c-blobs inside the werds.
478 BLOCK_IT block_it(block_list);
479 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
480 BLOCK* block = block_it.data();
481 if (block->pdblk.poly_block() != nullptr && !block->pdblk.poly_block()->IsText())
482 continue; // Don't touch non-text blocks.
483 // Iterate over all rows in the block.
484 ROW_IT row_it(block->row_list());
485 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
486 ROW* row = row_it.data();
487 // Iterate over all werds in the row.
488 WERD_IT werd_it(row->word_list());
489 WERD_LIST new_words;
490 WERD_IT new_words_it(&new_words);
491 for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) {
492 WERD* werd = werd_it.extract();
493 WERD* new_werd = werd->ConstructWerdWithNewBlobs(new_blobs,
494 not_found_blobs);
495 if (new_werd) {
496 // Insert this new werd into the actual row's werd-list. Remove the
497 // existing one.
498 new_words_it.add_after_then_move(new_werd);
499 delete werd;
500 } else {
501 // Reinsert the older word back, for lack of better options.
502 // This is critical since dropping the words messes up segmentation:
503 // eg. 1st word in the row might otherwise have W_FUZZY_NON turned on.
504 new_words_it.add_after_then_move(werd);
505 }
506 }
507 // Get rid of the old word list & replace it with the new one.
508 row->word_list()->clear();
509 werd_it.move_to_first();
510 werd_it.add_list_after(&new_words);
511 }
512 }
513}
PDBLK pdblk
Page Description Block.
Definition: ocrblock.h:190
POLY_BLOCK * poly_block() const
Definition: pdblock.h:55
bool IsText() const
Definition: polyblk.h:49
WERD * ConstructWerdWithNewBlobs(C_BLOB_LIST *all_blobs, C_BLOB_LIST *orphan_blobs)
Definition: werd.cpp:388