tesseract 4.1.1
Loading...
Searching...
No Matches
blobbox.h File Reference
#include <cinttypes>
#include <cmath>
#include <cstdint>
#include "elst.h"
#include "elst2.h"
#include "errcode.h"
#include "ocrblock.h"
#include "params.h"
#include "pdblock.h"
#include "points.h"
#include "quspline.h"
#include "rect.h"
#include "scrollview.h"
#include "statistc.h"
#include "stepblob.h"
#include "tprintf.h"
#include "werd.h"

Go to the source code of this file.

Classes

class  BLOBNBOX
 
class  TO_ROW
 
class  TO_BLOCK
 

Namespaces

namespace  tesseract
 

Enumerations

enum  PITCH_TYPE {
  PITCH_DUNNO , PITCH_DEF_FIXED , PITCH_MAYBE_FIXED , PITCH_DEF_PROP ,
  PITCH_MAYBE_PROP , PITCH_CORR_FIXED , PITCH_CORR_PROP
}
 
enum  TabType {
  TT_NONE , TT_DELETED , TT_MAYBE_RAGGED , TT_MAYBE_ALIGNED ,
  TT_CONFIRMED , TT_VLINE
}
 
enum  BlobRegionType {
  BRT_NOISE , BRT_HLINE , BRT_VLINE , BRT_RECTIMAGE ,
  BRT_POLYIMAGE , BRT_UNKNOWN , BRT_VERT_TEXT , BRT_TEXT ,
  BRT_COUNT
}
 
enum  BlobNeighbourDir {
  BND_LEFT , BND_BELOW , BND_RIGHT , BND_ABOVE ,
  BND_COUNT
}
 
enum  BlobSpecialTextType {
  BSTT_NONE , BSTT_ITALIC , BSTT_DIGIT , BSTT_MATH ,
  BSTT_UNCLEAR , BSTT_SKIP , BSTT_COUNT
}
 
enum  BlobTextFlowType {
  BTFT_NONE , BTFT_NONTEXT , BTFT_NEIGHBOURS , BTFT_CHAIN ,
  BTFT_STRONG_CHAIN , BTFT_TEXT_ON_IMAGE , BTFT_LEADER , BTFT_COUNT
}
 

Functions

BlobNeighbourDir DirOtherWay (BlobNeighbourDir dir)
 
bool DominatesInMerge (BlobTextFlowType type1, BlobTextFlowType type2)
 
void find_cblob_limits (C_BLOB *blob, float leftx, float rightx, FCOORD rotation, float &ymin, float &ymax)
 
void find_cblob_vlimits (C_BLOB *blob, float leftx, float rightx, float &ymin, float &ymax)
 
void find_cblob_hlimits (C_BLOB *blob, float bottomy, float topy, float &xmin, float &xymax)
 
C_BLOBcrotate_cblob (C_BLOB *blob, FCOORD rotation)
 
TBOX box_next (BLOBNBOX_IT *it)
 
TBOX box_next_pre_chopped (BLOBNBOX_IT *it)
 
void vertical_cblob_projection (C_BLOB *blob, STATS *stats)
 
void vertical_coutline_projection (C_OUTLINE *outline, STATS *stats)
 
void plot_blob_list (ScrollView *win, BLOBNBOX_LIST *list, ScrollView::Color body_colour, ScrollView::Color child_colour)
 

Variables

double textord_error_weight = 3
 

Enumeration Type Documentation

◆ BlobNeighbourDir

Enumerator
BND_LEFT 
BND_BELOW 
BND_RIGHT 
BND_ABOVE 
BND_COUNT 

Definition at line 87 of file blobbox.h.

87 {
93};
@ BND_COUNT
Definition: blobbox.h:92
@ BND_ABOVE
Definition: blobbox.h:91
@ BND_LEFT
Definition: blobbox.h:88
@ BND_BELOW
Definition: blobbox.h:89
@ BND_RIGHT
Definition: blobbox.h:90

◆ BlobRegionType

Enumerator
BRT_NOISE 
BRT_HLINE 
BRT_VLINE 
BRT_RECTIMAGE 
BRT_POLYIMAGE 
BRT_UNKNOWN 
BRT_VERT_TEXT 
BRT_TEXT 
BRT_COUNT 

Definition at line 72 of file blobbox.h.

72 {
73 BRT_NOISE, // Neither text nor image.
74 BRT_HLINE, // Horizontal separator line.
75 BRT_VLINE, // Vertical separator line.
76 BRT_RECTIMAGE, // Rectangular image.
77 BRT_POLYIMAGE, // Non-rectangular image.
78 BRT_UNKNOWN, // Not determined yet.
79 BRT_VERT_TEXT, // Vertical alignment, not necessarily vertically oriented.
80 BRT_TEXT, // Convincing text.
81
82 BRT_COUNT // Number of possibilities.
83};
@ BRT_RECTIMAGE
Definition: blobbox.h:76
@ BRT_COUNT
Definition: blobbox.h:82
@ BRT_POLYIMAGE
Definition: blobbox.h:77
@ BRT_TEXT
Definition: blobbox.h:80
@ BRT_HLINE
Definition: blobbox.h:74
@ BRT_VLINE
Definition: blobbox.h:75
@ BRT_UNKNOWN
Definition: blobbox.h:78
@ BRT_NOISE
Definition: blobbox.h:73
@ BRT_VERT_TEXT
Definition: blobbox.h:79

◆ BlobSpecialTextType

Enumerator
BSTT_NONE 
BSTT_ITALIC 
BSTT_DIGIT 
BSTT_MATH 
BSTT_UNCLEAR 
BSTT_SKIP 
BSTT_COUNT 

Definition at line 96 of file blobbox.h.

96 {
97 BSTT_NONE, // No special.
98 BSTT_ITALIC, // Italic style.
99 BSTT_DIGIT, // Digit symbols.
100 BSTT_MATH, // Mathmatical symobls (not including digit).
101 BSTT_UNCLEAR, // Characters with low recognition rate.
102 BSTT_SKIP, // Characters that we skip labeling (usually too small).
104};
@ BSTT_NONE
Definition: blobbox.h:97
@ BSTT_MATH
Definition: blobbox.h:100
@ BSTT_UNCLEAR
Definition: blobbox.h:101
@ BSTT_SKIP
Definition: blobbox.h:102
@ BSTT_ITALIC
Definition: blobbox.h:98
@ BSTT_DIGIT
Definition: blobbox.h:99
@ BSTT_COUNT
Definition: blobbox.h:103

◆ BlobTextFlowType

Enumerator
BTFT_NONE 
BTFT_NONTEXT 
BTFT_NEIGHBOURS 
BTFT_CHAIN 
BTFT_STRONG_CHAIN 
BTFT_TEXT_ON_IMAGE 
BTFT_LEADER 
BTFT_COUNT 

Definition at line 114 of file blobbox.h.

114 {
115 BTFT_NONE, // No text flow set yet.
116 BTFT_NONTEXT, // Flow too poor to be likely text.
117 BTFT_NEIGHBOURS, // Neighbours support flow in this direction.
118 BTFT_CHAIN, // There is a weak chain of text in this direction.
119 BTFT_STRONG_CHAIN, // There is a strong chain of text in this direction.
120 BTFT_TEXT_ON_IMAGE, // There is a strong chain of text on an image.
121 BTFT_LEADER, // Leader dots/dashes etc.
123};
@ BTFT_TEXT_ON_IMAGE
Definition: blobbox.h:120
@ BTFT_COUNT
Definition: blobbox.h:122
@ BTFT_LEADER
Definition: blobbox.h:121
@ BTFT_NONE
Definition: blobbox.h:115
@ BTFT_CHAIN
Definition: blobbox.h:118
@ BTFT_STRONG_CHAIN
Definition: blobbox.h:119
@ BTFT_NEIGHBOURS
Definition: blobbox.h:117
@ BTFT_NONTEXT
Definition: blobbox.h:116

◆ PITCH_TYPE

enum PITCH_TYPE
Enumerator
PITCH_DUNNO 
PITCH_DEF_FIXED 
PITCH_MAYBE_FIXED 
PITCH_DEF_PROP 
PITCH_MAYBE_PROP 
PITCH_CORR_FIXED 
PITCH_CORR_PROP 

Definition at line 44 of file blobbox.h.

45{
46 PITCH_DUNNO, // insufficient data
47 PITCH_DEF_FIXED, // definitely fixed
48 PITCH_MAYBE_FIXED, // could be
53};
@ PITCH_DEF_FIXED
Definition: blobbox.h:47
@ PITCH_DUNNO
Definition: blobbox.h:46
@ PITCH_MAYBE_FIXED
Definition: blobbox.h:48
@ PITCH_CORR_FIXED
Definition: blobbox.h:51
@ PITCH_DEF_PROP
Definition: blobbox.h:49
@ PITCH_CORR_PROP
Definition: blobbox.h:52
@ PITCH_MAYBE_PROP
Definition: blobbox.h:50

◆ TabType

enum TabType
Enumerator
TT_NONE 
TT_DELETED 
TT_MAYBE_RAGGED 
TT_MAYBE_ALIGNED 
TT_CONFIRMED 
TT_VLINE 

Definition at line 59 of file blobbox.h.

59 {
60 TT_NONE, // Not a tab.
61 TT_DELETED, // Not a tab after detailed analysis.
62 TT_MAYBE_RAGGED, // Initial designation of a tab-stop candidate.
63 TT_MAYBE_ALIGNED, // Initial designation of a tab-stop candidate.
64 TT_CONFIRMED, // Aligned with neighbours.
65 TT_VLINE // Detected as a vertical line.
66};
@ TT_MAYBE_RAGGED
Definition: blobbox.h:62
@ TT_VLINE
Definition: blobbox.h:65
@ TT_DELETED
Definition: blobbox.h:61
@ TT_CONFIRMED
Definition: blobbox.h:64
@ TT_MAYBE_ALIGNED
Definition: blobbox.h:63
@ TT_NONE
Definition: blobbox.h:60

Function Documentation

◆ box_next()

TBOX box_next ( BLOBNBOX_IT *  it)

Definition at line 636 of file blobbox.cpp.

638 {
639 BLOBNBOX *blob; //current blob
640 TBOX result; //total box
641
642 blob = it->data ();
643 result = blob->bounding_box ();
644 do {
645 it->forward ();
646 blob = it->data ();
647 if (blob->cblob() == nullptr)
648 //was pre-chopped
649 result += blob->bounding_box ();
650 }
651 //until next real blob
652 while ((blob->cblob() == nullptr) || blob->joined_to_prev());
653 return result;
654}
const TBOX & bounding_box() const
Definition: blobbox.h:230
C_BLOB * cblob() const
Definition: blobbox.h:268
bool joined_to_prev() const
Definition: blobbox.h:256
Definition: rect.h:34

◆ box_next_pre_chopped()

TBOX box_next_pre_chopped ( BLOBNBOX_IT *  it)

Definition at line 665 of file blobbox.cpp.

667 {
668 BLOBNBOX *blob; //current blob
669 TBOX result; //total box
670
671 blob = it->data ();
672 result = blob->bounding_box ();
673 do {
674 it->forward ();
675 blob = it->data ();
676 }
677 //until next real blob
678 while (blob->joined_to_prev ());
679 return result;
680}

◆ crotate_cblob()

C_BLOB * crotate_cblob ( C_BLOB blob,
FCOORD  rotation 
)

Definition at line 611 of file blobbox.cpp.

614 {
615 C_OUTLINE_LIST out_list; //output outlines
616 //input outlines
617 C_OUTLINE_IT in_it = blob->out_list ();
618 //output outlines
619 C_OUTLINE_IT out_it = &out_list;
620
621 for (in_it.mark_cycle_pt (); !in_it.cycled_list (); in_it.forward ()) {
622 out_it.add_after_then_move (new C_OUTLINE (in_it.data (), rotation));
623 }
624 return new C_BLOB (&out_list);
625}
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:70

◆ DirOtherWay()

BlobNeighbourDir DirOtherWay ( BlobNeighbourDir  dir)
inline

Definition at line 106 of file blobbox.h.

106 {
107 return static_cast<BlobNeighbourDir>(dir ^ 2);
108}
BlobNeighbourDir
Definition: blobbox.h:87

◆ DominatesInMerge()

bool DominatesInMerge ( BlobTextFlowType  type1,
BlobTextFlowType  type2 
)
inline

Definition at line 129 of file blobbox.h.

129 {
130 // LEADER always loses.
131 if (type1 == BTFT_LEADER) return false;
132 if (type2 == BTFT_LEADER) return true;
133 // With those out of the way, the ordering of the enum determines the result.
134 return type1 >= type2;
135}

◆ find_cblob_hlimits()

void find_cblob_hlimits ( C_BLOB blob,
float  bottomy,
float  topy,
float &  xmin,
float &  xymax 
)

Definition at line 576 of file blobbox.cpp.

581 {
582 int16_t stepindex; //current point
583 ICOORD pos; //current coords
584 ICOORD vec; //rotated step
585 C_OUTLINE *outline; //current outline
586 //outlines
587 C_OUTLINE_IT out_it = blob->out_list ();
588
589 xmin = static_cast<float>(INT32_MAX);
590 xmax = static_cast<float>(-INT32_MAX);
591 for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
592 outline = out_it.data ();
593 pos = outline->start_pos (); //get coords
594 for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) {
595 //inside
596 if (pos.y () >= bottomy && pos.y () <= topy) {
597 UpdateRange(pos.x(), &xmin, &xmax);
598 }
599 vec = outline->step (stepindex);
600 pos += vec; //move to next
601 }
602 }
603}
void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound)
Definition: helpers.h:120
ICOORD step(int index) const
Definition: coutln.h:144
const ICOORD & start_pos() const
Definition: coutln.h:148
int32_t pathlength() const
Definition: coutln.h:135
integer coordinate
Definition: points.h:32
int16_t y() const
access_function
Definition: points.h:56
int16_t x() const
access function
Definition: points.h:52

◆ find_cblob_limits()

void find_cblob_limits ( C_BLOB blob,
float  leftx,
float  rightx,
FCOORD  rotation,
float &  ymin,
float &  ymax 
)

Definition at line 499 of file blobbox.cpp.

505 {
506 int16_t stepindex; //current point
507 ICOORD pos; //current coords
508 ICOORD vec; //rotated step
509 C_OUTLINE *outline; //current outline
510 //outlines
511 C_OUTLINE_IT out_it = blob->out_list ();
512
513 ymin = static_cast<float>(INT32_MAX);
514 ymax = static_cast<float>(-INT32_MAX);
515 for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
516 outline = out_it.data ();
517 pos = outline->start_pos (); //get coords
518 pos.rotate (rotation);
519 for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) {
520 //inside
521 if (pos.x () >= leftx && pos.x () <= rightx) {
522 UpdateRange(pos.y(), &ymin, &ymax);
523 }
524 vec = outline->step (stepindex);
525 vec.rotate (rotation);
526 pos += vec; //move to next
527 }
528 }
529}
void rotate(const FCOORD &vec)
Definition: points.h:536

◆ find_cblob_vlimits()

void find_cblob_vlimits ( C_BLOB blob,
float  leftx,
float  rightx,
float &  ymin,
float &  ymax 
)

Definition at line 539 of file blobbox.cpp.

544 {
545 int16_t stepindex; //current point
546 ICOORD pos; //current coords
547 ICOORD vec; //rotated step
548 C_OUTLINE *outline; //current outline
549 //outlines
550 C_OUTLINE_IT out_it = blob->out_list ();
551
552 ymin = static_cast<float>(INT32_MAX);
553 ymax = static_cast<float>(-INT32_MAX);
554 for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
555 outline = out_it.data ();
556 pos = outline->start_pos (); //get coords
557 for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) {
558 //inside
559 if (pos.x () >= leftx && pos.x () <= rightx) {
560 UpdateRange(pos.y(), &ymin, &ymax);
561 }
562 vec = outline->step (stepindex);
563 pos += vec; //move to next
564 }
565 }
566}

◆ plot_blob_list()

void plot_blob_list ( ScrollView win,
BLOBNBOX_LIST *  list,
ScrollView::Color  body_colour,
ScrollView::Color  child_colour 
)

Definition at line 1086 of file blobbox.cpp.

1089 { // colour of child
1090 BLOBNBOX_IT it = list;
1091 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1092 it.data()->plot(win, body_colour, child_colour);
1093 }
1094}

◆ vertical_cblob_projection()

void vertical_cblob_projection ( C_BLOB blob,
STATS stats 
)

Definition at line 868 of file blobbox.cpp.

871 {
872 //outlines of blob
873 C_OUTLINE_IT out_it = blob->out_list ();
874
875 for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
876 vertical_coutline_projection (out_it.data (), stats);
877 }
878}
void vertical_coutline_projection(C_OUTLINE *outline, STATS *stats)
Definition: blobbox.cpp:888

◆ vertical_coutline_projection()

void vertical_coutline_projection ( C_OUTLINE outline,
STATS stats 
)

Definition at line 888 of file blobbox.cpp.

891 {
892 ICOORD pos; //current point
893 ICOORD step; //edge step
894 int32_t length; //of outline
895 int16_t stepindex; //current step
896 C_OUTLINE_IT out_it = outline->child ();
897
898 pos = outline->start_pos ();
899 length = outline->pathlength ();
900 for (stepindex = 0; stepindex < length; stepindex++) {
901 step = outline->step (stepindex);
902 if (step.x () > 0) {
903 stats->add (pos.x (), -pos.y ());
904 } else if (step.x () < 0) {
905 stats->add (pos.x () - 1, pos.y ());
906 }
907 pos += step;
908 }
909
910 for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
911 vertical_coutline_projection (out_it.data (), stats);
912 }
913}
C_OUTLINE_LIST * child()
Definition: coutln.h:108
void add(int32_t value, int32_t count)
Definition: statistc.cpp:93

Variable Documentation

◆ textord_error_weight

double textord_error_weight = 3
extern

"Weighting for error in believability"