tesseract 4.1.1
Loading...
Searching...
No Matches
tesseract::ParagraphModelSmearer Class Reference

#include <paragraphs_internal.h>

Public Member Functions

 ParagraphModelSmearer (GenericVector< RowScratchRegisters > *rows, int row_start, int row_end, ParagraphTheory *theory)
 
void Smear ()
 

Detailed Description

Definition at line 237 of file paragraphs_internal.h.

Constructor & Destructor Documentation

◆ ParagraphModelSmearer()

tesseract::ParagraphModelSmearer::ParagraphModelSmearer ( GenericVector< RowScratchRegisters > *  rows,
int  row_start,
int  row_end,
ParagraphTheory theory 
)

Definition at line 1320 of file paragraphs.cpp.

1323 : theory_(theory), rows_(rows), row_start_(row_start),
1324 row_end_(row_end) {
1325 if (!AcceptableRowArgs(0, 0, __func__, rows, row_start, row_end)) {
1326 row_start_ = 0;
1327 row_end_ = 0;
1328 return;
1329 }
1330 SetOfModels no_models;
1331 for (int row = row_start - 1; row <= row_end; row++) {
1332 open_models_.push_back(no_models);
1333 }
1334}
GenericVectorEqEq< const ParagraphModel * > SetOfModels

Member Function Documentation

◆ Smear()

void tesseract::ParagraphModelSmearer::Smear ( )

Definition at line 1367 of file paragraphs.cpp.

1367 {
1368 CalculateOpenModels(row_start_, row_end_);
1369
1370 // For each row which we're unsure about (that is, it is LT_UNKNOWN or
1371 // we have multiple LT_START hypotheses), see if there's a model that
1372 // was recently used (an "open" model) which might model it well.
1373 for (int i = row_start_; i < row_end_; i++) {
1374 RowScratchRegisters &row = (*rows_)[i];
1375 if (row.ri_->num_words == 0)
1376 continue;
1377
1378 // Step One:
1379 // Figure out if there are "open" models which are left-alined or
1380 // right-aligned. This is important for determining whether the
1381 // "first" word in a row would fit at the "end" of the previous row.
1382 bool left_align_open = false;
1383 bool right_align_open = false;
1384 for (int m = 0; m < OpenModels(i).size(); m++) {
1385 switch (OpenModels(i)[m]->justification()) {
1386 case JUSTIFICATION_LEFT: left_align_open = true; break;
1387 case JUSTIFICATION_RIGHT: right_align_open = true; break;
1388 default: left_align_open = right_align_open = true;
1389 }
1390 }
1391 // Step Two:
1392 // Use that knowledge to figure out if this row is likely to
1393 // start a paragraph.
1394 bool likely_start;
1395 if (i == 0) {
1396 likely_start = true;
1397 } else {
1398 if ((left_align_open && right_align_open) ||
1399 (!left_align_open && !right_align_open)) {
1400 likely_start = LikelyParagraphStart((*rows_)[i - 1], row,
1402 LikelyParagraphStart((*rows_)[i - 1], row,
1404 } else if (left_align_open) {
1405 likely_start = LikelyParagraphStart((*rows_)[i - 1], row,
1407 } else {
1408 likely_start = LikelyParagraphStart((*rows_)[i - 1], row,
1410 }
1411 }
1412
1413 // Step Three:
1414 // If this text line seems like an obvious first line of an
1415 // open model, or an obvious continuation of an existing
1416 // modelled paragraph, mark it up.
1417 if (likely_start) {
1418 // Add Start Hypotheses for all Open models that fit.
1419 for (int m = 0; m < OpenModels(i).size(); m++) {
1420 if (ValidFirstLine(rows_, i, OpenModels(i)[m])) {
1421 row.AddStartLine(OpenModels(i)[m]);
1422 }
1423 }
1424 } else {
1425 // Add relevant body line hypotheses.
1426 SetOfModels last_line_models;
1427 if (i > 0) {
1428 (*rows_)[i - 1].StrongHypotheses(&last_line_models);
1429 } else {
1430 theory_->NonCenteredModels(&last_line_models);
1431 }
1432 for (int m = 0; m < last_line_models.size(); m++) {
1433 const ParagraphModel *model = last_line_models[m];
1434 if (ValidBodyLine(rows_, i, model))
1435 row.AddBodyLine(model);
1436 }
1437 }
1438
1439 // Step Four:
1440 // If we're still quite unsure about this line, go through all
1441 // models in our theory and see if this row could be the start
1442 // of any of our models.
1443 if (row.GetLineType() == LT_UNKNOWN ||
1444 (row.GetLineType() == LT_START && !row.UniqueStartHypothesis())) {
1445 SetOfModels all_models;
1446 theory_->NonCenteredModels(&all_models);
1447 for (int m = 0; m < all_models.size(); m++) {
1448 if (ValidFirstLine(rows_, i, all_models[m])) {
1449 row.AddStartLine(all_models[m]);
1450 }
1451 }
1452 }
1453 // Step Five:
1454 // Since we may have updated the hypotheses about this row, we need
1455 // to recalculate the Open models for the rest of rows[i + 1, row_end)
1456 if (row.GetLineType() != LT_UNKNOWN) {
1457 CalculateOpenModels(i + 1, row_end_);
1458 }
1459 }
1460}
@ JUSTIFICATION_LEFT
Definition: publictypes.h:253
@ JUSTIFICATION_RIGHT
Definition: publictypes.h:255
bool ValidBodyLine(const GenericVector< RowScratchRegisters > *rows, int row, const ParagraphModel *model)
bool ValidFirstLine(const GenericVector< RowScratchRegisters > *rows, int row, const ParagraphModel *model)
int size() const
Definition: genericvector.h:72
void NonCenteredModels(SetOfModels *models)

The documentation for this class was generated from the following files: