tesseract 4.1.1
Loading...
Searching...
No Matches
tesseract::DocumentData Class Reference

#include <imagedata.h>

Public Member Functions

 DocumentData (const STRING &name)
 
 ~DocumentData ()
 
bool LoadDocument (const char *filename, int start_page, int64_t max_memory, FileReader reader)
 
void SetDocument (const char *filename, int64_t max_memory, FileReader reader)
 
bool SaveDocument (const char *filename, FileWriter writer)
 
bool SaveToBuffer (GenericVector< char > *buffer)
 
void AddPageToDocument (ImageData *page)
 
const STRINGdocument_name () const
 
int NumPages () const
 
size_t PagesSize () const
 
int64_t memory_used () const
 
void LoadPageInBackground (int index)
 
const ImageDataGetPage (int index)
 
bool IsPageAvailable (int index, ImageData **page)
 
ImageDataTakePage (int index)
 
bool IsCached () const
 
int64_t UnCache ()
 
void Shuffle ()
 

Friends

void * ReCachePagesFunc (void *data)
 

Detailed Description

Definition at line 209 of file imagedata.h.

Constructor & Destructor Documentation

◆ DocumentData()

tesseract::DocumentData::DocumentData ( const STRING name)
explicit

Definition at line 383 of file imagedata.cpp.

384 : document_name_(name),
385 pages_offset_(-1),
386 total_pages_(-1),
387 memory_used_(0),
388 max_memory_(0),
389 reader_(nullptr) {}

◆ ~DocumentData()

tesseract::DocumentData::~DocumentData ( )

Definition at line 391 of file imagedata.cpp.

391 {
392 SVAutoLock lock_p(&pages_mutex_);
393 SVAutoLock lock_g(&general_mutex_);
394}

Member Function Documentation

◆ AddPageToDocument()

void tesseract::DocumentData::AddPageToDocument ( ImageData page)

Definition at line 435 of file imagedata.cpp.

435 {
436 SVAutoLock lock(&pages_mutex_);
437 pages_.push_back(page);
438 set_memory_used(memory_used() + page->MemoryUsed());
439}
int64_t memory_used() const
Definition: imagedata.h:240

◆ document_name()

const STRING & tesseract::DocumentData::document_name ( ) const
inline

Definition at line 229 of file imagedata.h.

229 {
230 SVAutoLock lock(&general_mutex_);
231 return document_name_;
232 }

◆ GetPage()

const ImageData * tesseract::DocumentData::GetPage ( int  index)

Definition at line 455 of file imagedata.cpp.

455 {
456 ImageData* page = nullptr;
457 while (!IsPageAvailable(index, &page)) {
458 // If there is no background load scheduled, schedule one now.
459 pages_mutex_.Lock();
460 bool needs_loading = pages_offset_ != index;
461 pages_mutex_.Unlock();
462 if (needs_loading) LoadPageInBackground(index);
463 // We can't directly load the page, or the background load will delete it
464 // while the caller is using it, so give it a chance to work.
465#if defined(__MINGW32__)
466 sleep(1);
467#else
468 std::this_thread::sleep_for(std::chrono::seconds(1));
469#endif
470 }
471 return page;
472}
bool IsPageAvailable(int index, ImageData **page)
Definition: imagedata.cpp:477
void LoadPageInBackground(int index)
Definition: imagedata.cpp:443
void Unlock()
Unlocks on a mutex.
Definition: svutil.cpp:72
void Lock()
Locks on a mutex.
Definition: svutil.cpp:64

◆ IsCached()

bool tesseract::DocumentData::IsCached ( ) const
inline

Definition at line 272 of file imagedata.h.

272{ return NumPages() >= 0; }
int NumPages() const
Definition: imagedata.h:233

◆ IsPageAvailable()

bool tesseract::DocumentData::IsPageAvailable ( int  index,
ImageData **  page 
)

Definition at line 477 of file imagedata.cpp.

477 {
478 SVAutoLock lock(&pages_mutex_);
479 int num_pages = NumPages();
480 if (num_pages == 0 || index < 0) {
481 *page = nullptr; // Empty Document.
482 return true;
483 }
484 if (num_pages > 0) {
485 index = Modulo(index, num_pages);
486 if (pages_offset_ <= index && index < pages_offset_ + pages_.size()) {
487 *page = pages_[index - pages_offset_]; // Page is available already.
488 return true;
489 }
490 }
491 return false;
492}
int Modulo(int a, int b)
Definition: helpers.h:158

◆ LoadDocument()

bool tesseract::DocumentData::LoadDocument ( const char *  filename,
int  start_page,
int64_t  max_memory,
FileReader  reader 
)

Definition at line 398 of file imagedata.cpp.

399 {
400 SetDocument(filename, max_memory, reader);
401 pages_offset_ = start_page;
402 return ReCachePages();
403}
void SetDocument(const char *filename, int64_t max_memory, FileReader reader)
Definition: imagedata.cpp:406

◆ LoadPageInBackground()

void tesseract::DocumentData::LoadPageInBackground ( int  index)

Definition at line 443 of file imagedata.cpp.

443 {
444 ImageData* page = nullptr;
445 if (IsPageAvailable(index, &page)) return;
446 SVAutoLock lock(&pages_mutex_);
447 if (pages_offset_ == index) return;
448 pages_offset_ = index;
449 pages_.clear();
451}
friend void * ReCachePagesFunc(void *data)
Definition: imagedata.cpp:377
static void StartThread(void *(*func)(void *), void *arg)
Create new thread.
Definition: svutil.cpp:81

◆ memory_used()

int64_t tesseract::DocumentData::memory_used ( ) const
inline

Definition at line 240 of file imagedata.h.

240 {
241 SVAutoLock lock(&general_mutex_);
242 return memory_used_;
243 }

◆ NumPages()

int tesseract::DocumentData::NumPages ( ) const
inline

Definition at line 233 of file imagedata.h.

233 {
234 SVAutoLock lock(&general_mutex_);
235 return total_pages_;
236 }

◆ PagesSize()

size_t tesseract::DocumentData::PagesSize ( ) const
inline

Definition at line 237 of file imagedata.h.

237 {
238 return pages_.size();
239 }

◆ SaveDocument()

bool tesseract::DocumentData::SaveDocument ( const char *  filename,
FileWriter  writer 
)

Definition at line 417 of file imagedata.cpp.

417 {
418 SVAutoLock lock(&pages_mutex_);
419 TFile fp;
420 fp.OpenWrite(nullptr);
421 if (!pages_.Serialize(&fp) || !fp.CloseWrite(filename, writer)) {
422 tprintf("Serialize failed: %s\n", filename);
423 return false;
424 }
425 return true;
426}
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35

◆ SaveToBuffer()

bool tesseract::DocumentData::SaveToBuffer ( GenericVector< char > *  buffer)

Definition at line 427 of file imagedata.cpp.

427 {
428 SVAutoLock lock(&pages_mutex_);
429 TFile fp;
430 fp.OpenWrite(buffer);
431 return pages_.Serialize(&fp);
432}

◆ SetDocument()

void tesseract::DocumentData::SetDocument ( const char *  filename,
int64_t  max_memory,
FileReader  reader 
)

Definition at line 406 of file imagedata.cpp.

407 {
408 SVAutoLock lock_p(&pages_mutex_);
409 SVAutoLock lock(&general_mutex_);
410 document_name_ = filename;
411 pages_offset_ = -1;
412 max_memory_ = max_memory;
413 reader_ = reader;
414}

◆ Shuffle()

void tesseract::DocumentData::Shuffle ( )

Definition at line 509 of file imagedata.cpp.

509 {
510 TRand random;
511 // Different documents get shuffled differently, but the same for the same
512 // name.
513 random.set_seed(document_name_.string());
514 int num_pages = pages_.size();
515 // Execute one random swap for each page in the document.
516 for (int i = 0; i < num_pages; ++i) {
517 int src = random.IntRand() % num_pages;
518 int dest = random.IntRand() % num_pages;
519 std::swap(pages_[src], pages_[dest]);
520 }
521}
const char * string() const
Definition: strngs.cpp:194

◆ TakePage()

ImageData * tesseract::DocumentData::TakePage ( int  index)
inline

Definition at line 264 of file imagedata.h.

264 {
265 SVAutoLock lock(&pages_mutex_);
266 ImageData* page = pages_[index];
267 pages_[index] = nullptr;
268 return page;
269 }

◆ UnCache()

int64_t tesseract::DocumentData::UnCache ( )

Definition at line 496 of file imagedata.cpp.

496 {
497 SVAutoLock lock(&pages_mutex_);
498 int64_t memory_saved = memory_used();
499 pages_.clear();
500 pages_offset_ = -1;
501 set_total_pages(-1);
502 set_memory_used(0);
503 tprintf("Unloaded document %s, saving %" PRId64 " memory\n",
504 document_name_.string(), memory_saved);
505 return memory_saved;
506}

Friends And Related Function Documentation

◆ ReCachePagesFunc

void * ReCachePagesFunc ( void *  data)
friend

Definition at line 377 of file imagedata.cpp.

377 {
378 auto* document_data = static_cast<DocumentData*>(data);
379 document_data->ReCachePages();
380 return nullptr;
381}
DocumentData(const STRING &name)
Definition: imagedata.cpp:383

The documentation for this class was generated from the following files: