21#ifndef TESSERACT_CCUTIL_NORMSTRNGS_H_
22#define TESSERACT_CCUTIL_NORMSTRNGS_H_
59 std::string* normalized);
67 std::vector<std::string>* graphemes);
86unsigned int SpanUTF8Whitespace(
const char* text);
90unsigned int SpanUTF8NotWhitespace(
const char* text);
bool IsOCREquivalent(char32 ch1, char32 ch2)
bool NormalizeUTF8String(UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNorm grapheme_normalize, const char *str8, std::string *normalized)
bool IsUTF8Whitespace(const char *text)
char32 OCRNormalize(char32 ch)
bool NormalizeCleanAndSegmentUTF8(UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNormMode g_mode, bool report_errors, const char *str8, std::vector< std::string > *graphemes)
bool IsInterchangeValid(const char32 ch)
char32 FullwidthToHalfwidth(const char32 ch)
bool IsWhitespace(const char32 ch)
bool IsInterchangeValid7BitAscii(const char32 ch)
bool IsValidCodepoint(const char32 ch)