tesseract 4.1.1
Loading...
Searching...
No Matches
lstm.h
Go to the documentation of this file.
1
2// File: lstm.h
3// Description: Long-term-short-term-memory Recurrent neural network.
4// Author: Ray Smith
5// Created: Wed May 01 17:33:06 PST 2013
6//
7// (C) Copyright 2013, Google Inc.
8// Licensed under the Apache License, Version 2.0 (the "License");
9// you may not use this file except in compliance with the License.
10// You may obtain a copy of the License at
11// http://www.apache.org/licenses/LICENSE-2.0
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
18
19#ifndef TESSERACT_LSTM_LSTM_H_
20#define TESSERACT_LSTM_LSTM_H_
21
22#include "network.h"
23#include "fullyconnected.h"
24
25namespace tesseract {
26
27// C++ Implementation of the LSTM class from lstm.py.
28class LSTM : public Network {
29 public:
30 // Enum for the different weights in LSTM, to reduce some of the I/O and
31 // setup code to loops. The elements of the enum correspond to elements of an
32 // array of WeightMatrix or a corresponding array of NetworkIO.
34 CI, // Cell Inputs.
35 GI, // Gate at the input.
36 GF1, // Forget gate at the memory (1-d or looking back 1 timestep).
37 GO, // Gate at the output.
38 GFS, // Forget gate at the memory, looking back in the other dimension.
39
40 WT_COUNT // Number of WeightTypes.
41 };
42
43 // Constructor for NT_LSTM (regular 1 or 2-d LSTM), NT_LSTM_SOFTMAX (LSTM with
44 // additional softmax layer included and fed back into the input at the next
45 // timestep), or NT_LSTM_SOFTMAX_ENCODED (as LSTM_SOFTMAX, but the feedback
46 // is binary encoded instead of categorical) only.
47 // 2-d and bidi softmax LSTMs are not rejected, but are impossible to build
48 // in the conventional way because the output feedback both forwards and
49 // backwards in time does become impossible.
50 LSTM(const STRING& name, int num_inputs, int num_states, int num_outputs,
51 bool two_dimensional, NetworkType type);
52 ~LSTM() override;
53
54 // Returns the shape output from the network given an input shape (which may
55 // be partially unknown ie zero).
56 StaticShape OutputShape(const StaticShape& input_shape) const override;
57
58 STRING spec() const override {
60 if (type_ == NT_LSTM)
61 spec.add_str_int("Lfx", ns_);
62 else if (type_ == NT_LSTM_SUMMARY)
63 spec.add_str_int("Lfxs", ns_);
64 else if (type_ == NT_LSTM_SOFTMAX)
65 spec.add_str_int("LS", ns_);
67 spec.add_str_int("LE", ns_);
68 if (softmax_ != nullptr) spec += softmax_->spec();
69 return spec;
70 }
71
72 // Suspends/Enables training by setting the training_ flag. Serialize and
73 // DeSerialize only operate on the run-time data if state is false.
74 void SetEnableTraining(TrainingState state) override;
75
76 // Sets up the network for training. Initializes weights using weights of
77 // scale `range` picked according to the random number generator `randomizer`.
78 int InitWeights(float range, TRand* randomizer) override;
79 // Recursively searches the network for softmaxes with old_no outputs,
80 // and remaps their outputs according to code_map. See network.h for details.
81 int RemapOutputs(int old_no, const std::vector<int>& code_map) override;
82
83 // Converts a float network to an int network.
84 void ConvertToInt() override;
85
86 // Provides debug output on the weights.
87 void DebugWeights() override;
88
89 // Writes to the given file. Returns false in case of error.
90 bool Serialize(TFile* fp) const override;
91 // Reads from the given file. Returns false in case of error.
92 bool DeSerialize(TFile* fp) override;
93
94 // Runs forward propagation of activations on the input line.
95 // See Network for a detailed discussion of the arguments.
96 void Forward(bool debug, const NetworkIO& input,
97 const TransposedArray* input_transpose, NetworkScratch* scratch,
98 NetworkIO* output) override;
99
100 // Runs backward propagation of errors on the deltas line.
101 // See Network for a detailed discussion of the arguments.
102 bool Backward(bool debug, const NetworkIO& fwd_deltas,
103 NetworkScratch* scratch, NetworkIO* back_deltas) override;
104 // Updates the weights using the given learning rate, momentum and adam_beta.
105 // num_samples is used in the adam computation iff use_adam_ is true.
106 void Update(float learning_rate, float momentum, float adam_beta,
107 int num_samples) override;
108 // Sums the products of weight updates in *this and other, splitting into
109 // positive (same direction) in *same and negative (different direction) in
110 // *changed.
111 void CountAlternators(const Network& other, double* same,
112 double* changed) const override;
113 // Prints the weights for debug purposes.
114 void PrintW();
115 // Prints the weight deltas for debug purposes.
116 void PrintDW();
117
118 // Returns true of this is a 2-d lstm.
119 bool Is2D() const {
120 return is_2d_;
121 }
122
123 private:
124 // Resizes forward data to cope with an input image of the given width.
125 void ResizeForward(const NetworkIO& input);
126
127 private:
128 // Size of padded input to weight matrices = ni_ + no_ for 1-D operation
129 // and ni_ + 2 * no_ for 2-D operation. Note that there is a phantom 1 input
130 // for the bias that makes the weight matrices of size [na + 1][no].
131 int32_t na_;
132 // Number of internal states. Equal to no_ except for a softmax LSTM.
133 // ns_ is NOT serialized, but is calculated from gate_weights_.
134 int32_t ns_;
135 // Number of additional feedback states. The softmax types feed back
136 // additional output information on top of the ns_ internal states.
137 // In the case of a binary-coded (EMBEDDED) softmax, nf_ < no_.
138 int32_t nf_;
139 // Flag indicating 2-D operation.
140 bool is_2d_;
141
142 // Gate weight arrays of size [na + 1, no].
143 WeightMatrix gate_weights_[WT_COUNT];
144 // Used only if this is a softmax LSTM.
145 FullyConnected* softmax_;
146 // Input padded with previous output of size [width, na].
147 NetworkIO source_;
148 // Internal state used during forward operation, of size [width, ns].
149 NetworkIO state_;
150 // State of the 2-d maxpool, generated during forward, used during backward.
151 GENERIC_2D_ARRAY<int8_t> which_fg_;
152 // Internal state saved from forward, but used only during backward.
153 NetworkIO node_values_[WT_COUNT];
154 // Preserved input stride_map used for Backward when NT_LSTM_SQUASHED.
155 StrideMap input_map_;
156 int input_width_;
157};
158
159} // namespace tesseract.
160
161
162#endif // TESSERACT_LSTM_LSTM_H_
TrainingState
Definition: network.h:92
NetworkType
Definition: network.h:43
@ NT_LSTM
Definition: network.h:60
@ NT_LSTM_SOFTMAX_ENCODED
Definition: network.h:76
@ NT_LSTM_SUMMARY
Definition: network.h:61
@ NT_LSTM_SOFTMAX
Definition: network.h:75
Definition: strngs.h:45
void add_str_int(const char *str, int number)
Definition: strngs.cpp:377
STRING spec() const override
void PrintDW()
Definition: lstm.cpp:727
bool Is2D() const
Definition: lstm.h:119
bool Backward(bool debug, const NetworkIO &fwd_deltas, NetworkScratch *scratch, NetworkIO *back_deltas) override
Definition: lstm.cpp:441
~LSTM() override
Definition: lstm.cpp:123
int InitWeights(float range, TRand *randomizer) override
Definition: lstm.cpp:158
void PrintW()
Definition: lstm.cpp:701
void DebugWeights() override
Definition: lstm.cpp:194
int RemapOutputs(int old_no, const std::vector< int > &code_map) override
Definition: lstm.cpp:174
bool DeSerialize(TFile *fp) override
Definition: lstm.cpp:220
bool Serialize(TFile *fp) const override
Definition: lstm.cpp:207
void ConvertToInt() override
Definition: lstm.cpp:183
void SetEnableTraining(TrainingState state) override
Definition: lstm.cpp:137
void Forward(bool debug, const NetworkIO &input, const TransposedArray *input_transpose, NetworkScratch *scratch, NetworkIO *output) override
Definition: lstm.cpp:250
void Update(float learning_rate, float momentum, float adam_beta, int num_samples) override
Definition: lstm.cpp:667
STRING spec() const override
Definition: lstm.h:58
StaticShape OutputShape(const StaticShape &input_shape) const override
Definition: lstm.cpp:127
void CountAlternators(const Network &other, double *same, double *changed) const override
Definition: lstm.cpp:687
NetworkType type_
Definition: network.h:293
const STRING & name() const
Definition: network.h:138
NetworkType type() const
Definition: network.h:112