ATLAS Offline Software
Loading...
Searching...
No Matches
Stack.h
Go to the documentation of this file.
1/*
2 Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration
3*/
4
5#ifndef STACK_HH_TAURECTOOLS
6#define STACK_HH_TAURECTOOLS
7
8// Stack classes
9//
10// These are the low-level classes that implement feed-forward and
11// recurrent neural networks. All the Eigen-dependant code in this
12// library should live in this file.
13//
14// To keep the Eigen code out of the high-level interface, the STL ->
15// Eigen ``preprocessor'' classes are also defined here.
16//
17// The ordering of classes is as follows:
18// - Feed-forward Stack class
19// - Feed-forward Layer classes
20// - RecurrentStack class
21// - Recurrent layers
22// - Activation functions
23// - Various utility functions
24
25
26#include "Exceptions.h"
27#include "NNLayerConfig.h"
28
29#include <Eigen/Dense>
30
31#include <vector>
32#include <functional>
33#include <memory> //for unique_ptr
34
35namespace lwtDev {
36
37 using Eigen::VectorXd;
38 using Eigen::MatrixXd;
39
40 class ILayer;
41 class IRecurrentLayer;
42
43
44 // ______________________________________________________________________
45 // Feed forward Stack class
46
47 class Stack
48 {
49 public:
50 // constructor for dummy net
51 Stack();
52 // constructor for real net
53 Stack(size_t n_inputs, const std::vector<LayerConfig>& layers,
54 size_t skip_layers = 0);
55 ~Stack();
56
57 // make non-copyable for now
58 Stack(Stack&) = delete;
59 Stack& operator=(Stack&) = delete;
60
61 VectorXd compute(VectorXd) const;
62 size_t n_outputs() const;
63
64 private:
65 // returns the size of the next layer
66 size_t add_layers(size_t n_inputs, const LayerConfig&);
67 size_t add_dense_layers(size_t n_inputs, const LayerConfig&);
68 size_t add_normalization_layers(size_t n_inputs, const LayerConfig&);
69 size_t add_highway_layers(size_t n_inputs, const LayerConfig&);
70 size_t add_maxout_layers(size_t n_inputs, const LayerConfig&);
71 std::vector<ILayer*> m_layers;
73 };
74
75 // _______________________________________________________________________
76 // Feed-forward layers
77
78 class ILayer
79 {
80 public:
81 virtual ~ILayer() {}
82 virtual VectorXd compute(const VectorXd&) const = 0;
83 };
84
85 class DummyLayer: public ILayer
86 {
87 public:
88 virtual VectorXd compute(const VectorXd&) const override;
89 };
90
92 {
93 public:
95 virtual VectorXd compute(const VectorXd&) const override;
96 private:
97 std::function<double(double)> m_func;
98 };
99
100 class SoftmaxLayer: public ILayer
101 {
102 public:
103 virtual VectorXd compute(const VectorXd&) const override;
104 };
105
106 class BiasLayer: public ILayer
107 {
108 public:
109 BiasLayer(const VectorXd& bias);
110 BiasLayer(const std::vector<double>& bias);
111 virtual VectorXd compute(const VectorXd&) const override;
112 private:
113 VectorXd m_bias;
114 };
115
116 class MatrixLayer: public ILayer
117 {
118 public:
119 MatrixLayer(const MatrixXd& matrix);
120 virtual VectorXd compute(const VectorXd&) const override;
121 private:
122 MatrixXd m_matrix;
123 };
124
125 class MaxoutLayer: public ILayer
126 {
127 public:
128 typedef std::pair<MatrixXd, VectorXd> InitUnit;
129 MaxoutLayer(const std::vector<InitUnit>& maxout_tensor);
130 virtual VectorXd compute(const VectorXd&) const override;
131 private:
132 std::vector<MatrixXd> m_matrices;
133 MatrixXd m_bias;
134 };
135
136
140 {
141
142 public:
143 NormalizationLayer(const VectorXd& W,const VectorXd& b);
144 virtual VectorXd compute(const VectorXd&) const override;
145
146 private:
147 VectorXd m_W;
148 VectorXd m_b;
149
150 };
151
152 //http://arxiv.org/pdf/1505.00387v2.pdf
153 class HighwayLayer: public ILayer
154 {
155 public:
156 HighwayLayer(const MatrixXd& W,
157 const VectorXd& b,
158 const MatrixXd& W_carry,
159 const VectorXd& b_carry,
160 ActivationConfig activation);
161 virtual VectorXd compute(const VectorXd&) const override;
162 private:
163 MatrixXd m_w_t;
164 VectorXd m_b_t;
165 MatrixXd m_w_c;
166 VectorXd m_b_c;
167 std::function<double(double)> m_act;
168 };
169
170 // ______________________________________________________________________
171 // Recurrent Stack
172
174 {
175 public:
176 RecurrentStack(size_t n_inputs, const std::vector<LayerConfig>& layers);
180 MatrixXd scan(MatrixXd inputs) const;
181 size_t n_outputs() const;
182 private:
183 std::vector<IRecurrentLayer*> m_layers;
184 size_t add_lstm_layers(size_t n_inputs, const LayerConfig&);
185 size_t add_gru_layers(size_t n_inputs, const LayerConfig&);
186 size_t add_bidirectional_layers(size_t n_inputs, const LayerConfig&);
187 size_t add_embedding_layers(size_t n_inputs, const LayerConfig&);
189 };
190
191 // This is the old RecurrentStack. Should probably absorb this into
192 // the high-level interface in LightweightRNN, since all it does is
193 // provide a slightly higher-level interface to a network which
194 // combines recurrent + ff layers.
196 {
197 public:
198 ReductionStack(size_t n_in, const std::vector<LayerConfig>& layers);
202 VectorXd reduce(MatrixXd inputs) const;
203 size_t n_outputs() const;
204 private:
207 };
208
209 // __________________________________________________________________
210 // Recurrent layers
211
213 {
214 public:
215 virtual ~IRecurrentLayer() {}
216 virtual MatrixXd scan( const MatrixXd&) const = 0;
217
218 bool m_go_backwards = false;
219 bool m_return_sequence = false;
220 };
221
223 {
224 public:
225 EmbeddingLayer(int var_row_index, const MatrixXd & W);
226 virtual ~EmbeddingLayer() {};
227 virtual MatrixXd scan( const MatrixXd&) const override;
228
229 private:
231 MatrixXd m_W;
232 };
233
235 struct LSTMState;
237 {
238 public:
239 LSTMLayer(const ActivationConfig & activation,
240 const ActivationConfig & inner_activation,
241 const MatrixXd & W_i, const MatrixXd & U_i, const VectorXd & b_i,
242 const MatrixXd & W_f, const MatrixXd & U_f, const VectorXd & b_f,
243 const MatrixXd & W_o, const MatrixXd & U_o, const VectorXd & b_o,
244 const MatrixXd & W_c, const MatrixXd & U_c, const VectorXd & b_c,
245 bool go_backwards,
246 bool return_sequence);
247
248 virtual ~LSTMLayer() {};
249 virtual MatrixXd scan( const MatrixXd&) const override;
250 void step( const VectorXd& input, LSTMState& ) const;
251
252 private:
253 std::function<double(double)> m_activation_fun;
254 std::function<double(double)> m_inner_activation_fun;
255
256 MatrixXd m_W_i;
257 MatrixXd m_U_i;
258 VectorXd m_b_i;
259
260 MatrixXd m_W_f;
261 MatrixXd m_U_f;
262 VectorXd m_b_f;
263
264 MatrixXd m_W_o;
265 MatrixXd m_U_o;
266 VectorXd m_b_o;
267
268 MatrixXd m_W_c;
269 MatrixXd m_U_c;
270 VectorXd m_b_c;
271
273 };
274
276 struct GRUState;
278 {
279 public:
280 GRULayer(const ActivationConfig & activation,
281 const ActivationConfig & inner_activation,
282 const MatrixXd & W_z, const MatrixXd & U_z, const VectorXd & b_z,
283 const MatrixXd & W_r, const MatrixXd & U_r, const VectorXd & b_r,
284 const MatrixXd & W_h, const MatrixXd & U_h, const VectorXd & b_h);
285
286 virtual ~GRULayer() {};
287 virtual MatrixXd scan( const MatrixXd&) const override;
288 void step( const VectorXd& input, GRUState& ) const;
289
290 private:
291 std::function<double(double)> m_activation_fun;
292 std::function<double(double)> m_inner_activation_fun;
293
294 MatrixXd m_W_z;
295 MatrixXd m_U_z;
296 VectorXd m_b_z;
297
298 MatrixXd m_W_r;
299 MatrixXd m_U_r;
300 VectorXd m_b_r;
301
302 MatrixXd m_W_h;
303 MatrixXd m_U_h;
304 VectorXd m_b_h;
305
307 };
308
311 {
312 public:
313 BidirectionalLayer(std::unique_ptr<IRecurrentLayer> forward_layer,
314 std::unique_ptr<IRecurrentLayer> backward_layer,
315 const std::string& merge_mode,
316 bool return_sequence);
317
319 virtual MatrixXd scan( const MatrixXd&) const override;
320
321 private:
322 std::unique_ptr<const IRecurrentLayer> m_forward_layer;
323 std::unique_ptr<const IRecurrentLayer> m_backward_layer;
324
325 std::string m_merge_mode;
326 };
327
328 // ______________________________________________________________________
329 // Activation functions
330
331 // note that others are supported but are too simple to
332 // require a special function
333 double nn_sigmoid( double x );
334 double nn_hard_sigmoid( double x );
335 double nn_tanh( double x );
336 double nn_relu( double x );
337 class ELU
338 {
339 public:
340 ELU(double alpha);
341 double operator()(double) const;
342 private:
343 double m_alpha;
344 };
346 {
347 public:
348 LeakyReLU(double alpha);
349 double operator()(double) const;
350 private:
351 double m_alpha;
352 };
353 class Swish
354 {
355 public:
356 Swish(double alpha);
357 double operator()(double) const;
358 private:
359 double m_alpha;
360 };
361 std::function<double(double)> get_activation(lwtDev::ActivationConfig);
362
363 // WARNING: you own this pointer! Only call when assigning to member data!
365
366 // ______________________________________________________________________
367 // utility functions
368
369 // functions to build up basic units from vectors
370 MatrixXd build_matrix(const std::vector<double>& weights, size_t n_inputs);
371 VectorXd build_vector(const std::vector<double>& bias);
372
373 // consistency checks
374 void throw_if_not_maxout(const LayerConfig& layer);
375 void throw_if_not_dense(const LayerConfig& layer);
376 void throw_if_not_normalization(const LayerConfig& layer);
377
378 // LSTM component for convenience in some layers
380 {
381 Eigen::MatrixXd W;
382 Eigen::MatrixXd U;
383 Eigen::VectorXd b;
384 };
385 DenseComponents get_component(const lwtDev::LayerConfig& layer, size_t n_in);
386
387
388}
389
390#endif // STACK_HH_TAURECTOOLS
#define x
BiasLayer(const VectorXd &bias)
Definition Stack.cxx:196
virtual VectorXd compute(const VectorXd &) const override
Definition Stack.cxx:203
VectorXd m_bias
Definition Stack.h:113
std::unique_ptr< const IRecurrentLayer > m_backward_layer
Definition Stack.h:323
virtual MatrixXd scan(const MatrixXd &) const override
Definition Stack.cxx:623
BidirectionalLayer(std::unique_ptr< IRecurrentLayer > forward_layer, std::unique_ptr< IRecurrentLayer > backward_layer, const std::string &merge_mode, bool return_sequence)
bidirectional layer ///
Definition Stack.cxx:611
std::string m_merge_mode
Definition Stack.h:325
std::unique_ptr< const IRecurrentLayer > m_forward_layer
Definition Stack.h:322
virtual ~BidirectionalLayer()
Definition Stack.h:318
virtual VectorXd compute(const VectorXd &) const override
Definition Stack.cxx:169
ELU(double alpha)
Definition Stack.cxx:721
double m_alpha
Definition Stack.h:343
double operator()(double) const
Definition Stack.cxx:724
virtual ~EmbeddingLayer()
Definition Stack.h:226
EmbeddingLayer(int var_row_index, const MatrixXd &W)
Definition Stack.cxx:427
virtual MatrixXd scan(const MatrixXd &) const override
Definition Stack.cxx:437
std::function< double(double)> m_inner_activation_fun
Definition Stack.h:292
MatrixXd m_W_z
Definition Stack.h:294
GRULayer(const ActivationConfig &activation, const ActivationConfig &inner_activation, const MatrixXd &W_z, const MatrixXd &U_z, const VectorXd &b_z, const MatrixXd &W_r, const MatrixXd &U_r, const VectorXd &b_r, const MatrixXd &W_h, const MatrixXd &U_h, const VectorXd &b_h)
Definition Stack.cxx:550
MatrixXd m_W_h
Definition Stack.h:302
void step(const VectorXd &input, GRUState &) const
Definition Stack.cxx:582
std::function< double(double)> m_activation_fun
Definition Stack.h:291
MatrixXd m_W_r
Definition Stack.h:298
MatrixXd m_U_z
Definition Stack.h:295
MatrixXd m_U_h
Definition Stack.h:303
VectorXd m_b_h
Definition Stack.h:304
MatrixXd m_U_r
Definition Stack.h:299
VectorXd m_b_r
Definition Stack.h:300
virtual ~GRULayer()
Definition Stack.h:286
virtual MatrixXd scan(const MatrixXd &) const override
Definition Stack.cxx:598
VectorXd m_b_z
Definition Stack.h:296
std::function< double(double)> m_act
Definition Stack.h:167
virtual VectorXd compute(const VectorXd &) const override
Definition Stack.cxx:261
HighwayLayer(const MatrixXd &W, const VectorXd &b, const MatrixXd &W_carry, const VectorXd &b_carry, ActivationConfig activation)
Definition Stack.cxx:252
virtual VectorXd compute(const VectorXd &) const =0
virtual ~ILayer()
Definition Stack.h:81
virtual MatrixXd scan(const MatrixXd &) const =0
virtual ~IRecurrentLayer()
Definition Stack.h:215
MatrixXd m_U_c
Definition Stack.h:269
void step(const VectorXd &input, LSTMState &) const
Definition Stack.cxx:516
VectorXd m_b_i
Definition Stack.h:258
MatrixXd m_W_i
Definition Stack.h:256
std::function< double(double)> m_inner_activation_fun
Definition Stack.h:254
MatrixXd m_W_c
Definition Stack.h:268
LSTMLayer(const ActivationConfig &activation, const ActivationConfig &inner_activation, const MatrixXd &W_i, const MatrixXd &U_i, const VectorXd &b_i, const MatrixXd &W_f, const MatrixXd &U_f, const VectorXd &b_f, const MatrixXd &W_o, const MatrixXd &U_o, const VectorXd &b_o, const MatrixXd &W_c, const MatrixXd &U_c, const VectorXd &b_c, bool go_backwards, bool return_sequence)
Definition Stack.cxx:473
MatrixXd m_U_i
Definition Stack.h:257
MatrixXd m_W_f
Definition Stack.h:260
std::function< double(double)> m_activation_fun
Definition Stack.h:253
MatrixXd m_U_o
Definition Stack.h:265
virtual MatrixXd scan(const MatrixXd &) const override
Definition Stack.cxx:535
VectorXd m_b_o
Definition Stack.h:266
MatrixXd m_W_o
Definition Stack.h:264
virtual ~LSTMLayer()
Definition Stack.h:248
VectorXd m_b_f
Definition Stack.h:262
MatrixXd m_U_f
Definition Stack.h:261
VectorXd m_b_c
Definition Stack.h:270
LeakyReLU(double alpha)
Definition Stack.cxx:732
double operator()(double) const
Definition Stack.cxx:735
double m_alpha
Definition Stack.h:351
MatrixLayer(const MatrixXd &matrix)
Definition Stack.cxx:208
virtual VectorXd compute(const VectorXd &) const override
Definition Stack.cxx:212
MatrixXd m_matrix
Definition Stack.h:122
MatrixXd m_bias
Definition Stack.h:133
std::vector< MatrixXd > m_matrices
Definition Stack.h:132
std::pair< MatrixXd, VectorXd > InitUnit
Definition Stack.h:128
virtual VectorXd compute(const VectorXd &) const override
Definition Stack.cxx:227
MaxoutLayer(const std::vector< InitUnit > &maxout_tensor)
Definition Stack.cxx:217
NormalizationLayer(const VectorXd &W, const VectorXd &b)
Definition Stack.cxx:241
virtual VectorXd compute(const VectorXd &) const override
Definition Stack.cxx:246
size_t add_embedding_layers(size_t n_inputs, const LayerConfig &)
Definition Stack.cxx:384
MatrixXd scan(MatrixXd inputs) const
Definition Stack.cxx:300
RecurrentStack & operator=(RecurrentStack &)=delete
RecurrentStack(size_t n_inputs, const std::vector< LayerConfig > &layers)
Definition Stack.cxx:271
size_t n_outputs() const
Definition Stack.cxx:306
size_t add_gru_layers(size_t n_inputs, const LayerConfig &)
Definition Stack.cxx:330
size_t add_bidirectional_layers(size_t n_inputs, const LayerConfig &)
Definition Stack.cxx:344
RecurrentStack(RecurrentStack &)=delete
std::vector< IRecurrentLayer * > m_layers
Definition Stack.h:183
size_t add_lstm_layers(size_t n_inputs, const LayerConfig &)
Definition Stack.cxx:310
size_t n_outputs() const
Definition Stack.cxx:420
ReductionStack & operator=(ReductionStack &)=delete
ReductionStack(ReductionStack &)=delete
RecurrentStack * m_recurrent
Definition Stack.h:205
VectorXd reduce(MatrixXd inputs) const
Definition Stack.cxx:416
ReductionStack(size_t n_in, const std::vector< LayerConfig > &layers)
Definition Stack.cxx:396
virtual VectorXd compute(const VectorXd &) const override
Definition Stack.cxx:182
size_t m_n_outputs
Definition Stack.h:72
std::vector< ILayer * > m_layers
Definition Stack.h:71
size_t add_maxout_layers(size_t n_inputs, const LayerConfig &)
Definition Stack.cxx:144
size_t n_outputs() const
Definition Stack.cxx:57
size_t add_normalization_layers(size_t n_inputs, const LayerConfig &)
Definition Stack.cxx:111
size_t add_dense_layers(size_t n_inputs, const LayerConfig &)
Definition Stack.cxx:79
size_t add_layers(size_t n_inputs, const LayerConfig &)
Definition Stack.cxx:66
Stack(Stack &)=delete
VectorXd compute(VectorXd) const
Definition Stack.cxx:51
Stack & operator=(Stack &)=delete
size_t add_highway_layers(size_t n_inputs, const LayerConfig &)
Definition Stack.cxx:133
double operator()(double) const
Definition Stack.cxx:708
Swish(double alpha)
Definition Stack.cxx:705
double m_alpha
Definition Stack.h:359
UnaryActivationLayer(ActivationConfig)
Definition Stack.cxx:174
virtual VectorXd compute(const VectorXd &) const override
Definition Stack.cxx:178
std::function< double(double)> m_func
Definition Stack.h:97
MatrixXd build_matrix(const std::vector< double > &weights, size_t n_inputs)
Definition Stack.cxx:741
ILayer * get_raw_activation_layer(ActivationConfig activation)
Definition Stack.cxx:661
void throw_if_not_maxout(const LayerConfig &layer)
Definition Stack.cxx:771
double nn_sigmoid(double x)
Definition Stack.cxx:690
double nn_hard_sigmoid(double x)
Definition Stack.cxx:697
DenseComponents get_component(const lwtDev::LayerConfig &layer, size_t n_in)
Definition Stack.cxx:792
VectorXd build_vector(const std::vector< double > &bias)
Definition Stack.cxx:760
void throw_if_not_normalization(const LayerConfig &layer)
Definition Stack.cxx:785
std::function< double(double)> get_activation(lwtDev::ActivationConfig act)
Definition Stack.cxx:671
double nn_relu(double x)
Definition Stack.cxx:716
void throw_if_not_dense(const LayerConfig &layer)
Definition Stack.cxx:779
double nn_tanh(double x)
Definition Stack.cxx:712
Eigen::VectorXd b
Definition Stack.h:383
Eigen::MatrixXd U
Definition Stack.h:382
Eigen::MatrixXd W
Definition Stack.h:381