ATLAS Offline Software
Loading...
Searching...
No Matches
Stack.cxx
Go to the documentation of this file.
1/*
2 Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
3*/
4
6#include <Eigen/Dense>
7#include <set>
8
9// internal utility functions
10namespace {
11 using namespace Eigen;
12 using namespace lwtDev;
13}
14namespace lwtDev {
15
16 // ______________________________________________________________________
17 // Feed forward Stack class
18
19 // dummy construction routine
21 m_layers.push_back(new DummyLayer);
22 m_layers.push_back(new UnaryActivationLayer({ Activation::SIGMOID, 0.0 }));
23
24 m_layers.push_back(new BiasLayer(std::vector<double>{1, 1, 1, 1}));
25 MatrixXd mat(4, 4);
26 mat <<
27 0, 0, 0, 1,
28 0, 0, 1, 0,
29 0, 1, 0, 0,
30 1, 0, 0, 0;
31 m_layers.push_back(new MatrixLayer(mat));
32 m_n_outputs = 4;
33 }
34
35 // construct from LayerConfig
36 Stack::Stack(size_t n_inputs, const std::vector<LayerConfig>& layers,
37 size_t skip) {
38 for (size_t nnn = skip; nnn < layers.size(); nnn++) {
39 n_inputs = add_layers(n_inputs, layers.at(nnn));
40 }
41 // the final assigned n_inputs is the number of output nodes
42 m_n_outputs = n_inputs;
43 }
44
46 for (auto& layer: m_layers) {
47 delete layer;
48 layer = 0;
49 }
50 }
51 VectorXd Stack::compute(VectorXd in) const {
52 for (const auto& layer: m_layers) {
53 in = layer->compute(in);
54 }
55 return in;
56 }
57 size_t Stack::n_outputs() const {
58 return m_n_outputs;
59 }
60
61
62 // Private Stack methods to add various types of layers
63 //
64 // top level add_layers method. This delegates to the other methods
65 // below
66 size_t Stack::add_layers(size_t n_inputs, const LayerConfig& layer) {
67 if (layer.architecture == Architecture::DENSE) {
68 return add_dense_layers(n_inputs, layer);
69 } else if (layer.architecture == Architecture::NORMALIZATION){
70 return add_normalization_layers(n_inputs, layer);
71 } else if (layer.architecture == Architecture::HIGHWAY){
72 return add_highway_layers(n_inputs, layer);
73 } else if (layer.architecture == Architecture::MAXOUT) {
74 return add_maxout_layers(n_inputs, layer);
75 }
76 throw NNConfigurationException("unknown architecture");
77 }
78
79 size_t Stack::add_dense_layers(size_t n_inputs, const LayerConfig& layer) {
80 assert(layer.architecture == Architecture::DENSE);
81 throw_if_not_dense(layer);
82
83 size_t n_outputs = n_inputs;
84
85 // add matrix layer
86 if (layer.weights.size() > 0) {
87 MatrixXd matrix = build_matrix(layer.weights, n_inputs);
88 n_outputs = matrix.rows();
89 m_layers.push_back(new MatrixLayer(matrix));
90 };
91
92 // add bias layer
93 if (layer.bias.size() > 0) {
94 if (n_outputs != layer.bias.size() ) {
95 std::string problem = "tried to add a bias layer with " +
96 std::to_string(layer.bias.size()) + " entries, previous layer"
97 " had " + std::to_string(n_outputs) + " outputs";
98 throw NNConfigurationException(problem);
99 }
100 m_layers.push_back(new BiasLayer(layer.bias));
101 }
102
103 // add activation layer
104 if (layer.activation.function != Activation::LINEAR) {
105 m_layers.push_back(get_raw_activation_layer(layer.activation));
106 }
107
108 return n_outputs;
109 }
110
111 size_t Stack::add_normalization_layers(size_t n_inputs, const LayerConfig& layer) {
112 assert(layer.architecture == Architecture::NORMALIZATION);
114
115 // Do some checks
116 if ( layer.weights.size() < 1 || layer.bias.size() < 1 ) {
117 std::string problem = "Either weights or bias layer size is < 1";
118 throw NNConfigurationException(problem);
119 };
120 if ( layer.weights.size() != layer.bias.size() ) {
121 std::string problem = "weights and bias layer are not equal in size!";
122 throw NNConfigurationException(problem);
123 };
124 VectorXd v_weights = build_vector(layer.weights);
125 VectorXd v_bias = build_vector(layer.bias);
126
127 m_layers.push_back(
128 new NormalizationLayer(v_weights, v_bias));
129 return n_inputs;
130 }
131
132
133 size_t Stack::add_highway_layers(size_t n_inputs, const LayerConfig& layer) {
134 auto& comps = layer.components;
135 const auto& t = get_component(comps.at(Component::T), n_inputs);
136 const auto& c = get_component(comps.at(Component::CARRY), n_inputs);
137
138 m_layers.push_back(
139 new HighwayLayer(t.W, t.b, c.W, c.b, layer.activation));
140 return n_inputs;
141 }
142
143
144 size_t Stack::add_maxout_layers(size_t n_inputs, const LayerConfig& layer) {
145 assert(layer.architecture == Architecture::MAXOUT);
146 throw_if_not_maxout(layer);
147 std::vector<MaxoutLayer::InitUnit> matrices;
148 std::set<size_t> n_outputs;
149 for (const auto& sublayer: layer.sublayers) {
150 MatrixXd matrix = build_matrix(sublayer.weights, n_inputs);
151 VectorXd bias = build_vector(sublayer.bias);
152 n_outputs.insert(matrix.rows());
153 matrices.push_back(std::make_pair(matrix, bias));
154 }
155 if (n_outputs.size() == 0) {
156 throw NNConfigurationException("tried to build maxout withoutweights!");
157 }
158 else if (n_outputs.size() != 1) {
159 throw NNConfigurationException("uneven matrices for maxout");
160 }
161 m_layers.push_back(new MaxoutLayer(matrices));
162 return *n_outputs.begin();
163 }
164
165
166 // _______________________________________________________________________
167 // Feed-forward layers
168
169 VectorXd DummyLayer::compute(const VectorXd& in) const {
170 return in;
171 }
172
173 // activation functions
178 VectorXd UnaryActivationLayer::compute(const VectorXd& in) const {
179 return in.unaryExpr(m_func);
180 }
181
182 VectorXd SoftmaxLayer::compute(const VectorXd& in) const {
183 // More numerically stable softmax, as suggested in
184 // http://stackoverflow.com/a/34969389
185 size_t n_elements = in.rows();
186 VectorXd exp(n_elements);
187 double max = in.maxCoeff();
188 for (size_t iii = 0; iii < n_elements; iii++) {
189 exp(iii) = std::exp(in(iii) - max);
190 }
191 double sum_exp = exp.sum();
192 return exp / sum_exp;
193 }
194
195 // bias layer
196 BiasLayer::BiasLayer(const VectorXd& bias): m_bias(bias)
197 {
198 }
199 BiasLayer::BiasLayer(const std::vector<double>& bias):
200 m_bias(build_vector(bias))
201 {
202 }
203 VectorXd BiasLayer::compute(const VectorXd& in) const {
204 return in + m_bias;
205 }
206
207 // basic dense matrix layer
208 MatrixLayer::MatrixLayer(const MatrixXd& matrix):
209 m_matrix(matrix)
210 {
211 }
212 VectorXd MatrixLayer::compute(const VectorXd& in) const {
213 return m_matrix * in;
214 }
215
216 // maxout layer
217 MaxoutLayer::MaxoutLayer(const std::vector<MaxoutLayer::InitUnit>& units):
218 m_bias(units.size(), units.front().first.rows())
219 {
220 int out_pos = 0;
221 for (const auto& unit: units) {
222 m_matrices.push_back(unit.first);
223 m_bias.row(out_pos) = unit.second;
224 out_pos++;
225 }
226 }
227 VectorXd MaxoutLayer::compute(const VectorXd& in) const {
228 // eigen supports tensors, but only in the experimental component
229 // for now just stick to matrix and vector classes
230 const size_t n_mat = m_matrices.size();
231 const size_t out_dim = m_matrices.front().rows();
232 MatrixXd outputs(n_mat, out_dim);
233 for (size_t mat_n = 0; mat_n < n_mat; mat_n++) {
234 outputs.row(mat_n) = m_matrices.at(mat_n) * in;
235 }
236 outputs += m_bias;
237 return outputs.colwise().maxCoeff();
238 }
239
240 // Normalization layer
242 const VectorXd& b):
243 m_W(W), m_b(b)
244 {
245 }
246 VectorXd NormalizationLayer::compute(const VectorXd& in) const {
247 VectorXd shift = in + m_b ;
248 return m_W.cwiseProduct(shift);
249 }
250
251 // highway layer
252 HighwayLayer::HighwayLayer(const MatrixXd& W,
253 const VectorXd& b,
254 const MatrixXd& W_carry,
255 const VectorXd& b_carry,
256 ActivationConfig activation):
257 m_w_t(W), m_b_t(b), m_w_c(W_carry), m_b_c(b_carry),
258 m_act(get_activation(activation))
259 {
260 }
261 VectorXd HighwayLayer::compute(const VectorXd& in) const {
262 const std::function<double(double)> sig(nn_sigmoid);
263 ArrayXd c = (m_w_c * in + m_b_c).unaryExpr(sig);
264 ArrayXd t = (m_w_t * in + m_b_t).unaryExpr(m_act);
265 return c * t + (1 - c) * in.array();
266 }
267
268 // ______________________________________________________________________
269 // Recurrent Stack
270
272 const std::vector<lwtDev::LayerConfig>& layers)
273 {
274 using namespace lwtDev;
275 const size_t n_layers = layers.size();
276 for (size_t layer_n = 0; layer_n < n_layers; layer_n++) {
277 auto& layer = layers.at(layer_n);
278
279 // add recurrent layers (now LSTM and GRU!)
280 if (layer.architecture == Architecture::LSTM) {
281 n_inputs = add_lstm_layers(n_inputs, layer);
282 } else if (layer.architecture == Architecture::GRU) {
283 n_inputs = add_gru_layers(n_inputs, layer);
284 } else if (layer.architecture == Architecture::BIDIRECTIONAL) {
285 n_inputs = add_bidirectional_layers(n_inputs, layer);
286 } else if (layer.architecture == Architecture::EMBEDDING) {
287 n_inputs = add_embedding_layers(n_inputs, layer);
288 } else {
289 throw NNConfigurationException("found non-recurrent layer");
290 }
291 }
292 m_n_outputs = n_inputs;
293 }
295 for (auto& layer: m_layers) {
296 delete layer;
297 layer = 0;
298 }
299 }
300 MatrixXd RecurrentStack::scan(MatrixXd in) const {
301 for (auto* layer: m_layers) {
302 in = layer->scan(in);
303 }
304 return in;
305 }
307 return m_n_outputs;
308 }
309
310 size_t RecurrentStack::add_lstm_layers(size_t n_inputs,
311 const LayerConfig& layer) {
312 auto& comps = layer.components;
313 const auto& i = get_component(comps.at(Component::I), n_inputs);
314 const auto& o = get_component(comps.at(Component::O), n_inputs);
315 const auto& f = get_component(comps.at(Component::F), n_inputs);
316 const auto& c = get_component(comps.at(Component::C), n_inputs);
317 const bool& go_backwards = layer.go_backwards;
318 const bool& return_sequence = layer.return_sequence;
319 m_layers.push_back(
320 new LSTMLayer(layer.activation, layer.inner_activation,
321 i.W, i.U, i.b,
322 f.W, f.U, f.b,
323 o.W, o.U, o.b,
324 c.W, c.U, c.b,
325 go_backwards,
326 return_sequence));
327 return o.b.rows();
328 }
329
330 size_t RecurrentStack::add_gru_layers(size_t n_inputs,
331 const LayerConfig& layer) {
332 auto& comps = layer.components;
333 const auto& z = get_component(comps.at(Component::Z), n_inputs);
334 const auto& r = get_component(comps.at(Component::R), n_inputs);
335 const auto& h = get_component(comps.at(Component::H), n_inputs);
336 m_layers.push_back(
337 new GRULayer(layer.activation, layer.inner_activation,
338 z.W, z.U, z.b,
339 r.W, r.U, r.b,
340 h.W, h.U, h.b));
341 return h.b.rows();
342 }
343
345 const LayerConfig& layer) {
346 // nasty hack to get the hands on RNNs: create RNN, fetch it from m_layers and finally pop it
347 if(layer.sublayers.size() != 2)
348 throw NNConfigurationException("Number of sublayers not matching expected number of 2 for bidirectional layers");
349 const LayerConfig forward_layer_conf = layer.sublayers[0];
350 const LayerConfig backward_layer_conf = layer.sublayers[1];
351 size_t n_forward = 0;
352 // fixing nasty -Wunused-but-set-variable warning
353 (void) n_forward;
354 size_t n_backward = 0;
355 if(forward_layer_conf.architecture == Architecture::LSTM)
356 n_forward = add_lstm_layers(n_inputs, forward_layer_conf);
357 else if(forward_layer_conf.architecture == Architecture::GRU)
358 n_forward = add_gru_layers(n_inputs, forward_layer_conf);
359 else
360 throw NNConfigurationException("Bidirectional forward layer type not supported");
361
362 std::unique_ptr<IRecurrentLayer> forward_layer(m_layers.back());
363 m_layers.pop_back();
364
365 if(backward_layer_conf.architecture == Architecture::LSTM)
366 n_backward = add_lstm_layers(n_inputs, backward_layer_conf);
367 else if(backward_layer_conf.architecture == Architecture::GRU)
368 n_backward = add_gru_layers(n_inputs, backward_layer_conf);
369 else
370 throw NNConfigurationException("Bidirectional backward layer type not supported");
371
372 std::unique_ptr<IRecurrentLayer> backward_layer(m_layers.back());
373 backward_layer->m_go_backwards = (!forward_layer->m_go_backwards);
374 m_layers.pop_back();
375
376 m_layers.push_back(new BidirectionalLayer(std::move(forward_layer),
377 std::move(backward_layer),
378 layer.merge_mode,
379 layer.return_sequence));
380 return n_backward;
381
382 }
383
385 const LayerConfig& layer) {
386 for (const auto& emb: layer.embedding) {
387 size_t n_wt = emb.weights.size();
388 size_t n_cats = n_wt / emb.n_out;
389 MatrixXd mat = build_matrix(emb.weights, n_cats);
390 m_layers.push_back(new EmbeddingLayer(emb.index, mat));
391 n_inputs += emb.n_out - 1;
392 }
393 return n_inputs;
394 }
395
397 const std::vector<LayerConfig>& layers) {
398 std::vector<LayerConfig> recurrent;
399 std::vector<LayerConfig> feed_forward;
400 std::set<Architecture> recurrent_arcs{
402 for (const auto& layer: layers) {
403 if (recurrent_arcs.count(layer.architecture)) {
404 recurrent.push_back(layer);
405 } else {
406 feed_forward.push_back(layer);
407 }
408 }
409 m_recurrent = new RecurrentStack(n_in, recurrent);
410 m_stack = new Stack(m_recurrent->n_outputs(), feed_forward);
411 }
413 delete m_recurrent;
414 delete m_stack;
415 }
416 VectorXd ReductionStack::reduce(MatrixXd in) const {
417 in = m_recurrent->scan(in);
418 return m_stack->compute(in.col(in.cols() -1));
419 }
421 return m_stack->n_outputs();
422 }
423
424 // __________________________________________________________________
425 // Recurrent layers
426
427 EmbeddingLayer::EmbeddingLayer(int var_row_index, const MatrixXd & W):
428 m_var_row_index(var_row_index),
429 m_W(W)
430 {
431 if(var_row_index < 0)
433 "EmbeddingLayer::EmbeddingLayer - can not set var_row_index<0,"
434 " it is an index for a matrix row!");
435 }
436
437 MatrixXd EmbeddingLayer::scan( const MatrixXd& x) const {
438
439 if( m_var_row_index >= x.rows() )
441 "EmbeddingLayer::scan - var_row_index is larger than input matrix"
442 " number of rows!");
443
444 MatrixXd embedded(m_W.rows(), x.cols());
445
446 for(int icol=0; icol<x.cols(); icol++) {
447 double vector_idx = x(m_var_row_index, icol);
448 bool is_int = std::floor(vector_idx) == vector_idx;
449 bool is_valid = (vector_idx >= 0) && (vector_idx < m_W.cols());
450 if (!is_int || !is_valid) throw NNEvaluationException(
451 "Invalid embedded index: " + std::to_string(vector_idx));
452 embedded.col(icol) = m_W.col( vector_idx );
453 }
454
455 //only embed 1 variable at a time, so this should be correct size
456 MatrixXd out(m_W.rows() + (x.rows() - 1), x.cols());
457
458 //assuming m_var_row_index is an index with first possible value of 0
459 if(m_var_row_index > 0)
460 out.topRows(m_var_row_index) = x.topRows(m_var_row_index);
461
462 out.block(m_var_row_index, 0, embedded.rows(), embedded.cols()) = embedded;
463
464 if( m_var_row_index < (x.rows()-1) )
465 out.bottomRows( x.cols() - 1 - m_var_row_index)
466 = x.bottomRows( x.cols() - 1 - m_var_row_index);
467
468 return out;
469 }
470
471
472 // LSTM layer
474 const ActivationConfig & inner_activation,
475 const MatrixXd & W_i, const MatrixXd & U_i, const VectorXd & b_i,
476 const MatrixXd & W_f, const MatrixXd & U_f, const VectorXd & b_f,
477 const MatrixXd & W_o, const MatrixXd & U_o, const VectorXd & b_o,
478 const MatrixXd & W_c, const MatrixXd & U_c, const VectorXd & b_c,
479 bool go_backwards,
480 bool return_sequence):
481 m_W_i(W_i),
482 m_U_i(U_i),
483 m_b_i(b_i),
484 m_W_f(W_f),
485 m_U_f(U_f),
486 m_b_f(b_f),
487 m_W_o(W_o),
488 m_U_o(U_o),
489 m_b_o(b_o),
490 m_W_c(W_c),
491 m_U_c(U_c),
492 m_b_c(b_c)
493 {
494 m_n_outputs = m_W_o.rows();
495
496 m_activation_fun = get_activation(activation);
497 m_inner_activation_fun = get_activation(inner_activation);
498 m_go_backwards = go_backwards;
499 m_return_sequence = return_sequence;
500 }
501
502 // internal structure created on each scan call
503 struct LSTMState {
504 LSTMState(size_t n_input, size_t n_outputs);
505 MatrixXd C_t;
506 MatrixXd h_t;
507 int time;
508 };
509 LSTMState::LSTMState(size_t n_input, size_t n_output):
510 C_t(MatrixXd::Zero(n_output, n_input)),
511 h_t(MatrixXd::Zero(n_output, n_input)),
512 time(0)
513 {
514 }
515
516 void LSTMLayer::step(const VectorXd& x_t, LSTMState& s) const {
517 // https://github.com/fchollet/keras/blob/master/keras/layers/recurrent.py#L740
518
519 const auto& act_fun = m_activation_fun;
520 const auto& in_act_fun = m_inner_activation_fun;
521
522 int tm1 = s.time == 0 ? 0 : s.time - 1;
523 VectorXd h_tm1 = s.h_t.col(tm1);
524 VectorXd C_tm1 = s.C_t.col(tm1);
525
526 VectorXd i = (m_W_i*x_t + m_b_i + m_U_i*h_tm1).unaryExpr(in_act_fun);
527 VectorXd f = (m_W_f*x_t + m_b_f + m_U_f*h_tm1).unaryExpr(in_act_fun);
528 VectorXd o = (m_W_o*x_t + m_b_o + m_U_o*h_tm1).unaryExpr(in_act_fun);
529 VectorXd ct = (m_W_c*x_t + m_b_c + m_U_c*h_tm1).unaryExpr(act_fun);
530
531 s.C_t.col(s.time) = f.cwiseProduct(C_tm1) + i.cwiseProduct(ct);
532 s.h_t.col(s.time) = o.cwiseProduct(s.C_t.col(s.time).unaryExpr(act_fun));
533 }
534
535 MatrixXd LSTMLayer::scan( const MatrixXd& x ) const {
536 LSTMState state(x.cols(), m_n_outputs);
537
538 for(state.time = 0; state.time < x.cols(); state.time++) {
540 step( x.col( x.cols() -1 - state.time ), state );
541 else
542 step( x.col( state.time ), state );
543 }
544
545 return state.h_t;
546 }
547
548
549 // GRU layer
551 const ActivationConfig & inner_activation,
552 const MatrixXd & W_z, const MatrixXd & U_z, const VectorXd & b_z,
553 const MatrixXd & W_r, const MatrixXd & U_r, const VectorXd & b_r,
554 const MatrixXd & W_h, const MatrixXd & U_h, const VectorXd & b_h):
555 m_W_z(W_z),
556 m_U_z(U_z),
557 m_b_z(b_z),
558 m_W_r(W_r),
559 m_U_r(U_r),
560 m_b_r(b_r),
561 m_W_h(W_h),
562 m_U_h(U_h),
563 m_b_h(b_h)
564 {
565 m_n_outputs = m_W_h.rows();
566
567 m_activation_fun = get_activation(activation);
568 m_inner_activation_fun = get_activation(inner_activation);
569 }
570 // internal structure created on each scan call
571 struct GRUState {
572 GRUState(size_t n_input, size_t n_outputs);
573 MatrixXd h_t;
574 int time;
575 };
576 GRUState::GRUState(size_t n_input, size_t n_output):
577 h_t(MatrixXd::Zero(n_output, n_input)),
578 time(0)
579 {
580 }
581
582 void GRULayer::step( const VectorXd& x_t, GRUState& s) const {
583 // https://github.com/fchollet/keras/blob/master/keras/layers/recurrent.py#L547
584
585 const auto& act_fun = m_activation_fun;
586 const auto& in_act_fun = m_inner_activation_fun;
587
588 int tm1 = s.time == 0 ? 0 : s.time - 1;
589 VectorXd h_tm1 = s.h_t.col(tm1);
590 VectorXd z = (m_W_z*x_t + m_b_z + m_U_z*h_tm1).unaryExpr(in_act_fun);
591 VectorXd r = (m_W_r*x_t + m_b_r + m_U_r*h_tm1).unaryExpr(in_act_fun);
592 VectorXd rh = r.cwiseProduct(h_tm1);
593 VectorXd hh = (m_W_h*x_t + m_b_h + m_U_h*rh).unaryExpr(act_fun);
594 VectorXd one = VectorXd::Ones(z.size());
595 s.h_t.col(s.time) = z.cwiseProduct(h_tm1) + (one - z).cwiseProduct(hh);
596 }
597
598 MatrixXd GRULayer::scan( const MatrixXd& x ) const {
599
600 GRUState state(x.cols(), m_n_outputs);
601
602 for(state.time = 0; state.time < x.cols(); state.time++) {
603 step( x.col( state.time ), state );
604 }
605
606 return state.h_t;
607 }
608
610
611 BidirectionalLayer::BidirectionalLayer(std::unique_ptr<IRecurrentLayer> forward_layer,
612 std::unique_ptr<IRecurrentLayer> backward_layer,
613 const std::string& merge_mode,
614 bool return_sequence):
615 m_forward_layer(std::move(forward_layer)),
616 m_backward_layer(std::move(backward_layer)),
617 m_merge_mode(merge_mode)
618 {
619 //baseclass variable
620 m_return_sequence=return_sequence;
621 }
622
623 MatrixXd BidirectionalLayer::scan( const MatrixXd& x) const{
624 const MatrixXd & forward = m_forward_layer->scan(x);
625 const MatrixXd & backward = m_backward_layer->scan(x);
626 MatrixXd backward_rev;
628 backward_rev = backward.rowwise().reverse();
629 }else{
630 backward_rev = backward;
631 }
632
633 if(m_merge_mode == "mul")
634 return forward.array()*backward_rev.array();
635 else if(m_merge_mode == "sum")
636 return forward.array() + backward_rev.array();
637 else if(m_merge_mode == "ave")
638 return (forward.array() + backward_rev.array())/2.;
639 else if(m_merge_mode == "concat"){
640 MatrixXd concatMatr(forward.rows(), forward.cols()+backward_rev.cols());
641 concatMatr << forward, backward_rev;
642 return concatMatr;
643 }else
645 "Merge mode "+m_merge_mode+"not implemented. Choose one of [mul, sum, ave, concat]");
646
647 // mute compiler
648 return forward;
649 }
650
651
652 // _____________________________________________________________________
653 // Activation functions
654 //
655 // There are two functions below. In most cases the activation layer
656 // can be implemented as a unary function, but in some cases
657 // (i.e. softmax) something more complicated is reqired.
658
659 // Note that in the first case you own this layer! It's your
660 // responsibility to delete it.
662 // Check for special cases. If it's not one, use
663 // UnaryActivationLayer
664 switch (activation.function) {
665 case Activation::SOFTMAX: return new SoftmaxLayer;
666 default: return new UnaryActivationLayer(activation);
667 }
668 }
669
670 // Most activation functions should be handled here.
671 std::function<double(double)> get_activation(lwtDev::ActivationConfig act) {
672 using namespace lwtDev;
673 switch (act.function) {
674 case Activation::SIGMOID: return nn_sigmoid;
676 case Activation::SWISH: return Swish(act.alpha);
677 case Activation::TANH: return nn_tanh;
678 case Activation::RECTIFIED: return nn_relu;
679 case Activation::ELU: return ELU(act.alpha);
680 case Activation::LEAKY_RELU: return LeakyReLU(act.alpha);
681 case Activation::LINEAR: return [](double x){return x;};
682 case Activation::ABS: return [](double x){return std::abs(x);};
683 default: {
684 throw NNConfigurationException("Got undefined activation function");
685 }
686 }
687 }
688
689
690 double nn_sigmoid( double x ){
691 //github.com/Theano/Theano/blob/master/theano/tensor/nnet/sigm.py#L35
692 if (x < -30.0) return 0.0;
693 if (x > 30.0) return 1.0;
694 return 1.0 / (1.0 + std::exp(-1.0*x));
695 }
696
697 double nn_hard_sigmoid( double x ){
698 //github.com/Theano/Theano/blob/master/theano/tensor/nnet/sigm.py#L279
699 double out = 0.2*x + 0.5;
700 if (out < 0) return 0.0;
701 if (out > 1) return 1.0;
702 return out;
703 }
704
705 Swish::Swish(double alpha):
706 m_alpha(alpha)
707 {}
708 double Swish::operator()(double x) const {
709 return x * nn_sigmoid(m_alpha * x);
710 }
711
712 double nn_tanh( double x ){
713 return std::tanh(x);
714 }
715
716 double nn_relu( double x) {
717 if (std::isnan(x)) return x;
718 else return x > 0 ? x : 0;
719 }
720
721 ELU::ELU(double alpha):
722 m_alpha(alpha)
723 {}
724 double ELU::operator()( double x ) const {
725 /* ELU function : https://arxiv.org/pdf/1511.07289.pdf
726 f(x)=(x>=0)*x + ( (x<0)*alpha*(exp(x)-1) )
727 */
728 double exp_term = m_alpha * (std::exp(x)-1);
729 return x>=0 ? x : exp_term;
730 }
731
732 LeakyReLU::LeakyReLU(double alpha):
733 m_alpha(alpha)
734 {}
735 double LeakyReLU::operator()(double x) const {
736 return x > 0 ? x : m_alpha * x;
737 }
738
739 // ________________________________________________________________________
740 // utility functions
741 MatrixXd build_matrix(const std::vector<double>& weights, size_t n_inputs)
742 {
743 size_t n_elements = weights.size();
744 if ((n_elements % n_inputs) != 0) {
745 std::string problem = "matrix elements not divisible by number"
746 " of columns. Elements: " + std::to_string(n_elements) +
747 ", Inputs: " + std::to_string(n_inputs);
749 }
750 size_t n_outputs = n_elements / n_inputs;
751 MatrixXd matrix(n_outputs, n_inputs);
752 for (size_t row = 0; row < n_outputs; row++) {
753 for (size_t col = 0; col < n_inputs; col++) {
754 double element = weights.at(col + row * n_inputs);
755 matrix(row, col) = element;
756 }
757 }
758 return matrix;
759 }
760 VectorXd build_vector(const std::vector<double>& bias) {
761 VectorXd out(bias.size());
762 size_t idx = 0;
763 for (const auto& val: bias) {
764 out(idx) = val;
765 idx++;
766 }
767 return out;
768 }
769
770 // consistency checks
771 void throw_if_not_maxout(const LayerConfig& layer) {
772 bool wt_ok = layer.weights.size() == 0;
773 bool bias_ok = layer.bias.size() == 0;
774 bool maxout_ok = layer.sublayers.size() > 0;
775 bool act_ok = layer.activation.function == Activation::NONE;
776 if (wt_ok && bias_ok && maxout_ok && act_ok) return;
777 throw NNConfigurationException("layer has wrong info for maxout");
778 }
779 void throw_if_not_dense(const LayerConfig& layer) {
780 if (layer.sublayers.size() > 0) {
781 throw NNConfigurationException("sublayers in dense layer");
782 }
783 }
784
786 if (layer.sublayers.size() > 0) {
787 throw NNConfigurationException("sublayers in normalization layer");
788 }
789 }
790
791 // component-wise getters (for Highway, lstm, etc)
793 using namespace Eigen;
794 using namespace lwtDev;
795 MatrixXd weights = build_matrix(layer.weights, n_in);
796 size_t n_out = weights.rows();
797 VectorXd bias = build_vector(layer.bias);
798
799 // the u element is optional
800 size_t u_el = layer.U.size();
801 MatrixXd U = u_el ? build_matrix(layer.U, n_out) : MatrixXd::Zero(0,0);
802
803 size_t u_out = U.rows();
804 size_t b_out = bias.rows();
805 bool u_mismatch = (u_out != n_out) && (u_out > 0);
806 if ( u_mismatch || b_out != n_out) {
808 "Output dims mismatch, W: " + std::to_string(n_out) +
809 ", U: " + std::to_string(u_out) + ", b: " + std::to_string(b_out));
810 }
811 return {std::move(weights), std::move(U), std::move(bias)};
812 }
813
814
815}
const PlainObject unit() const
This is a plugin that makes Eigen look like CLHEP & defines some convenience methods.
#define x
#define z
#define max(a, b)
Definition cfImp.cxx:41
BiasLayer(const VectorXd &bias)
Definition Stack.cxx:196
virtual VectorXd compute(const VectorXd &) const override
Definition Stack.cxx:203
VectorXd m_bias
Definition Stack.h:113
bidirectional unit ///
Definition Stack.h:311
std::unique_ptr< const IRecurrentLayer > m_backward_layer
Definition Stack.h:323
virtual MatrixXd scan(const MatrixXd &) const override
Definition Stack.cxx:623
BidirectionalLayer(std::unique_ptr< IRecurrentLayer > forward_layer, std::unique_ptr< IRecurrentLayer > backward_layer, const std::string &merge_mode, bool return_sequence)
bidirectional layer ///
Definition Stack.cxx:611
std::string m_merge_mode
Definition Stack.h:325
std::unique_ptr< const IRecurrentLayer > m_forward_layer
Definition Stack.h:322
virtual VectorXd compute(const VectorXd &) const override
Definition Stack.cxx:169
ELU(double alpha)
Definition Stack.cxx:721
double m_alpha
Definition Stack.h:343
double operator()(double) const
Definition Stack.cxx:724
EmbeddingLayer(int var_row_index, const MatrixXd &W)
Definition Stack.cxx:427
virtual MatrixXd scan(const MatrixXd &) const override
Definition Stack.cxx:437
std::function< double(double)> m_inner_activation_fun
Definition Stack.h:292
MatrixXd m_W_z
Definition Stack.h:294
GRULayer(const ActivationConfig &activation, const ActivationConfig &inner_activation, const MatrixXd &W_z, const MatrixXd &U_z, const VectorXd &b_z, const MatrixXd &W_r, const MatrixXd &U_r, const VectorXd &b_r, const MatrixXd &W_h, const MatrixXd &U_h, const VectorXd &b_h)
Definition Stack.cxx:550
MatrixXd m_W_h
Definition Stack.h:302
void step(const VectorXd &input, GRUState &) const
Definition Stack.cxx:582
std::function< double(double)> m_activation_fun
Definition Stack.h:291
MatrixXd m_W_r
Definition Stack.h:298
MatrixXd m_U_z
Definition Stack.h:295
MatrixXd m_U_h
Definition Stack.h:303
VectorXd m_b_h
Definition Stack.h:304
MatrixXd m_U_r
Definition Stack.h:299
VectorXd m_b_r
Definition Stack.h:300
virtual MatrixXd scan(const MatrixXd &) const override
Definition Stack.cxx:598
VectorXd m_b_z
Definition Stack.h:296
std::function< double(double)> m_act
Definition Stack.h:167
virtual VectorXd compute(const VectorXd &) const override
Definition Stack.cxx:261
HighwayLayer(const MatrixXd &W, const VectorXd &b, const MatrixXd &W_carry, const VectorXd &b_carry, ActivationConfig activation)
Definition Stack.cxx:252
MatrixXd m_U_c
Definition Stack.h:269
void step(const VectorXd &input, LSTMState &) const
Definition Stack.cxx:516
VectorXd m_b_i
Definition Stack.h:258
MatrixXd m_W_i
Definition Stack.h:256
std::function< double(double)> m_inner_activation_fun
Definition Stack.h:254
MatrixXd m_W_c
Definition Stack.h:268
LSTMLayer(const ActivationConfig &activation, const ActivationConfig &inner_activation, const MatrixXd &W_i, const MatrixXd &U_i, const VectorXd &b_i, const MatrixXd &W_f, const MatrixXd &U_f, const VectorXd &b_f, const MatrixXd &W_o, const MatrixXd &U_o, const VectorXd &b_o, const MatrixXd &W_c, const MatrixXd &U_c, const VectorXd &b_c, bool go_backwards, bool return_sequence)
Definition Stack.cxx:473
MatrixXd m_U_i
Definition Stack.h:257
MatrixXd m_W_f
Definition Stack.h:260
std::function< double(double)> m_activation_fun
Definition Stack.h:253
MatrixXd m_U_o
Definition Stack.h:265
virtual MatrixXd scan(const MatrixXd &) const override
Definition Stack.cxx:535
VectorXd m_b_o
Definition Stack.h:266
MatrixXd m_W_o
Definition Stack.h:264
VectorXd m_b_f
Definition Stack.h:262
MatrixXd m_U_f
Definition Stack.h:261
VectorXd m_b_c
Definition Stack.h:270
LeakyReLU(double alpha)
Definition Stack.cxx:732
double operator()(double) const
Definition Stack.cxx:735
double m_alpha
Definition Stack.h:351
MatrixLayer(const MatrixXd &matrix)
Definition Stack.cxx:208
virtual VectorXd compute(const VectorXd &) const override
Definition Stack.cxx:212
MatrixXd m_matrix
Definition Stack.h:122
MatrixXd m_bias
Definition Stack.h:133
std::vector< MatrixXd > m_matrices
Definition Stack.h:132
virtual VectorXd compute(const VectorXd &) const override
Definition Stack.cxx:227
MaxoutLayer(const std::vector< InitUnit > &maxout_tensor)
Definition Stack.cxx:217
Normalization layer /// https://arxiv.org/abs/1502.03167 ///.
Definition Stack.h:140
NormalizationLayer(const VectorXd &W, const VectorXd &b)
Definition Stack.cxx:241
virtual VectorXd compute(const VectorXd &) const override
Definition Stack.cxx:246
size_t add_embedding_layers(size_t n_inputs, const LayerConfig &)
Definition Stack.cxx:384
MatrixXd scan(MatrixXd inputs) const
Definition Stack.cxx:300
RecurrentStack(size_t n_inputs, const std::vector< LayerConfig > &layers)
Definition Stack.cxx:271
size_t n_outputs() const
Definition Stack.cxx:306
size_t add_gru_layers(size_t n_inputs, const LayerConfig &)
Definition Stack.cxx:330
size_t add_bidirectional_layers(size_t n_inputs, const LayerConfig &)
Definition Stack.cxx:344
std::vector< IRecurrentLayer * > m_layers
Definition Stack.h:183
size_t add_lstm_layers(size_t n_inputs, const LayerConfig &)
Definition Stack.cxx:310
size_t n_outputs() const
Definition Stack.cxx:420
RecurrentStack * m_recurrent
Definition Stack.h:205
VectorXd reduce(MatrixXd inputs) const
Definition Stack.cxx:416
ReductionStack(size_t n_in, const std::vector< LayerConfig > &layers)
Definition Stack.cxx:396
virtual VectorXd compute(const VectorXd &) const override
Definition Stack.cxx:182
size_t m_n_outputs
Definition Stack.h:72
std::vector< ILayer * > m_layers
Definition Stack.h:71
size_t add_maxout_layers(size_t n_inputs, const LayerConfig &)
Definition Stack.cxx:144
size_t n_outputs() const
Definition Stack.cxx:57
size_t add_normalization_layers(size_t n_inputs, const LayerConfig &)
Definition Stack.cxx:111
size_t add_dense_layers(size_t n_inputs, const LayerConfig &)
Definition Stack.cxx:79
size_t add_layers(size_t n_inputs, const LayerConfig &)
Definition Stack.cxx:66
VectorXd compute(VectorXd) const
Definition Stack.cxx:51
size_t add_highway_layers(size_t n_inputs, const LayerConfig &)
Definition Stack.cxx:133
double operator()(double) const
Definition Stack.cxx:708
Swish(double alpha)
Definition Stack.cxx:705
double m_alpha
Definition Stack.h:359
UnaryActivationLayer(ActivationConfig)
Definition Stack.cxx:174
virtual VectorXd compute(const VectorXd &) const override
Definition Stack.cxx:178
std::function< double(double)> m_func
Definition Stack.h:97
int r
Definition globals.cxx:22
MatrixXd build_matrix(const std::vector< double > &weights, size_t n_inputs)
Definition Stack.cxx:741
ILayer * get_raw_activation_layer(ActivationConfig activation)
Definition Stack.cxx:661
void throw_if_not_maxout(const LayerConfig &layer)
Definition Stack.cxx:771
double nn_sigmoid(double x)
Definition Stack.cxx:690
double nn_hard_sigmoid(double x)
Definition Stack.cxx:697
DenseComponents get_component(const lwtDev::LayerConfig &layer, size_t n_in)
Definition Stack.cxx:792
VectorXd build_vector(const std::vector< double > &bias)
Definition Stack.cxx:760
void throw_if_not_normalization(const LayerConfig &layer)
Definition Stack.cxx:785
std::function< double(double)> get_activation(lwtDev::ActivationConfig act)
Definition Stack.cxx:671
double nn_relu(double x)
Definition Stack.cxx:716
void throw_if_not_dense(const LayerConfig &layer)
Definition Stack.cxx:779
double nn_tanh(double x)
Definition Stack.cxx:712
STL namespace.
MatrixXd h_t
Definition Stack.cxx:573
GRUState(size_t n_input, size_t n_outputs)
Definition Stack.cxx:576
LSTMState(size_t n_input, size_t n_outputs)
Definition Stack.cxx:509
Architecture architecture