11 using namespace Eigen;
36 Stack::Stack(
size_t n_inputs,
const std::vector<LayerConfig>& layers,
38 for (
size_t nnn =
skip; nnn < layers.size(); nnn++) {
39 n_inputs =
add_layers(n_inputs, layers.at(nnn));
53 in = layer->compute(in);
86 if (layer.weights.size() > 0) {
93 if (layer.bias.size() > 0) {
95 std::string problem =
"tried to add a bias layer with " +
96 std::to_string(layer.bias.size()) +
" entries, previous layer"
97 " had " + std::to_string(
n_outputs) +
" outputs";
116 if ( layer.weights.size() < 1 || layer.bias.size() < 1 ) {
117 std::string problem =
"Either weights or bias layer size is < 1";
120 if ( layer.weights.size() != layer.bias.size() ) {
121 std::string problem =
"weights and bias layer are not equal in size!";
134 auto& comps = layer.components;
139 new HighwayLayer(t.W, t.b, c.W, c.b, layer.activation));
147 std::vector<MaxoutLayer::InitUnit> matrices;
149 for (
const auto& sublayer: layer.sublayers) {
150 MatrixXd matrix =
build_matrix(sublayer.weights, n_inputs);
153 matrices.push_back(std::make_pair(matrix, bias));
179 return in.unaryExpr(
m_func);
185 size_t n_elements = in.rows();
186 VectorXd exp(n_elements);
187 double max = in.maxCoeff();
188 for (
size_t iii = 0; iii < n_elements; iii++) {
189 exp(iii) = std::exp(in(iii) -
max);
191 double sum_exp = exp.sum();
192 return exp / sum_exp;
218 m_bias(units.size(), units.front().first.rows())
221 for (
const auto&
unit: units) {
231 const size_t out_dim =
m_matrices.front().rows();
232 MatrixXd outputs(n_mat, out_dim);
233 for (
size_t mat_n = 0; mat_n < n_mat; mat_n++) {
234 outputs.row(mat_n) =
m_matrices.at(mat_n) * in;
237 return outputs.colwise().maxCoeff();
247 VectorXd shift = in +
m_b ;
248 return m_W.cwiseProduct(shift);
254 const MatrixXd& W_carry,
255 const VectorXd& b_carry,
262 const std::function<double(
double)> sig(
nn_sigmoid);
263 ArrayXd c = (
m_w_c * in +
m_b_c).unaryExpr(sig);
265 return c * t + (1 - c) * in.array();
272 const std::vector<lwtDev::LayerConfig>& layers)
275 const size_t n_layers = layers.size();
276 for (
size_t layer_n = 0; layer_n < n_layers; layer_n++) {
277 auto& layer = layers.at(layer_n);
302 in = layer->scan(in);
312 auto& comps = layer.components;
317 const bool& go_backwards = layer.go_backwards;
318 const bool& return_sequence = layer.return_sequence;
320 new LSTMLayer(layer.activation, layer.inner_activation,
332 auto& comps = layer.components;
337 new GRULayer(layer.activation, layer.inner_activation,
347 if(layer.sublayers.size() != 2)
349 const LayerConfig forward_layer_conf = layer.sublayers[0];
350 const LayerConfig backward_layer_conf = layer.sublayers[1];
351 size_t n_forward = 0;
354 size_t n_backward = 0;
362 std::unique_ptr<IRecurrentLayer> forward_layer(
m_layers.back());
372 std::unique_ptr<IRecurrentLayer> backward_layer(
m_layers.back());
373 backward_layer->m_go_backwards = (!forward_layer->m_go_backwards);
377 std::move(backward_layer),
379 layer.return_sequence));
386 for (
const auto& emb: layer.embedding) {
387 size_t n_wt = emb.weights.size();
388 size_t n_cats = n_wt / emb.n_out;
391 n_inputs += emb.n_out - 1;
397 const std::vector<LayerConfig>& layers) {
398 std::vector<LayerConfig> recurrent;
399 std::vector<LayerConfig> feed_forward;
400 std::set<Architecture> recurrent_arcs{
402 for (
const auto& layer: layers) {
403 if (recurrent_arcs.count(layer.architecture)) {
404 recurrent.push_back(layer);
406 feed_forward.push_back(layer);
418 return m_stack->compute(in.col(in.cols() -1));
431 if(var_row_index < 0)
433 "EmbeddingLayer::EmbeddingLayer - can not set var_row_index<0,"
434 " it is an index for a matrix row!");
441 "EmbeddingLayer::scan - var_row_index is larger than input matrix"
444 MatrixXd embedded(
m_W.rows(),
x.cols());
446 for(
int icol=0; icol<
x.cols(); icol++) {
448 bool is_int = std::floor(vector_idx) == vector_idx;
449 bool is_valid = (vector_idx >= 0) && (vector_idx <
m_W.cols());
451 "Invalid embedded index: " + std::to_string(vector_idx));
452 embedded.col(icol) =
m_W.col( vector_idx );
456 MatrixXd out(
m_W.rows() + (
x.rows() - 1),
x.cols());
462 out.block(
m_var_row_index, 0, embedded.rows(), embedded.cols()) = embedded;
475 const MatrixXd & W_i,
const MatrixXd & U_i,
const VectorXd & b_i,
476 const MatrixXd & W_f,
const MatrixXd & U_f,
const VectorXd & b_f,
477 const MatrixXd & W_o,
const MatrixXd & U_o,
const VectorXd & b_o,
478 const MatrixXd & W_c,
const MatrixXd & U_c,
const VectorXd & b_c,
480 bool return_sequence):
504 LSTMState(
size_t n_input,
size_t n_outputs);
510 C_t(MatrixXd::Zero(n_output, n_input)),
511 h_t(MatrixXd::Zero(n_output, n_input)),
522 int tm1 = s.time == 0 ? 0 : s.time - 1;
523 VectorXd h_tm1 = s.h_t.col(tm1);
524 VectorXd C_tm1 = s.C_t.col(tm1);
531 s.C_t.col(s.time) = f.cwiseProduct(C_tm1) + i.cwiseProduct(ct);
532 s.h_t.col(s.time) = o.cwiseProduct(s.C_t.col(s.time).unaryExpr(act_fun));
540 step(
x.col(
x.cols() -1 - state.
time ), state );
552 const MatrixXd & W_z,
const MatrixXd & U_z,
const VectorXd & b_z,
553 const MatrixXd & W_r,
const MatrixXd & U_r,
const VectorXd & b_r,
554 const MatrixXd & W_h,
const MatrixXd & U_h,
const VectorXd & b_h):
572 GRUState(
size_t n_input,
size_t n_outputs);
577 h_t(MatrixXd::Zero(n_output, n_input)),
588 int tm1 = s.time == 0 ? 0 : s.time - 1;
589 VectorXd h_tm1 = s.h_t.col(tm1);
592 VectorXd rh =
r.cwiseProduct(h_tm1);
594 VectorXd one = VectorXd::Ones(
z.size());
595 s.h_t.col(s.time) =
z.cwiseProduct(h_tm1) + (one -
z).cwiseProduct(hh);
612 std::unique_ptr<IRecurrentLayer> backward_layer,
613 const std::string& merge_mode,
614 bool return_sequence):
626 MatrixXd backward_rev;
628 backward_rev = backward.rowwise().reverse();
630 backward_rev = backward;
634 return forward.array()*backward_rev.array();
636 return forward.array() + backward_rev.array();
638 return (forward.array() + backward_rev.array())/2.;
640 MatrixXd concatMatr(forward.rows(), forward.cols()+backward_rev.cols());
641 concatMatr << forward, backward_rev;
645 "Merge mode "+
m_merge_mode+
"not implemented. Choose one of [mul, sum, ave, concat]");
692 if (
x < -30.0)
return 0.0;
693 if (
x > 30.0)
return 1.0;
694 return 1.0 / (1.0 + std::exp(-1.0*
x));
699 double out = 0.2*
x + 0.5;
700 if (out < 0)
return 0.0;
701 if (out > 1)
return 1.0;
717 if (std::isnan(
x))
return x;
718 else return x > 0 ?
x : 0;
728 double exp_term =
m_alpha * (std::exp(
x)-1);
729 return x>=0 ?
x : exp_term;
743 size_t n_elements =
weights.size();
744 if ((n_elements % n_inputs) != 0) {
745 std::string problem =
"matrix elements not divisible by number"
746 " of columns. Elements: " + std::to_string(n_elements) +
747 ", Inputs: " + std::to_string(n_inputs);
750 size_t n_outputs = n_elements / n_inputs;
751 MatrixXd matrix(n_outputs, n_inputs);
752 for (
size_t row = 0; row < n_outputs; row++) {
753 for (
size_t col = 0; col < n_inputs; col++) {
754 double element =
weights.at(col + row * n_inputs);
755 matrix(row, col) = element;
761 VectorXd out(bias.size());
763 for (
const auto& val: bias) {
772 bool wt_ok = layer.weights.size() == 0;
773 bool bias_ok = layer.bias.size() == 0;
774 bool maxout_ok = layer.sublayers.size() > 0;
776 if (wt_ok && bias_ok && maxout_ok && act_ok)
return;
780 if (layer.sublayers.size() > 0) {
786 if (layer.sublayers.size() > 0) {
793 using namespace Eigen;
800 size_t u_el = layer.U.size();
801 MatrixXd U = u_el ?
build_matrix(layer.U, n_out) : MatrixXd::Zero(0,0);
803 size_t u_out = U.rows();
804 size_t b_out = bias.rows();
805 bool u_mismatch = (u_out != n_out) && (u_out > 0);
806 if ( u_mismatch || b_out != n_out) {
808 "Output dims mismatch, W: " + std::to_string(n_out) +
809 ", U: " + std::to_string(u_out) +
", b: " + std::to_string(b_out));
811 return {std::move(
weights), std::move(U), std::move(bias)};
const PlainObject unit() const
This is a plugin that makes Eigen look like CLHEP & defines some convenience methods.
BiasLayer(const VectorXd &bias)
virtual VectorXd compute(const VectorXd &) const override
std::unique_ptr< const IRecurrentLayer > m_backward_layer
virtual MatrixXd scan(const MatrixXd &) const override
BidirectionalLayer(std::unique_ptr< IRecurrentLayer > forward_layer, std::unique_ptr< IRecurrentLayer > backward_layer, const std::string &merge_mode, bool return_sequence)
bidirectional layer ///
std::unique_ptr< const IRecurrentLayer > m_forward_layer
virtual VectorXd compute(const VectorXd &) const override
double operator()(double) const
EmbeddingLayer(int var_row_index, const MatrixXd &W)
virtual MatrixXd scan(const MatrixXd &) const override
std::function< double(double)> m_inner_activation_fun
GRULayer(const ActivationConfig &activation, const ActivationConfig &inner_activation, const MatrixXd &W_z, const MatrixXd &U_z, const VectorXd &b_z, const MatrixXd &W_r, const MatrixXd &U_r, const VectorXd &b_r, const MatrixXd &W_h, const MatrixXd &U_h, const VectorXd &b_h)
void step(const VectorXd &input, GRUState &) const
std::function< double(double)> m_activation_fun
virtual MatrixXd scan(const MatrixXd &) const override
std::function< double(double)> m_act
virtual VectorXd compute(const VectorXd &) const override
HighwayLayer(const MatrixXd &W, const VectorXd &b, const MatrixXd &W_carry, const VectorXd &b_carry, ActivationConfig activation)
void step(const VectorXd &input, LSTMState &) const
std::function< double(double)> m_inner_activation_fun
LSTMLayer(const ActivationConfig &activation, const ActivationConfig &inner_activation, const MatrixXd &W_i, const MatrixXd &U_i, const VectorXd &b_i, const MatrixXd &W_f, const MatrixXd &U_f, const VectorXd &b_f, const MatrixXd &W_o, const MatrixXd &U_o, const VectorXd &b_o, const MatrixXd &W_c, const MatrixXd &U_c, const VectorXd &b_c, bool go_backwards, bool return_sequence)
std::function< double(double)> m_activation_fun
virtual MatrixXd scan(const MatrixXd &) const override
double operator()(double) const
MatrixLayer(const MatrixXd &matrix)
virtual VectorXd compute(const VectorXd &) const override
std::vector< MatrixXd > m_matrices
virtual VectorXd compute(const VectorXd &) const override
MaxoutLayer(const std::vector< InitUnit > &maxout_tensor)
Normalization layer /// https://arxiv.org/abs/1502.03167 ///.
NormalizationLayer(const VectorXd &W, const VectorXd &b)
virtual VectorXd compute(const VectorXd &) const override
size_t add_embedding_layers(size_t n_inputs, const LayerConfig &)
MatrixXd scan(MatrixXd inputs) const
RecurrentStack(size_t n_inputs, const std::vector< LayerConfig > &layers)
size_t add_gru_layers(size_t n_inputs, const LayerConfig &)
size_t add_bidirectional_layers(size_t n_inputs, const LayerConfig &)
std::vector< IRecurrentLayer * > m_layers
size_t add_lstm_layers(size_t n_inputs, const LayerConfig &)
RecurrentStack * m_recurrent
VectorXd reduce(MatrixXd inputs) const
ReductionStack(size_t n_in, const std::vector< LayerConfig > &layers)
virtual VectorXd compute(const VectorXd &) const override
std::vector< ILayer * > m_layers
size_t add_maxout_layers(size_t n_inputs, const LayerConfig &)
size_t add_normalization_layers(size_t n_inputs, const LayerConfig &)
size_t add_dense_layers(size_t n_inputs, const LayerConfig &)
size_t add_layers(size_t n_inputs, const LayerConfig &)
VectorXd compute(VectorXd) const
size_t add_highway_layers(size_t n_inputs, const LayerConfig &)
double operator()(double) const
UnaryActivationLayer(ActivationConfig)
virtual VectorXd compute(const VectorXd &) const override
std::function< double(double)> m_func
MatrixXd build_matrix(const std::vector< double > &weights, size_t n_inputs)
ILayer * get_raw_activation_layer(ActivationConfig activation)
void throw_if_not_maxout(const LayerConfig &layer)
double nn_sigmoid(double x)
double nn_hard_sigmoid(double x)
DenseComponents get_component(const lwtDev::LayerConfig &layer, size_t n_in)
VectorXd build_vector(const std::vector< double > &bias)
void throw_if_not_normalization(const LayerConfig &layer)
std::function< double(double)> get_activation(lwtDev::ActivationConfig act)
void throw_if_not_dense(const LayerConfig &layer)
GRUState(size_t n_input, size_t n_outputs)
LSTMState(size_t n_input, size_t n_outputs)
Architecture architecture