11 using namespace Eigen;
24 m_layers.push_back(
new BiasLayer(std::vector<double>{1, 1, 1, 1}));
36 Stack::Stack(
size_t n_inputs,
const std::vector<LayerConfig>&
layers,
39 n_inputs = add_layers(n_inputs,
layers.at(
nnn));
42 m_n_outputs = n_inputs;
46 for (
auto&
layer: m_layers) {
51 VectorXd Stack::compute(VectorXd in)
const {
52 for (
const auto&
layer: m_layers) {
53 in =
layer->compute(in);
57 size_t Stack::n_outputs()
const {
67 if (
layer.architecture == Architecture::DENSE) {
68 return add_dense_layers(n_inputs,
layer);
69 }
else if (
layer.architecture == Architecture::NORMALIZATION){
70 return add_normalization_layers(n_inputs,
layer);
71 }
else if (
layer.architecture == Architecture::HIGHWAY){
72 return add_highway_layers(n_inputs,
layer);
73 }
else if (
layer.architecture == Architecture::MAXOUT) {
74 return add_maxout_layers(n_inputs,
layer);
80 assert(
layer.architecture == Architecture::DENSE);
83 size_t n_outputs = n_inputs;
86 if (
layer.weights.size() > 0) {
93 if (
layer.bias.size() > 0) {
94 if (n_outputs !=
layer.bias.size() ) {
95 std::string problem =
"tried to add a bias layer with " +
112 assert(
layer.architecture == Architecture::NORMALIZATION);
116 if (
layer.weights.size() < 1 ||
layer.bias.size() < 1 ) {
117 std::string problem =
"Either weights or bias layer size is < 1";
120 if (
layer.weights.size() !=
layer.bias.size() ) {
121 std::string problem =
"weights and bias layer are not equal in size!";
134 auto& comps =
layer.components;
136 const auto&
c =
get_component(comps.at(Component::CARRY), n_inputs);
145 assert(
layer.architecture == Architecture::MAXOUT);
147 std::vector<MaxoutLayer::InitUnit> matrices;
148 std::set<size_t> n_outputs;
149 for (
const auto& sublayer:
layer.sublayers) {
152 n_outputs.insert(
matrix.rows());
153 matrices.push_back(std::make_pair(
matrix, bias));
155 if (n_outputs.size() == 0) {
158 else if (n_outputs.size() != 1) {
162 return *n_outputs.begin();
169 VectorXd DummyLayer::compute(
const VectorXd& in)
const {
179 return in.unaryExpr(
m_func);
185 size_t n_elements = in.rows();
186 VectorXd
exp(n_elements);
187 double max = in.maxCoeff();
188 for (
size_t iii = 0; iii < n_elements; iii++) {
191 double sum_exp =
exp.sum();
192 return exp / sum_exp;
231 const size_t out_dim =
m_matrices.front().rows();
232 MatrixXd
outputs(n_mat, out_dim);
233 for (
size_t mat_n = 0; mat_n < n_mat; mat_n++) {
237 return outputs.colwise().maxCoeff();
247 VectorXd shift = in +
m_b ;
248 return m_W.cwiseProduct(shift);
254 const MatrixXd& W_carry,
255 const VectorXd& b_carry,
257 m_w_t(
W), m_b_t(
b), m_w_c(W_carry), m_b_c(b_carry),
265 return c *
t + (1 -
c) * in.array();
272 const std::vector<lwtDev::LayerConfig>&
layers)
275 const size_t n_layers =
layers.size();
276 for (
size_t layer_n = 0; layer_n < n_layers; layer_n++) {
302 in =
layer->scan(in);
312 auto& comps =
layer.components;
317 const bool& go_backwards =
layer.go_backwards;
318 const bool& return_sequence =
layer.return_sequence;
332 auto& comps =
layer.components;
347 if(
layer.sublayers.size() != 2)
351 size_t n_forward = 0;
354 size_t n_backward = 0;
362 std::unique_ptr<IRecurrentLayer> forward_layer(
m_layers.back());
372 std::unique_ptr<IRecurrentLayer> backward_layer(
m_layers.back());
377 std::move(backward_layer),
379 layer.return_sequence));
386 for (
const auto& emb:
layer.embedding) {
387 size_t n_wt = emb.weights.size();
388 size_t n_cats = n_wt / emb.n_out;
391 n_inputs += emb.n_out - 1;
397 const std::vector<LayerConfig>&
layers) {
398 std::vector<LayerConfig> recurrent;
399 std::vector<LayerConfig> feed_forward;
400 std::set<Architecture> recurrent_arcs{
403 if (recurrent_arcs.count(
layer.architecture)) {
404 recurrent.push_back(
layer);
406 feed_forward.push_back(
layer);
410 m_stack =
new Stack(m_recurrent->n_outputs(), feed_forward);
417 in = m_recurrent->scan(in);
418 return m_stack->compute(in.col(in.cols() -1));
421 return m_stack->n_outputs();
428 m_var_row_index(var_row_index),
431 if(var_row_index < 0)
433 "EmbeddingLayer::EmbeddingLayer - can not set var_row_index<0,"
434 " it is an index for a matrix row!");
441 "EmbeddingLayer::scan - var_row_index is larger than input matrix"
444 MatrixXd embedded(
m_W.rows(),
x.cols());
448 bool is_int = std::floor(vector_idx) == vector_idx;
449 bool is_valid = (vector_idx >= 0) && (vector_idx <
m_W.cols());
452 embedded.col(
icol) =
m_W.col( vector_idx );
456 MatrixXd
out(
m_W.rows() + (
x.rows() - 1),
x.cols());
475 const MatrixXd & W_i,
const MatrixXd & U_i,
const VectorXd & b_i,
476 const MatrixXd & W_f,
const MatrixXd & U_f,
const VectorXd & b_f,
477 const MatrixXd & W_o,
const MatrixXd & U_o,
const VectorXd & b_o,
478 const MatrixXd & W_c,
const MatrixXd & U_c,
const VectorXd & b_c,
480 bool return_sequence):
504 LSTMState(
size_t n_input,
size_t n_outputs);
510 C_t(MatrixXd::Zero(n_output, n_input)),
511 h_t(MatrixXd::Zero(n_output, n_input)),
522 int tm1 =
s.time == 0 ? 0 :
s.time - 1;
523 VectorXd h_tm1 =
s.h_t.col(tm1);
524 VectorXd C_tm1 =
s.C_t.col(tm1);
531 s.C_t.col(
s.time) =
f.cwiseProduct(C_tm1) +
i.cwiseProduct(
ct);
532 s.h_t.col(
s.time) = o.cwiseProduct(
s.C_t.col(
s.time).unaryExpr(act_fun));
540 step(
x.col(
x.cols() -1 - state.
time ), state );
552 const MatrixXd & W_z,
const MatrixXd & U_z,
const VectorXd & b_z,
553 const MatrixXd & W_r,
const MatrixXd & U_r,
const VectorXd & b_r,
554 const MatrixXd & W_h,
const MatrixXd & U_h,
const VectorXd & b_h):
572 GRUState(
size_t n_input,
size_t n_outputs);
577 h_t(MatrixXd::Zero(n_output, n_input)),
588 int tm1 =
s.time == 0 ? 0 :
s.time - 1;
589 VectorXd h_tm1 =
s.h_t.col(tm1);
592 VectorXd rh =
r.cwiseProduct(h_tm1);
594 VectorXd
one = VectorXd::Ones(
z.size());
595 s.h_t.col(
s.time) =
z.cwiseProduct(h_tm1) + (
one -
z).cwiseProduct(
hh);
612 std::unique_ptr<IRecurrentLayer> backward_layer,
613 const std::string& merge_mode,
614 bool return_sequence):
615 m_forward_layer(std::move(forward_layer)),
616 m_backward_layer(std::move(backward_layer)),
617 m_merge_mode(merge_mode)
626 MatrixXd backward_rev;
628 backward_rev = backward.rowwise().reverse();
630 backward_rev = backward;
634 return forward.array()*backward_rev.array();
636 return forward.array() + backward_rev.array();
638 return (forward.array() + backward_rev.array())/2.;
640 MatrixXd concatMatr(forward.rows(), forward.cols()+backward_rev.cols());
641 concatMatr << forward, backward_rev;
645 "Merge mode "+
m_merge_mode+
"not implemented. Choose one of [mul, sum, ave, concat]");
692 if (
x < -30.0)
return 0.0;
693 if (
x > 30.0)
return 1.0;
699 double out = 0.2*
x + 0.5;
700 if (
out < 0)
return 0.0;
701 if (
out > 1)
return 1.0;
717 if (std::isnan(
x))
return x;
718 else return x > 0 ?
x : 0;
729 return x>=0 ?
x : exp_term;
743 size_t n_elements =
weights.size();
744 if ((n_elements % n_inputs) != 0) {
745 std::string problem =
"matrix elements not divisible by number"
750 size_t n_outputs = n_elements / n_inputs;
751 MatrixXd
matrix(n_outputs, n_inputs);
752 for (
size_t row = 0;
row < n_outputs;
row++) {
753 for (
size_t col = 0;
col < n_inputs;
col++) {
761 VectorXd
out(bias.size());
763 for (
const auto&
val: bias) {
772 bool wt_ok =
layer.weights.size() == 0;
773 bool bias_ok =
layer.bias.size() == 0;
774 bool maxout_ok =
layer.sublayers.size() > 0;
776 if (wt_ok && bias_ok && maxout_ok && act_ok)
return;
780 if (
layer.sublayers.size() > 0) {
786 if (
layer.sublayers.size() > 0) {
793 using namespace Eigen;
800 size_t u_el =
layer.U.size();
803 size_t u_out = U.rows();
804 size_t b_out = bias.rows();
805 bool u_mismatch = (u_out != n_out) && (u_out > 0);
806 if ( u_mismatch || b_out != n_out) {