game-math/SGD_8h_source.html

#pragma once


#include "../Matrix.h"

#include "../matrix_utils.h"

#include <functional>


class SGD

{

  size_t n_iter;

  double eta;

  bool shuffle = false;

  std::function<double(const Matrix<double>&, const Matrix<double>&)> weight_update = nullptr;

  std::function<Matrix<double>(const Matrix<double>&)> net_input_fun = nullptr;


public:

  Matrix<double> cost;


  explicit SGD(

  double _eta                                                                                   = 0.01,

  size_t iter                                                                                   = 10,

  bool _shuffle                                                                                 = false,

  const std::function<double(const Matrix<double>&, const Matrix<double>&)>& update_weights_fun = nullptr,

  const std::function<Matrix<double>(const Matrix<double>&)>& netInputFun                       = nullptr)

    : n_iter(iter)

    , eta(_eta)

    , shuffle(_shuffle) {

    if(update_weights_fun != nullptr) { weight_update = update_weights_fun; }

    if(netInputFun != nullptr) { net_input_fun = netInputFun; }

    cost = Matrix<double>(0.0, 0, 0);

  }


  void fit(const Matrix<double>& X, const Matrix<double>& y, Matrix<double>& weights) {

    cost       = Matrix<double>(0, n_iter, 1);

    auto xCopy = X;

    auto yCopy = y;

    for(size_t iter = 0; iter < n_iter; iter++) {

      if(shuffle) {

        auto fooPair = shuffleData(X, y);

        xCopy        = fooPair.first;

        yCopy        = fooPair.second;

      }

      double costSum = 0;

      for(const auto& elem : zip(xCopy, yCopy)) {

        if(weight_update != nullptr) {

          costSum += weight_update(elem.first, elem.second);

        } else {

          costSum += update_weights(elem.first, elem.second, weights);

        }

      }


      cost(iter, 0) = costSum;

    }

  }


  void partial_fit(const Matrix<double>& X, const Matrix<double>& y, Matrix<double>& weights) const {

    if(y.rows() > 1) {

      for(const auto& elem : zip(X, y)) { update_weights(elem.first, elem.second, weights); }

    } else {

      update_weights(X, y, weights);

    }

  }


  double update_weights(const Matrix<double>& xi, const Matrix<double>& target, Matrix<double>& weights) const {

    auto output = Matrix<double>();

    if(net_input_fun != nullptr) {

      output = net_input_fun(xi);

    } else {

      output = netInput(xi, weights);

    }

    auto error = target - output;

    weights.SetRow(0, weights(0) + eta * error);

    auto delta = eta * (xi.Transpose() * error);

    for(size_t i = 0; i < weights.rows() - 1; i++) { weights.SetRow(i + 1, weights(i + 1) + delta(i)); }

    return ((error * error) * 0.5).sumElements() / (double)target.rows();

  }


  std::pair<Matrix<double>, Matrix<double>>

  shuffleData([[maybe_unused]] const Matrix<double>& X, [[maybe_unused]] const Matrix<double>& y) {

    return { X, y };

  }


  [[nodiscard]] static Matrix<double> netInput(const Matrix<double>& X, const Matrix<double>& weights) {

    Matrix<double> A(0, weights.rows() - 1, weights.columns());

    Matrix<double> B(0, 1, weights.columns());

    for(size_t i = 0; i < weights.rows(); i++) {

      for(size_t j = 0; j < weights.columns(); j++) {

        if(i == 0) {

          B(i, j) = weights(i, j);

        } else {

          A(i - 1, j) = weights(i, j);

        }

      }

    }


    // Note: since Matrix<T> will never allow Matrix-Scalar Addition

    //       we need to create a vector of size of rows of the input

    //       values (jeez, horrible sentence). Therefore we rescale

    //       B and assign the bias to each element.

    B = Matrix<double>(B(0, 0), X.rows(), 1);

    return (X * A) + B;

  }

};

Matrix
Definition: Matrix.h:42

Matrix::Transpose
constexpr Matrix< T > Transpose() const
Definition: Matrix.h:256

Matrix::SetRow
void SetRow(size_t index, const Matrix< T > &other)
Definition: Matrix.h:541

Matrix::rows
size_t rows() const
Definition: Matrix.h:193

Matrix::columns
size_t columns() const
Definition: Matrix.h:198

SGD
Definition: SGD.h:12

SGD::update_weights
double update_weights(const Matrix< double > &xi, const Matrix< double > &target, Matrix< double > &weights) const
Definition: SGD.h:101

SGD::net_input_fun
std::function< Matrix< double >(const Matrix< double > &)> net_input_fun
represents net input function
Definition: SGD.h:22

SGD::shuffle
bool shuffle
shuffle during training
Definition: SGD.h:18

SGD::n_iter
size_t n_iter
number iterations (epochs) during fit
Definition: SGD.h:14

SGD::partial_fit
void partial_fit(const Matrix< double > &X, const Matrix< double > &y, Matrix< double > &weights) const
Definition: SGD.h:86

SGD::shuffleData
std::pair< Matrix< double >, Matrix< double > > shuffleData(const Matrix< double > &X, const Matrix< double > &y)
Definition: SGD.h:122

SGD::netInput
static Matrix< double > netInput(const Matrix< double > &X, const Matrix< double > &weights)
Definition: SGD.h:132

SGD::cost
Matrix< double > cost
matrix holding cost per epoch
Definition: SGD.h:26

SGD::weight_update
std::function< double(const Matrix< double > &, const Matrix< double > &)> weight_update
represents weight update function
Definition: SGD.h:20

SGD::SGD
SGD(double _eta=0.01, size_t iter=10, bool _shuffle=false, const std::function< double(const Matrix< double > &, const Matrix< double > &)> &update_weights_fun=nullptr, const std::function< Matrix< double >(const Matrix< double > &)> &netInputFun=nullptr)
Definition: SGD.h:36

SGD::fit
void fit(const Matrix< double > &X, const Matrix< double > &y, Matrix< double > &weights)
Definition: SGD.h:57

SGD::eta
double eta
cost factor
Definition: SGD.h:16

zip
std::vector< std::pair< Matrix< T >, Matrix< T > > > zip(const Matrix< T > &a, const Matrix< T > &b)
Definition: matrix_utils.h:271