src/supervised/neural-network/fully-connected.js

// Internal dependencies
import { Classifier } from '../base';
import * as Arrays from '../../arrays';
import * as Random from '../../random';

/**
 * Calculate the logit function for an input
 *
 * @param {number} x - Input number
 * @return {number} Output of logit function applied on input
 */
function sigmoid(x) {
  return 1 / (1 + Math.exp(-x));
}

export default class FullyConnected extends Classifier {
  /**
   * Constructor. Initialize class members and store user-defined options.
   *
   * @param {Object} [optionsUser] User-defined options
   * @param {number} [optionsUser.numInputs = 'auto'] Number of features each input sample has.
   *   The first layer of the network has this (plus one bias node) as the number of nodes. Defaults
   *   to 'auto', which determines the number of input nodes on the dimensionality of the training
   *   data upon the training call
   * @param {number} [optionsUser.numOutputs = 'auto'] Number of possible outputs for the network.
   *   The final layer of the network has this as the number of nodes. Defaults to 'auto', which
   *   determines the number of input nodes on the number of unique labels in the data upon the
   *   training call
   * @param {Array.<number>} [optionsUser.hiddenLayers = []] Number of nodes in the hidden layers.
   *   Each entry in this array corresponds to a single hidden layer
   * @param {number} [optionsUser.numEpochs = 20] Number of epochs (i.e., passes over all training
   *   data) to train the network for
   * @param {number} [optionsUser.learningRate = 0.01] Learning rate for training
   */
  constructor(optionsUser = {}) {
    super();

    // Parse options
    const optionsDefault = {
      numInputs: 'auto',
      numOutputs: 'auto',
      hiddenLayers: [],
      numEpochs: 20,
      learningRate: 0.01,
    };

    const options = {
      ...optionsDefault,
      ...optionsUser,
    };

    this.numInputs = options.numInputs;
    this.numOutputs = options.numOutputs;
    this.hiddenLayers = options.hiddenLayers;
    this.numEpochs = options.numEpochs;
    this.learningRate = options.learningRate;

    // Initialize layers, connectivity, and weights

    /**
     * Number of nodes (including bias nodes) in each layer of the network. Filled at the start of
     * training.
     *
     * @type {Array.<number>}
     */
    this.layers = [];

    /**
     * Weights between each pair of nodes in subsequent layers. Each entry in the main array
     * contains a matrix of weights between the nodes in that layer and the nodes in the next layer.
     * This includes entries for weights between unconnected (e.g., where the output node is a bias
     * node) nodes
     *
     * @type {Array.<Array.<Array.<number>>>}
     */
    this.weights = [];

    /**
     * Boolean matrix of connectivity between each pair of nodes in subsequent layers. For format,
     * see {@link FullyConnected#weights}.
     *
     * @type {Array.<Array.<Array.<boolean>>>}
     */
    this.connectivity = [];
  }

  /**
   * Randomly initialize the weights for the neural network. For each subsequent pair of layers,
   * where the first has n nodes and the second n' nodes, initialize an matrix with n rows and n'
   * columns. Each cell in the matrix is assigned a random value in the range [-1, 1]. Furthermore,
   * the connectivity of each pair of nodes in subsequent layers is stored (where all nodes in each
   * layer are connected to all non-bias nodes in the next layer).
   *
   * The weights between layer k and layer k + 1 are stored in element k (starting at k = 0) of the
   * weights array.
   */
  initializeWeights() {
    this.weights = [];
    this.connectivity = [];

    // Initialize weights for each subsequent pair of layers
    for (let i = 0; i < this.layers.length - 1; i++) {
      // Shape of the weight and connectivity matrices for the weights between the nodes in this and
      // the next layer
      const shape = [this.layers[i], this.layers[i + 1]];

      // Initialize weights from this layer to the next layer to a random real number in the
      // range [-1, 1]
      this.weights.push(Arrays.full(shape, () => Random.rand(-1, 1)));

      // Initialize connectivity between nodes by connecting all nodes (including bias nodes; these
      // are removed in the next few lines)
      const connectivity = Arrays.full(shape, true);

      // All layers but the last layer have a bias node: remove the connections between all nodes
      // and bias nodes in the next layer
      if (i < this.layers.length - 2) {
        connectivity.forEach(x => x[0] = false);
      }

      this.connectivity.push(connectivity);
    }
  }

  /**
   * @see {@link Classifier#train}
   */
  train(X, y) {
    // Determine number of inputs (one input for each feature sample) and number of outputs (one
    // output for each possible class) automatically depending on user settings
    const numInputs = this.numInputs == 'auto' ? X[0].length : this.numInputs;
    const numOutputs = this.numOutputs == 'auto' ? Arrays.unique(y).length : this.numOutputs;

    // Initialize layers
    this.layers = [numInputs + 1, ...this.hiddenLayers, numOutputs];

    // Initialize weights arrays
    this.initializeWeights();

    // Train for specified number of epochs
    for (let i = 0; i < this.numEpochs; i++) {
      this.trainEpoch(X, y);
    }
  }

  /**
   * Train the network for one epoch. Samples will be shuffled inside this function before training.
   *
   * @param {Array.<Array.<number>>} X - Features of samples to train with
   * @param {Array.<mixed>} y - Labels of samples
   */
  trainEpoch(X, y) {
    // Shuffle data points
    const [XUse, yUse] = Arrays.shuffle(X, y);

    // Train for each sample individually
    for (let i = 0; i < XUse.length; i += 1) {
      this.trainSample(XUse[i].slice(), yUse[i]);
    }
  }

  /**
   * Calculate root-mean-square error of the network on some data set.
   *
   * @param {Array.<Array.<number>>} X - Features of samples to calculate RMSE for
   * @param {Array.<mixed>} y - Labels of samples
   * @return {number} Root-mean-squared error
   */
  calculateRMSE(X, y) {
    return Math.sqrt(
      X.reduce((a, x, i) => a + this.calculateError(x, y[i]) ** 2, 0) / X.length
    );
  }

  /**
   * Calculate the squared error between the network outputs for a sample and the specified outputs.
   *
   * @param {Array.<number>} x - Input sample
   * @param {number} y - Sample label
   * @return {number} Sum of squared errors between the outputs corresponding to the sample label
   *   and the outputs obtained passing the sample through the network
   */
  calculateError(x, y) {
    const [activations, outputs] = this.forwardPass(x);
    return outputs[outputs.length - 1].reduce((a, o, i) => a + 0.5 * ((o - (y[i] == i)) ** 2), 0);
  }

  /**
   * Apply the delta rule to the result of a forward pass through the network, expressed by the
   * specified activations and outputs. The network targets corresponding to the forward pass need
   * to be specified too.
   *
   * @param {Array.<Array.<number>>} activations - Network activations for each node in each layer
   * @param {Array.<Array.<number>>} outputs - Network outputs (i.e., the activations passed through
   *   the activation function) for each node in each layer
   * @param {Array.<number>} targets - Network targets for the final layer
   * @return {Array.<Array.<number>>} Deltas calculated for each node in each layer. The deltas
   *   for the bias nodes are not calculated, and set to 0
   */
  deltaRule(activations, outputs, targets) {
    // Calculate deltas using the generalized delta rule
    let deltas = this.layers.map(x => Arrays.zeros(x));

    // Start at the final layer, and calculate deltas going backward until the second layer
    for (let k = this.layers.length - 1; k > 0; k--) {
      // Index of first regular node in this layer
      const startNode = (k < this.layers.length - 1) ? 1 : 0;

      // Loop over all non-bias nodes in the layer
      for (let i = startNode; i < this.layers[k]; i++) {
        // Extract output and activation for this node
        const activation = activations[k][i];
        const output = outputs[k][i];

        // Last layer
        if (k == this.layers.length - 1) {
          // console.log(output - target);
          deltas[k][i] = this.activationFunctionDerivative(activation) * (output - targets[i]);
        }
        // Earlier layers
        else {
          // Calculate sum of weighted deltas in next layer
          const nextDeltaSum = deltas[k + 1].reduce((r, a, j) => r + a * this.weights[k][i][j], 0);
          deltas[k][i] = this.activationFunctionDerivative(activation) * nextDeltaSum;
        }
      }
    }

    return deltas;
  }

  /**
   * Train the network on a single sample
   *
   * @param {Array.<number>} x - Input sample
   * @param {number} y - Sample label
   */
  trainSample(x, y) {
    // Pass the sample through the network
    const [activations, outputs] = this.forwardPass(x);

    // Apply one-hot encoding to the sample label
    const targets = [...Array(this.layers[this.layers.length - 1])].map((a, i) => i == y ? 1 : 0);

    // Calculate of delta for each node in each layer
    const deltas = this.deltaRule(activations, outputs, targets);

    // Update weights
    for (let k = 0; k < this.layers.length - 1; k++) {
      // console.log('Updating weights in layer ' + k);

      // Loop over all pairs of connected nodes in layers k and k + 1
      for (let i = 0; i < this.layers[k]; i++) {
        // console.log('Updating weights from node ' + i);
        for (let j = 0; j < this.layers[k + 1]; j++) {
          if (!this.connectivity[k][i][j]) {
            continue;
          }

          // Update weights
          this.weights[k][i][j] -= this.learningRate * outputs[k][i] * deltas[k + 1][j];
        }
      }
    }

  }

  /**
   * Pass a sample through the network, calculating the activations and outputs for all nodes in the
   * network.
   *
   * @param {Array.<number>} x - Data point features
   * @return {Array} - Array with two elements: containing the activations and outputs,
   *   respectively, for each node in the network
   */
  forwardPass(x) {
    if (x.length != this.layers[0] - 1) {
      throw new Error('Number of features of samples should match the number of network inputs.');
    }

    // Output and activations of nodes in each layer, including a bias node
    let activations = this.layers.map(a => Arrays.zeros(a));
    let outputs = this.layers.map(a => Arrays.zeros(a));

    // Fill the outputs of the first layer with the sample features, and initialize the activations
    // of the first layer to an empty list
    activations[0] = [];
    outputs[0] = [1, ...x.slice()];

    // Propagate the inputs layer-by-layer
    for (let layer = 1; layer < this.layers.length; layer++) {
      // Index of first regular node in this layer
      let startNode = 0;

      // If this is not the output layer, set the output of the bias node to 1
      if (layer < this.layers.length - 1) {
        startNode = 1;

        // Bias node
        outputs[layer][0] = 1;
      }

      // Calculate the activation and output of each (non-bias) node in the layer
      for (let node = startNode; node < this.layers[layer]; node++) {
        // Calculate the activation as the weighted sum of the outputs (including the bias node) of
        // the previous layer
        activations[layer][node] = outputs[layer - 1].reduce((r, a, i) => {
          return r + a * this.weights[layer - 1][i][node];
        }, 0);

        // Calculate the output of this node by applying the activation function to the activation
        outputs[layer][node] = this.activationFunction(activations[layer][node]);
      }
    }

    return [activations, outputs];
  }

  /**
   * Get the activation function value for the specified input.
   *
   * @param {number} a - Input value
   * @return {number} Return value of activation function applied to input value
   */
  activationFunction(a) {
    return sigmoid(a);
  }

  /**
   * Get the function value for the derivative of the activation function for the specified input.
   *
   * @param {number} a - Input value
   * @return {number} Return value of derivative of activation function applied to input value
   */
  activationFunctionDerivative(a) {
    return sigmoid(a) * (1 - sigmoid(a));
  }

  /**
   * Manually set the weights matrices of the network.
   *
   * @brief {Array.<Array.<Array<number>>>} Weight matrix for each pair of subsequent layers
   *   For more information, see {@link FullyConnected#weights}
   */
  setWeights(weights) {
    this.weights = weights;
  }

  /**
   * @see {@link Classifier#predict}
   */
  predict(X) {
    //console.log(JSON.parse(JSON.stringify(this.forwardPass(X[500]))));
    return X.map(x => {
      const [activations, outputs] = this.forwardPass(x);
      return Arrays.argMax(outputs[outputs.length - 1]);
    });
  }
}