src/supervised/svm/svm.js
// Internal dependencies
import { OneVsAllClassifier, Classifier } from '../base';
import LinearKernel from '../../kernel/linear';
import * as Arrays from '../../arrays';
import * as Random from '../../random';
/**
* SVM learner for binary classification problem.
*/
export class BinarySVM extends Classifier {
/**
* Constructor. Initialize class members and store user-defined options.
*
* @param {Object} [optionsUser] - User-defined options for SVM
* @param {number} [optionsUser.C = 100] - Regularization (i.e. penalty for slack variables)
* @param {Object} [optionsUser.kernel = null] - Kernel. Defaults to the linear kernel
* @param {number} [optionsUser.convergenceNumPasses = 20] - Number of passes without alphas
* changing to treat the algorithm as converged
* @param {number} [optionsUser.numericalTolerance = 1e-6] - Numerical tolerance for a
* value in the to be equal to another SMO algorithm to be equal to another value
* @param {boolean} [optionsUser.useKernelCache = true] - Whether to cache calculated kernel
* values for training sample pairs. Enabling this option (which is the default) generally
* improves the performance in terms of speed at the cost of memory
*/
constructor(optionsUser = {}) {
super();
// Parse options
const optionsDefault = {
C: 100.0,
kernel: null,
convergenceNumPasses: 20,
numericalTolerance: 1e-6,
useKernelCache: true,
};
const options = {
...optionsDefault,
...optionsUser,
};
// Set options
this.C = options.C;
this.kernel = options.kernel === null ? new LinearKernel() : options.kernel;
this.convergenceNumPasses = options.convergenceNumPasses;
this.numericalTolerance = options.numericalTolerance;
this.useKernelCache = options.useKernelCache;
// Set properties
this.isTraining = false;
}
/**
* Get the signed value of the class index. Returns -1 for class index 0, 1 for class index 1.
*
* @param {number} classIndex - Class index
* @return {number} Sign corresponding to class index
*/
getClassIndexSign(classIndex) {
return classIndex * 2 - 1;
}
/**
* Get the class index corresponding to a sign.
*
* @param {number} sign - Sign
* @return {number} Class index corresponding to sign
*/
getSignClassIndex(sign) {
return (sign + 1) / 2;
}
/**
* @see {@link Classifier#train}
*/
train(X, y) {
// Mark that the SVM is currently in the training procedure
this.isTraining = true;
// Number of training samples
const numSamples = X.length;
// Alphas (Lagrange multipliers)
this.alphas = Arrays.zeros(numSamples);
// Bias term
this.b = 0.0;
// Kernel cache
this.kernelCache = Arrays.full([numSamples, numSamples], 0.0);
this.kernelCacheStatus = Arrays.full([numSamples, numSamples], false);
// Number of passes of the algorithm without any alphas changing
let numPasses = 0;
// Shorthand notation for features and labels
this.training = { X, y };
const ySigns = y.map(x => this.getClassIndexSign(x));
while (numPasses < this.convergenceNumPasses) {
let alphasChanged = 0;
// Loop over all training samples
for (let i = 0; i < numSamples; i += 1) {
// Calculate offset to the 1-margin of sample i
const ei = this.sampleMargin(i) - ySigns[i];
// Check whether the KKT constraints were violated
if ((ySigns[i] * ei < -this.numericalTolerance && this.alphas[i] < this.C)
|| (ySigns[i] * ei > this.numericalTolerance && this.alphas[i] > 0)) {
/* Now, we need to update \alpha_i as it violates the KKT constraints */
// Thus, we pick a random \alpha_j such that j does not equal i
let j = Random.randint(0, numSamples - 1);
if (j >= i) j += 1;
// Calculate offset to the 1-margin of sample j
const ej = this.sampleMargin(j) - ySigns[j];
// Calculate lower and upper bounds for \alpha_j
const [boundL, boundH] = this.calculateAlphaBounds(i, j);
if (Math.abs(boundH - boundL) < this.numericalTolerance) {
// Difference between bounds is practically zero, so there's not much to optimize.
// Continue to next sample.
continue;
}
// Calculate second derivative of cost function from Lagrange dual problem. Note
// that a_i = (g - a_j * y_j) / y_i, where g is the negative sum of all a_k * y_k where
// k is not equal to i or j
const Kij = this.applyKernel(i, j);
const Kii = this.applyKernel(i, i);
const Kjj = this.applyKernel(j, j);
const eta = 2 * Kij - Kii - Kjj;
if (eta >= 0) {
continue;
}
// Compute new \alpha_j
const oldAlphaJ = this.alphas[j];
let newAlphaJ = oldAlphaJ - ySigns[j] * (ei - ej) / eta;
newAlphaJ = Math.min(newAlphaJ, boundH);
newAlphaJ = Math.max(newAlphaJ, boundL);
// Don't update if the difference is too small
if (Math.abs(newAlphaJ - oldAlphaJ) < this.numericalTolerance) {
continue;
}
// Compute new \alpha_i
const oldAlphaI = this.alphas[i];
const newAlphaI = oldAlphaI + ySigns[i] * ySigns[j] * (oldAlphaJ - newAlphaJ);
// Update \alpha_j and \alpha_i
this.alphas[j] = newAlphaJ;
this.alphas[i] = newAlphaI;
// Update the bias term, interpolating between the bias terms for \alpha_i and \alpha_j
const b1 = this.b - ei - ySigns[i] * (newAlphaI - oldAlphaI) * Kii
- ySigns[j] * (newAlphaJ - oldAlphaJ) * Kij;
const b2 = this.b - ej - ySigns[i] * (newAlphaI - oldAlphaI) * Kij
- ySigns[j] * (newAlphaJ - oldAlphaJ) * Kjj;
if (newAlphaJ > 0 && newAlphaJ < this.C) {
this.b = b2;
} else if (newAlphaI > 0 && newAlphaI < this.C) {
this.b = b1;
} else {
this.b = (b1 + b2) / 2;
}
alphasChanged += 1;
}
}
if (alphasChanged === 0) {
numPasses += 1;
} else {
numPasses = 0;
}
}
// Store indices of support vectors (where alpha > 0, or, in this case, where alpha is greater
// than some numerical tolerance)
this.supportVectors = Arrays.argFilter(this.alphas, x => x > 1e-3);
// Mark that training has completed
this.isTraining = false;
}
/**
* Calculate the margin (distance to the decision boundary) for a single sample.
*
* @param {Array.<number>|number} sample - Sample features array or training sample index
* @return {number} Distance to decision boundary
*/
sampleMargin(sample) {
let rval = this.b;
if (this.isTraining) {
// If we're in the training phase, we have to loop over all elements
for (let i = 0; i < this.training.X.length; i += 1) {
const k = this.applyKernel(sample, i);
rval += this.getClassIndexSign(this.training.y[i]) * this.alphas[i] * k;
}
} else {
// If training is done, we only loop over the support vectors
for (const sv of this.supportVectors) {
const k = this.applyKernel(sample, this.training.X[sv]);
rval += this.getClassIndexSign(this.training.y[sv]) * this.alphas[sv] * k;
}
}
return rval;
}
/**
* Apply the kernel to two data points. Accepts both feature arrays and training data point
* indices for x and y. If x and y are integers, attempts to fetch the kernel result for the
* corresponding training data points from cache, and computes and stores the result in cache if
* it isn't found
*
* @param {Array.<number>|number} x - Feature vector or data point index for first data point.
* Arrays are treated as feature vectors, integers as training data point indices
* @param {Array.<number>|number} y - Feature vector or data point index for second data point.
* Arrays are treated as feature vectors, integers as training data point indices
* @return {number} Kernel result
*/
applyKernel(x, y) {
const fromCache = (this.useKernelCache && typeof x === 'number' && typeof y === 'number');
if (fromCache && this.kernelCacheStatus[x][y] === true) {
return this.kernelCache[x][y];
}
const xf = (typeof x === 'number') ? this.training.X[x] : x;
const yf = (typeof y === 'number') ? this.training.X[y] : y;
const result = this.kernel.apply(xf, yf);
if (fromCache) {
this.kernelCache[x][y] = result;
this.kernelCacheStatus[x][y] = true;
}
return result;
}
/**
* Calculate the bounds on \alpha_j to make sure it can be clipped to the [0,C] box and that it
* can be chosen to satisfy the linear equality constraint stemming from the fact that the sum of
* all products y_i * a_i should equal 0.
*
* @param {number} i Index of \alpha_i
* @param {number} j Index of \alpha_j
* @return {Array.<number>} Two-dimensional array containing the lower and upper bound
*/
calculateAlphaBounds(i, j) {
let boundL;
let boundH;
if (this.training.y[i] === this.training.y[j]) {
// The alphas lie on a line with slope -1
boundL = this.alphas[j] - (this.C - this.alphas[i]);
boundH = this.alphas[j] + this.alphas[i];
} else {
// The alphas lie on a line with slope 1
boundL = this.alphas[j] - this.alphas[i];
boundH = this.alphas[j] + (this.C - this.alphas[i]);
}
boundL = Math.max(0, boundL);
boundH = Math.min(this.C, boundH);
return [boundL, boundH];
}
/**
* Make a prediction for a data set.
*
* @param {Array.<Array.<mixed>>} features - Features for each data point. Each array element
* should be an array containing the features of the data point
* @param {Object} [optionsUser] - Options for prediction
* @param {string} [optionsUser.output = 'classLabels'] - Output for predictions. Either
* "classLabels" (default, output predicted class label), "raw" or "normalized" (both output
* margin (distance to decision boundary) for each sample)
* @return {Array.<mixed>} Predictions. Output dependent on options.output, defaults to class
* labels
*/
predict(features, optionsUser = {}) {
// Options
const optionsDefault = {
output: 'classLabels', // 'classLabels', 'normalized' or 'raw'
};
const options = {
...optionsDefault,
...optionsUser,
};
return features.map((x) => {
let output = this.sampleMargin(x);
// Store prediction
if (options.output === 'raw' || options.output === 'normalized') {
// Raw output: do nothing
} else {
// Class label output
output = this.getSignClassIndex(output > 0 ? 1 : -1);
}
return output;
});
}
/**
* Retrieve the indices of the support vector samples.
*
* @return {Array.<number>} List of sample indices of the support vectors
*/
getSupportVectors() {
return this.supportVectors;
}
}
/**
* Support Vector Machine (SVM) classification model for 2 or more classes. The model is a
* one-vs-all classifier and uses the {@link BinarySVM} classifier as its base model. For training
* individual models, a simplified version of John Platt's
* [SMO algorithm](@link https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/tr-98-14.pdf)
* is used.
*
* ## Support Vector Machines (SVM)
* Support Vector Machines train a classifier by finding the decision boundary between the classes
* that maximizes the margin between the boundary and the data points on either side of it. It is a
* very intuitive approach to classification. The soft-margin SVM is a modification of this approach
* where samples are allowed to be misclasiffied, but at some cost. Furthermore, SVMs can implement
* the "kernel trick", where an implicit feature transformation is applied.
*
* This is all implemented in the SVM implementation in JSMLT. For more information about Support
* Vector Machines, you can start by checking out the [Wikipedia](https://en.wikipedia.org/wiki/Support_vector_machine)
* page on SVMs.
*
* ## Examples
* **Example 1:** Training a multiclass SVM on a well-known three-class dataset, the
* [Iris dataset](https://github.com/jsmlt/datasets-repository/tree/master/iris#readme).
*
* @example <caption>Example 1. SVM training on a multiclass classification task.</caption>
* // Import JSMLT
* var jsmlt = require('@jsmlt/jsmlt');
*
* // Load the iris dataset. When loading is completed, process the data and run the classifier
* jsmlt.Datasets.loadIris((X, y_raw) => {
* // Encode the labels (which are strings) into integers
* var labelencoder = new jsmlt.Preprocessing.LabelEncoder();
* var y = labelencoder.encode(y_raw);
*
* // Split the data into a training set and a test set
* [X_train, y_train, X_test, y_test] = jsmlt.ModelSelection.trainTestSplit([X, y]);
*
* // Create and train classifier
* var clf = new jsmlt.Supervised.SVM.SVM({
* kernel: new jsmlt.Kernel.Gaussian(1),
* });
* clf.train(X_train, y_train);
*
* // Make predictions on test data
* var predictions = clf.predict(X_test);
*
* // Evaluate and output the classifier's accuracy
* var accuracy = jsmlt.Validation.Metrics.accuracy(predictions, y_test);
* console.log(`Accuracy: ${accuracy}`);
* });
*
* @see {@link BinarySVM}
*/
export default class SVM extends OneVsAllClassifier {
/**
* Constructor. Initialize class members and store user-defined options.
*
* @see {@link BinarySVM#constructor}
*
* @param {Object} optionsUser User-defined options for SVM. Options are passed to created
* BinarySVM objects. See BinarySVM.constructor() for more details
* @param {Object} [optionsUser] - User-defined options for SVM
* @param {number} [optionsUser.C = 100] - Regularization (i.e. penalty for slack variables)
* @param {Object} [optionsUser.kernel] - Kernel. Defaults to the linear kernel
* @param {number} [optionsUser.convergenceNumPasses = 10] - Number of passes without alphas
* changing to treat the algorithm as converged
* @param {number} [optionsUser.numericalTolerance = 1e-6] - Numerical tolerance for a
* value in the to be equal to another SMO algorithm to be equal to another value
* @param {boolean} [optionsUser.useKernelCache = true] - Whether to cache calculated kernel
* values for training sample pairs. Enabling this option (which is the default) generally
* improves the performance in terms of speed at the cost of memory
*/
constructor(optionsUser = {}) {
super();
/**
* Number of errors per iteration. Only used if accuracy tracking is enabled
*
* @type {Array.<mixed>}
*/
this.numErrors = null;
// Set options
this.optionsUser = optionsUser;
}
/**
* @see {@link OneVsAll#createClassifier}
*/
createClassifier() {
return new BinarySVM(this.optionsUser);
}
/**
* @see {@link Estimator#train}
*/
train(X, y) {
this.training = { X, y };
this.createClassifiers(y);
this.trainBatch(X, y);
}
/**
* Retrieve the indices of the support vector samples.
*
* @return {Array.<number>} List of sample indices of the support vectors
*/
getSupportVectors() {
const classifiersSupportVectors = this.getClassifiers().map(
x => x.classifier.getSupportVectors()
);
return Arrays.unique(Array.prototype.concat(...classifiersSupportVectors));
}
}