scikit-learn
diff --git a/‎dev/_downloads/plot_classifier_comparison.py
Lines changed: 4 additions & 2 deletions b/‎dev/_downloads/plot_classifier_comparison.py
Lines changed: 4 additions & 2 deletions
diff --git a/‎dev/_downloads/plot_mlp_alpha.py
Lines changed: 113 additions & 0 deletions b/‎dev/_downloads/plot_mlp_alpha.py
Lines changed: 113 additions & 0 deletions
diff --git a/‎dev/_downloads/plot_mlp_training_curves.py
Lines changed: 86 additions & 0 deletions b/‎dev/_downloads/plot_mlp_training_curves.py
Lines changed: 86 additions & 0 deletions
diff --git a/‎dev/_downloads/plot_mnist_filters.py
Lines changed: 54 additions & 0 deletions b/‎dev/_downloads/plot_mnist_filters.py
Lines changed: 54 additions & 0 deletions
diff --git a/‎dev/_images/math/12ea3a10030ba2d90f89dab440196780a4b19e93.png
197 Bytes b/‎dev/_images/math/12ea3a10030ba2d90f89dab440196780a4b19e93.png
197 Bytes
diff --git a/‎dev/_images/math/1a6dcb0f90f8ed7f0f0b3af16613143be0ffd6c1.png
456 Bytes b/‎dev/_images/math/1a6dcb0f90f8ed7f0f0b3af16613143be0ffd6c1.png
456 Bytes
diff --git a/‎dev/_images/math/1ebc52bf597809058fd5fe33cbcb3830fce77abd.png
450 Bytes b/‎dev/_images/math/1ebc52bf597809058fd5fe33cbcb3830fce77abd.png
450 Bytes
diff --git a/‎dev/_images/math/2dbac3d4a21ff260f77a24c69c1e3b57acafcf2d.png
1.11 KB b/‎dev/_images/math/2dbac3d4a21ff260f77a24c69c1e3b57acafcf2d.png
1.11 KB
diff --git a/‎dev/_images/math/372cb770e60e27b0c5f465e51565f1a74b25ba4f.png
1.11 KB b/‎dev/_images/math/372cb770e60e27b0c5f465e51565f1a74b25ba4f.png
1.11 KB
diff --git a/‎dev/_images/math/3ee28c31216a8bcd9e06159b87cabf712e85d95d.png
338 Bytes b/‎dev/_images/math/3ee28c31216a8bcd9e06159b87cabf712e85d95d.png
338 Bytes
@@ -34,6 +34,7 @@
 from sklearn.cross_validation import train_test_split
 from sklearn.preprocessing import StandardScaler
 from sklearn.datasets import make_moons, make_circles, make_classification
+from sklearn.neural_network import MLPClassifier
 from sklearn.neighbors import KNeighborsClassifier
 from sklearn.svm import SVC
 from sklearn.gaussian_process import GaussianProcessClassifier
@@ -46,8 +47,8 @@
 h = .02  # step size in the mesh
 
 names = ["Nearest Neighbors", "Linear SVM", "RBF SVM", "Gaussian Process",
-         "Decision Tree", "Random Forest", "AdaBoost", "Naive Bayes",
-         "QDA"]
+         "Decision Tree", "Random Forest", "Neural Net", "AdaBoost",
+         "Naive Bayes", "QDA"]
 
 classifiers = [
     KNeighborsClassifier(3),
@@ -56,6 +57,7 @@
     GaussianProcessClassifier(1.0 * RBF(1.0), warm_start=True),
     DecisionTreeClassifier(max_depth=5),
     RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
+    MLPClassifier(alpha=1),
     AdaBoostClassifier(),
     GaussianNB(),
     QuadraticDiscriminantAnalysis()]
 
@@ -0,0 +1,113 @@
+"""
+================================================
+Varying regularization in Multi-layer Perceptron
+================================================
+
+A comparison of different values for regularization parameter 'alpha' on
+synthetic datasets. The plot shows that different alphas yield different
+decision functions.
+
+Alpha is a parameter for regularization term, aka penalty term, that combats
+overfitting by constraining the size of the weights. Increasing alpha may fix
+high variance (a sign of overfitting) by encouraging smaller weights, resulting
+in a decision boundary plot that appears with lesser curvatures.
+Similarly, decreasing alpha may fix high bias (a sign of underfitting) by
+encouraging larger weights, potentially resulting in a more complicated
+decision boundery.
+"""
+print(__doc__)
+
+
+# Author: Issam H. Laradji
+# License: BSD 3 clause
+
+import numpy as np
+from matplotlib import pyplot as plt
+from matplotlib.colors import ListedColormap
+from sklearn.cross_validation import train_test_split
+from sklearn.preprocessing import StandardScaler
+from sklearn.datasets import make_moons, make_circles, make_classification
+from sklearn.neural_network import MLPClassifier
+
+h = .02  # step size in the mesh
+
+alphas = np.logspace(-5, 3, 5)
+names = []
+for i in alphas:
+    names.append('alpha ' + str(i))
+
+classifiers = []
+for i in alphas:
+    classifiers.append(MLPClassifier(alpha=i, random_state=1))
+
+X, y = make_classification(n_features=2, n_redundant=0, n_informative=2,
+                           random_state=0, n_clusters_per_class=1)
+rng = np.random.RandomState(2)
+X += 2 * rng.uniform(size=X.shape)
+linearly_separable = (X, y)
+
+datasets = [make_moons(noise=0.3, random_state=0),
+            make_circles(noise=0.2, factor=0.5, random_state=1),
+            linearly_separable]
+
+figure = plt.figure(figsize=(17, 9))
+i = 1
+# iterate over datasets
+for X, y in datasets:
+    # preprocess dataset, split into training and test part
+    X = StandardScaler().fit_transform(X)
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.4)
+
+    x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
+    y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
+    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
+                         np.arange(y_min, y_max, h))
+
+    # just plot the dataset first
+    cm = plt.cm.RdBu
+    cm_bright = ListedColormap(['#FF0000', '#0000FF'])
+    ax = plt.subplot(len(datasets), len(classifiers) + 1, i)
+    # Plot the training points
+    ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright)
+    # and testing points
+    ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6)
+    ax.set_xlim(xx.min(), xx.max())
+    ax.set_ylim(yy.min(), yy.max())
+    ax.set_xticks(())
+    ax.set_yticks(())
+    i += 1
+
+    # iterate over classifiers
+    for name, clf in zip(names, classifiers):
+        ax = plt.subplot(len(datasets), len(classifiers) + 1, i)
+        clf.fit(X_train, y_train)
+        score = clf.score(X_test, y_test)
+
+        # Plot the decision boundary. For that, we will assign a color to each
+        # point in the mesh [x_min, m_max]x[y_min, y_max].
+        if hasattr(clf, "decision_function"):
+            Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
+        else:
+            Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]
+
+        # Put the result into a color plot
+        Z = Z.reshape(xx.shape)
+        ax.contourf(xx, yy, Z, cmap=cm, alpha=.8)
+
+        # Plot also the training points
+        ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright)
+        # and testing points
+        ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright,
+                   alpha=0.6)
+
+        ax.set_xlim(xx.min(), xx.max())
+        ax.set_ylim(yy.min(), yy.max())
+        ax.set_xticks(())
+        ax.set_yticks(())
+        ax.set_title(name)
+        ax.text(xx.max() - .3, yy.min() + .3, ('%.2f' % score).lstrip('0'),
+                size=15, horizontalalignment='right')
+        i += 1
+
+figure.subplots_adjust(left=.02, right=.98)
+plt.show()
@@ -0,0 +1,86 @@
+"""
+========================================================
+Compare Stochastic learning strategies for MLPClassifier
+========================================================
+
+This example visualizes some training loss curves for different stochastic
+learning strategies, including SGD and Adam. Because of time-constraints, we
+use several small datasets, for which L-BFGS might be more suitable. The
+general trend shown in these examples seems to carry over to larger datasets,
+however.
+"""
+
+print(__doc__)
+import matplotlib.pyplot as plt
+from sklearn.neural_network import MLPClassifier
+from sklearn.preprocessing import MinMaxScaler
+from sklearn import datasets
+
+# different learning rate schedules and momentum parameters
+params = [{'algorithm': 'sgd', 'learning_rate': 'constant', 'momentum': 0,
+           'learning_rate_init': 0.2},
+          {'algorithm': 'sgd', 'learning_rate': 'constant', 'momentum': .9,
+           'nesterovs_momentum': False, 'learning_rate_init': 0.2},
+          {'algorithm': 'sgd', 'learning_rate': 'constant', 'momentum': .9,
+           'nesterovs_momentum': True, 'learning_rate_init': 0.2},
+          {'algorithm': 'sgd', 'learning_rate': 'invscaling', 'momentum': 0,
+           'learning_rate_init': 0.2},
+          {'algorithm': 'sgd', 'learning_rate': 'invscaling', 'momentum': .9,
+           'nesterovs_momentum': True, 'learning_rate_init': 0.2},
+          {'algorithm': 'sgd', 'learning_rate': 'invscaling', 'momentum': .9,
+           'nesterovs_momentum': False, 'learning_rate_init': 0.2},
+          {'algorithm': 'adam'}]
+
+labels = ["constant learning-rate", "constant with momentum",
+          "constant with Nesterov's momentum",
+          "inv-scaling learning-rate", "inv-scaling with momentum",
+          "inv-scaling with Nesterov's momentum", "adam"]
+
+plot_args = [{'c': 'red', 'linestyle': '-'},
+             {'c': 'green', 'linestyle': '-'},
+             {'c': 'blue', 'linestyle': '-'},
+             {'c': 'red', 'linestyle': '--'},
+             {'c': 'green', 'linestyle': '--'},
+             {'c': 'blue', 'linestyle': '--'},
+             {'c': 'black', 'linestyle': '-'}]
+
+
+def plot_on_dataset(X, y, ax, name):
+    # for each dataset, plot learning for each learning strategy
+    print("\nlearning on dataset %s" % name)
+    ax.set_title(name)
+    X = MinMaxScaler().fit_transform(X)
+    mlps = []
+    if name == "digits":
+        # digits is larger but converges fairly quickly
+        max_iter = 15
+    else:
+        max_iter = 400
+
+    for label, param in zip(labels, params):
+        print("training: %s" % label)
+        mlp = MLPClassifier(verbose=0, random_state=0,
+                            max_iter=max_iter, **param)
+        mlp.fit(X, y)
+        mlps.append(mlp)
+        print("Training set score: %f" % mlp.score(X, y))
+        print("Training set loss: %f" % mlp.loss_)
+    for mlp, label, args in zip(mlps, labels, plot_args):
+            ax.plot(mlp.loss_curve_, label=label, **args)
+
+
+fig, axes = plt.subplots(2, 2, figsize=(15, 10))
+# load / generate some toy datasets
+iris = datasets.load_iris()
+digits = datasets.load_digits()
+data_sets = [(iris.data, iris.target),
+             (digits.data, digits.target),
+             datasets.make_circles(noise=0.2, factor=0.5, random_state=1),
+             datasets.make_moons(noise=0.3, random_state=0)]
+
+for ax, data, name in zip(axes.ravel(), data_sets, ['iris', 'digits',
+                                                    'circles', 'moons']):
+    plot_on_dataset(*data, ax=ax, name=name)
+
+fig.legend(ax.get_lines(), labels=labels, ncol=3, loc="upper center")
+plt.show()
@@ -0,0 +1,54 @@
+"""
+=====================================
+Visualization of MLP weights on MNIST
+=====================================
+
+Sometimes looking at the learned coefficients of a neural network can provide
+insight into the learning behavior. For example if weights look unstructured,
+maybe some were not used at all, or if very large coefficients exist, maybe
+regularization was too low or the learning rate too high.
+
+This example shows how to plot some of the first layer weights in a
+MLPClassifier trained on the MNIST dataset.
+
+The input data consists of 28x28 pixel handwritten digits, leading to 784
+features in the dataset. Therefore the first layer weight matrix have the shape
+(784, hidden_layer_sizes[0]).  We can therefore visualize a single column of
+the weight matrix as a 28x28 pixel image.
+
+To make the example run faster, we use very few hidden units, and train only
+for a very short time. Training longer would result in weights with a much
+smoother spatial appearance.
+"""
+print(__doc__)
+
+import matplotlib.pyplot as plt
+from sklearn.datasets import fetch_mldata
+from sklearn.neural_network import MLPClassifier
+
+mnist = fetch_mldata("MNIST original")
+# rescale the data, use the traditional train/test split
+X, y = mnist.data / 255., mnist.target
+X_train, X_test = X[:60000], X[60000:]
+y_train, y_test = y[:60000], y[60000:]
+
+# mlp = MLPClassifier(hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
+#                     algorithm='sgd', verbose=10, tol=1e-4, random_state=1)
+mlp = MLPClassifier(hidden_layer_sizes=(50,), max_iter=10, alpha=1e-4,
+                    algorithm='sgd', verbose=10, tol=1e-4, random_state=1,
+                    learning_rate_init=.1)
+
+mlp.fit(X_train, y_train)
+print("Training set score: %f" % mlp.score(X_train, y_train))
+print("Test set score: %f" % mlp.score(X_test, y_test))
+
+fig, axes = plt.subplots(4, 4)
+# use global min / max to ensure all weights are shown on the same scale
+vmin, vmax = mlp.coefs_[0].min(), mlp.coefs_[0].max()
+for coef, ax in zip(mlp.coefs_[0].T, axes.ravel()):
+    ax.matshow(coef.reshape(28, 28), cmap=plt.cm.gray, vmin=.5 * vmin,
+               vmax=.5 * vmax)
+    ax.set_xticks(())
+    ax.set_yticks(())
+
+plt.show()