GRSEB9S
diff --git a/‎dev/_downloads/auto_examples_jupyter.zip
9.2 KB b/‎dev/_downloads/auto_examples_jupyter.zip
9.2 KB
diff --git a/‎dev/_downloads/auto_examples_python.zip
7.14 KB b/‎dev/_downloads/auto_examples_python.zip
7.14 KB
diff --git a/‎dev/_downloads/plot_sparse_logistic_regression_20newsgroups.ipynb
Lines changed: 54 additions & 0 deletions b/‎dev/_downloads/plot_sparse_logistic_regression_20newsgroups.ipynb
Lines changed: 54 additions & 0 deletions
diff --git a/‎dev/_downloads/plot_sparse_logistic_regression_20newsgroups.py
Lines changed: 118 additions & 0 deletions b/‎dev/_downloads/plot_sparse_logistic_regression_20newsgroups.py
Lines changed: 118 additions & 0 deletions
diff --git a/‎dev/_downloads/plot_sparse_logistic_regression_mnist.ipynb
Lines changed: 54 additions & 0 deletions b/‎dev/_downloads/plot_sparse_logistic_regression_mnist.ipynb
Lines changed: 54 additions & 0 deletions
diff --git a/‎dev/_downloads/plot_sparse_logistic_regression_mnist.py
Lines changed: 79 additions & 0 deletions b/‎dev/_downloads/plot_sparse_logistic_regression_mnist.py
Lines changed: 79 additions & 0 deletions
diff --git a/‎dev/_downloads/scikit-learn-docs.pdf
42.9 KB b/‎dev/_downloads/scikit-learn-docs.pdf
42.9 KB
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
217 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
217 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0031.png
217 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0031.png
217 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_004.png
-385 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_004.png
-385 Bytes
@@ -0,0 +1,54 @@
+{
+  "nbformat_minor": 0, 
+  "nbformat": 4, 
+  "cells": [
+    {
+      "execution_count": null, 
+      "cell_type": "code", 
+      "source": [
+        "%matplotlib inline"
+      ], 
+      "outputs": [], 
+      "metadata": {
+        "collapsed": false
+      }
+    }, 
+    {
+      "source": [
+        "\n# Multiclass sparse logisitic regression on newgroups20\n\n\nComparison of multinomial logistic L1 vs one-versus-rest L1 logistic regression\nto classify documents from the newgroups20 dataset. Multinomial logistic\nregression yields more accurate results and is faster to train on the larger\nscale dataset.\n\nHere we use the l1 sparsity that trims the weights of not informative\nfeatures to zero. This is good if the goal is to extract the strongly\ndiscriminative vocabulary of each class. If the goal is to get the best\npredictive accuracy, it is better to use the non sparsity-inducing l2 penalty\ninstead.\n\nA more traditional (and possibly better) way to predict on a sparse subset of\ninput features would be to use univariate feature selection followed by a\ntraditional (l2-penalised) logistic regression model.\n\n"
+      ], 
+      "cell_type": "markdown", 
+      "metadata": {}
+    }, 
+    {
+      "execution_count": null, 
+      "cell_type": "code", 
+      "source": [
+        "import time\n\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nfrom sklearn.datasets import fetch_20newsgroups_vectorized\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.model_selection import train_test_split\n\nprint(__doc__)\n# Author: Arthur Mensch\n\nt0 = time.clock()\n\n# We use SAGA solver\nsolver = 'saga'\n\n# Turn down for faster run time\nn_samples = 10000\n\n# Memorized fetch_rcv1 for faster access\ndataset = fetch_20newsgroups_vectorized('all')\nX = dataset.data\ny = dataset.target\nX = X[:n_samples]\ny = y[:n_samples]\n\nX_train, X_test, y_train, y_test = train_test_split(X, y,\n                                                    random_state=42,\n                                                    stratify=y,\n                                                    test_size=0.1)\ntrain_samples, n_features = X_train.shape\nn_classes = np.unique(y).shape[0]\n\nprint('Dataset 20newsgroup, train_samples=%i, n_features=%i, n_classes=%i'\n      % (train_samples, n_features, n_classes))\n\nmodels = {'ovr': {'name': 'One versus Rest', 'iters': [1, 3]},\n          'multinomial': {'name': 'Multinomial', 'iters': [1, 3, 7]}}\n\nfor model in models:\n    # Add initial chance-level values for plotting purpose\n    accuracies = [1 / n_classes]\n    times = [0]\n    densities = [1]\n\n    model_params = models[model]\n\n    # Small number of epochs for fast runtime\n    for this_max_iter in model_params['iters']:\n        print('[model=%s, solver=%s] Number of epochs: %s' %\n              (model_params['name'], solver, this_max_iter))\n        lr = LogisticRegression(solver=solver,\n                                multi_class=model,\n                                C=1,\n                                penalty='l1',\n                                fit_intercept=True,\n                                max_iter=this_max_iter,\n                                random_state=42,\n                                )\n        t1 = time.clock()\n        lr.fit(X_train, y_train)\n        train_time = time.clock() - t1\n\n        y_pred = lr.predict(X_test)\n        accuracy = np.sum(y_pred == y_test) / y_test.shape[0]\n        density = np.mean(lr.coef_ != 0, axis=1) * 100\n        accuracies.append(accuracy)\n        densities.append(density)\n        times.append(train_time)\n    models[model]['times'] = times\n    models[model]['densities'] = densities\n    models[model]['accuracies'] = accuracies\n    print('Test accuracy for model %s: %.4f' % (model, accuracies[-1]))\n    print('%% non-zero coefficients for model %s, '\n          'per class:\\n %s' % (model, densities[-1]))\n    print('Run time (%i epochs) for model %s:'\n          '%.2f' % (model_params['iters'][-1], model, times[-1]))\n\nfig = plt.figure()\nax = fig.add_subplot(111)\n\nfor model in models:\n    name = models[model]['name']\n    times = models[model]['times']\n    accuracies = models[model]['accuracies']\n    ax.plot(times, accuracies, marker='o',\n            label='Model: %s' % name)\n    ax.set_xlabel('Train time (s)')\n    ax.set_ylabel('Test accuracy')\nax.legend()\nfig.suptitle('Multinomial vs One-vs-Rest Logistic L1\\n'\n             'Dataset %s' % '20newsgroups')\nfig.tight_layout()\nfig.subplots_adjust(top=0.85)\nrun_time = time.clock() - t0\nprint('Example run in %.3f s' % run_time)\nplt.show()"
+      ], 
+      "outputs": [], 
+      "metadata": {
+        "collapsed": false
+      }
+    }
+  ], 
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 2", 
+      "name": "python2", 
+      "language": "python"
+    }, 
+    "language_info": {
+      "mimetype": "text/x-python", 
+      "nbconvert_exporter": "python", 
+      "name": "python", 
+      "file_extension": ".py", 
+      "version": "2.7.13", 
+      "pygments_lexer": "ipython2", 
+      "codemirror_mode": {
+        "version": 2, 
+        "name": "ipython"
+      }
+    }
+  }
+}
@@ -0,0 +1,118 @@
+"""
+=====================================================
+Multiclass sparse logisitic regression on newgroups20
+=====================================================
+
+Comparison of multinomial logistic L1 vs one-versus-rest L1 logistic regression
+to classify documents from the newgroups20 dataset. Multinomial logistic
+regression yields more accurate results and is faster to train on the larger
+scale dataset.
+
+Here we use the l1 sparsity that trims the weights of not informative
+features to zero. This is good if the goal is to extract the strongly
+discriminative vocabulary of each class. If the goal is to get the best
+predictive accuracy, it is better to use the non sparsity-inducing l2 penalty
+instead.
+
+A more traditional (and possibly better) way to predict on a sparse subset of
+input features would be to use univariate feature selection followed by a
+traditional (l2-penalised) logistic regression model.
+"""
+import time
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+from sklearn.datasets import fetch_20newsgroups_vectorized
+from sklearn.linear_model import LogisticRegression
+from sklearn.model_selection import train_test_split
+
+print(__doc__)
+# Author: Arthur Mensch
+
+t0 = time.clock()
+
+# We use SAGA solver
+solver = 'saga'
+
+# Turn down for faster run time
+n_samples = 10000
+
+# Memorized fetch_rcv1 for faster access
+dataset = fetch_20newsgroups_vectorized('all')
+X = dataset.data
+y = dataset.target
+X = X[:n_samples]
+y = y[:n_samples]
+
+X_train, X_test, y_train, y_test = train_test_split(X, y,
+                                                    random_state=42,
+                                                    stratify=y,
+                                                    test_size=0.1)
+train_samples, n_features = X_train.shape
+n_classes = np.unique(y).shape[0]
+
+print('Dataset 20newsgroup, train_samples=%i, n_features=%i, n_classes=%i'
+      % (train_samples, n_features, n_classes))
+
+models = {'ovr': {'name': 'One versus Rest', 'iters': [1, 3]},
+          'multinomial': {'name': 'Multinomial', 'iters': [1, 3, 7]}}
+
+for model in models:
+    # Add initial chance-level values for plotting purpose
+    accuracies = [1 / n_classes]
+    times = [0]
+    densities = [1]
+
+    model_params = models[model]
+
+    # Small number of epochs for fast runtime
+    for this_max_iter in model_params['iters']:
+        print('[model=%s, solver=%s] Number of epochs: %s' %
+              (model_params['name'], solver, this_max_iter))
+        lr = LogisticRegression(solver=solver,
+                                multi_class=model,
+                                C=1,
+                                penalty='l1',
+                                fit_intercept=True,
+                                max_iter=this_max_iter,
+                                random_state=42,
+                                )
+        t1 = time.clock()
+        lr.fit(X_train, y_train)
+        train_time = time.clock() - t1
+
+        y_pred = lr.predict(X_test)
+        accuracy = np.sum(y_pred == y_test) / y_test.shape[0]
+        density = np.mean(lr.coef_ != 0, axis=1) * 100
+        accuracies.append(accuracy)
+        densities.append(density)
+        times.append(train_time)
+    models[model]['times'] = times
+    models[model]['densities'] = densities
+    models[model]['accuracies'] = accuracies
+    print('Test accuracy for model %s: %.4f' % (model, accuracies[-1]))
+    print('%% non-zero coefficients for model %s, '
+          'per class:\n %s' % (model, densities[-1]))
+    print('Run time (%i epochs) for model %s:'
+          '%.2f' % (model_params['iters'][-1], model, times[-1]))
+
+fig = plt.figure()
+ax = fig.add_subplot(111)
+
+for model in models:
+    name = models[model]['name']
+    times = models[model]['times']
+    accuracies = models[model]['accuracies']
+    ax.plot(times, accuracies, marker='o',
+            label='Model: %s' % name)
+    ax.set_xlabel('Train time (s)')
+    ax.set_ylabel('Test accuracy')
+ax.legend()
+fig.suptitle('Multinomial vs One-vs-Rest Logistic L1\n'
+             'Dataset %s' % '20newsgroups')
+fig.tight_layout()
+fig.subplots_adjust(top=0.85)
+run_time = time.clock() - t0
+print('Example run in %.3f s' % run_time)
+plt.show()
@@ -0,0 +1,54 @@
+{
+  "nbformat_minor": 0, 
+  "nbformat": 4, 
+  "cells": [
+    {
+      "execution_count": null, 
+      "cell_type": "code", 
+      "source": [
+        "%matplotlib inline"
+      ], 
+      "outputs": [], 
+      "metadata": {
+        "collapsed": false
+      }
+    }, 
+    {
+      "source": [
+        "\n=====================================================\nMNIST classfification using multinomial logistic + L1\n=====================================================\n\nHere we fit a multinomial logistic regression with L1 penalty on a subset of\nthe MNIST digits classification task. We use the SAGA algorithm for this\npurpose: this a solver that is fast when the number of samples is significantly\nlarger than the number of features and is able to finely optimize non-smooth\nobjective functions which is the case with the l1-penalty. Test accuracy\nreaches > 0.8, while weight vectors remains *sparse* and therefore more easily\n*interpretable*.\n\nNote that this accuracy of this l1-penalized linear model is significantly\nbelow what can be reached by an l2-penalized linear model or a non-linear\nmulti-layer perceptron model on this dataset.\n\n\n"
+      ], 
+      "cell_type": "markdown", 
+      "metadata": {}
+    }, 
+    {
+      "execution_count": null, 
+      "cell_type": "code", 
+      "source": [
+        "import time\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nfrom sklearn.datasets import fetch_mldata\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.utils import check_random_state\n\nprint(__doc__)\n\n# Author: Arthur Mensch <[email protected]>\n# License: BSD 3 clause\n\n# Turn down for faster convergence\nt0 = time.time()\ntrain_samples = 5000\n\nmnist = fetch_mldata('MNIST original')\nX = mnist.data.astype('float64')\ny = mnist.target\nrandom_state = check_random_state(0)\npermutation = random_state.permutation(X.shape[0])\nX = X[permutation]\ny = y[permutation]\nX = X.reshape((X.shape[0], -1))\n\nX_train, X_test, y_train, y_test = train_test_split(\n    X, y, train_size=train_samples, test_size=10000)\n\nscaler = StandardScaler()\nX_train = scaler.fit_transform(X_train)\nX_test = scaler.transform(X_test)\n\n# Turn up tolerance for faster convergence\nclf = LogisticRegression(C=50 / train_samples,\n                         multi_class='multinomial',\n                         penalty='l1', solver='saga', tol=0.1)\nclf.fit(X_train, y_train)\nsparsity = np.mean(clf.coef_ == 0) * 100\nscore = clf.score(X_test, y_test)\n# print('Best C % .4f' % clf.C_)\nprint(\"Sparsity with L1 penalty: %.2f%%\" % sparsity)\nprint(\"Test score with L1 penalty: %.4f\" % score)\n\ncoef = clf.coef_.copy()\nplt.figure(figsize=(10, 5))\nscale = np.abs(coef).max()\nfor i in range(10):\n    l1_plot = plt.subplot(2, 5, i + 1)\n    l1_plot.imshow(coef[i].reshape(28, 28), interpolation='nearest',\n                   cmap=plt.cm.RdBu, vmin=-scale, vmax=scale)\n    l1_plot.set_xticks(())\n    l1_plot.set_yticks(())\n    l1_plot.set_xlabel('Class %i' % i)\nplt.suptitle('Classification vector for...')\n\nrun_time = time.time() - t0\nprint('Example run in %.3f s' % run_time)\nplt.show()"
+      ], 
+      "outputs": [], 
+      "metadata": {
+        "collapsed": false
+      }
+    }
+  ], 
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 2", 
+      "name": "python2", 
+      "language": "python"
+    }, 
+    "language_info": {
+      "mimetype": "text/x-python", 
+      "nbconvert_exporter": "python", 
+      "name": "python", 
+      "file_extension": ".py", 
+      "version": "2.7.13", 
+      "pygments_lexer": "ipython2", 
+      "codemirror_mode": {
+        "version": 2, 
+        "name": "ipython"
+      }
+    }
+  }
+}
@@ -0,0 +1,79 @@
+"""
+=====================================================
+MNIST classfification using multinomial logistic + L1
+=====================================================
+
+Here we fit a multinomial logistic regression with L1 penalty on a subset of
+the MNIST digits classification task. We use the SAGA algorithm for this
+purpose: this a solver that is fast when the number of samples is significantly
+larger than the number of features and is able to finely optimize non-smooth
+objective functions which is the case with the l1-penalty. Test accuracy
+reaches > 0.8, while weight vectors remains *sparse* and therefore more easily
+*interpretable*.
+
+Note that this accuracy of this l1-penalized linear model is significantly
+below what can be reached by an l2-penalized linear model or a non-linear
+multi-layer perceptron model on this dataset.
+
+"""
+import time
+import matplotlib.pyplot as plt
+import numpy as np
+
+from sklearn.datasets import fetch_mldata
+from sklearn.linear_model import LogisticRegression
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler
+from sklearn.utils import check_random_state
+
+print(__doc__)
+
+# Author: Arthur Mensch <[email protected]>
+# License: BSD 3 clause
+
+# Turn down for faster convergence
+t0 = time.time()
+train_samples = 5000
+
+mnist = fetch_mldata('MNIST original')
+X = mnist.data.astype('float64')
+y = mnist.target
+random_state = check_random_state(0)
+permutation = random_state.permutation(X.shape[0])
+X = X[permutation]
+y = y[permutation]
+X = X.reshape((X.shape[0], -1))
+
+X_train, X_test, y_train, y_test = train_test_split(
+    X, y, train_size=train_samples, test_size=10000)
+
+scaler = StandardScaler()
+X_train = scaler.fit_transform(X_train)
+X_test = scaler.transform(X_test)
+
+# Turn up tolerance for faster convergence
+clf = LogisticRegression(C=50 / train_samples,
+                         multi_class='multinomial',
+                         penalty='l1', solver='saga', tol=0.1)
+clf.fit(X_train, y_train)
+sparsity = np.mean(clf.coef_ == 0) * 100
+score = clf.score(X_test, y_test)
+# print('Best C % .4f' % clf.C_)
+print("Sparsity with L1 penalty: %.2f%%" % sparsity)
+print("Test score with L1 penalty: %.4f" % score)
+
+coef = clf.coef_.copy()
+plt.figure(figsize=(10, 5))
+scale = np.abs(coef).max()
+for i in range(10):
+    l1_plot = plt.subplot(2, 5, i + 1)
+    l1_plot.imshow(coef[i].reshape(28, 28), interpolation='nearest',
+                   cmap=plt.cm.RdBu, vmin=-scale, vmax=scale)
+    l1_plot.set_xticks(())
+    l1_plot.set_yticks(())
+    l1_plot.set_xlabel('Class %i' % i)
+plt.suptitle('Classification vector for...')
+
+run_time = time.time() - t0
+print('Example run in %.3f s' % run_time)
+plt.show()