Skip to content

Commit 699443f

Browse files
committed
Pushing the docs to 0.22/ for branch: 0.22.X, commit e5698bde9a8b719514bf39e6e5d58f90cfe5bc01
1 parent 641fdc1 commit 699443f

File tree

1,390 files changed

+15868
-15484
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,390 files changed

+15868
-15484
lines changed

0.22/.buildinfo

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# Sphinx build info version 1
22
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
3-
config: 8c8d8d75cf4566af76f5b5290a417e14
3+
config: c548bd7f3af31ba24d4445857b1ee136
44
tags: 645f666f9bcd5a90fca523b33c5a78b7

0.22/_downloads/0805c1058fba7383113f44df060e3b88/plot_changed_only_pprint_parameter.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
66
This example illustrates the use of the print_changed_only global parameter.
77
8-
Setting print_changed_only to True will alterate the representation of
8+
Setting print_changed_only to True will alternate the representation of
99
estimators to only show the parameters that have been set to non-default
1010
values. This can be used to have more compact representations.
1111
"""

0.22/_downloads/0e54710c34c6326f0886857069d1cc1f/plot_permutation_importance_multicollinear.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@
6262
},
6363
"outputs": [],
6464
"source": [
65-
"result = permutation_importance(clf, X_train, y_train, n_repeats=10,\n random_state=42)\nperm_sorted_idx = result.importances_mean.argsort()\n\ntree_importance_sorted_idx = np.argsort(clf.feature_importances_)\ntree_indices = np.arange(0, len(clf.feature_importances_)) + 0.5\n\nfig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 8))\nax1.barh(tree_indices,\n clf.feature_importances_[tree_importance_sorted_idx], height=0.7)\nax1.set_yticklabels(data.feature_names)\nax1.set_yticks(tree_indices)\nax1.set_ylim((0, len(clf.feature_importances_)))\nax2.boxplot(result.importances[perm_sorted_idx].T, vert=False,\n labels=data.feature_names)\nfig.tight_layout()\nplt.show()"
65+
"result = permutation_importance(clf, X_train, y_train, n_repeats=10,\n random_state=42)\nperm_sorted_idx = result.importances_mean.argsort()\n\ntree_importance_sorted_idx = np.argsort(clf.feature_importances_)\ntree_indices = np.arange(0, len(clf.feature_importances_)) + 0.5\n\nfig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 8))\nax1.barh(tree_indices,\n clf.feature_importances_[tree_importance_sorted_idx], height=0.7)\nax1.set_yticklabels(data.feature_names[tree_importance_sorted_idx])\nax1.set_yticks(tree_indices)\nax1.set_ylim((0, len(clf.feature_importances_)))\nax2.boxplot(result.importances[perm_sorted_idx].T, vert=False,\n labels=data.feature_names[perm_sorted_idx])\nfig.tight_layout()\nplt.show()"
6666
]
6767
},
6868
{

0.22/_downloads/22b5d928415782a90ca8864871748096/plot_sparse_logistic_regression_mnist.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""
22
=====================================================
3-
MNIST classfification using multinomial logistic + L1
3+
MNIST classification using multinomial logistic + L1
44
=====================================================
55
66
Here we fit a multinomial logistic regression with L1 penalty on a subset of

0.22/_downloads/28477181ee2a477248e703cf646f97f1/plot_sparse_logistic_regression_20newsgroups.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
"cell_type": "markdown",
1616
"metadata": {},
1717
"source": [
18-
"\n# Multiclass sparse logisitic regression on newgroups20\n\n\nComparison of multinomial logistic L1 vs one-versus-rest L1 logistic regression\nto classify documents from the newgroups20 dataset. Multinomial logistic\nregression yields more accurate results and is faster to train on the larger\nscale dataset.\n\nHere we use the l1 sparsity that trims the weights of not informative\nfeatures to zero. This is good if the goal is to extract the strongly\ndiscriminative vocabulary of each class. If the goal is to get the best\npredictive accuracy, it is better to use the non sparsity-inducing l2 penalty\ninstead.\n\nA more traditional (and possibly better) way to predict on a sparse subset of\ninput features would be to use univariate feature selection followed by a\ntraditional (l2-penalised) logistic regression model.\n"
18+
"\n# Multiclass sparse logistic regression on 20newgroups\n\n\nComparison of multinomial logistic L1 vs one-versus-rest L1 logistic regression\nto classify documents from the newgroups20 dataset. Multinomial logistic\nregression yields more accurate results and is faster to train on the larger\nscale dataset.\n\nHere we use the l1 sparsity that trims the weights of not informative\nfeatures to zero. This is good if the goal is to extract the strongly\ndiscriminative vocabulary of each class. If the goal is to get the best\npredictive accuracy, it is better to use the non sparsity-inducing l2 penalty\ninstead.\n\nA more traditional (and possibly better) way to predict on a sparse subset of\ninput features would be to use univariate feature selection followed by a\ntraditional (l2-penalised) logistic regression model.\n"
1919
]
2020
},
2121
{
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"import timeit\nimport warnings\n\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nfrom sklearn.datasets import fetch_20newsgroups_vectorized\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.exceptions import ConvergenceWarning\n\nprint(__doc__)\n# Author: Arthur Mensch\n\nwarnings.filterwarnings(\"ignore\", category=ConvergenceWarning,\n module=\"sklearn\")\nt0 = timeit.default_timer()\n\n# We use SAGA solver\nsolver = 'saga'\n\n# Turn down for faster run time\nn_samples = 10000\n\n# Memorized fetch_rcv1 for faster access\nX, y = fetch_20newsgroups_vectorized('all', return_X_y=True)\nX = X[:n_samples]\ny = y[:n_samples]\n\nX_train, X_test, y_train, y_test = train_test_split(X, y,\n random_state=42,\n stratify=y,\n test_size=0.1)\ntrain_samples, n_features = X_train.shape\nn_classes = np.unique(y).shape[0]\n\nprint('Dataset 20newsgroup, train_samples=%i, n_features=%i, n_classes=%i'\n % (train_samples, n_features, n_classes))\n\nmodels = {'ovr': {'name': 'One versus Rest', 'iters': [1, 2, 4]},\n 'multinomial': {'name': 'Multinomial', 'iters': [1, 3, 7]}}\n\nfor model in models:\n # Add initial chance-level values for plotting purpose\n accuracies = [1 / n_classes]\n times = [0]\n densities = [1]\n\n model_params = models[model]\n\n # Small number of epochs for fast runtime\n for this_max_iter in model_params['iters']:\n print('[model=%s, solver=%s] Number of epochs: %s' %\n (model_params['name'], solver, this_max_iter))\n lr = LogisticRegression(solver=solver,\n multi_class=model,\n penalty='l1',\n max_iter=this_max_iter,\n random_state=42,\n )\n t1 = timeit.default_timer()\n lr.fit(X_train, y_train)\n train_time = timeit.default_timer() - t1\n\n y_pred = lr.predict(X_test)\n accuracy = np.sum(y_pred == y_test) / y_test.shape[0]\n density = np.mean(lr.coef_ != 0, axis=1) * 100\n accuracies.append(accuracy)\n densities.append(density)\n times.append(train_time)\n models[model]['times'] = times\n models[model]['densities'] = densities\n models[model]['accuracies'] = accuracies\n print('Test accuracy for model %s: %.4f' % (model, accuracies[-1]))\n print('%% non-zero coefficients for model %s, '\n 'per class:\\n %s' % (model, densities[-1]))\n print('Run time (%i epochs) for model %s:'\n '%.2f' % (model_params['iters'][-1], model, times[-1]))\n\nfig = plt.figure()\nax = fig.add_subplot(111)\n\nfor model in models:\n name = models[model]['name']\n times = models[model]['times']\n accuracies = models[model]['accuracies']\n ax.plot(times, accuracies, marker='o',\n label='Model: %s' % name)\n ax.set_xlabel('Train time (s)')\n ax.set_ylabel('Test accuracy')\nax.legend()\nfig.suptitle('Multinomial vs One-vs-Rest Logistic L1\\n'\n 'Dataset %s' % '20newsgroups')\nfig.tight_layout()\nfig.subplots_adjust(top=0.85)\nrun_time = timeit.default_timer() - t0\nprint('Example run in %.3f s' % run_time)\nplt.show()"
29+
"import timeit\nimport warnings\n\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nfrom sklearn.datasets import fetch_20newsgroups_vectorized\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.exceptions import ConvergenceWarning\n\nprint(__doc__)\n# Author: Arthur Mensch\n\nwarnings.filterwarnings(\"ignore\", category=ConvergenceWarning,\n module=\"sklearn\")\nt0 = timeit.default_timer()\n\n# We use SAGA solver\nsolver = 'saga'\n\n# Turn down for faster run time\nn_samples = 10000\n\nX, y = fetch_20newsgroups_vectorized('all', return_X_y=True)\nX = X[:n_samples]\ny = y[:n_samples]\n\nX_train, X_test, y_train, y_test = train_test_split(X, y,\n random_state=42,\n stratify=y,\n test_size=0.1)\ntrain_samples, n_features = X_train.shape\nn_classes = np.unique(y).shape[0]\n\nprint('Dataset 20newsgroup, train_samples=%i, n_features=%i, n_classes=%i'\n % (train_samples, n_features, n_classes))\n\nmodels = {'ovr': {'name': 'One versus Rest', 'iters': [1, 2, 4]},\n 'multinomial': {'name': 'Multinomial', 'iters': [1, 3, 7]}}\n\nfor model in models:\n # Add initial chance-level values for plotting purpose\n accuracies = [1 / n_classes]\n times = [0]\n densities = [1]\n\n model_params = models[model]\n\n # Small number of epochs for fast runtime\n for this_max_iter in model_params['iters']:\n print('[model=%s, solver=%s] Number of epochs: %s' %\n (model_params['name'], solver, this_max_iter))\n lr = LogisticRegression(solver=solver,\n multi_class=model,\n penalty='l1',\n max_iter=this_max_iter,\n random_state=42,\n )\n t1 = timeit.default_timer()\n lr.fit(X_train, y_train)\n train_time = timeit.default_timer() - t1\n\n y_pred = lr.predict(X_test)\n accuracy = np.sum(y_pred == y_test) / y_test.shape[0]\n density = np.mean(lr.coef_ != 0, axis=1) * 100\n accuracies.append(accuracy)\n densities.append(density)\n times.append(train_time)\n models[model]['times'] = times\n models[model]['densities'] = densities\n models[model]['accuracies'] = accuracies\n print('Test accuracy for model %s: %.4f' % (model, accuracies[-1]))\n print('%% non-zero coefficients for model %s, '\n 'per class:\\n %s' % (model, densities[-1]))\n print('Run time (%i epochs) for model %s:'\n '%.2f' % (model_params['iters'][-1], model, times[-1]))\n\nfig = plt.figure()\nax = fig.add_subplot(111)\n\nfor model in models:\n name = models[model]['name']\n times = models[model]['times']\n accuracies = models[model]['accuracies']\n ax.plot(times, accuracies, marker='o',\n label='Model: %s' % name)\n ax.set_xlabel('Train time (s)')\n ax.set_ylabel('Test accuracy')\nax.legend()\nfig.suptitle('Multinomial vs One-vs-Rest Logistic L1\\n'\n 'Dataset %s' % '20newsgroups')\nfig.tight_layout()\nfig.subplots_adjust(top=0.85)\nrun_time = timeit.default_timer() - t0\nprint('Example run in %.3f s' % run_time)\nplt.show()"
3030
]
3131
}
3232
],
Binary file not shown.

0.22/_downloads/34da82a8792cf79d06c7598ae139cc1c/plot_permutation_importance_multicollinear.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,11 +60,11 @@
6060
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 8))
6161
ax1.barh(tree_indices,
6262
clf.feature_importances_[tree_importance_sorted_idx], height=0.7)
63-
ax1.set_yticklabels(data.feature_names)
63+
ax1.set_yticklabels(data.feature_names[tree_importance_sorted_idx])
6464
ax1.set_yticks(tree_indices)
6565
ax1.set_ylim((0, len(clf.feature_importances_)))
6666
ax2.boxplot(result.importances[perm_sorted_idx].T, vert=False,
67-
labels=data.feature_names)
67+
labels=data.feature_names[perm_sorted_idx])
6868
fig.tight_layout()
6969
plt.show()
7070

0.22/_downloads/50620cb5a6bfe6c3bcb899f31a3348a3/plot_compare_methods.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"# Author: Jake Vanderplas -- <[email protected]>\n\nprint(__doc__)\n\nfrom time import time\n\nimport matplotlib.pyplot as plt\nfrom mpl_toolkits.mplot3d import Axes3D\nfrom matplotlib.ticker import NullFormatter\n\nfrom sklearn import manifold, datasets\n\n# Next line to silence pyflakes. This import is needed.\nAxes3D\n\nn_points = 1000\nX, color = datasets.make_s_curve(n_points, random_state=0)\nn_neighbors = 10\nn_components = 2\n\nfig = plt.figure(figsize=(15, 8))\nplt.suptitle(\"Manifold Learning with %i points, %i neighbors\"\n % (1000, n_neighbors), fontsize=14)\n\n\nax = fig.add_subplot(251, projection='3d')\nax.scatter(X[:, 0], X[:, 1], X[:, 2], c=color, cmap=plt.cm.Spectral)\nax.view_init(4, -72)\n\nmethods = ['standard', 'ltsa', 'hessian', 'modified']\nlabels = ['LLE', 'LTSA', 'Hessian LLE', 'Modified LLE']\n\nfor i, method in enumerate(methods):\n t0 = time()\n Y = manifold.LocallyLinearEmbedding(n_neighbors, n_components,\n eigen_solver='auto',\n method=method).fit_transform(X)\n t1 = time()\n print(\"%s: %.2g sec\" % (methods[i], t1 - t0))\n\n ax = fig.add_subplot(252 + i)\n plt.scatter(Y[:, 0], Y[:, 1], c=color, cmap=plt.cm.Spectral)\n plt.title(\"%s (%.2g sec)\" % (labels[i], t1 - t0))\n ax.xaxis.set_major_formatter(NullFormatter())\n ax.yaxis.set_major_formatter(NullFormatter())\n plt.axis('tight')\n\nt0 = time()\nY = manifold.Isomap(n_neighbors, n_components).fit_transform(X)\nt1 = time()\nprint(\"Isomap: %.2g sec\" % (t1 - t0))\nax = fig.add_subplot(257)\nplt.scatter(Y[:, 0], Y[:, 1], c=color, cmap=plt.cm.Spectral)\nplt.title(\"Isomap (%.2g sec)\" % (t1 - t0))\nax.xaxis.set_major_formatter(NullFormatter())\nax.yaxis.set_major_formatter(NullFormatter())\nplt.axis('tight')\n\n\nt0 = time()\nmds = manifold.MDS(n_components, max_iter=100, n_init=1)\nY = mds.fit_transform(X)\nt1 = time()\nprint(\"MDS: %.2g sec\" % (t1 - t0))\nax = fig.add_subplot(258)\nplt.scatter(Y[:, 0], Y[:, 1], c=color, cmap=plt.cm.Spectral)\nplt.title(\"MDS (%.2g sec)\" % (t1 - t0))\nax.xaxis.set_major_formatter(NullFormatter())\nax.yaxis.set_major_formatter(NullFormatter())\nplt.axis('tight')\n\n\nt0 = time()\nse = manifold.SpectralEmbedding(n_components=n_components,\n n_neighbors=n_neighbors)\nY = se.fit_transform(X)\nt1 = time()\nprint(\"SpectralEmbedding: %.2g sec\" % (t1 - t0))\nax = fig.add_subplot(259)\nplt.scatter(Y[:, 0], Y[:, 1], c=color, cmap=plt.cm.Spectral)\nplt.title(\"SpectralEmbedding (%.2g sec)\" % (t1 - t0))\nax.xaxis.set_major_formatter(NullFormatter())\nax.yaxis.set_major_formatter(NullFormatter())\nplt.axis('tight')\n\nt0 = time()\ntsne = manifold.TSNE(n_components=n_components, init='pca', random_state=0)\nY = tsne.fit_transform(X)\nt1 = time()\nprint(\"t-SNE: %.2g sec\" % (t1 - t0))\nax = fig.add_subplot(2, 5, 10)\nplt.scatter(Y[:, 0], Y[:, 1], c=color, cmap=plt.cm.Spectral)\nplt.title(\"t-SNE (%.2g sec)\" % (t1 - t0))\nax.xaxis.set_major_formatter(NullFormatter())\nax.yaxis.set_major_formatter(NullFormatter())\nplt.axis('tight')\n\nplt.show()"
29+
"# Author: Jake Vanderplas -- <[email protected]>\n\nprint(__doc__)\n\nfrom collections import OrderedDict\nfrom functools import partial\nfrom time import time\n\nimport matplotlib.pyplot as plt\nfrom mpl_toolkits.mplot3d import Axes3D\nfrom matplotlib.ticker import NullFormatter\n\nfrom sklearn import manifold, datasets\n\n# Next line to silence pyflakes. This import is needed.\nAxes3D\n\nn_points = 1000\nX, color = datasets.make_s_curve(n_points, random_state=0)\nn_neighbors = 10\nn_components = 2\n\n# Create figure\nfig = plt.figure(figsize=(15, 8))\nfig.suptitle(\"Manifold Learning with %i points, %i neighbors\"\n % (1000, n_neighbors), fontsize=14)\n\n# Add 3d scatter plot\nax = fig.add_subplot(251, projection='3d')\nax.scatter(X[:, 0], X[:, 1], X[:, 2], c=color, cmap=plt.cm.Spectral)\nax.view_init(4, -72)\n\n# Set-up manifold methods\nLLE = partial(manifold.LocallyLinearEmbedding,\n n_neighbors, n_components, eigen_solver='auto')\n\nmethods = OrderedDict()\nmethods['LLE'] = LLE(method='standard')\nmethods['LTSA'] = LLE(method='ltsa')\nmethods['Hessian LLE'] = LLE(method='hessian')\nmethods['Modified LLE'] = LLE(method='modified')\nmethods['Isomap'] = manifold.Isomap(n_neighbors, n_components)\nmethods['MDS'] = manifold.MDS(n_components, max_iter=100, n_init=1)\nmethods['SE'] = manifold.SpectralEmbedding(n_components=n_components,\n n_neighbors=n_neighbors)\nmethods['t-SNE'] = manifold.TSNE(n_components=n_components, init='pca',\n random_state=0)\n\n# Plot results\nfor i, (label, method) in enumerate(methods.items()):\n t0 = time()\n Y = method.fit_transform(X)\n t1 = time()\n print(\"%s: %.2g sec\" % (label, t1 - t0))\n ax = fig.add_subplot(2, 5, 2 + i + (i > 3))\n ax.scatter(Y[:, 0], Y[:, 1], c=color, cmap=plt.cm.Spectral)\n ax.set_title(\"%s (%.2g sec)\" % (label, t1 - t0))\n ax.xaxis.set_major_formatter(NullFormatter())\n ax.yaxis.set_major_formatter(NullFormatter())\n ax.axis('tight')\n\nplt.show()"
3030
]
3131
}
3232
],

0.22/_downloads/60b2a2c4441794028ceb207b0614746f/plot_roc.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@
151151
# .........................................
152152
# The :func:`sklearn.metrics.roc_auc_score` function can be used for
153153
# multi-class classification. The multi-class One-vs-One scheme compares every
154-
# unique pairwise combination of classes. In this section, we calcuate the AUC
154+
# unique pairwise combination of classes. In this section, we calculate the AUC
155155
# using the OvR and OvO schemes. We report a macro average, and a
156156
# prevalence-weighted average.
157157
y_prob = classifier.predict_proba(X_test)

0.22/_downloads/6530c9428bacd72600c7516d4bc1b117/plot_t_sne_perplexity.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
An illustration of t-SNE on the two concentric circles and the S-curve
77
datasets for different perplexity values.
88
9-
We observe a tendency towards clearer shapes as the preplexity value increases.
9+
We observe a tendency towards clearer shapes as the perplexity value increases.
1010
1111
The size, the distance and the shape of clusters may vary upon initialization,
1212
perplexity values and does not always convey a meaning.

0 commit comments

Comments
 (0)