Skip to content

Commit 40f5978

Browse files
committed
Pushing the docs to 0.23/ for branch: 0.23.X, commit fd237278e895b42abe8d8d09105cbb82dc2cbba7
1 parent 14bb9dc commit 40f5978

File tree

1,372 files changed

+8317
-9109
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,372 files changed

+8317
-9109
lines changed

0.23/.buildinfo

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# Sphinx build info version 1
22
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
3-
config: d7433eaf5230c5568f23548ac6841c3b
3+
config: 08776a34286b30b06d1e26673b1db986
44
tags: 645f666f9bcd5a90fca523b33c5a78b7

0.23/_downloads/28477181ee2a477248e703cf646f97f1/plot_sparse_logistic_regression_20newsgroups.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"import timeit\nimport warnings\n\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nfrom sklearn.datasets import fetch_20newsgroups_vectorized\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.exceptions import ConvergenceWarning\n\nprint(__doc__)\n# Author: Arthur Mensch\n\nwarnings.filterwarnings(\"ignore\", category=ConvergenceWarning,\n module=\"sklearn\")\nt0 = timeit.default_timer()\n\n# We use SAGA solver\nsolver = 'saga'\n\n# Turn down for faster run time\nn_samples = 10000\n\nX, y = fetch_20newsgroups_vectorized('all', return_X_y=True)\nX = X[:n_samples]\ny = y[:n_samples]\n\nX_train, X_test, y_train, y_test = train_test_split(X, y,\n random_state=42,\n stratify=y,\n test_size=0.1)\ntrain_samples, n_features = X_train.shape\nn_classes = np.unique(y).shape[0]\n\nprint('Dataset 20newsgroup, train_samples=%i, n_features=%i, n_classes=%i'\n % (train_samples, n_features, n_classes))\n\nmodels = {'ovr': {'name': 'One versus Rest', 'iters': [1, 2, 4]},\n 'multinomial': {'name': 'Multinomial', 'iters': [1, 3, 7]}}\n\nfor model in models:\n # Add initial chance-level values for plotting purpose\n accuracies = [1 / n_classes]\n times = [0]\n densities = [1]\n\n model_params = models[model]\n\n # Small number of epochs for fast runtime\n for this_max_iter in model_params['iters']:\n print('[model=%s, solver=%s] Number of epochs: %s' %\n (model_params['name'], solver, this_max_iter))\n lr = LogisticRegression(solver=solver,\n multi_class=model,\n penalty='l1',\n max_iter=this_max_iter,\n random_state=42,\n )\n t1 = timeit.default_timer()\n lr.fit(X_train, y_train)\n train_time = timeit.default_timer() - t1\n\n y_pred = lr.predict(X_test)\n accuracy = np.sum(y_pred == y_test) / y_test.shape[0]\n density = np.mean(lr.coef_ != 0, axis=1) * 100\n accuracies.append(accuracy)\n densities.append(density)\n times.append(train_time)\n models[model]['times'] = times\n models[model]['densities'] = densities\n models[model]['accuracies'] = accuracies\n print('Test accuracy for model %s: %.4f' % (model, accuracies[-1]))\n print('%% non-zero coefficients for model %s, '\n 'per class:\\n %s' % (model, densities[-1]))\n print('Run time (%i epochs) for model %s:'\n '%.2f' % (model_params['iters'][-1], model, times[-1]))\n\nfig = plt.figure()\nax = fig.add_subplot(111)\n\nfor model in models:\n name = models[model]['name']\n times = models[model]['times']\n accuracies = models[model]['accuracies']\n ax.plot(times, accuracies, marker='o',\n label='Model: %s' % name)\n ax.set_xlabel('Train time (s)')\n ax.set_ylabel('Test accuracy')\nax.legend()\nfig.suptitle('Multinomial vs One-vs-Rest Logistic L1\\n'\n 'Dataset %s' % '20newsgroups')\nfig.tight_layout()\nfig.subplots_adjust(top=0.85)\nrun_time = timeit.default_timer() - t0\nprint('Example run in %.3f s' % run_time)\nplt.show()"
29+
"import timeit\nimport warnings\n\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nfrom sklearn.datasets import fetch_20newsgroups_vectorized\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.exceptions import ConvergenceWarning\n\nprint(__doc__)\n# Author: Arthur Mensch\n\nwarnings.filterwarnings(\"ignore\", category=ConvergenceWarning,\n module=\"sklearn\")\nt0 = timeit.default_timer()\n\n# We use SAGA solver\nsolver = 'saga'\n\n# Turn down for faster run time\nn_samples = 10000\n\nX, y = fetch_20newsgroups_vectorized(subset='all', return_X_y=True)\nX = X[:n_samples]\ny = y[:n_samples]\n\nX_train, X_test, y_train, y_test = train_test_split(X, y,\n random_state=42,\n stratify=y,\n test_size=0.1)\ntrain_samples, n_features = X_train.shape\nn_classes = np.unique(y).shape[0]\n\nprint('Dataset 20newsgroup, train_samples=%i, n_features=%i, n_classes=%i'\n % (train_samples, n_features, n_classes))\n\nmodels = {'ovr': {'name': 'One versus Rest', 'iters': [1, 2, 4]},\n 'multinomial': {'name': 'Multinomial', 'iters': [1, 3, 7]}}\n\nfor model in models:\n # Add initial chance-level values for plotting purpose\n accuracies = [1 / n_classes]\n times = [0]\n densities = [1]\n\n model_params = models[model]\n\n # Small number of epochs for fast runtime\n for this_max_iter in model_params['iters']:\n print('[model=%s, solver=%s] Number of epochs: %s' %\n (model_params['name'], solver, this_max_iter))\n lr = LogisticRegression(solver=solver,\n multi_class=model,\n penalty='l1',\n max_iter=this_max_iter,\n random_state=42,\n )\n t1 = timeit.default_timer()\n lr.fit(X_train, y_train)\n train_time = timeit.default_timer() - t1\n\n y_pred = lr.predict(X_test)\n accuracy = np.sum(y_pred == y_test) / y_test.shape[0]\n density = np.mean(lr.coef_ != 0, axis=1) * 100\n accuracies.append(accuracy)\n densities.append(density)\n times.append(train_time)\n models[model]['times'] = times\n models[model]['densities'] = densities\n models[model]['accuracies'] = accuracies\n print('Test accuracy for model %s: %.4f' % (model, accuracies[-1]))\n print('%% non-zero coefficients for model %s, '\n 'per class:\\n %s' % (model, densities[-1]))\n print('Run time (%i epochs) for model %s:'\n '%.2f' % (model_params['iters'][-1], model, times[-1]))\n\nfig = plt.figure()\nax = fig.add_subplot(111)\n\nfor model in models:\n name = models[model]['name']\n times = models[model]['times']\n accuracies = models[model]['accuracies']\n ax.plot(times, accuracies, marker='o',\n label='Model: %s' % name)\n ax.set_xlabel('Train time (s)')\n ax.set_ylabel('Test accuracy')\nax.legend()\nfig.suptitle('Multinomial vs One-vs-Rest Logistic L1\\n'\n 'Dataset %s' % '20newsgroups')\nfig.tight_layout()\nfig.subplots_adjust(top=0.85)\nrun_time = timeit.default_timer() - t0\nprint('Example run in %.3f s' % run_time)\nplt.show()"
3030
]
3131
}
3232
],

0.23/_downloads/2f467d575019742cbcf00d6b53858c0d/plot_lle_digits.ipynb

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

0.23/_downloads/33bc25666e895f6720c86dffe127d651/plot_randomized_search.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
"cell_type": "markdown",
1616
"metadata": {},
1717
"source": [
18-
"\n# Comparing randomized search and grid search for hyperparameter estimation\n\n\nCompare randomized search and grid search for optimizing hyperparameters of a\nrandom forest.\nAll parameters that influence the learning are searched simultaneously\n(except for the number of estimators, which poses a time / quality tradeoff).\n\nThe randomized search and the grid search explore exactly the same space of\nparameters. The result in parameter settings is quite similar, while the run\ntime for randomized search is drastically lower.\n\nThe performance is may slightly worse for the randomized search, and is likely\ndue to a noise effect and would not carry over to a held-out test set.\n\nNote that in practice, one would not search over this many different parameters\nsimultaneously using grid search, but pick only the ones deemed most important.\n"
18+
"\n# Comparing randomized search and grid search for hyperparameter estimation\n\n\nCompare randomized search and grid search for optimizing hyperparameters of a\nlinear SVM with SGD training.\nAll parameters that influence the learning are searched simultaneously\n(except for the number of estimators, which poses a time / quality tradeoff).\n\nThe randomized search and the grid search explore exactly the same space of\nparameters. The result in parameter settings is quite similar, while the run\ntime for randomized search is drastically lower.\n\nThe performance is may slightly worse for the randomized search, and is likely\ndue to a noise effect and would not carry over to a held-out test set.\n\nNote that in practice, one would not search over this many different parameters\nsimultaneously using grid search, but pick only the ones deemed most important.\n"
1919
]
2020
},
2121
{
Binary file not shown.

0.23/_downloads/38082c4eb06099bc72a5e5fbfff35118/plot_calibration.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"print(__doc__)\n\n# Author: Mathieu Blondel <[email protected]>\n# Alexandre Gramfort <[email protected]>\n# Balazs Kegl <[email protected]>\n# Jan Hendrik Metzen <[email protected]>\n# License: BSD Style.\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom matplotlib import cm\n\nfrom sklearn.datasets import make_blobs\nfrom sklearn.naive_bayes import GaussianNB\nfrom sklearn.metrics import brier_score_loss\nfrom sklearn.calibration import CalibratedClassifierCV\nfrom sklearn.model_selection import train_test_split\n\n\nn_samples = 50000\nn_bins = 3 # use 3 bins for calibration_curve as we have 3 clusters here\n\n# Generate 3 blobs with 2 classes where the second blob contains\n# half positive samples and half negative samples. Probability in this\n# blob is therefore 0.5.\ncenters = [(-5, -5), (0, 0), (5, 5)]\nX, y = make_blobs(n_samples=n_samples, centers=centers, shuffle=False,\n random_state=42)\n\ny[:n_samples // 2] = 0\ny[n_samples // 2:] = 1\nsample_weight = np.random.RandomState(42).rand(y.shape[0])\n\n# split train, test for calibration\nX_train, X_test, y_train, y_test, sw_train, sw_test = \\\n train_test_split(X, y, sample_weight, test_size=0.9, random_state=42)\n\n# Gaussian Naive-Bayes with no calibration\nclf = GaussianNB()\nclf.fit(X_train, y_train) # GaussianNB itself does not support sample-weights\nprob_pos_clf = clf.predict_proba(X_test)[:, 1]\n\n# Gaussian Naive-Bayes with isotonic calibration\nclf_isotonic = CalibratedClassifierCV(clf, cv=2, method='isotonic')\nclf_isotonic.fit(X_train, y_train, sw_train)\nprob_pos_isotonic = clf_isotonic.predict_proba(X_test)[:, 1]\n\n# Gaussian Naive-Bayes with sigmoid calibration\nclf_sigmoid = CalibratedClassifierCV(clf, cv=2, method='sigmoid')\nclf_sigmoid.fit(X_train, y_train, sw_train)\nprob_pos_sigmoid = clf_sigmoid.predict_proba(X_test)[:, 1]\n\nprint(\"Brier scores: (the smaller the better)\")\n\nclf_score = brier_score_loss(y_test, prob_pos_clf, sw_test)\nprint(\"No calibration: %1.3f\" % clf_score)\n\nclf_isotonic_score = brier_score_loss(y_test, prob_pos_isotonic, sw_test)\nprint(\"With isotonic calibration: %1.3f\" % clf_isotonic_score)\n\nclf_sigmoid_score = brier_score_loss(y_test, prob_pos_sigmoid, sw_test)\nprint(\"With sigmoid calibration: %1.3f\" % clf_sigmoid_score)\n\n# #############################################################################\n# Plot the data and the predicted probabilities\nplt.figure()\ny_unique = np.unique(y)\ncolors = cm.rainbow(np.linspace(0.0, 1.0, y_unique.size))\nfor this_y, color in zip(y_unique, colors):\n this_X = X_train[y_train == this_y]\n this_sw = sw_train[y_train == this_y]\n plt.scatter(this_X[:, 0], this_X[:, 1], s=this_sw * 50,\n c=color[np.newaxis, :],\n alpha=0.5, edgecolor='k',\n label=\"Class %s\" % this_y)\nplt.legend(loc=\"best\")\nplt.title(\"Data\")\n\nplt.figure()\norder = np.lexsort((prob_pos_clf, ))\nplt.plot(prob_pos_clf[order], 'r', label='No calibration (%1.3f)' % clf_score)\nplt.plot(prob_pos_isotonic[order], 'g', linewidth=3,\n label='Isotonic calibration (%1.3f)' % clf_isotonic_score)\nplt.plot(prob_pos_sigmoid[order], 'b', linewidth=3,\n label='Sigmoid calibration (%1.3f)' % clf_sigmoid_score)\nplt.plot(np.linspace(0, y_test.size, 51)[1::2],\n y_test[order].reshape(25, -1).mean(1),\n 'k', linewidth=3, label=r'Empirical')\nplt.ylim([-0.05, 1.05])\nplt.xlabel(\"Instances sorted according to predicted probability \"\n \"(uncalibrated GNB)\")\nplt.ylabel(\"P(y=1)\")\nplt.legend(loc=\"upper left\")\nplt.title(\"Gaussian naive Bayes probabilities\")\n\nplt.show()"
29+
"print(__doc__)\n\n# Author: Mathieu Blondel <[email protected]>\n# Alexandre Gramfort <[email protected]>\n# Balazs Kegl <[email protected]>\n# Jan Hendrik Metzen <[email protected]>\n# License: BSD Style.\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom matplotlib import cm\n\nfrom sklearn.datasets import make_blobs\nfrom sklearn.naive_bayes import GaussianNB\nfrom sklearn.metrics import brier_score_loss\nfrom sklearn.calibration import CalibratedClassifierCV\nfrom sklearn.model_selection import train_test_split\n\n\nn_samples = 50000\nn_bins = 3 # use 3 bins for calibration_curve as we have 3 clusters here\n\n# Generate 3 blobs with 2 classes where the second blob contains\n# half positive samples and half negative samples. Probability in this\n# blob is therefore 0.5.\ncenters = [(-5, -5), (0, 0), (5, 5)]\nX, y = make_blobs(n_samples=n_samples, centers=centers, shuffle=False,\n random_state=42)\n\ny[:n_samples // 2] = 0\ny[n_samples // 2:] = 1\nsample_weight = np.random.RandomState(42).rand(y.shape[0])\n\n# split train, test for calibration\nX_train, X_test, y_train, y_test, sw_train, sw_test = \\\n train_test_split(X, y, sample_weight, test_size=0.9, random_state=42)\n\n# Gaussian Naive-Bayes with no calibration\nclf = GaussianNB()\nclf.fit(X_train, y_train) # GaussianNB itself does not support sample-weights\nprob_pos_clf = clf.predict_proba(X_test)[:, 1]\n\n# Gaussian Naive-Bayes with isotonic calibration\nclf_isotonic = CalibratedClassifierCV(clf, cv=2, method='isotonic')\nclf_isotonic.fit(X_train, y_train, sample_weight=sw_train)\nprob_pos_isotonic = clf_isotonic.predict_proba(X_test)[:, 1]\n\n# Gaussian Naive-Bayes with sigmoid calibration\nclf_sigmoid = CalibratedClassifierCV(clf, cv=2, method='sigmoid')\nclf_sigmoid.fit(X_train, y_train, sample_weight=sw_train)\nprob_pos_sigmoid = clf_sigmoid.predict_proba(X_test)[:, 1]\n\nprint(\"Brier scores: (the smaller the better)\")\n\nclf_score = brier_score_loss(y_test, prob_pos_clf, sample_weight=sw_test)\nprint(\"No calibration: %1.3f\" % clf_score)\n\nclf_isotonic_score = brier_score_loss(y_test, prob_pos_isotonic,\n sample_weight=sw_test)\nprint(\"With isotonic calibration: %1.3f\" % clf_isotonic_score)\n\nclf_sigmoid_score = brier_score_loss(y_test, prob_pos_sigmoid,\n sample_weight=sw_test)\nprint(\"With sigmoid calibration: %1.3f\" % clf_sigmoid_score)\n\n# #############################################################################\n# Plot the data and the predicted probabilities\nplt.figure()\ny_unique = np.unique(y)\ncolors = cm.rainbow(np.linspace(0.0, 1.0, y_unique.size))\nfor this_y, color in zip(y_unique, colors):\n this_X = X_train[y_train == this_y]\n this_sw = sw_train[y_train == this_y]\n plt.scatter(this_X[:, 0], this_X[:, 1], s=this_sw * 50,\n c=color[np.newaxis, :],\n alpha=0.5, edgecolor='k',\n label=\"Class %s\" % this_y)\nplt.legend(loc=\"best\")\nplt.title(\"Data\")\n\nplt.figure()\norder = np.lexsort((prob_pos_clf, ))\nplt.plot(prob_pos_clf[order], 'r', label='No calibration (%1.3f)' % clf_score)\nplt.plot(prob_pos_isotonic[order], 'g', linewidth=3,\n label='Isotonic calibration (%1.3f)' % clf_isotonic_score)\nplt.plot(prob_pos_sigmoid[order], 'b', linewidth=3,\n label='Sigmoid calibration (%1.3f)' % clf_sigmoid_score)\nplt.plot(np.linspace(0, y_test.size, 51)[1::2],\n y_test[order].reshape(25, -1).mean(1),\n 'k', linewidth=3, label=r'Empirical')\nplt.ylim([-0.05, 1.05])\nplt.xlabel(\"Instances sorted according to predicted probability \"\n \"(uncalibrated GNB)\")\nplt.ylabel(\"P(y=1)\")\nplt.legend(loc=\"upper left\")\nplt.title(\"Gaussian naive Bayes probabilities\")\n\nplt.show()"
3030
]
3131
}
3232
],

0 commit comments

Comments
 (0)