Skip to content

Commit 0f4c21f

Browse files
committed
Pushing the docs to dev/ for branch: master, commit fc7d6e698668b983cee2867b1bf3c65f1384e4cf
1 parent 2a02da1 commit 0f4c21f

File tree

1,132 files changed

+4166
-3981
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,132 files changed

+4166
-3981
lines changed
2.98 KB
Binary file not shown.
2.94 KB
Binary file not shown.

dev/_downloads/plot_classification_probability.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
"cell_type": "markdown",
1616
"metadata": {},
1717
"source": [
18-
"\n# Plot classification probability\n\n\nPlot the classification probability for different classifiers. We use a 3\nclass dataset, and we classify it with a Support Vector classifier, L1\nand L2 penalized logistic regression with either a One-Vs-Rest or multinomial\nsetting, and Gaussian process classification.\n\nThe logistic regression is not a multiclass classifier out of the box. As\na result it can identify only the first class.\n\n"
18+
"\n# Plot classification probability\n\n\nPlot the classification probability for different classifiers. We use a 3 class\ndataset, and we classify it with a Support Vector classifier, L1 and L2\npenalized logistic regression with either a One-Vs-Rest or multinomial setting,\nand Gaussian process classification.\n\nLinear SVC is not a probabilistic classifier by default but it has a built-in\ncalibration option enabled in this example (`probability=True`).\n\nThe logistic regression with One-Vs-Rest is not a multiclass classifier out of\nthe box. As a result it has more trouble in separating class 2 and 3 than the\nother estimators.\n\n"
1919
]
2020
},
2121
{
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"print(__doc__)\n\n# Author: Alexandre Gramfort <[email protected]>\n# License: BSD 3 clause\n\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.svm import SVC\nfrom sklearn.gaussian_process import GaussianProcessClassifier\nfrom sklearn.gaussian_process.kernels import RBF\nfrom sklearn import datasets\n\niris = datasets.load_iris()\nX = iris.data[:, 0:2] # we only take the first two features for visualization\ny = iris.target\n\nn_features = X.shape[1]\n\nC = 1.0\nkernel = 1.0 * RBF([1.0, 1.0]) # for GPC\n\n# Create different classifiers. The logistic regression cannot do\n# multiclass out of the box.\nclassifiers = {'L1 logistic': LogisticRegression(C=C, penalty='l1'),\n 'L2 logistic (OvR)': LogisticRegression(C=C, penalty='l2'),\n 'Linear SVC': SVC(kernel='linear', C=C, probability=True,\n random_state=0),\n 'L2 logistic (Multinomial)': LogisticRegression(\n C=C, solver='lbfgs', multi_class='multinomial'),\n 'GPC': GaussianProcessClassifier(kernel)\n }\n\nn_classifiers = len(classifiers)\n\nplt.figure(figsize=(3 * 2, n_classifiers * 2))\nplt.subplots_adjust(bottom=.2, top=.95)\n\nxx = np.linspace(3, 9, 100)\nyy = np.linspace(1, 5, 100).T\nxx, yy = np.meshgrid(xx, yy)\nXfull = np.c_[xx.ravel(), yy.ravel()]\n\nfor index, (name, classifier) in enumerate(classifiers.items()):\n classifier.fit(X, y)\n\n y_pred = classifier.predict(X)\n classif_rate = np.mean(y_pred.ravel() == y.ravel()) * 100\n print(\"classif_rate for %s : %f \" % (name, classif_rate))\n\n # View probabilities=\n probas = classifier.predict_proba(Xfull)\n n_classes = np.unique(y_pred).size\n for k in range(n_classes):\n plt.subplot(n_classifiers, n_classes, index * n_classes + k + 1)\n plt.title(\"Class %d\" % k)\n if k == 0:\n plt.ylabel(name)\n imshow_handle = plt.imshow(probas[:, k].reshape((100, 100)),\n extent=(3, 9, 1, 5), origin='lower')\n plt.xticks(())\n plt.yticks(())\n idx = (y_pred == k)\n if idx.any():\n plt.scatter(X[idx, 0], X[idx, 1], marker='o', c='w', edgecolor='k')\n\nax = plt.axes([0.15, 0.04, 0.7, 0.05])\nplt.title(\"Probability\")\nplt.colorbar(imshow_handle, cax=ax, orientation='horizontal')\n\nplt.show()"
29+
"print(__doc__)\n\n# Author: Alexandre Gramfort <[email protected]>\n# License: BSD 3 clause\n\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nfrom sklearn.metrics import accuracy_score\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.svm import SVC\nfrom sklearn.gaussian_process import GaussianProcessClassifier\nfrom sklearn.gaussian_process.kernels import RBF\nfrom sklearn import datasets\n\niris = datasets.load_iris()\nX = iris.data[:, 0:2] # we only take the first two features for visualization\ny = iris.target\n\nn_features = X.shape[1]\n\nC = 10\nkernel = 1.0 * RBF([1.0, 1.0]) # for GPC\n\n# Create different classifiers.\nclassifiers = {\n 'L1 logistic': LogisticRegression(C=C, penalty='l1',\n solver='saga',\n multi_class='multinomial',\n max_iter=10000),\n 'L2 logistic (Multinomial)': LogisticRegression(C=C, penalty='l2',\n solver='saga',\n multi_class='multinomial',\n max_iter=10000),\n 'L2 logistic (OvR)': LogisticRegression(C=C, penalty='l2',\n solver='saga',\n multi_class='ovr',\n max_iter=10000),\n 'Linear SVC': SVC(kernel='linear', C=C, probability=True,\n random_state=0),\n 'GPC': GaussianProcessClassifier(kernel)\n}\n\nn_classifiers = len(classifiers)\n\nplt.figure(figsize=(3 * 2, n_classifiers * 2))\nplt.subplots_adjust(bottom=.2, top=.95)\n\nxx = np.linspace(3, 9, 100)\nyy = np.linspace(1, 5, 100).T\nxx, yy = np.meshgrid(xx, yy)\nXfull = np.c_[xx.ravel(), yy.ravel()]\n\nfor index, (name, classifier) in enumerate(classifiers.items()):\n classifier.fit(X, y)\n\n y_pred = classifier.predict(X)\n accuracy = accuracy_score(y, y_pred)\n print(\"Accuracy (train) for %s: %0.1f%% \" % (name, accuracy * 100))\n\n # View probabilities:\n probas = classifier.predict_proba(Xfull)\n n_classes = np.unique(y_pred).size\n for k in range(n_classes):\n plt.subplot(n_classifiers, n_classes, index * n_classes + k + 1)\n plt.title(\"Class %d\" % k)\n if k == 0:\n plt.ylabel(name)\n imshow_handle = plt.imshow(probas[:, k].reshape((100, 100)),\n extent=(3, 9, 1, 5), origin='lower')\n plt.xticks(())\n plt.yticks(())\n idx = (y_pred == k)\n if idx.any():\n plt.scatter(X[idx, 0], X[idx, 1], marker='o', c='w', edgecolor='k')\n\nax = plt.axes([0.15, 0.04, 0.7, 0.05])\nplt.title(\"Probability\")\nplt.colorbar(imshow_handle, cax=ax, orientation='horizontal')\n\nplt.show()"
3030
]
3131
}
3232
],

dev/_downloads/plot_classification_probability.py

Lines changed: 33 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,17 @@
33
Plot classification probability
44
===============================
55
6-
Plot the classification probability for different classifiers. We use a 3
7-
class dataset, and we classify it with a Support Vector classifier, L1
8-
and L2 penalized logistic regression with either a One-Vs-Rest or multinomial
9-
setting, and Gaussian process classification.
6+
Plot the classification probability for different classifiers. We use a 3 class
7+
dataset, and we classify it with a Support Vector classifier, L1 and L2
8+
penalized logistic regression with either a One-Vs-Rest or multinomial setting,
9+
and Gaussian process classification.
1010
11-
The logistic regression is not a multiclass classifier out of the box. As
12-
a result it can identify only the first class.
11+
Linear SVC is not a probabilistic classifier by default but it has a built-in
12+
calibration option enabled in this example (`probability=True`).
13+
14+
The logistic regression with One-Vs-Rest is not a multiclass classifier out of
15+
the box. As a result it has more trouble in separating class 2 and 3 than the
16+
other estimators.
1317
"""
1418
print(__doc__)
1519

@@ -19,6 +23,7 @@ class dataset, and we classify it with a Support Vector classifier, L1
1923
import matplotlib.pyplot as plt
2024
import numpy as np
2125

26+
from sklearn.metrics import accuracy_score
2227
from sklearn.linear_model import LogisticRegression
2328
from sklearn.svm import SVC
2429
from sklearn.gaussian_process import GaussianProcessClassifier
@@ -31,19 +36,27 @@ class dataset, and we classify it with a Support Vector classifier, L1
3136

3237
n_features = X.shape[1]
3338

34-
C = 1.0
39+
C = 10
3540
kernel = 1.0 * RBF([1.0, 1.0]) # for GPC
3641

37-
# Create different classifiers. The logistic regression cannot do
38-
# multiclass out of the box.
39-
classifiers = {'L1 logistic': LogisticRegression(C=C, penalty='l1'),
40-
'L2 logistic (OvR)': LogisticRegression(C=C, penalty='l2'),
41-
'Linear SVC': SVC(kernel='linear', C=C, probability=True,
42-
random_state=0),
43-
'L2 logistic (Multinomial)': LogisticRegression(
44-
C=C, solver='lbfgs', multi_class='multinomial'),
45-
'GPC': GaussianProcessClassifier(kernel)
46-
}
42+
# Create different classifiers.
43+
classifiers = {
44+
'L1 logistic': LogisticRegression(C=C, penalty='l1',
45+
solver='saga',
46+
multi_class='multinomial',
47+
max_iter=10000),
48+
'L2 logistic (Multinomial)': LogisticRegression(C=C, penalty='l2',
49+
solver='saga',
50+
multi_class='multinomial',
51+
max_iter=10000),
52+
'L2 logistic (OvR)': LogisticRegression(C=C, penalty='l2',
53+
solver='saga',
54+
multi_class='ovr',
55+
max_iter=10000),
56+
'Linear SVC': SVC(kernel='linear', C=C, probability=True,
57+
random_state=0),
58+
'GPC': GaussianProcessClassifier(kernel)
59+
}
4760

4861
n_classifiers = len(classifiers)
4962

@@ -59,10 +72,10 @@ class dataset, and we classify it with a Support Vector classifier, L1
5972
classifier.fit(X, y)
6073

6174
y_pred = classifier.predict(X)
62-
classif_rate = np.mean(y_pred.ravel() == y.ravel()) * 100
63-
print("classif_rate for %s : %f " % (name, classif_rate))
75+
accuracy = accuracy_score(y, y_pred)
76+
print("Accuracy (train) for %s: %0.1f%% " % (name, accuracy * 100))
6477

65-
# View probabilities=
78+
# View probabilities:
6679
probas = classifier.predict_proba(Xfull)
6780
n_classes = np.unique(y_pred).size
6881
for k in range(n_classes):

dev/_downloads/plot_classifier_chain_yeast.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"# Author: Adam Kleczewski\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.datasets import fetch_openml\nfrom sklearn.multioutput import ClassifierChain\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.multiclass import OneVsRestClassifier\nfrom sklearn.metrics import jaccard_similarity_score\nfrom sklearn.linear_model import LogisticRegression\n\nprint(__doc__)\n\n# Load a multi-label dataset from https://www.openml.org/d/40597\nX, Y = fetch_openml('yeast', version=4, return_X_y=True)\nY = Y == 'TRUE'\nX_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=.2,\n random_state=0)\n\n# Fit an independent logistic regression model for each class using the\n# OneVsRestClassifier wrapper.\novr = OneVsRestClassifier(LogisticRegression())\novr.fit(X_train, Y_train)\nY_pred_ovr = ovr.predict(X_test)\novr_jaccard_score = jaccard_similarity_score(Y_test, Y_pred_ovr)\n\n# Fit an ensemble of logistic regression classifier chains and take the\n# take the average prediction of all the chains.\nchains = [ClassifierChain(LogisticRegression(), order='random', random_state=i)\n for i in range(10)]\nfor chain in chains:\n chain.fit(X_train, Y_train)\n\nY_pred_chains = np.array([chain.predict(X_test) for chain in\n chains])\nchain_jaccard_scores = [jaccard_similarity_score(Y_test, Y_pred_chain >= .5)\n for Y_pred_chain in Y_pred_chains]\n\nY_pred_ensemble = Y_pred_chains.mean(axis=0)\nensemble_jaccard_score = jaccard_similarity_score(Y_test,\n Y_pred_ensemble >= .5)\n\nmodel_scores = [ovr_jaccard_score] + chain_jaccard_scores\nmodel_scores.append(ensemble_jaccard_score)\n\nmodel_names = ('Independent',\n 'Chain 1',\n 'Chain 2',\n 'Chain 3',\n 'Chain 4',\n 'Chain 5',\n 'Chain 6',\n 'Chain 7',\n 'Chain 8',\n 'Chain 9',\n 'Chain 10',\n 'Ensemble')\n\nx_pos = np.arange(len(model_names))\n\n# Plot the Jaccard similarity scores for the independent model, each of the\n# chains, and the ensemble (note that the vertical axis on this plot does\n# not begin at 0).\n\nfig, ax = plt.subplots(figsize=(7, 4))\nax.grid(True)\nax.set_title('Classifier Chain Ensemble Performance Comparison')\nax.set_xticks(x_pos)\nax.set_xticklabels(model_names, rotation='vertical')\nax.set_ylabel('Jaccard Similarity Score')\nax.set_ylim([min(model_scores) * .9, max(model_scores) * 1.1])\ncolors = ['r'] + ['b'] * len(chain_jaccard_scores) + ['g']\nax.bar(x_pos, model_scores, alpha=0.5, color=colors)\nplt.tight_layout()\nplt.show()"
29+
"# Author: Adam Kleczewski\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.datasets import fetch_openml\nfrom sklearn.multioutput import ClassifierChain\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.multiclass import OneVsRestClassifier\nfrom sklearn.metrics import jaccard_similarity_score\nfrom sklearn.linear_model import LogisticRegression\n\nprint(__doc__)\n\n# Load a multi-label dataset from https://www.openml.org/d/40597\nX, Y = fetch_openml('yeast', version=4, return_X_y=True)\nY = Y == 'TRUE'\nX_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=.2,\n random_state=0)\n\n# Fit an independent logistic regression model for each class using the\n# OneVsRestClassifier wrapper.\nbase_lr = LogisticRegression(solver='lbfgs')\novr = OneVsRestClassifier(base_lr)\novr.fit(X_train, Y_train)\nY_pred_ovr = ovr.predict(X_test)\novr_jaccard_score = jaccard_similarity_score(Y_test, Y_pred_ovr)\n\n# Fit an ensemble of logistic regression classifier chains and take the\n# take the average prediction of all the chains.\nchains = [ClassifierChain(base_lr, order='random', random_state=i)\n for i in range(10)]\nfor chain in chains:\n chain.fit(X_train, Y_train)\n\nY_pred_chains = np.array([chain.predict(X_test) for chain in\n chains])\nchain_jaccard_scores = [jaccard_similarity_score(Y_test, Y_pred_chain >= .5)\n for Y_pred_chain in Y_pred_chains]\n\nY_pred_ensemble = Y_pred_chains.mean(axis=0)\nensemble_jaccard_score = jaccard_similarity_score(Y_test,\n Y_pred_ensemble >= .5)\n\nmodel_scores = [ovr_jaccard_score] + chain_jaccard_scores\nmodel_scores.append(ensemble_jaccard_score)\n\nmodel_names = ('Independent',\n 'Chain 1',\n 'Chain 2',\n 'Chain 3',\n 'Chain 4',\n 'Chain 5',\n 'Chain 6',\n 'Chain 7',\n 'Chain 8',\n 'Chain 9',\n 'Chain 10',\n 'Ensemble')\n\nx_pos = np.arange(len(model_names))\n\n# Plot the Jaccard similarity scores for the independent model, each of the\n# chains, and the ensemble (note that the vertical axis on this plot does\n# not begin at 0).\n\nfig, ax = plt.subplots(figsize=(7, 4))\nax.grid(True)\nax.set_title('Classifier Chain Ensemble Performance Comparison')\nax.set_xticks(x_pos)\nax.set_xticklabels(model_names, rotation='vertical')\nax.set_ylabel('Jaccard Similarity Score')\nax.set_ylim([min(model_scores) * .9, max(model_scores) * 1.1])\ncolors = ['r'] + ['b'] * len(chain_jaccard_scores) + ['g']\nax.bar(x_pos, model_scores, alpha=0.5, color=colors)\nplt.tight_layout()\nplt.show()"
3030
]
3131
}
3232
],

dev/_downloads/plot_classifier_chain_yeast.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,14 +54,15 @@
5454

5555
# Fit an independent logistic regression model for each class using the
5656
# OneVsRestClassifier wrapper.
57-
ovr = OneVsRestClassifier(LogisticRegression())
57+
base_lr = LogisticRegression(solver='lbfgs')
58+
ovr = OneVsRestClassifier(base_lr)
5859
ovr.fit(X_train, Y_train)
5960
Y_pred_ovr = ovr.predict(X_test)
6061
ovr_jaccard_score = jaccard_similarity_score(Y_test, Y_pred_ovr)
6162

6263
# Fit an ensemble of logistic regression classifier chains and take the
6364
# take the average prediction of all the chains.
64-
chains = [ClassifierChain(LogisticRegression(), order='random', random_state=i)
65+
chains = [ClassifierChain(base_lr, order='random', random_state=i)
6566
for i in range(10)]
6667
for chain in chains:
6768
chain.fit(X_train, Y_train)

0 commit comments

Comments
 (0)