Skip to content

Commit 37e962b

Browse files
committed
Pushing the docs to dev/ for branch: master, commit 370e5452b52a90cb137513159d3d9559d77bc71d
1 parent f8f98a0 commit 37e962b

File tree

1,240 files changed

+3908
-3801
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,240 files changed

+3908
-3801
lines changed
Binary file not shown.

dev/_downloads/14f620cd922ca2c9a39ae5784034dd0d/plot_lda.py

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,17 @@
11
"""
2-
====================================================================
3-
Normal and Shrinkage Linear Discriminant Analysis for classification
4-
====================================================================
2+
===========================================================================
3+
Normal, Ledoit-Wolf and OAS Linear Discriminant Analysis for classification
4+
===========================================================================
55
6-
Shows how shrinkage improves classification.
6+
This example illustrates how the Ledoit-Wolf and Oracle Shrinkage
7+
Approximating (OAS) estimators of covariance can improve classification.
78
"""
89
import numpy as np
910
import matplotlib.pyplot as plt
1011

1112
from sklearn.datasets import make_blobs
1213
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
14+
from sklearn.covariance import OAS
1315

1416

1517
n_train = 20 # samples for training
@@ -35,34 +37,45 @@ def generate_data(n_samples, n_features):
3537
X = np.hstack([X, np.random.randn(n_samples, n_features - 1)])
3638
return X, y
3739

38-
acc_clf1, acc_clf2 = [], []
40+
41+
acc_clf1, acc_clf2, acc_clf3 = [], [], []
3942
n_features_range = range(1, n_features_max + 1, step)
4043
for n_features in n_features_range:
41-
score_clf1, score_clf2 = 0, 0
44+
score_clf1, score_clf2, score_clf3 = 0, 0, 0
4245
for _ in range(n_averages):
4346
X, y = generate_data(n_train, n_features)
4447

45-
clf1 = LinearDiscriminantAnalysis(solver='lsqr', shrinkage='auto').fit(X, y)
46-
clf2 = LinearDiscriminantAnalysis(solver='lsqr', shrinkage=None).fit(X, y)
48+
clf1 = LinearDiscriminantAnalysis(solver='lsqr',
49+
shrinkage='auto').fit(X, y)
50+
clf2 = LinearDiscriminantAnalysis(solver='lsqr',
51+
shrinkage=None).fit(X, y)
52+
oa = OAS(store_precision=False, assume_centered=False)
53+
clf3 = LinearDiscriminantAnalysis(solver='lsqr',
54+
covariance_estimator=oa).fit(X, y)
4755

4856
X, y = generate_data(n_test, n_features)
4957
score_clf1 += clf1.score(X, y)
5058
score_clf2 += clf2.score(X, y)
59+
score_clf3 += clf3.score(X, y)
5160

5261
acc_clf1.append(score_clf1 / n_averages)
5362
acc_clf2.append(score_clf2 / n_averages)
63+
acc_clf3.append(score_clf3 / n_averages)
5464

5565
features_samples_ratio = np.array(n_features_range) / n_train
5666

5767
plt.plot(features_samples_ratio, acc_clf1, linewidth=2,
58-
label="Linear Discriminant Analysis with shrinkage", color='navy')
68+
label="Linear Discriminant Analysis with Ledoit Wolf", color='navy')
5969
plt.plot(features_samples_ratio, acc_clf2, linewidth=2,
6070
label="Linear Discriminant Analysis", color='gold')
71+
plt.plot(features_samples_ratio, acc_clf3, linewidth=2,
72+
label="Linear Discriminant Analysis with OAS", color='red')
6173

6274
plt.xlabel('n_features / n_samples')
6375
plt.ylabel('Classification accuracy')
6476

65-
plt.legend(loc=1, prop={'size': 12})
66-
plt.suptitle('Linear Discriminant Analysis vs. \
67-
shrinkage Linear Discriminant Analysis (1 discriminative feature)')
77+
plt.legend(loc=3, prop={'size': 12})
78+
plt.suptitle('Linear Discriminant Analysis vs. ' + '\n'
79+
+ 'Shrinkage Linear Discriminant Analysis vs. ' + '\n'
80+
+ 'OAS Linear Discriminant Analysis (1 discriminative feature)')
6881
plt.show()
Binary file not shown.

dev/_downloads/acc912c1f80e1cb0e32675b5f7686075/plot_lda.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
"cell_type": "markdown",
1616
"metadata": {},
1717
"source": [
18-
"\n# Normal and Shrinkage Linear Discriminant Analysis for classification\n\nShows how shrinkage improves classification.\n"
18+
"\n# Normal, Ledoit-Wolf and OAS Linear Discriminant Analysis for classification\n\nThis example illustrates how the Ledoit-Wolf and Oracle Shrinkage\nApproximating (OAS) estimators of covariance can improve classification.\n"
1919
]
2020
},
2121
{
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"import numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn.datasets import make_blobs\nfrom sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n\n\nn_train = 20 # samples for training\nn_test = 200 # samples for testing\nn_averages = 50 # how often to repeat classification\nn_features_max = 75 # maximum number of features\nstep = 4 # step size for the calculation\n\n\ndef generate_data(n_samples, n_features):\n \"\"\"Generate random blob-ish data with noisy features.\n\n This returns an array of input data with shape `(n_samples, n_features)`\n and an array of `n_samples` target labels.\n\n Only one feature contains discriminative information, the other features\n contain only noise.\n \"\"\"\n X, y = make_blobs(n_samples=n_samples, n_features=1, centers=[[-2], [2]])\n\n # add non-discriminative features\n if n_features > 1:\n X = np.hstack([X, np.random.randn(n_samples, n_features - 1)])\n return X, y\n\nacc_clf1, acc_clf2 = [], []\nn_features_range = range(1, n_features_max + 1, step)\nfor n_features in n_features_range:\n score_clf1, score_clf2 = 0, 0\n for _ in range(n_averages):\n X, y = generate_data(n_train, n_features)\n\n clf1 = LinearDiscriminantAnalysis(solver='lsqr', shrinkage='auto').fit(X, y)\n clf2 = LinearDiscriminantAnalysis(solver='lsqr', shrinkage=None).fit(X, y)\n\n X, y = generate_data(n_test, n_features)\n score_clf1 += clf1.score(X, y)\n score_clf2 += clf2.score(X, y)\n\n acc_clf1.append(score_clf1 / n_averages)\n acc_clf2.append(score_clf2 / n_averages)\n\nfeatures_samples_ratio = np.array(n_features_range) / n_train\n\nplt.plot(features_samples_ratio, acc_clf1, linewidth=2,\n label=\"Linear Discriminant Analysis with shrinkage\", color='navy')\nplt.plot(features_samples_ratio, acc_clf2, linewidth=2,\n label=\"Linear Discriminant Analysis\", color='gold')\n\nplt.xlabel('n_features / n_samples')\nplt.ylabel('Classification accuracy')\n\nplt.legend(loc=1, prop={'size': 12})\nplt.suptitle('Linear Discriminant Analysis vs. \\\nshrinkage Linear Discriminant Analysis (1 discriminative feature)')\nplt.show()"
29+
"import numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn.datasets import make_blobs\nfrom sklearn.discriminant_analysis import LinearDiscriminantAnalysis\nfrom sklearn.covariance import OAS\n\n\nn_train = 20 # samples for training\nn_test = 200 # samples for testing\nn_averages = 50 # how often to repeat classification\nn_features_max = 75 # maximum number of features\nstep = 4 # step size for the calculation\n\n\ndef generate_data(n_samples, n_features):\n \"\"\"Generate random blob-ish data with noisy features.\n\n This returns an array of input data with shape `(n_samples, n_features)`\n and an array of `n_samples` target labels.\n\n Only one feature contains discriminative information, the other features\n contain only noise.\n \"\"\"\n X, y = make_blobs(n_samples=n_samples, n_features=1, centers=[[-2], [2]])\n\n # add non-discriminative features\n if n_features > 1:\n X = np.hstack([X, np.random.randn(n_samples, n_features - 1)])\n return X, y\n\n\nacc_clf1, acc_clf2, acc_clf3 = [], [], []\nn_features_range = range(1, n_features_max + 1, step)\nfor n_features in n_features_range:\n score_clf1, score_clf2, score_clf3 = 0, 0, 0\n for _ in range(n_averages):\n X, y = generate_data(n_train, n_features)\n\n clf1 = LinearDiscriminantAnalysis(solver='lsqr',\n shrinkage='auto').fit(X, y)\n clf2 = LinearDiscriminantAnalysis(solver='lsqr',\n shrinkage=None).fit(X, y)\n oa = OAS(store_precision=False, assume_centered=False)\n clf3 = LinearDiscriminantAnalysis(solver='lsqr',\n covariance_estimator=oa).fit(X, y)\n\n X, y = generate_data(n_test, n_features)\n score_clf1 += clf1.score(X, y)\n score_clf2 += clf2.score(X, y)\n score_clf3 += clf3.score(X, y)\n\n acc_clf1.append(score_clf1 / n_averages)\n acc_clf2.append(score_clf2 / n_averages)\n acc_clf3.append(score_clf3 / n_averages)\n\nfeatures_samples_ratio = np.array(n_features_range) / n_train\n\nplt.plot(features_samples_ratio, acc_clf1, linewidth=2,\n label=\"Linear Discriminant Analysis with Ledoit Wolf\", color='navy')\nplt.plot(features_samples_ratio, acc_clf2, linewidth=2,\n label=\"Linear Discriminant Analysis\", color='gold')\nplt.plot(features_samples_ratio, acc_clf3, linewidth=2,\n label=\"Linear Discriminant Analysis with OAS\", color='red')\n\nplt.xlabel('n_features / n_samples')\nplt.ylabel('Classification accuracy')\n\nplt.legend(loc=3, prop={'size': 12})\nplt.suptitle('Linear Discriminant Analysis vs. ' + '\\n'\n + 'Shrinkage Linear Discriminant Analysis vs. ' + '\\n'\n + 'OAS Linear Discriminant Analysis (1 discriminative feature)')\nplt.show()"
3030
]
3131
}
3232
],

dev/_downloads/scikit-learn-docs.pdf

19.5 KB
Binary file not shown.

dev/_images/binder_badge_logo.png

0 Bytes

dev/_images/iris.png

0 Bytes
21 Bytes
21 Bytes
232 Bytes

0 commit comments

Comments
 (0)