Skip to content

Commit 9a9aa78

Browse files
committed
Pushing the docs to dev/ for branch: main, commit a3893d3bdc7b63f236536b0c0f4d47b5ab575c07
1 parent 53f9630 commit 9a9aa78

File tree

1,214 files changed

+4617
-4405
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,214 files changed

+4617
-4405
lines changed
Binary file not shown.

dev/_downloads/1a2ab00bbfd4eb80e0afca13d83e2a14/plot_lda_qda.ipynb

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,67 @@
1818
"\n# Linear and Quadratic Discriminant Analysis with covariance ellipsoid\n\nThis example plots the covariance ellipsoids of each class and\ndecision boundary learned by LDA and QDA. The ellipsoids display\nthe double standard deviation for each class. With LDA, the\nstandard deviation is the same for all the classes, while each\nclass has its own standard deviation with QDA.\n"
1919
]
2020
},
21+
{
22+
"cell_type": "markdown",
23+
"metadata": {},
24+
"source": [
25+
"## Colormap\n\n"
26+
]
27+
},
28+
{
29+
"cell_type": "code",
30+
"execution_count": null,
31+
"metadata": {
32+
"collapsed": false
33+
},
34+
"outputs": [],
35+
"source": [
36+
"import matplotlib.pyplot as plt\nimport matplotlib as mpl\nfrom matplotlib import colors\n\ncmap = colors.LinearSegmentedColormap(\n \"red_blue_classes\",\n {\n \"red\": [(0, 1, 1), (1, 0.7, 0.7)],\n \"green\": [(0, 0.7, 0.7), (1, 0.7, 0.7)],\n \"blue\": [(0, 0.7, 0.7), (1, 1, 1)],\n },\n)\nplt.cm.register_cmap(cmap=cmap)"
37+
]
38+
},
39+
{
40+
"cell_type": "markdown",
41+
"metadata": {},
42+
"source": [
43+
"## Datasets generation functions\n\n"
44+
]
45+
},
46+
{
47+
"cell_type": "code",
48+
"execution_count": null,
49+
"metadata": {
50+
"collapsed": false
51+
},
52+
"outputs": [],
53+
"source": [
54+
"import numpy as np\n\n\ndef dataset_fixed_cov():\n \"\"\"Generate 2 Gaussians samples with the same covariance matrix\"\"\"\n n, dim = 300, 2\n np.random.seed(0)\n C = np.array([[0.0, -0.23], [0.83, 0.23]])\n X = np.r_[\n np.dot(np.random.randn(n, dim), C),\n np.dot(np.random.randn(n, dim), C) + np.array([1, 1]),\n ]\n y = np.hstack((np.zeros(n), np.ones(n)))\n return X, y\n\n\ndef dataset_cov():\n \"\"\"Generate 2 Gaussians samples with different covariance matrices\"\"\"\n n, dim = 300, 2\n np.random.seed(0)\n C = np.array([[0.0, -1.0], [2.5, 0.7]]) * 2.0\n X = np.r_[\n np.dot(np.random.randn(n, dim), C),\n np.dot(np.random.randn(n, dim), C.T) + np.array([1, 4]),\n ]\n y = np.hstack((np.zeros(n), np.ones(n)))\n return X, y"
55+
]
56+
},
57+
{
58+
"cell_type": "markdown",
59+
"metadata": {},
60+
"source": [
61+
"## Plot functions\n\n"
62+
]
63+
},
64+
{
65+
"cell_type": "code",
66+
"execution_count": null,
67+
"metadata": {
68+
"collapsed": false
69+
},
70+
"outputs": [],
71+
"source": [
72+
"from scipy import linalg\n\n\ndef plot_data(lda, X, y, y_pred, fig_index):\n splot = plt.subplot(2, 2, fig_index)\n if fig_index == 1:\n plt.title(\"Linear Discriminant Analysis\")\n plt.ylabel(\"Data with\\n fixed covariance\")\n elif fig_index == 2:\n plt.title(\"Quadratic Discriminant Analysis\")\n elif fig_index == 3:\n plt.ylabel(\"Data with\\n varying covariances\")\n\n tp = y == y_pred # True Positive\n tp0, tp1 = tp[y == 0], tp[y == 1]\n X0, X1 = X[y == 0], X[y == 1]\n X0_tp, X0_fp = X0[tp0], X0[~tp0]\n X1_tp, X1_fp = X1[tp1], X1[~tp1]\n\n # class 0: dots\n plt.scatter(X0_tp[:, 0], X0_tp[:, 1], marker=\".\", color=\"red\")\n plt.scatter(X0_fp[:, 0], X0_fp[:, 1], marker=\"x\", s=20, color=\"#990000\") # dark red\n\n # class 1: dots\n plt.scatter(X1_tp[:, 0], X1_tp[:, 1], marker=\".\", color=\"blue\")\n plt.scatter(\n X1_fp[:, 0], X1_fp[:, 1], marker=\"x\", s=20, color=\"#000099\"\n ) # dark blue\n\n # class 0 and 1 : areas\n nx, ny = 200, 100\n x_min, x_max = plt.xlim()\n y_min, y_max = plt.ylim()\n xx, yy = np.meshgrid(np.linspace(x_min, x_max, nx), np.linspace(y_min, y_max, ny))\n Z = lda.predict_proba(np.c_[xx.ravel(), yy.ravel()])\n Z = Z[:, 1].reshape(xx.shape)\n plt.pcolormesh(\n xx, yy, Z, cmap=\"red_blue_classes\", norm=colors.Normalize(0.0, 1.0), zorder=0\n )\n plt.contour(xx, yy, Z, [0.5], linewidths=2.0, colors=\"white\")\n\n # means\n plt.plot(\n lda.means_[0][0],\n lda.means_[0][1],\n \"*\",\n color=\"yellow\",\n markersize=15,\n markeredgecolor=\"grey\",\n )\n plt.plot(\n lda.means_[1][0],\n lda.means_[1][1],\n \"*\",\n color=\"yellow\",\n markersize=15,\n markeredgecolor=\"grey\",\n )\n\n return splot\n\n\ndef plot_ellipse(splot, mean, cov, color):\n v, w = linalg.eigh(cov)\n u = w[0] / linalg.norm(w[0])\n angle = np.arctan(u[1] / u[0])\n angle = 180 * angle / np.pi # convert to degrees\n # filled Gaussian at 2 standard deviation\n ell = mpl.patches.Ellipse(\n mean,\n 2 * v[0] ** 0.5,\n 2 * v[1] ** 0.5,\n 180 + angle,\n facecolor=color,\n edgecolor=\"black\",\n linewidth=2,\n )\n ell.set_clip_box(splot.bbox)\n ell.set_alpha(0.2)\n splot.add_artist(ell)\n splot.set_xticks(())\n splot.set_yticks(())\n\n\ndef plot_lda_cov(lda, splot):\n plot_ellipse(splot, lda.means_[0], lda.covariance_, \"red\")\n plot_ellipse(splot, lda.means_[1], lda.covariance_, \"blue\")\n\n\ndef plot_qda_cov(qda, splot):\n plot_ellipse(splot, qda.means_[0], qda.covariance_[0], \"red\")\n plot_ellipse(splot, qda.means_[1], qda.covariance_[1], \"blue\")"
73+
]
74+
},
75+
{
76+
"cell_type": "markdown",
77+
"metadata": {},
78+
"source": [
79+
"## Plot\n\n"
80+
]
81+
},
2182
{
2283
"cell_type": "code",
2384
"execution_count": null,
@@ -26,7 +87,7 @@
2687
},
2788
"outputs": [],
2889
"source": [
29-
"from scipy import linalg\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport matplotlib as mpl\nfrom matplotlib import colors\n\nfrom sklearn.discriminant_analysis import LinearDiscriminantAnalysis\nfrom sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis\n\n# #############################################################################\n# Colormap\ncmap = colors.LinearSegmentedColormap(\n \"red_blue_classes\",\n {\n \"red\": [(0, 1, 1), (1, 0.7, 0.7)],\n \"green\": [(0, 0.7, 0.7), (1, 0.7, 0.7)],\n \"blue\": [(0, 0.7, 0.7), (1, 1, 1)],\n },\n)\nplt.cm.register_cmap(cmap=cmap)\n\n\n# #############################################################################\n# Generate datasets\ndef dataset_fixed_cov():\n \"\"\"Generate 2 Gaussians samples with the same covariance matrix\"\"\"\n n, dim = 300, 2\n np.random.seed(0)\n C = np.array([[0.0, -0.23], [0.83, 0.23]])\n X = np.r_[\n np.dot(np.random.randn(n, dim), C),\n np.dot(np.random.randn(n, dim), C) + np.array([1, 1]),\n ]\n y = np.hstack((np.zeros(n), np.ones(n)))\n return X, y\n\n\ndef dataset_cov():\n \"\"\"Generate 2 Gaussians samples with different covariance matrices\"\"\"\n n, dim = 300, 2\n np.random.seed(0)\n C = np.array([[0.0, -1.0], [2.5, 0.7]]) * 2.0\n X = np.r_[\n np.dot(np.random.randn(n, dim), C),\n np.dot(np.random.randn(n, dim), C.T) + np.array([1, 4]),\n ]\n y = np.hstack((np.zeros(n), np.ones(n)))\n return X, y\n\n\n# #############################################################################\n# Plot functions\ndef plot_data(lda, X, y, y_pred, fig_index):\n splot = plt.subplot(2, 2, fig_index)\n if fig_index == 1:\n plt.title(\"Linear Discriminant Analysis\")\n plt.ylabel(\"Data with\\n fixed covariance\")\n elif fig_index == 2:\n plt.title(\"Quadratic Discriminant Analysis\")\n elif fig_index == 3:\n plt.ylabel(\"Data with\\n varying covariances\")\n\n tp = y == y_pred # True Positive\n tp0, tp1 = tp[y == 0], tp[y == 1]\n X0, X1 = X[y == 0], X[y == 1]\n X0_tp, X0_fp = X0[tp0], X0[~tp0]\n X1_tp, X1_fp = X1[tp1], X1[~tp1]\n\n # class 0: dots\n plt.scatter(X0_tp[:, 0], X0_tp[:, 1], marker=\".\", color=\"red\")\n plt.scatter(X0_fp[:, 0], X0_fp[:, 1], marker=\"x\", s=20, color=\"#990000\") # dark red\n\n # class 1: dots\n plt.scatter(X1_tp[:, 0], X1_tp[:, 1], marker=\".\", color=\"blue\")\n plt.scatter(\n X1_fp[:, 0], X1_fp[:, 1], marker=\"x\", s=20, color=\"#000099\"\n ) # dark blue\n\n # class 0 and 1 : areas\n nx, ny = 200, 100\n x_min, x_max = plt.xlim()\n y_min, y_max = plt.ylim()\n xx, yy = np.meshgrid(np.linspace(x_min, x_max, nx), np.linspace(y_min, y_max, ny))\n Z = lda.predict_proba(np.c_[xx.ravel(), yy.ravel()])\n Z = Z[:, 1].reshape(xx.shape)\n plt.pcolormesh(\n xx, yy, Z, cmap=\"red_blue_classes\", norm=colors.Normalize(0.0, 1.0), zorder=0\n )\n plt.contour(xx, yy, Z, [0.5], linewidths=2.0, colors=\"white\")\n\n # means\n plt.plot(\n lda.means_[0][0],\n lda.means_[0][1],\n \"*\",\n color=\"yellow\",\n markersize=15,\n markeredgecolor=\"grey\",\n )\n plt.plot(\n lda.means_[1][0],\n lda.means_[1][1],\n \"*\",\n color=\"yellow\",\n markersize=15,\n markeredgecolor=\"grey\",\n )\n\n return splot\n\n\ndef plot_ellipse(splot, mean, cov, color):\n v, w = linalg.eigh(cov)\n u = w[0] / linalg.norm(w[0])\n angle = np.arctan(u[1] / u[0])\n angle = 180 * angle / np.pi # convert to degrees\n # filled Gaussian at 2 standard deviation\n ell = mpl.patches.Ellipse(\n mean,\n 2 * v[0] ** 0.5,\n 2 * v[1] ** 0.5,\n 180 + angle,\n facecolor=color,\n edgecolor=\"black\",\n linewidth=2,\n )\n ell.set_clip_box(splot.bbox)\n ell.set_alpha(0.2)\n splot.add_artist(ell)\n splot.set_xticks(())\n splot.set_yticks(())\n\n\ndef plot_lda_cov(lda, splot):\n plot_ellipse(splot, lda.means_[0], lda.covariance_, \"red\")\n plot_ellipse(splot, lda.means_[1], lda.covariance_, \"blue\")\n\n\ndef plot_qda_cov(qda, splot):\n plot_ellipse(splot, qda.means_[0], qda.covariance_[0], \"red\")\n plot_ellipse(splot, qda.means_[1], qda.covariance_[1], \"blue\")\n\n\nplt.figure(figsize=(10, 8), facecolor=\"white\")\nplt.suptitle(\n \"Linear Discriminant Analysis vs Quadratic Discriminant Analysis\",\n y=0.98,\n fontsize=15,\n)\nfor i, (X, y) in enumerate([dataset_fixed_cov(), dataset_cov()]):\n # Linear Discriminant Analysis\n lda = LinearDiscriminantAnalysis(solver=\"svd\", store_covariance=True)\n y_pred = lda.fit(X, y).predict(X)\n splot = plot_data(lda, X, y, y_pred, fig_index=2 * i + 1)\n plot_lda_cov(lda, splot)\n plt.axis(\"tight\")\n\n # Quadratic Discriminant Analysis\n qda = QuadraticDiscriminantAnalysis(store_covariance=True)\n y_pred = qda.fit(X, y).predict(X)\n splot = plot_data(qda, X, y, y_pred, fig_index=2 * i + 2)\n plot_qda_cov(qda, splot)\n plt.axis(\"tight\")\nplt.tight_layout()\nplt.subplots_adjust(top=0.92)\nplt.show()"
90+
"plt.figure(figsize=(10, 8), facecolor=\"white\")\nplt.suptitle(\n \"Linear Discriminant Analysis vs Quadratic Discriminant Analysis\",\n y=0.98,\n fontsize=15,\n)\n\nfrom sklearn.discriminant_analysis import LinearDiscriminantAnalysis\nfrom sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis\n\nfor i, (X, y) in enumerate([dataset_fixed_cov(), dataset_cov()]):\n # Linear Discriminant Analysis\n lda = LinearDiscriminantAnalysis(solver=\"svd\", store_covariance=True)\n y_pred = lda.fit(X, y).predict(X)\n splot = plot_data(lda, X, y, y_pred, fig_index=2 * i + 1)\n plot_lda_cov(lda, splot)\n plt.axis(\"tight\")\n\n # Quadratic Discriminant Analysis\n qda = QuadraticDiscriminantAnalysis(store_covariance=True)\n y_pred = qda.fit(X, y).predict(X)\n splot = plot_data(qda, X, y, y_pred, fig_index=2 * i + 2)\n plot_qda_cov(qda, splot)\n plt.axis(\"tight\")\n\nplt.tight_layout()\nplt.subplots_adjust(top=0.92)\nplt.show()"
3091
]
3192
}
3293
],
Binary file not shown.

dev/_downloads/d7c704916c145b9b383b87c04245efda/plot_lda_qda.py

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,17 +11,14 @@ class has its own standard deviation with QDA.
1111
1212
"""
1313

14-
from scipy import linalg
15-
import numpy as np
14+
# %%
15+
# Colormap
16+
# --------
17+
1618
import matplotlib.pyplot as plt
1719
import matplotlib as mpl
1820
from matplotlib import colors
1921

20-
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
21-
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
22-
23-
# #############################################################################
24-
# Colormap
2522
cmap = colors.LinearSegmentedColormap(
2623
"red_blue_classes",
2724
{
@@ -33,8 +30,13 @@ class has its own standard deviation with QDA.
3330
plt.cm.register_cmap(cmap=cmap)
3431

3532

36-
# #############################################################################
37-
# Generate datasets
33+
# %%
34+
# Datasets generation functions
35+
# -----------------------------
36+
37+
import numpy as np
38+
39+
3840
def dataset_fixed_cov():
3941
"""Generate 2 Gaussians samples with the same covariance matrix"""
4042
n, dim = 300, 2
@@ -61,8 +63,13 @@ def dataset_cov():
6163
return X, y
6264

6365

64-
# #############################################################################
66+
# %%
6567
# Plot functions
68+
# --------------
69+
70+
from scipy import linalg
71+
72+
6673
def plot_data(lda, X, y, y_pred, fig_index):
6774
splot = plt.subplot(2, 2, fig_index)
6875
if fig_index == 1:
@@ -154,12 +161,20 @@ def plot_qda_cov(qda, splot):
154161
plot_ellipse(splot, qda.means_[1], qda.covariance_[1], "blue")
155162

156163

164+
# %%
165+
# Plot
166+
# ----
167+
157168
plt.figure(figsize=(10, 8), facecolor="white")
158169
plt.suptitle(
159170
"Linear Discriminant Analysis vs Quadratic Discriminant Analysis",
160171
y=0.98,
161172
fontsize=15,
162173
)
174+
175+
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
176+
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
177+
163178
for i, (X, y) in enumerate([dataset_fixed_cov(), dataset_cov()]):
164179
# Linear Discriminant Analysis
165180
lda = LinearDiscriminantAnalysis(solver="svd", store_covariance=True)
@@ -174,6 +189,7 @@ def plot_qda_cov(qda, splot):
174189
splot = plot_data(qda, X, y, y_pred, fig_index=2 * i + 2)
175190
plot_qda_cov(qda, splot)
176191
plt.axis("tight")
192+
177193
plt.tight_layout()
178194
plt.subplots_adjust(top=0.92)
179195
plt.show()

dev/_downloads/scikit-learn-docs.zip

-8.74 KB
Binary file not shown.
-212 Bytes
-203 Bytes
39 Bytes
247 Bytes
-62 Bytes

0 commit comments

Comments
 (0)