Skip to content

Commit 84d84a8

Browse files
committed
Pushing the docs to dev/ for branch: main, commit bbc73cfd7f5a85f6ac63432d3294abecdcb81d2a
1 parent 0335386 commit 84d84a8

File tree

1,361 files changed

+6539
-7726
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,361 files changed

+6539
-7726
lines changed

dev/.buildinfo

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# Sphinx build info version 1
22
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
3-
config: 0196bdeb140758540138776c646854be
3+
config: d0f46beb81ff4ac48bf0a44b4a9db458
44
tags: 645f666f9bcd5a90fca523b33c5a78b7
Binary file not shown.

dev/_downloads/26998096b90db15754e891c733ae032c/plot_iris_dataset.ipynb

Lines changed: 69 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,75 @@
1515
},
1616
"outputs": [],
1717
"source": [
18-
"# Code source: Ga\u00ebl Varoquaux\n# Modified for documentation by Jaques Grobler\n# License: BSD 3 clause\n\nimport matplotlib.pyplot as plt\n\n# unused but required import for doing 3d projections with matplotlib < 3.2\nimport mpl_toolkits.mplot3d # noqa: F401\n\nfrom sklearn import datasets\nfrom sklearn.decomposition import PCA\n\n# import some data to play with\niris = datasets.load_iris()\nX = iris.data[:, :2] # we only take the first two features.\ny = iris.target\n\nx_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5\ny_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5\n\nplt.figure(2, figsize=(8, 6))\nplt.clf()\n\n# Plot the training points\nplt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Set1, edgecolor=\"k\")\nplt.xlabel(\"Sepal length\")\nplt.ylabel(\"Sepal width\")\n\nplt.xlim(x_min, x_max)\nplt.ylim(y_min, y_max)\nplt.xticks(())\nplt.yticks(())\n\n# To getter a better understanding of interaction of the dimensions\n# plot the first three PCA dimensions\nfig = plt.figure(1, figsize=(8, 6))\nax = fig.add_subplot(111, projection=\"3d\", elev=-150, azim=110)\n\nX_reduced = PCA(n_components=3).fit_transform(iris.data)\nax.scatter(\n X_reduced[:, 0],\n X_reduced[:, 1],\n X_reduced[:, 2],\n c=y,\n cmap=plt.cm.Set1,\n edgecolor=\"k\",\n s=40,\n)\n\nax.set_title(\"First three PCA directions\")\nax.set_xlabel(\"1st eigenvector\")\nax.xaxis.set_ticklabels([])\nax.set_ylabel(\"2nd eigenvector\")\nax.yaxis.set_ticklabels([])\nax.set_zlabel(\"3rd eigenvector\")\nax.zaxis.set_ticklabels([])\n\nplt.show()"
18+
"# Code source: Ga\u00ebl Varoquaux\n# Modified for documentation by Jaques Grobler\n# License: BSD 3 clause"
19+
]
20+
},
21+
{
22+
"cell_type": "markdown",
23+
"metadata": {},
24+
"source": [
25+
"## Loading the iris dataset\n\n"
26+
]
27+
},
28+
{
29+
"cell_type": "code",
30+
"execution_count": null,
31+
"metadata": {
32+
"collapsed": false
33+
},
34+
"outputs": [],
35+
"source": [
36+
"from sklearn import datasets\n\niris = datasets.load_iris()"
37+
]
38+
},
39+
{
40+
"cell_type": "markdown",
41+
"metadata": {},
42+
"source": [
43+
"## Scatter Plot of the Iris dataset\n\n"
44+
]
45+
},
46+
{
47+
"cell_type": "code",
48+
"execution_count": null,
49+
"metadata": {
50+
"collapsed": false
51+
},
52+
"outputs": [],
53+
"source": [
54+
"import matplotlib.pyplot as plt\n\n_, ax = plt.subplots()\nscatter = ax.scatter(iris.data[:, 0], iris.data[:, 1], c=iris.target)\nax.set(xlabel=iris.feature_names[0], ylabel=iris.feature_names[1])\n_ = ax.legend(\n scatter.legend_elements()[0], iris.target_names, loc=\"lower right\", title=\"Classes\"\n)"
55+
]
56+
},
57+
{
58+
"cell_type": "markdown",
59+
"metadata": {},
60+
"source": [
61+
"Each point in the scatter plot refers to one of the 150 iris flowers\nin the dataset, with the color indicating their respective type\n(Setosa, Versicolour, and Virginica).\nYou can already see a pattern regarding the Setosa type, which is\neasily identifiable based on its short and wide sepal. Only\nconsidering these 2 dimensions, sepal width and length, there's still\noverlap between the Versicolor and Virginica types.\n\n"
62+
]
63+
},
64+
{
65+
"cell_type": "markdown",
66+
"metadata": {},
67+
"source": [
68+
"## Plot a PCA representation\nLet's apply a Principal Component Analysis (PCA) to the iris dataset\nand then plot the irises across the first three PCA dimensions.\nThis will allow us to better differentiate between the three types!\n\n"
69+
]
70+
},
71+
{
72+
"cell_type": "code",
73+
"execution_count": null,
74+
"metadata": {
75+
"collapsed": false
76+
},
77+
"outputs": [],
78+
"source": [
79+
"# unused but required import for doing 3d projections with matplotlib < 3.2\nimport mpl_toolkits.mplot3d # noqa: F401\n\nfrom sklearn.decomposition import PCA\n\nfig = plt.figure(1, figsize=(8, 6))\nax = fig.add_subplot(111, projection=\"3d\", elev=-150, azim=110)\n\nX_reduced = PCA(n_components=3).fit_transform(iris.data)\nax.scatter(\n X_reduced[:, 0],\n X_reduced[:, 1],\n X_reduced[:, 2],\n c=iris.target,\n s=40,\n)\n\nax.set_title(\"First three PCA dimensions\")\nax.set_xlabel(\"1st Eigenvector\")\nax.xaxis.set_ticklabels([])\nax.set_ylabel(\"2nd Eigenvector\")\nax.yaxis.set_ticklabels([])\nax.set_zlabel(\"3rd Eigenvector\")\nax.zaxis.set_ticklabels([])\n\nplt.show()"
80+
]
81+
},
82+
{
83+
"cell_type": "markdown",
84+
"metadata": {},
85+
"source": [
86+
"PCA will create 3 new features that are a linear combination of the\n4 original features. In addition, this transform maximizes the variance.\nWith this transformation, we see that we can identify each species using\nonly the first feature (i.e. first eigenvalues).\n\n"
1987
]
2088
}
2189
],

dev/_downloads/2c8a162a0e436f4ca9af35453585fc81/plot_adaboost_hastie_10_2.py

Lines changed: 0 additions & 172 deletions
This file was deleted.

dev/_downloads/2da0534ab0e0c8241033bcc2d912e419/plot_classifier_comparison.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@
6666
max_depth=5, n_estimators=10, max_features=1, random_state=42
6767
),
6868
MLPClassifier(alpha=1, max_iter=1000, random_state=42),
69-
AdaBoostClassifier(random_state=42),
69+
AdaBoostClassifier(algorithm="SAMME", random_state=42),
7070
GaussianNB(),
7171
QuadraticDiscriminantAnalysis(),
7272
]

dev/_downloads/3438aba177365cb595921cf18806dfa7/plot_classifier_comparison.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
},
1616
"outputs": [],
1717
"source": [
18-
"# Code source: Ga\u00ebl Varoquaux\n# Andreas M\u00fcller\n# Modified for documentation by Jaques Grobler\n# License: BSD 3 clause\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nfrom matplotlib.colors import ListedColormap\n\nfrom sklearn.datasets import make_circles, make_classification, make_moons\nfrom sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis\nfrom sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier\nfrom sklearn.gaussian_process import GaussianProcessClassifier\nfrom sklearn.gaussian_process.kernels import RBF\nfrom sklearn.inspection import DecisionBoundaryDisplay\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.naive_bayes import GaussianNB\nfrom sklearn.neighbors import KNeighborsClassifier\nfrom sklearn.neural_network import MLPClassifier\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.svm import SVC\nfrom sklearn.tree import DecisionTreeClassifier\n\nnames = [\n \"Nearest Neighbors\",\n \"Linear SVM\",\n \"RBF SVM\",\n \"Gaussian Process\",\n \"Decision Tree\",\n \"Random Forest\",\n \"Neural Net\",\n \"AdaBoost\",\n \"Naive Bayes\",\n \"QDA\",\n]\n\nclassifiers = [\n KNeighborsClassifier(3),\n SVC(kernel=\"linear\", C=0.025, random_state=42),\n SVC(gamma=2, C=1, random_state=42),\n GaussianProcessClassifier(1.0 * RBF(1.0), random_state=42),\n DecisionTreeClassifier(max_depth=5, random_state=42),\n RandomForestClassifier(\n max_depth=5, n_estimators=10, max_features=1, random_state=42\n ),\n MLPClassifier(alpha=1, max_iter=1000, random_state=42),\n AdaBoostClassifier(random_state=42),\n GaussianNB(),\n QuadraticDiscriminantAnalysis(),\n]\n\nX, y = make_classification(\n n_features=2, n_redundant=0, n_informative=2, random_state=1, n_clusters_per_class=1\n)\nrng = np.random.RandomState(2)\nX += 2 * rng.uniform(size=X.shape)\nlinearly_separable = (X, y)\n\ndatasets = [\n make_moons(noise=0.3, random_state=0),\n make_circles(noise=0.2, factor=0.5, random_state=1),\n linearly_separable,\n]\n\nfigure = plt.figure(figsize=(27, 9))\ni = 1\n# iterate over datasets\nfor ds_cnt, ds in enumerate(datasets):\n # preprocess dataset, split into training and test part\n X, y = ds\n X_train, X_test, y_train, y_test = train_test_split(\n X, y, test_size=0.4, random_state=42\n )\n\n x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5\n y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5\n\n # just plot the dataset first\n cm = plt.cm.RdBu\n cm_bright = ListedColormap([\"#FF0000\", \"#0000FF\"])\n ax = plt.subplot(len(datasets), len(classifiers) + 1, i)\n if ds_cnt == 0:\n ax.set_title(\"Input data\")\n # Plot the training points\n ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright, edgecolors=\"k\")\n # Plot the testing points\n ax.scatter(\n X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6, edgecolors=\"k\"\n )\n ax.set_xlim(x_min, x_max)\n ax.set_ylim(y_min, y_max)\n ax.set_xticks(())\n ax.set_yticks(())\n i += 1\n\n # iterate over classifiers\n for name, clf in zip(names, classifiers):\n ax = plt.subplot(len(datasets), len(classifiers) + 1, i)\n\n clf = make_pipeline(StandardScaler(), clf)\n clf.fit(X_train, y_train)\n score = clf.score(X_test, y_test)\n DecisionBoundaryDisplay.from_estimator(\n clf, X, cmap=cm, alpha=0.8, ax=ax, eps=0.5\n )\n\n # Plot the training points\n ax.scatter(\n X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright, edgecolors=\"k\"\n )\n # Plot the testing points\n ax.scatter(\n X_test[:, 0],\n X_test[:, 1],\n c=y_test,\n cmap=cm_bright,\n edgecolors=\"k\",\n alpha=0.6,\n )\n\n ax.set_xlim(x_min, x_max)\n ax.set_ylim(y_min, y_max)\n ax.set_xticks(())\n ax.set_yticks(())\n if ds_cnt == 0:\n ax.set_title(name)\n ax.text(\n x_max - 0.3,\n y_min + 0.3,\n (\"%.2f\" % score).lstrip(\"0\"),\n size=15,\n horizontalalignment=\"right\",\n )\n i += 1\n\nplt.tight_layout()\nplt.show()"
18+
"# Code source: Ga\u00ebl Varoquaux\n# Andreas M\u00fcller\n# Modified for documentation by Jaques Grobler\n# License: BSD 3 clause\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nfrom matplotlib.colors import ListedColormap\n\nfrom sklearn.datasets import make_circles, make_classification, make_moons\nfrom sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis\nfrom sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier\nfrom sklearn.gaussian_process import GaussianProcessClassifier\nfrom sklearn.gaussian_process.kernels import RBF\nfrom sklearn.inspection import DecisionBoundaryDisplay\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.naive_bayes import GaussianNB\nfrom sklearn.neighbors import KNeighborsClassifier\nfrom sklearn.neural_network import MLPClassifier\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.svm import SVC\nfrom sklearn.tree import DecisionTreeClassifier\n\nnames = [\n \"Nearest Neighbors\",\n \"Linear SVM\",\n \"RBF SVM\",\n \"Gaussian Process\",\n \"Decision Tree\",\n \"Random Forest\",\n \"Neural Net\",\n \"AdaBoost\",\n \"Naive Bayes\",\n \"QDA\",\n]\n\nclassifiers = [\n KNeighborsClassifier(3),\n SVC(kernel=\"linear\", C=0.025, random_state=42),\n SVC(gamma=2, C=1, random_state=42),\n GaussianProcessClassifier(1.0 * RBF(1.0), random_state=42),\n DecisionTreeClassifier(max_depth=5, random_state=42),\n RandomForestClassifier(\n max_depth=5, n_estimators=10, max_features=1, random_state=42\n ),\n MLPClassifier(alpha=1, max_iter=1000, random_state=42),\n AdaBoostClassifier(algorithm=\"SAMME\", random_state=42),\n GaussianNB(),\n QuadraticDiscriminantAnalysis(),\n]\n\nX, y = make_classification(\n n_features=2, n_redundant=0, n_informative=2, random_state=1, n_clusters_per_class=1\n)\nrng = np.random.RandomState(2)\nX += 2 * rng.uniform(size=X.shape)\nlinearly_separable = (X, y)\n\ndatasets = [\n make_moons(noise=0.3, random_state=0),\n make_circles(noise=0.2, factor=0.5, random_state=1),\n linearly_separable,\n]\n\nfigure = plt.figure(figsize=(27, 9))\ni = 1\n# iterate over datasets\nfor ds_cnt, ds in enumerate(datasets):\n # preprocess dataset, split into training and test part\n X, y = ds\n X_train, X_test, y_train, y_test = train_test_split(\n X, y, test_size=0.4, random_state=42\n )\n\n x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5\n y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5\n\n # just plot the dataset first\n cm = plt.cm.RdBu\n cm_bright = ListedColormap([\"#FF0000\", \"#0000FF\"])\n ax = plt.subplot(len(datasets), len(classifiers) + 1, i)\n if ds_cnt == 0:\n ax.set_title(\"Input data\")\n # Plot the training points\n ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright, edgecolors=\"k\")\n # Plot the testing points\n ax.scatter(\n X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6, edgecolors=\"k\"\n )\n ax.set_xlim(x_min, x_max)\n ax.set_ylim(y_min, y_max)\n ax.set_xticks(())\n ax.set_yticks(())\n i += 1\n\n # iterate over classifiers\n for name, clf in zip(names, classifiers):\n ax = plt.subplot(len(datasets), len(classifiers) + 1, i)\n\n clf = make_pipeline(StandardScaler(), clf)\n clf.fit(X_train, y_train)\n score = clf.score(X_test, y_test)\n DecisionBoundaryDisplay.from_estimator(\n clf, X, cmap=cm, alpha=0.8, ax=ax, eps=0.5\n )\n\n # Plot the training points\n ax.scatter(\n X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright, edgecolors=\"k\"\n )\n # Plot the testing points\n ax.scatter(\n X_test[:, 0],\n X_test[:, 1],\n c=y_test,\n cmap=cm_bright,\n edgecolors=\"k\",\n alpha=0.6,\n )\n\n ax.set_xlim(x_min, x_max)\n ax.set_ylim(y_min, y_max)\n ax.set_xticks(())\n ax.set_yticks(())\n if ds_cnt == 0:\n ax.set_title(name)\n ax.text(\n x_max - 0.3,\n y_min + 0.3,\n (\"%.2f\" % score).lstrip(\"0\"),\n size=15,\n horizontalalignment=\"right\",\n )\n i += 1\n\nplt.tight_layout()\nplt.show()"
1919
]
2020
}
2121
],

0 commit comments

Comments
 (0)