Skip to content

Commit 04e8b8a

Browse files
committed
Pushing the docs to dev/ for branch: master, commit dd3b705f7b30388b1595601a40c7212589ec0fb3
1 parent 6d6b262 commit 04e8b8a

File tree

1,068 files changed

+3285
-3294
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,068 files changed

+3285
-3294
lines changed
-151 Bytes
Binary file not shown.
-148 Bytes
Binary file not shown.

dev/_downloads/plot_birch_vs_minibatchkmeans.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"# Authors: Manoj Kumar <[email protected]\n# Alexandre Gramfort <[email protected]>\n# License: BSD 3 clause\n\nprint(__doc__)\n\nfrom itertools import cycle\nfrom time import time\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport matplotlib.colors as colors\n\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.cluster import Birch, MiniBatchKMeans\nfrom sklearn.datasets.samples_generator import make_blobs\n\n\n# Generate centers for the blobs so that it forms a 10 X 10 grid.\nxx = np.linspace(-22, 22, 10)\nyy = np.linspace(-22, 22, 10)\nxx, yy = np.meshgrid(xx, yy)\nn_centres = np.hstack((np.ravel(xx)[:, np.newaxis],\n np.ravel(yy)[:, np.newaxis]))\n\n# Generate blobs to do a comparison between MiniBatchKMeans and Birch.\nX, y = make_blobs(n_samples=100000, centers=n_centres, random_state=0)\n\n# Use all colors that matplotlib provides by default.\ncolors_ = cycle(colors.cnames.keys())\n\nfig = plt.figure(figsize=(12, 4))\nfig.subplots_adjust(left=0.04, right=0.98, bottom=0.1, top=0.9)\n\n# Compute clustering with Birch with and without the final clustering step\n# and plot.\nbirch_models = [Birch(threshold=1.7, n_clusters=None),\n Birch(threshold=1.7, n_clusters=100)]\nfinal_step = ['without global clustering', 'with global clustering']\n\nfor ind, (birch_model, info) in enumerate(zip(birch_models, final_step)):\n t = time()\n birch_model.fit(X)\n time_ = time() - t\n print(\"Birch %s as the final step took %0.2f seconds\" % (\n info, (time() - t)))\n\n # Plot result\n labels = birch_model.labels_\n centroids = birch_model.subcluster_centers_\n n_clusters = np.unique(labels).size\n print(\"n_clusters : %d\" % n_clusters)\n\n ax = fig.add_subplot(1, 3, ind + 1)\n for this_centroid, k, col in zip(centroids, range(n_clusters), colors_):\n mask = labels == k\n ax.scatter(X[mask, 0], X[mask, 1],\n c='w', edgecolor=col, marker='.', alpha=0.5)\n if birch_model.n_clusters is None:\n ax.scatter(this_centroid[0], this_centroid[1], marker='+',\n c='k', s=25)\n ax.set_ylim([-25, 25])\n ax.set_xlim([-25, 25])\n ax.set_autoscaley_on(False)\n ax.set_title('Birch %s' % info)\n\n# Compute clustering with MiniBatchKMeans.\nmbk = MiniBatchKMeans(init='k-means++', n_clusters=100, batch_size=100,\n n_init=10, max_no_improvement=10, verbose=0,\n random_state=0)\nt0 = time()\nmbk.fit(X)\nt_mini_batch = time() - t0\nprint(\"Time taken to run MiniBatchKMeans %0.2f seconds\" % t_mini_batch)\nmbk_means_labels_unique = np.unique(mbk.labels_)\n\nax = fig.add_subplot(1, 3, 3)\nfor this_centroid, k, col in zip(mbk.cluster_centers_,\n range(n_clusters), colors_):\n mask = mbk.labels_ == k\n ax.scatter(X[mask, 0], X[mask, 1], marker='.',\n c='w', edgecolor=col, alpha=0.5)\n ax.scatter(this_centroid[0], this_centroid[1], marker='+',\n c='k', s=25)\nax.set_xlim([-25, 25])\nax.set_ylim([-25, 25])\nax.set_title(\"MiniBatchKMeans\")\nax.set_autoscaley_on(False)\nplt.show()"
29+
"# Authors: Manoj Kumar <[email protected]\n# Alexandre Gramfort <[email protected]>\n# License: BSD 3 clause\n\nprint(__doc__)\n\nfrom itertools import cycle\nfrom time import time\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport matplotlib.colors as colors\n\nfrom sklearn.cluster import Birch, MiniBatchKMeans\nfrom sklearn.datasets.samples_generator import make_blobs\n\n\n# Generate centers for the blobs so that it forms a 10 X 10 grid.\nxx = np.linspace(-22, 22, 10)\nyy = np.linspace(-22, 22, 10)\nxx, yy = np.meshgrid(xx, yy)\nn_centres = np.hstack((np.ravel(xx)[:, np.newaxis],\n np.ravel(yy)[:, np.newaxis]))\n\n# Generate blobs to do a comparison between MiniBatchKMeans and Birch.\nX, y = make_blobs(n_samples=100000, centers=n_centres, random_state=0)\n\n# Use all colors that matplotlib provides by default.\ncolors_ = cycle(colors.cnames.keys())\n\nfig = plt.figure(figsize=(12, 4))\nfig.subplots_adjust(left=0.04, right=0.98, bottom=0.1, top=0.9)\n\n# Compute clustering with Birch with and without the final clustering step\n# and plot.\nbirch_models = [Birch(threshold=1.7, n_clusters=None),\n Birch(threshold=1.7, n_clusters=100)]\nfinal_step = ['without global clustering', 'with global clustering']\n\nfor ind, (birch_model, info) in enumerate(zip(birch_models, final_step)):\n t = time()\n birch_model.fit(X)\n time_ = time() - t\n print(\"Birch %s as the final step took %0.2f seconds\" % (\n info, (time() - t)))\n\n # Plot result\n labels = birch_model.labels_\n centroids = birch_model.subcluster_centers_\n n_clusters = np.unique(labels).size\n print(\"n_clusters : %d\" % n_clusters)\n\n ax = fig.add_subplot(1, 3, ind + 1)\n for this_centroid, k, col in zip(centroids, range(n_clusters), colors_):\n mask = labels == k\n ax.scatter(X[mask, 0], X[mask, 1],\n c='w', edgecolor=col, marker='.', alpha=0.5)\n if birch_model.n_clusters is None:\n ax.scatter(this_centroid[0], this_centroid[1], marker='+',\n c='k', s=25)\n ax.set_ylim([-25, 25])\n ax.set_xlim([-25, 25])\n ax.set_autoscaley_on(False)\n ax.set_title('Birch %s' % info)\n\n# Compute clustering with MiniBatchKMeans.\nmbk = MiniBatchKMeans(init='k-means++', n_clusters=100, batch_size=100,\n n_init=10, max_no_improvement=10, verbose=0,\n random_state=0)\nt0 = time()\nmbk.fit(X)\nt_mini_batch = time() - t0\nprint(\"Time taken to run MiniBatchKMeans %0.2f seconds\" % t_mini_batch)\nmbk_means_labels_unique = np.unique(mbk.labels_)\n\nax = fig.add_subplot(1, 3, 3)\nfor this_centroid, k, col in zip(mbk.cluster_centers_,\n range(n_clusters), colors_):\n mask = mbk.labels_ == k\n ax.scatter(X[mask, 0], X[mask, 1], marker='.',\n c='w', edgecolor=col, alpha=0.5)\n ax.scatter(this_centroid[0], this_centroid[1], marker='+',\n c='k', s=25)\nax.set_xlim([-25, 25])\nax.set_ylim([-25, 25])\nax.set_title(\"MiniBatchKMeans\")\nax.set_autoscaley_on(False)\nplt.show()"
3030
]
3131
}
3232
],

dev/_downloads/plot_birch_vs_minibatchkmeans.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
import matplotlib.pyplot as plt
2626
import matplotlib.colors as colors
2727

28-
from sklearn.preprocessing import StandardScaler
2928
from sklearn.cluster import Birch, MiniBatchKMeans
3029
from sklearn.datasets.samples_generator import make_blobs
3130

dev/_downloads/plot_ensemble_oob.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"import matplotlib.pyplot as plt\n\nfrom collections import OrderedDict\nfrom sklearn.datasets import make_classification\nfrom sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier\n\n# Author: Kian Ho <[email protected]>\n# Gilles Louppe <[email protected]>\n# Andreas Mueller <[email protected]>\n#\n# License: BSD 3 Clause\n\nprint(__doc__)\n\nRANDOM_STATE = 123\n\n# Generate a binary classification dataset.\nX, y = make_classification(n_samples=500, n_features=25,\n n_clusters_per_class=1, n_informative=15,\n random_state=RANDOM_STATE)\n\n# NOTE: Setting the `warm_start` construction parameter to `True` disables\n# support for parallelized ensembles but is necessary for tracking the OOB\n# error trajectory during training.\nensemble_clfs = [\n (\"RandomForestClassifier, max_features='sqrt'\",\n RandomForestClassifier(n_estimators=100,\n warm_start=True, oob_score=True,\n max_features=\"sqrt\",\n random_state=RANDOM_STATE)),\n (\"RandomForestClassifier, max_features='log2'\",\n RandomForestClassifier(n_estimators=100,\n warm_start=True, max_features='log2',\n oob_score=True,\n random_state=RANDOM_STATE)),\n (\"RandomForestClassifier, max_features=None\",\n RandomForestClassifier(n_estimators=100,\n warm_start=True, max_features=None,\n oob_score=True,\n random_state=RANDOM_STATE))\n]\n\n# Map a classifier name to a list of (<n_estimators>, <error rate>) pairs.\nerror_rate = OrderedDict((label, []) for label, _ in ensemble_clfs)\n\n# Range of `n_estimators` values to explore.\nmin_estimators = 15\nmax_estimators = 175\n\nfor label, clf in ensemble_clfs:\n for i in range(min_estimators, max_estimators + 1):\n clf.set_params(n_estimators=i)\n clf.fit(X, y)\n\n # Record the OOB error for each `n_estimators=i` setting.\n oob_error = 1 - clf.oob_score_\n error_rate[label].append((i, oob_error))\n\n# Generate the \"OOB error rate\" vs. \"n_estimators\" plot.\nfor label, clf_err in error_rate.items():\n xs, ys = zip(*clf_err)\n plt.plot(xs, ys, label=label)\n\nplt.xlim(min_estimators, max_estimators)\nplt.xlabel(\"n_estimators\")\nplt.ylabel(\"OOB error rate\")\nplt.legend(loc=\"upper right\")\nplt.show()"
29+
"import matplotlib.pyplot as plt\n\nfrom collections import OrderedDict\nfrom sklearn.datasets import make_classification\nfrom sklearn.ensemble import RandomForestClassifier\n\n# Author: Kian Ho <[email protected]>\n# Gilles Louppe <[email protected]>\n# Andreas Mueller <[email protected]>\n#\n# License: BSD 3 Clause\n\nprint(__doc__)\n\nRANDOM_STATE = 123\n\n# Generate a binary classification dataset.\nX, y = make_classification(n_samples=500, n_features=25,\n n_clusters_per_class=1, n_informative=15,\n random_state=RANDOM_STATE)\n\n# NOTE: Setting the `warm_start` construction parameter to `True` disables\n# support for parallelized ensembles but is necessary for tracking the OOB\n# error trajectory during training.\nensemble_clfs = [\n (\"RandomForestClassifier, max_features='sqrt'\",\n RandomForestClassifier(n_estimators=100,\n warm_start=True, oob_score=True,\n max_features=\"sqrt\",\n random_state=RANDOM_STATE)),\n (\"RandomForestClassifier, max_features='log2'\",\n RandomForestClassifier(n_estimators=100,\n warm_start=True, max_features='log2',\n oob_score=True,\n random_state=RANDOM_STATE)),\n (\"RandomForestClassifier, max_features=None\",\n RandomForestClassifier(n_estimators=100,\n warm_start=True, max_features=None,\n oob_score=True,\n random_state=RANDOM_STATE))\n]\n\n# Map a classifier name to a list of (<n_estimators>, <error rate>) pairs.\nerror_rate = OrderedDict((label, []) for label, _ in ensemble_clfs)\n\n# Range of `n_estimators` values to explore.\nmin_estimators = 15\nmax_estimators = 175\n\nfor label, clf in ensemble_clfs:\n for i in range(min_estimators, max_estimators + 1):\n clf.set_params(n_estimators=i)\n clf.fit(X, y)\n\n # Record the OOB error for each `n_estimators=i` setting.\n oob_error = 1 - clf.oob_score_\n error_rate[label].append((i, oob_error))\n\n# Generate the \"OOB error rate\" vs. \"n_estimators\" plot.\nfor label, clf_err in error_rate.items():\n xs, ys = zip(*clf_err)\n plt.plot(xs, ys, label=label)\n\nplt.xlim(min_estimators, max_estimators)\nplt.xlabel(\"n_estimators\")\nplt.ylabel(\"OOB error rate\")\nplt.legend(loc=\"upper right\")\nplt.show()"
3030
]
3131
}
3232
],

dev/_downloads/plot_ensemble_oob.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323

2424
from collections import OrderedDict
2525
from sklearn.datasets import make_classification
26-
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
26+
from sklearn.ensemble import RandomForestClassifier
2727

2828
# Author: Kian Ho <[email protected]>
2929
# Gilles Louppe <[email protected]>

dev/_downloads/plot_multilabel.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn.datasets import make_multilabel_classification\nfrom sklearn.multiclass import OneVsRestClassifier\nfrom sklearn.svm import SVC\nfrom sklearn.preprocessing import LabelBinarizer\nfrom sklearn.decomposition import PCA\nfrom sklearn.cross_decomposition import CCA\n\n\ndef plot_hyperplane(clf, min_x, max_x, linestyle, label):\n # get the separating hyperplane\n w = clf.coef_[0]\n a = -w[0] / w[1]\n xx = np.linspace(min_x - 5, max_x + 5) # make sure the line is long enough\n yy = a * xx - (clf.intercept_[0]) / w[1]\n plt.plot(xx, yy, linestyle, label=label)\n\n\ndef plot_subfigure(X, Y, subplot, title, transform):\n if transform == \"pca\":\n X = PCA(n_components=2).fit_transform(X)\n elif transform == \"cca\":\n X = CCA(n_components=2).fit(X, Y).transform(X)\n else:\n raise ValueError\n\n min_x = np.min(X[:, 0])\n max_x = np.max(X[:, 0])\n\n min_y = np.min(X[:, 1])\n max_y = np.max(X[:, 1])\n\n classif = OneVsRestClassifier(SVC(kernel='linear'))\n classif.fit(X, Y)\n\n plt.subplot(2, 2, subplot)\n plt.title(title)\n\n zero_class = np.where(Y[:, 0])\n one_class = np.where(Y[:, 1])\n plt.scatter(X[:, 0], X[:, 1], s=40, c='gray', edgecolors=(0, 0, 0))\n plt.scatter(X[zero_class, 0], X[zero_class, 1], s=160, edgecolors='b',\n facecolors='none', linewidths=2, label='Class 1')\n plt.scatter(X[one_class, 0], X[one_class, 1], s=80, edgecolors='orange',\n facecolors='none', linewidths=2, label='Class 2')\n\n plot_hyperplane(classif.estimators_[0], min_x, max_x, 'k--',\n 'Boundary\\nfor class 1')\n plot_hyperplane(classif.estimators_[1], min_x, max_x, 'k-.',\n 'Boundary\\nfor class 2')\n plt.xticks(())\n plt.yticks(())\n\n plt.xlim(min_x - .5 * max_x, max_x + .5 * max_x)\n plt.ylim(min_y - .5 * max_y, max_y + .5 * max_y)\n if subplot == 2:\n plt.xlabel('First principal component')\n plt.ylabel('Second principal component')\n plt.legend(loc=\"upper left\")\n\n\nplt.figure(figsize=(8, 6))\n\nX, Y = make_multilabel_classification(n_classes=2, n_labels=1,\n allow_unlabeled=True,\n random_state=1)\n\nplot_subfigure(X, Y, 1, \"With unlabeled samples + CCA\", \"cca\")\nplot_subfigure(X, Y, 2, \"With unlabeled samples + PCA\", \"pca\")\n\nX, Y = make_multilabel_classification(n_classes=2, n_labels=1,\n allow_unlabeled=False,\n random_state=1)\n\nplot_subfigure(X, Y, 3, \"Without unlabeled samples + CCA\", \"cca\")\nplot_subfigure(X, Y, 4, \"Without unlabeled samples + PCA\", \"pca\")\n\nplt.subplots_adjust(.04, .02, .97, .94, .09, .2)\nplt.show()"
29+
"print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn.datasets import make_multilabel_classification\nfrom sklearn.multiclass import OneVsRestClassifier\nfrom sklearn.svm import SVC\nfrom sklearn.decomposition import PCA\nfrom sklearn.cross_decomposition import CCA\n\n\ndef plot_hyperplane(clf, min_x, max_x, linestyle, label):\n # get the separating hyperplane\n w = clf.coef_[0]\n a = -w[0] / w[1]\n xx = np.linspace(min_x - 5, max_x + 5) # make sure the line is long enough\n yy = a * xx - (clf.intercept_[0]) / w[1]\n plt.plot(xx, yy, linestyle, label=label)\n\n\ndef plot_subfigure(X, Y, subplot, title, transform):\n if transform == \"pca\":\n X = PCA(n_components=2).fit_transform(X)\n elif transform == \"cca\":\n X = CCA(n_components=2).fit(X, Y).transform(X)\n else:\n raise ValueError\n\n min_x = np.min(X[:, 0])\n max_x = np.max(X[:, 0])\n\n min_y = np.min(X[:, 1])\n max_y = np.max(X[:, 1])\n\n classif = OneVsRestClassifier(SVC(kernel='linear'))\n classif.fit(X, Y)\n\n plt.subplot(2, 2, subplot)\n plt.title(title)\n\n zero_class = np.where(Y[:, 0])\n one_class = np.where(Y[:, 1])\n plt.scatter(X[:, 0], X[:, 1], s=40, c='gray', edgecolors=(0, 0, 0))\n plt.scatter(X[zero_class, 0], X[zero_class, 1], s=160, edgecolors='b',\n facecolors='none', linewidths=2, label='Class 1')\n plt.scatter(X[one_class, 0], X[one_class, 1], s=80, edgecolors='orange',\n facecolors='none', linewidths=2, label='Class 2')\n\n plot_hyperplane(classif.estimators_[0], min_x, max_x, 'k--',\n 'Boundary\\nfor class 1')\n plot_hyperplane(classif.estimators_[1], min_x, max_x, 'k-.',\n 'Boundary\\nfor class 2')\n plt.xticks(())\n plt.yticks(())\n\n plt.xlim(min_x - .5 * max_x, max_x + .5 * max_x)\n plt.ylim(min_y - .5 * max_y, max_y + .5 * max_y)\n if subplot == 2:\n plt.xlabel('First principal component')\n plt.ylabel('Second principal component')\n plt.legend(loc=\"upper left\")\n\n\nplt.figure(figsize=(8, 6))\n\nX, Y = make_multilabel_classification(n_classes=2, n_labels=1,\n allow_unlabeled=True,\n random_state=1)\n\nplot_subfigure(X, Y, 1, \"With unlabeled samples + CCA\", \"cca\")\nplot_subfigure(X, Y, 2, \"With unlabeled samples + PCA\", \"pca\")\n\nX, Y = make_multilabel_classification(n_classes=2, n_labels=1,\n allow_unlabeled=False,\n random_state=1)\n\nplot_subfigure(X, Y, 3, \"Without unlabeled samples + CCA\", \"cca\")\nplot_subfigure(X, Y, 4, \"Without unlabeled samples + PCA\", \"pca\")\n\nplt.subplots_adjust(.04, .02, .97, .94, .09, .2)\nplt.show()"
3030
]
3131
}
3232
],

dev/_downloads/plot_multilabel.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@
3737
from sklearn.datasets import make_multilabel_classification
3838
from sklearn.multiclass import OneVsRestClassifier
3939
from sklearn.svm import SVC
40-
from sklearn.preprocessing import LabelBinarizer
4140
from sklearn.decomposition import PCA
4241
from sklearn.cross_decomposition import CCA
4342

0 commit comments

Comments
 (0)