Skip to content

Commit e9be350

Browse files
committed
Pushing the docs for revision for branch: master, commit 687615d334f23edce467f80c099eeb4ee953c845
1 parent a75ee78 commit e9be350

File tree

898 files changed

+2581
-2581
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

898 files changed

+2581
-2581
lines changed
70 Bytes
Binary file not shown.
70 Bytes
Binary file not shown.

dev/_downloads/plot_random_forest_embedding.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
},
1616
{
1717
"source": [
18-
"\n# Hashing feature transformation using Totally Random Trees\n\n\nRandomTreesEmbedding provides a way to map data to a\nvery high-dimensional, sparse representation, which might\nbe beneficial for classification.\nThe mapping is completely unsupervised and very efficient.\n\nThis example visualizes the partitions given by several\ntrees and shows how the transformation can also be used for\nnon-linear dimensionality reduction or non-linear classification.\n\nPoints that are neighboring often share the same leaf of a tree and therefore\nshare large parts of their hashed representation. This allows to\nseparate two concentric circles simply based on the principal components of the\ntransformed data.\n\nIn high-dimensional spaces, linear classifiers often achieve\nexcellent accuracy. For sparse binary data, BernoulliNB\nis particularly well-suited. The bottom row compares the\ndecision boundary obtained by BernoulliNB in the transformed\nspace with an ExtraTreesClassifier forests learned on the\noriginal data.\n\n"
18+
"\n# Hashing feature transformation using Totally Random Trees\n\n\nRandomTreesEmbedding provides a way to map data to a\nvery high-dimensional, sparse representation, which might\nbe beneficial for classification.\nThe mapping is completely unsupervised and very efficient.\n\nThis example visualizes the partitions given by several\ntrees and shows how the transformation can also be used for\nnon-linear dimensionality reduction or non-linear classification.\n\nPoints that are neighboring often share the same leaf of a tree and therefore\nshare large parts of their hashed representation. This allows to\nseparate two concentric circles simply based on the principal components of the\ntransformed data with truncated SVD.\n\nIn high-dimensional spaces, linear classifiers often achieve\nexcellent accuracy. For sparse binary data, BernoulliNB\nis particularly well-suited. The bottom row compares the\ndecision boundary obtained by BernoulliNB in the transformed\nspace with an ExtraTreesClassifier forests learned on the\noriginal data.\n\n"
1919
],
2020
"cell_type": "markdown",
2121
"metadata": {}
@@ -24,7 +24,7 @@
2424
"execution_count": null,
2525
"cell_type": "code",
2626
"source": [
27-
"import numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn.datasets import make_circles\nfrom sklearn.ensemble import RandomTreesEmbedding, ExtraTreesClassifier\nfrom sklearn.decomposition import TruncatedSVD\nfrom sklearn.naive_bayes import BernoulliNB\n\n# make a synthetic dataset\nX, y = make_circles(factor=0.5, random_state=0, noise=0.05)\n\n# use RandomTreesEmbedding to transform data\nhasher = RandomTreesEmbedding(n_estimators=10, random_state=0, max_depth=3)\nX_transformed = hasher.fit_transform(X)\n\n# Visualize result using PCA\npca = TruncatedSVD(n_components=2)\nX_reduced = pca.fit_transform(X_transformed)\n\n# Learn a Naive Bayes classifier on the transformed data\nnb = BernoulliNB()\nnb.fit(X_transformed, y)\n\n\n# Learn an ExtraTreesClassifier for comparison\ntrees = ExtraTreesClassifier(max_depth=3, n_estimators=10, random_state=0)\ntrees.fit(X, y)\n\n\n# scatter plot of original and reduced data\nfig = plt.figure(figsize=(9, 8))\n\nax = plt.subplot(221)\nax.scatter(X[:, 0], X[:, 1], c=y, s=50)\nax.set_title(\"Original Data (2d)\")\nax.set_xticks(())\nax.set_yticks(())\n\nax = plt.subplot(222)\nax.scatter(X_reduced[:, 0], X_reduced[:, 1], c=y, s=50)\nax.set_title(\"PCA reduction (2d) of transformed data (%dd)\" %\n X_transformed.shape[1])\nax.set_xticks(())\nax.set_yticks(())\n\n# Plot the decision in original space. For that, we will assign a color to each\n# point in the mesh [x_min, x_max]x[y_min, y_max].\nh = .01\nx_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5\ny_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5\nxx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))\n\n# transform grid using RandomTreesEmbedding\ntransformed_grid = hasher.transform(np.c_[xx.ravel(), yy.ravel()])\ny_grid_pred = nb.predict_proba(transformed_grid)[:, 1]\n\nax = plt.subplot(223)\nax.set_title(\"Naive Bayes on Transformed data\")\nax.pcolormesh(xx, yy, y_grid_pred.reshape(xx.shape))\nax.scatter(X[:, 0], X[:, 1], c=y, s=50)\nax.set_ylim(-1.4, 1.4)\nax.set_xlim(-1.4, 1.4)\nax.set_xticks(())\nax.set_yticks(())\n\n# transform grid using ExtraTreesClassifier\ny_grid_pred = trees.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]\n\nax = plt.subplot(224)\nax.set_title(\"ExtraTrees predictions\")\nax.pcolormesh(xx, yy, y_grid_pred.reshape(xx.shape))\nax.scatter(X[:, 0], X[:, 1], c=y, s=50)\nax.set_ylim(-1.4, 1.4)\nax.set_xlim(-1.4, 1.4)\nax.set_xticks(())\nax.set_yticks(())\n\nplt.tight_layout()\nplt.show()"
27+
"import numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn.datasets import make_circles\nfrom sklearn.ensemble import RandomTreesEmbedding, ExtraTreesClassifier\nfrom sklearn.decomposition import TruncatedSVD\nfrom sklearn.naive_bayes import BernoulliNB\n\n# make a synthetic dataset\nX, y = make_circles(factor=0.5, random_state=0, noise=0.05)\n\n# use RandomTreesEmbedding to transform data\nhasher = RandomTreesEmbedding(n_estimators=10, random_state=0, max_depth=3)\nX_transformed = hasher.fit_transform(X)\n\n# Visualize result after dimensionality reduction using truncated SVD\nsvd = TruncatedSVD(n_components=2)\nX_reduced = svd.fit_transform(X_transformed)\n\n# Learn a Naive Bayes classifier on the transformed data\nnb = BernoulliNB()\nnb.fit(X_transformed, y)\n\n\n# Learn an ExtraTreesClassifier for comparison\ntrees = ExtraTreesClassifier(max_depth=3, n_estimators=10, random_state=0)\ntrees.fit(X, y)\n\n\n# scatter plot of original and reduced data\nfig = plt.figure(figsize=(9, 8))\n\nax = plt.subplot(221)\nax.scatter(X[:, 0], X[:, 1], c=y, s=50)\nax.set_title(\"Original Data (2d)\")\nax.set_xticks(())\nax.set_yticks(())\n\nax = plt.subplot(222)\nax.scatter(X_reduced[:, 0], X_reduced[:, 1], c=y, s=50)\nax.set_title(\"Truncated SVD reduction (2d) of transformed data (%dd)\" %\n X_transformed.shape[1])\nax.set_xticks(())\nax.set_yticks(())\n\n# Plot the decision in original space. For that, we will assign a color to each\n# point in the mesh [x_min, x_max]x[y_min, y_max].\nh = .01\nx_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5\ny_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5\nxx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))\n\n# transform grid using RandomTreesEmbedding\ntransformed_grid = hasher.transform(np.c_[xx.ravel(), yy.ravel()])\ny_grid_pred = nb.predict_proba(transformed_grid)[:, 1]\n\nax = plt.subplot(223)\nax.set_title(\"Naive Bayes on Transformed data\")\nax.pcolormesh(xx, yy, y_grid_pred.reshape(xx.shape))\nax.scatter(X[:, 0], X[:, 1], c=y, s=50)\nax.set_ylim(-1.4, 1.4)\nax.set_xlim(-1.4, 1.4)\nax.set_xticks(())\nax.set_yticks(())\n\n# transform grid using ExtraTreesClassifier\ny_grid_pred = trees.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]\n\nax = plt.subplot(224)\nax.set_title(\"ExtraTrees predictions\")\nax.pcolormesh(xx, yy, y_grid_pred.reshape(xx.shape))\nax.scatter(X[:, 0], X[:, 1], c=y, s=50)\nax.set_ylim(-1.4, 1.4)\nax.set_xlim(-1.4, 1.4)\nax.set_xticks(())\nax.set_yticks(())\n\nplt.tight_layout()\nplt.show()"
2828
],
2929
"outputs": [],
3030
"metadata": {

dev/_downloads/plot_random_forest_embedding.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
Points that are neighboring often share the same leaf of a tree and therefore
1616
share large parts of their hashed representation. This allows to
1717
separate two concentric circles simply based on the principal components of the
18-
transformed data.
18+
transformed data with truncated SVD.
1919
2020
In high-dimensional spaces, linear classifiers often achieve
2121
excellent accuracy. For sparse binary data, BernoulliNB
@@ -39,9 +39,9 @@
3939
hasher = RandomTreesEmbedding(n_estimators=10, random_state=0, max_depth=3)
4040
X_transformed = hasher.fit_transform(X)
4141

42-
# Visualize result using PCA
43-
pca = TruncatedSVD(n_components=2)
44-
X_reduced = pca.fit_transform(X_transformed)
42+
# Visualize result after dimensionality reduction using truncated SVD
43+
svd = TruncatedSVD(n_components=2)
44+
X_reduced = svd.fit_transform(X_transformed)
4545

4646
# Learn a Naive Bayes classifier on the transformed data
4747
nb = BernoulliNB()
@@ -64,7 +64,7 @@
6464

6565
ax = plt.subplot(222)
6666
ax.scatter(X_reduced[:, 0], X_reduced[:, 1], c=y, s=50)
67-
ax.set_title("PCA reduction (2d) of transformed data (%dd)" %
67+
ax.set_title("Truncated SVD reduction (2d) of transformed data (%dd)" %
6868
X_transformed.shape[1])
6969
ax.set_xticks(())
7070
ax.set_yticks(())

0 commit comments

Comments
 (0)