rmatam
diff --git a/‎dev/_downloads/auto_examples_jupyter.zip
70 Bytes b/‎dev/_downloads/auto_examples_jupyter.zip
70 Bytes
diff --git a/‎dev/_downloads/auto_examples_python.zip
70 Bytes b/‎dev/_downloads/auto_examples_python.zip
70 Bytes
diff --git a/‎dev/_downloads/plot_random_forest_embedding.ipynb
Lines changed: 2 additions & 2 deletions b/‎dev/_downloads/plot_random_forest_embedding.ipynb
Lines changed: 2 additions & 2 deletions
diff --git a/‎dev/_downloads/plot_random_forest_embedding.py
Lines changed: 5 additions & 5 deletions b/‎dev/_downloads/plot_random_forest_embedding.py
Lines changed: 5 additions & 5 deletions
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
-80 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
-80 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0011.png
-80 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0011.png
-80 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_002.png
-9 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_002.png
-9 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0021.png
-9 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0021.png
-9 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
460 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
460 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0031.png
460 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0031.png
460 Bytes
@@ -15,7 +15,7 @@
     }, 
     {
       "source": [
-        "\n# Hashing feature transformation using Totally Random Trees\n\n\nRandomTreesEmbedding provides a way to map data to a\nvery high-dimensional, sparse representation, which might\nbe beneficial for classification.\nThe mapping is completely unsupervised and very efficient.\n\nThis example visualizes the partitions given by several\ntrees and shows how the transformation can also be used for\nnon-linear dimensionality reduction or non-linear classification.\n\nPoints that are neighboring often share the same leaf of a tree and therefore\nshare large parts of their hashed representation. This allows to\nseparate two concentric circles simply based on the principal components of the\ntransformed data.\n\nIn high-dimensional spaces, linear classifiers often achieve\nexcellent accuracy. For sparse binary data, BernoulliNB\nis particularly well-suited. The bottom row compares the\ndecision boundary obtained by BernoulliNB in the transformed\nspace with an ExtraTreesClassifier forests learned on the\noriginal data.\n\n"
+        "\n# Hashing feature transformation using Totally Random Trees\n\n\nRandomTreesEmbedding provides a way to map data to a\nvery high-dimensional, sparse representation, which might\nbe beneficial for classification.\nThe mapping is completely unsupervised and very efficient.\n\nThis example visualizes the partitions given by several\ntrees and shows how the transformation can also be used for\nnon-linear dimensionality reduction or non-linear classification.\n\nPoints that are neighboring often share the same leaf of a tree and therefore\nshare large parts of their hashed representation. This allows to\nseparate two concentric circles simply based on the principal components of the\ntransformed data with truncated SVD.\n\nIn high-dimensional spaces, linear classifiers often achieve\nexcellent accuracy. For sparse binary data, BernoulliNB\nis particularly well-suited. The bottom row compares the\ndecision boundary obtained by BernoulliNB in the transformed\nspace with an ExtraTreesClassifier forests learned on the\noriginal data.\n\n"
       ], 
       "cell_type": "markdown", 
       "metadata": {}
@@ -24,7 +24,7 @@
       "execution_count": null, 
       "cell_type": "code", 
       "source": [
-        "import numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn.datasets import make_circles\nfrom sklearn.ensemble import RandomTreesEmbedding, ExtraTreesClassifier\nfrom sklearn.decomposition import TruncatedSVD\nfrom sklearn.naive_bayes import BernoulliNB\n\n# make a synthetic dataset\nX, y = make_circles(factor=0.5, random_state=0, noise=0.05)\n\n# use RandomTreesEmbedding to transform data\nhasher = RandomTreesEmbedding(n_estimators=10, random_state=0, max_depth=3)\nX_transformed = hasher.fit_transform(X)\n\n# Visualize result using PCA\npca = TruncatedSVD(n_components=2)\nX_reduced = pca.fit_transform(X_transformed)\n\n# Learn a Naive Bayes classifier on the transformed data\nnb = BernoulliNB()\nnb.fit(X_transformed, y)\n\n\n# Learn an ExtraTreesClassifier for comparison\ntrees = ExtraTreesClassifier(max_depth=3, n_estimators=10, random_state=0)\ntrees.fit(X, y)\n\n\n# scatter plot of original and reduced data\nfig = plt.figure(figsize=(9, 8))\n\nax = plt.subplot(221)\nax.scatter(X[:, 0], X[:, 1], c=y, s=50)\nax.set_title(\"Original Data (2d)\")\nax.set_xticks(())\nax.set_yticks(())\n\nax = plt.subplot(222)\nax.scatter(X_reduced[:, 0], X_reduced[:, 1], c=y, s=50)\nax.set_title(\"PCA reduction (2d) of transformed data (%dd)\" %\n             X_transformed.shape[1])\nax.set_xticks(())\nax.set_yticks(())\n\n# Plot the decision in original space. For that, we will assign a color to each\n# point in the mesh [x_min, x_max]x[y_min, y_max].\nh = .01\nx_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5\ny_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5\nxx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))\n\n# transform grid using RandomTreesEmbedding\ntransformed_grid = hasher.transform(np.c_[xx.ravel(), yy.ravel()])\ny_grid_pred = nb.predict_proba(transformed_grid)[:, 1]\n\nax = plt.subplot(223)\nax.set_title(\"Naive Bayes on Transformed data\")\nax.pcolormesh(xx, yy, y_grid_pred.reshape(xx.shape))\nax.scatter(X[:, 0], X[:, 1], c=y, s=50)\nax.set_ylim(-1.4, 1.4)\nax.set_xlim(-1.4, 1.4)\nax.set_xticks(())\nax.set_yticks(())\n\n# transform grid using ExtraTreesClassifier\ny_grid_pred = trees.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]\n\nax = plt.subplot(224)\nax.set_title(\"ExtraTrees predictions\")\nax.pcolormesh(xx, yy, y_grid_pred.reshape(xx.shape))\nax.scatter(X[:, 0], X[:, 1], c=y, s=50)\nax.set_ylim(-1.4, 1.4)\nax.set_xlim(-1.4, 1.4)\nax.set_xticks(())\nax.set_yticks(())\n\nplt.tight_layout()\nplt.show()"
+        "import numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn.datasets import make_circles\nfrom sklearn.ensemble import RandomTreesEmbedding, ExtraTreesClassifier\nfrom sklearn.decomposition import TruncatedSVD\nfrom sklearn.naive_bayes import BernoulliNB\n\n# make a synthetic dataset\nX, y = make_circles(factor=0.5, random_state=0, noise=0.05)\n\n# use RandomTreesEmbedding to transform data\nhasher = RandomTreesEmbedding(n_estimators=10, random_state=0, max_depth=3)\nX_transformed = hasher.fit_transform(X)\n\n# Visualize result after dimensionality reduction using truncated SVD\nsvd = TruncatedSVD(n_components=2)\nX_reduced = svd.fit_transform(X_transformed)\n\n# Learn a Naive Bayes classifier on the transformed data\nnb = BernoulliNB()\nnb.fit(X_transformed, y)\n\n\n# Learn an ExtraTreesClassifier for comparison\ntrees = ExtraTreesClassifier(max_depth=3, n_estimators=10, random_state=0)\ntrees.fit(X, y)\n\n\n# scatter plot of original and reduced data\nfig = plt.figure(figsize=(9, 8))\n\nax = plt.subplot(221)\nax.scatter(X[:, 0], X[:, 1], c=y, s=50)\nax.set_title(\"Original Data (2d)\")\nax.set_xticks(())\nax.set_yticks(())\n\nax = plt.subplot(222)\nax.scatter(X_reduced[:, 0], X_reduced[:, 1], c=y, s=50)\nax.set_title(\"Truncated SVD reduction (2d) of transformed data (%dd)\" %\n             X_transformed.shape[1])\nax.set_xticks(())\nax.set_yticks(())\n\n# Plot the decision in original space. For that, we will assign a color to each\n# point in the mesh [x_min, x_max]x[y_min, y_max].\nh = .01\nx_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5\ny_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5\nxx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))\n\n# transform grid using RandomTreesEmbedding\ntransformed_grid = hasher.transform(np.c_[xx.ravel(), yy.ravel()])\ny_grid_pred = nb.predict_proba(transformed_grid)[:, 1]\n\nax = plt.subplot(223)\nax.set_title(\"Naive Bayes on Transformed data\")\nax.pcolormesh(xx, yy, y_grid_pred.reshape(xx.shape))\nax.scatter(X[:, 0], X[:, 1], c=y, s=50)\nax.set_ylim(-1.4, 1.4)\nax.set_xlim(-1.4, 1.4)\nax.set_xticks(())\nax.set_yticks(())\n\n# transform grid using ExtraTreesClassifier\ny_grid_pred = trees.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]\n\nax = plt.subplot(224)\nax.set_title(\"ExtraTrees predictions\")\nax.pcolormesh(xx, yy, y_grid_pred.reshape(xx.shape))\nax.scatter(X[:, 0], X[:, 1], c=y, s=50)\nax.set_ylim(-1.4, 1.4)\nax.set_xlim(-1.4, 1.4)\nax.set_xticks(())\nax.set_yticks(())\n\nplt.tight_layout()\nplt.show()"
       ], 
       "outputs": [], 
       "metadata": {
 
@@ -15,7 +15,7 @@
 Points that are neighboring often share the same leaf of a tree and therefore
 share large parts of their hashed representation. This allows to
 separate two concentric circles simply based on the principal components of the
-transformed data.
+transformed data with truncated SVD.
 
 In high-dimensional spaces, linear classifiers often achieve
 excellent accuracy. For sparse binary data, BernoulliNB
@@ -39,9 +39,9 @@
 hasher = RandomTreesEmbedding(n_estimators=10, random_state=0, max_depth=3)
 X_transformed = hasher.fit_transform(X)
 
-# Visualize result using PCA
-pca = TruncatedSVD(n_components=2)
-X_reduced = pca.fit_transform(X_transformed)
+# Visualize result after dimensionality reduction using truncated SVD
+svd = TruncatedSVD(n_components=2)
+X_reduced = svd.fit_transform(X_transformed)
 
 # Learn a Naive Bayes classifier on the transformed data
 nb = BernoulliNB()
@@ -64,7 +64,7 @@
 
 ax = plt.subplot(222)
 ax.scatter(X_reduced[:, 0], X_reduced[:, 1], c=y, s=50)
-ax.set_title("PCA reduction (2d) of transformed data (%dd)" %
+ax.set_title("Truncated SVD reduction (2d) of transformed data (%dd)" %
              X_transformed.shape[1])
 ax.set_xticks(())
 ax.set_yticks(())