scikit-learn
diff --git a/‎dev/_downloads/auto_examples_jupyter.zip
5.17 KB b/‎dev/_downloads/auto_examples_jupyter.zip
5.17 KB
diff --git a/‎dev/_downloads/auto_examples_python.zip
4.17 KB b/‎dev/_downloads/auto_examples_python.zip
4.17 KB
diff --git a/‎dev/_downloads/plot_cluster_comparison.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/plot_cluster_comparison.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/plot_cluster_comparison.py
Lines changed: 14 additions & 4 deletions b/‎dev/_downloads/plot_cluster_comparison.py
Lines changed: 14 additions & 4 deletions
diff --git a/‎dev/_downloads/plot_optics.ipynb
Lines changed: 54 additions & 0 deletions b/‎dev/_downloads/plot_optics.ipynb
Lines changed: 54 additions & 0 deletions
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "print(__doc__)\n\nimport time\nimport warnings\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn import cluster, datasets, mixture\nfrom sklearn.neighbors import kneighbors_graph\nfrom sklearn.preprocessing import StandardScaler\nfrom itertools import cycle, islice\n\nnp.random.seed(0)\n\n# ============\n# Generate datasets. We choose the size big enough to see the scalability\n# of the algorithms, but not too big to avoid too long running times\n# ============\nn_samples = 1500\nnoisy_circles = datasets.make_circles(n_samples=n_samples, factor=.5,\n                                      noise=.05)\nnoisy_moons = datasets.make_moons(n_samples=n_samples, noise=.05)\nblobs = datasets.make_blobs(n_samples=n_samples, random_state=8)\nno_structure = np.random.rand(n_samples, 2), None\n\n# Anisotropicly distributed data\nrandom_state = 170\nX, y = datasets.make_blobs(n_samples=n_samples, random_state=random_state)\ntransformation = [[0.6, -0.6], [-0.4, 0.8]]\nX_aniso = np.dot(X, transformation)\naniso = (X_aniso, y)\n\n# blobs with varied variances\nvaried = datasets.make_blobs(n_samples=n_samples,\n                             cluster_std=[1.0, 2.5, 0.5],\n                             random_state=random_state)\n\n# ============\n# Set up cluster parameters\n# ============\nplt.figure(figsize=(9 * 2 + 3, 12.5))\nplt.subplots_adjust(left=.02, right=.98, bottom=.001, top=.96, wspace=.05,\n                    hspace=.01)\n\nplot_num = 1\n\ndefault_base = {'quantile': .3,\n                'eps': .3,\n                'damping': .9,\n                'preference': -200,\n                'n_neighbors': 10,\n                'n_clusters': 3}\n\ndatasets = [\n    (noisy_circles, {'damping': .77, 'preference': -240,\n                     'quantile': .2, 'n_clusters': 2}),\n    (noisy_moons, {'damping': .75, 'preference': -220, 'n_clusters': 2}),\n    (varied, {'eps': .18, 'n_neighbors': 2}),\n    (aniso, {'eps': .15, 'n_neighbors': 2}),\n    (blobs, {}),\n    (no_structure, {})]\n\nfor i_dataset, (dataset, algo_params) in enumerate(datasets):\n    # update parameters with dataset-specific values\n    params = default_base.copy()\n    params.update(algo_params)\n\n    X, y = dataset\n\n    # normalize dataset for easier parameter selection\n    X = StandardScaler().fit_transform(X)\n\n    # estimate bandwidth for mean shift\n    bandwidth = cluster.estimate_bandwidth(X, quantile=params['quantile'])\n\n    # connectivity matrix for structured Ward\n    connectivity = kneighbors_graph(\n        X, n_neighbors=params['n_neighbors'], include_self=False)\n    # make connectivity symmetric\n    connectivity = 0.5 * (connectivity + connectivity.T)\n\n    # ============\n    # Create cluster objects\n    # ============\n    ms = cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True)\n    two_means = cluster.MiniBatchKMeans(n_clusters=params['n_clusters'])\n    ward = cluster.AgglomerativeClustering(\n        n_clusters=params['n_clusters'], linkage='ward',\n        connectivity=connectivity)\n    spectral = cluster.SpectralClustering(\n        n_clusters=params['n_clusters'], eigen_solver='arpack',\n        affinity=\"nearest_neighbors\")\n    dbscan = cluster.DBSCAN(eps=params['eps'])\n    affinity_propagation = cluster.AffinityPropagation(\n        damping=params['damping'], preference=params['preference'])\n    average_linkage = cluster.AgglomerativeClustering(\n        linkage=\"average\", affinity=\"cityblock\",\n        n_clusters=params['n_clusters'], connectivity=connectivity)\n    birch = cluster.Birch(n_clusters=params['n_clusters'])\n    gmm = mixture.GaussianMixture(\n        n_components=params['n_clusters'], covariance_type='full')\n\n    clustering_algorithms = (\n        ('MiniBatchKMeans', two_means),\n        ('AffinityPropagation', affinity_propagation),\n        ('MeanShift', ms),\n        ('SpectralClustering', spectral),\n        ('Ward', ward),\n        ('AgglomerativeClustering', average_linkage),\n        ('DBSCAN', dbscan),\n        ('Birch', birch),\n        ('GaussianMixture', gmm)\n    )\n\n    for name, algorithm in clustering_algorithms:\n        t0 = time.time()\n\n        # catch warnings related to kneighbors_graph\n        with warnings.catch_warnings():\n            warnings.filterwarnings(\n                \"ignore\",\n                message=\"the number of connected components of the \" +\n                \"connectivity matrix is [0-9]{1,2}\" +\n                \" > 1. Completing it to avoid stopping the tree early.\",\n                category=UserWarning)\n            warnings.filterwarnings(\n                \"ignore\",\n                message=\"Graph is not fully connected, spectral embedding\" +\n                \" may not work as expected.\",\n                category=UserWarning)\n            algorithm.fit(X)\n\n        t1 = time.time()\n        if hasattr(algorithm, 'labels_'):\n            y_pred = algorithm.labels_.astype(np.int)\n        else:\n            y_pred = algorithm.predict(X)\n\n        plt.subplot(len(datasets), len(clustering_algorithms), plot_num)\n        if i_dataset == 0:\n            plt.title(name, size=18)\n\n        colors = np.array(list(islice(cycle(['#377eb8', '#ff7f00', '#4daf4a',\n                                             '#f781bf', '#a65628', '#984ea3',\n                                             '#999999', '#e41a1c', '#dede00']),\n                                      int(max(y_pred) + 1))))\n        # add black color for outliers (if any)\n        colors = np.append(colors, [\"#000000\"])\n        plt.scatter(X[:, 0], X[:, 1], s=10, color=colors[y_pred])\n\n        plt.xlim(-2.5, 2.5)\n        plt.ylim(-2.5, 2.5)\n        plt.xticks(())\n        plt.yticks(())\n        plt.text(.99, .01, ('%.2fs' % (t1 - t0)).lstrip('0'),\n                 transform=plt.gca().transAxes, size=15,\n                 horizontalalignment='right')\n        plot_num += 1\n\nplt.show()"
+        "print(__doc__)\n\nimport time\nimport warnings\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn import cluster, datasets, mixture\nfrom sklearn.neighbors import kneighbors_graph\nfrom sklearn.preprocessing import StandardScaler\nfrom itertools import cycle, islice\n\nnp.random.seed(0)\n\n# ============\n# Generate datasets. We choose the size big enough to see the scalability\n# of the algorithms, but not too big to avoid too long running times\n# ============\nn_samples = 1500\nnoisy_circles = datasets.make_circles(n_samples=n_samples, factor=.5,\n                                      noise=.05)\nnoisy_moons = datasets.make_moons(n_samples=n_samples, noise=.05)\nblobs = datasets.make_blobs(n_samples=n_samples, random_state=8)\nno_structure = np.random.rand(n_samples, 2), None\n\n# Anisotropicly distributed data\nrandom_state = 170\nX, y = datasets.make_blobs(n_samples=n_samples, random_state=random_state)\ntransformation = [[0.6, -0.6], [-0.4, 0.8]]\nX_aniso = np.dot(X, transformation)\naniso = (X_aniso, y)\n\n# blobs with varied variances\nvaried = datasets.make_blobs(n_samples=n_samples,\n                             cluster_std=[1.0, 2.5, 0.5],\n                             random_state=random_state)\n\n# ============\n# Set up cluster parameters\n# ============\nplt.figure(figsize=(9 * 2 + 3, 12.5))\nplt.subplots_adjust(left=.02, right=.98, bottom=.001, top=.96, wspace=.05,\n                    hspace=.01)\n\nplot_num = 1\n\ndefault_base = {'quantile': .3,\n                'eps': .3,\n                'damping': .9,\n                'preference': -200,\n                'n_neighbors': 10,\n                'n_clusters': 3,\n                'min_samples': 20,\n                'xi': 0.05,\n                'min_cluster_size': 0.1}\n\ndatasets = [\n    (noisy_circles, {'damping': .77, 'preference': -240,\n                     'quantile': .2, 'n_clusters': 2,\n                     'min_samples': 20, 'xi': 0.25}),\n    (noisy_moons, {'damping': .75, 'preference': -220, 'n_clusters': 2}),\n    (varied, {'eps': .18, 'n_neighbors': 2,\n              'min_samples': 5, 'xi': 0.035, 'min_cluster_size': .2}),\n    (aniso, {'eps': .15, 'n_neighbors': 2,\n             'min_samples': 20, 'xi': 0.1, 'min_cluster_size': .2}),\n    (blobs, {}),\n    (no_structure, {})]\n\nfor i_dataset, (dataset, algo_params) in enumerate(datasets):\n    # update parameters with dataset-specific values\n    params = default_base.copy()\n    params.update(algo_params)\n\n    X, y = dataset\n\n    # normalize dataset for easier parameter selection\n    X = StandardScaler().fit_transform(X)\n\n    # estimate bandwidth for mean shift\n    bandwidth = cluster.estimate_bandwidth(X, quantile=params['quantile'])\n\n    # connectivity matrix for structured Ward\n    connectivity = kneighbors_graph(\n        X, n_neighbors=params['n_neighbors'], include_self=False)\n    # make connectivity symmetric\n    connectivity = 0.5 * (connectivity + connectivity.T)\n\n    # ============\n    # Create cluster objects\n    # ============\n    ms = cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True)\n    two_means = cluster.MiniBatchKMeans(n_clusters=params['n_clusters'])\n    ward = cluster.AgglomerativeClustering(\n        n_clusters=params['n_clusters'], linkage='ward',\n        connectivity=connectivity)\n    spectral = cluster.SpectralClustering(\n        n_clusters=params['n_clusters'], eigen_solver='arpack',\n        affinity=\"nearest_neighbors\")\n    dbscan = cluster.DBSCAN(eps=params['eps'])\n    optics = cluster.OPTICS(min_samples=params['min_samples'],\n                            xi=params['xi'],\n                            min_cluster_size=params['min_cluster_size'])\n    affinity_propagation = cluster.AffinityPropagation(\n        damping=params['damping'], preference=params['preference'])\n    average_linkage = cluster.AgglomerativeClustering(\n        linkage=\"average\", affinity=\"cityblock\",\n        n_clusters=params['n_clusters'], connectivity=connectivity)\n    birch = cluster.Birch(n_clusters=params['n_clusters'])\n    gmm = mixture.GaussianMixture(\n        n_components=params['n_clusters'], covariance_type='full')\n\n    clustering_algorithms = (\n        ('MiniBatchKMeans', two_means),\n        ('AffinityPropagation', affinity_propagation),\n        ('MeanShift', ms),\n        ('SpectralClustering', spectral),\n        ('Ward', ward),\n        ('AgglomerativeClustering', average_linkage),\n        ('DBSCAN', dbscan),\n        ('OPTICS', optics),\n        ('Birch', birch),\n        ('GaussianMixture', gmm)\n    )\n\n    for name, algorithm in clustering_algorithms:\n        t0 = time.time()\n\n        # catch warnings related to kneighbors_graph\n        with warnings.catch_warnings():\n            warnings.filterwarnings(\n                \"ignore\",\n                message=\"the number of connected components of the \" +\n                \"connectivity matrix is [0-9]{1,2}\" +\n                \" > 1. Completing it to avoid stopping the tree early.\",\n                category=UserWarning)\n            warnings.filterwarnings(\n                \"ignore\",\n                message=\"Graph is not fully connected, spectral embedding\" +\n                \" may not work as expected.\",\n                category=UserWarning)\n            algorithm.fit(X)\n\n        t1 = time.time()\n        if hasattr(algorithm, 'labels_'):\n            y_pred = algorithm.labels_.astype(np.int)\n        else:\n            y_pred = algorithm.predict(X)\n\n        plt.subplot(len(datasets), len(clustering_algorithms), plot_num)\n        if i_dataset == 0:\n            plt.title(name, size=18)\n\n        colors = np.array(list(islice(cycle(['#377eb8', '#ff7f00', '#4daf4a',\n                                             '#f781bf', '#a65628', '#984ea3',\n                                             '#999999', '#e41a1c', '#dede00']),\n                                      int(max(y_pred) + 1))))\n        # add black color for outliers (if any)\n        colors = np.append(colors, [\"#000000\"])\n        plt.scatter(X[:, 0], X[:, 1], s=10, color=colors[y_pred])\n\n        plt.xlim(-2.5, 2.5)\n        plt.ylim(-2.5, 2.5)\n        plt.xticks(())\n        plt.yticks(())\n        plt.text(.99, .01, ('%.2fs' % (t1 - t0)).lstrip('0'),\n                 transform=plt.gca().transAxes, size=15,\n                 horizontalalignment='right')\n        plot_num += 1\n\nplt.show()"
       ]
     }
   ],
 
@@ -74,14 +74,20 @@
                 'damping': .9,
                 'preference': -200,
                 'n_neighbors': 10,
-                'n_clusters': 3}
+                'n_clusters': 3,
+                'min_samples': 20,
+                'xi': 0.05,
+                'min_cluster_size': 0.1}
 
 datasets = [
     (noisy_circles, {'damping': .77, 'preference': -240,
-                     'quantile': .2, 'n_clusters': 2}),
+                     'quantile': .2, 'n_clusters': 2,
+                     'min_samples': 20, 'xi': 0.25}),
     (noisy_moons, {'damping': .75, 'preference': -220, 'n_clusters': 2}),
-    (varied, {'eps': .18, 'n_neighbors': 2}),
-    (aniso, {'eps': .15, 'n_neighbors': 2}),
+    (varied, {'eps': .18, 'n_neighbors': 2,
+              'min_samples': 5, 'xi': 0.035, 'min_cluster_size': .2}),
+    (aniso, {'eps': .15, 'n_neighbors': 2,
+             'min_samples': 20, 'xi': 0.1, 'min_cluster_size': .2}),
     (blobs, {}),
     (no_structure, {})]
 
@@ -116,6 +122,9 @@
         n_clusters=params['n_clusters'], eigen_solver='arpack',
         affinity="nearest_neighbors")
     dbscan = cluster.DBSCAN(eps=params['eps'])
+    optics = cluster.OPTICS(min_samples=params['min_samples'],
+                            xi=params['xi'],
+                            min_cluster_size=params['min_cluster_size'])
     affinity_propagation = cluster.AffinityPropagation(
         damping=params['damping'], preference=params['preference'])
     average_linkage = cluster.AgglomerativeClustering(
@@ -133,6 +142,7 @@
         ('Ward', ward),
         ('AgglomerativeClustering', average_linkage),
         ('DBSCAN', dbscan),
+        ('OPTICS', optics),
         ('Birch', birch),
         ('GaussianMixture', gmm)
     )
 
@@ -0,0 +1,54 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "%matplotlib inline"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "\n# Demo of OPTICS clustering algorithm\n\nFinds core samples of high density and expands clusters from them.\nThis example uses data that is generated so that the clusters have\ndifferent densities.\nThe :class:`sklearn.cluster.OPTICS` is first used with its Xi cluster detection\nmethod, and then setting specific thresholds on the reachability, which\ncorresponds to :class:`sklearn.cluster.DBSCAN`. We can see that the different\nclusters of OPTICS's Xi method can be recovered with different choices of\nthresholds in DBSCAN.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "# Authors: Shane Grigsby <[email protected]>\n#          Adrin Jalali <[email protected]>\n# License: BSD 3 clause\n\n\nfrom sklearn.cluster import OPTICS, cluster_optics_dbscan\nimport matplotlib.gridspec as gridspec\nimport matplotlib.pyplot as plt\nimport numpy as np\n\n# Generate sample data\n\nnp.random.seed(0)\nn_points_per_cluster = 250\n\nC1 = [-5, -2] + .8 * np.random.randn(n_points_per_cluster, 2)\nC2 = [4, -1] + .1 * np.random.randn(n_points_per_cluster, 2)\nC3 = [1, -2] + .2 * np.random.randn(n_points_per_cluster, 2)\nC4 = [-2, 3] + .3 * np.random.randn(n_points_per_cluster, 2)\nC5 = [3, -2] + 1.6 * np.random.randn(n_points_per_cluster, 2)\nC6 = [5, 6] + 2 * np.random.randn(n_points_per_cluster, 2)\nX = np.vstack((C1, C2, C3, C4, C5, C6))\n\nclust = OPTICS(min_samples=50, xi=.05, min_cluster_size=.05)\n\n# Run the fit\nclust.fit(X)\n\nlabels_050 = cluster_optics_dbscan(reachability=clust.reachability_,\n                                   core_distances=clust.core_distances_,\n                                   ordering=clust.ordering_, eps=0.5)\nlabels_200 = cluster_optics_dbscan(reachability=clust.reachability_,\n                                   core_distances=clust.core_distances_,\n                                   ordering=clust.ordering_, eps=2)\n\nspace = np.arange(len(X))\nreachability = clust.reachability_[clust.ordering_]\nlabels = clust.labels_[clust.ordering_]\n\nplt.figure(figsize=(10, 7))\nG = gridspec.GridSpec(2, 3)\nax1 = plt.subplot(G[0, :])\nax2 = plt.subplot(G[1, 0])\nax3 = plt.subplot(G[1, 1])\nax4 = plt.subplot(G[1, 2])\n\n# Reachability plot\ncolors = ['g.', 'r.', 'b.', 'y.', 'c.']\nfor klass, color in zip(range(0, 5), colors):\n    Xk = space[labels == klass]\n    Rk = reachability[labels == klass]\n    ax1.plot(Xk, Rk, color, alpha=0.3)\nax1.plot(space[labels == -1], reachability[labels == -1], 'k.', alpha=0.3)\nax1.plot(space, np.full_like(space, 2., dtype=float), 'k-', alpha=0.5)\nax1.plot(space, np.full_like(space, 0.5, dtype=float), 'k-.', alpha=0.5)\nax1.set_ylabel('Reachability (epsilon distance)')\nax1.set_title('Reachability Plot')\n\n# OPTICS\ncolors = ['g.', 'r.', 'b.', 'y.', 'c.']\nfor klass, color in zip(range(0, 5), colors):\n    Xk = X[clust.labels_ == klass]\n    ax2.plot(Xk[:, 0], Xk[:, 1], color, alpha=0.3)\nax2.plot(X[clust.labels_ == -1, 0], X[clust.labels_ == -1, 1], 'k+', alpha=0.1)\nax2.set_title('Automatic Clustering\\nOPTICS')\n\n# DBSCAN at 0.5\ncolors = ['g', 'greenyellow', 'olive', 'r', 'b', 'c']\nfor klass, color in zip(range(0, 6), colors):\n    Xk = X[labels_050 == klass]\n    ax3.plot(Xk[:, 0], Xk[:, 1], color, alpha=0.3, marker='.')\nax3.plot(X[labels_050 == -1, 0], X[labels_050 == -1, 1], 'k+', alpha=0.1)\nax3.set_title('Clustering at 0.5 epsilon cut\\nDBSCAN')\n\n# DBSCAN at 2.\ncolors = ['g.', 'm.', 'y.', 'c.']\nfor klass, color in zip(range(0, 4), colors):\n    Xk = X[labels_200 == klass]\n    ax4.plot(Xk[:, 0], Xk[:, 1], color, alpha=0.3)\nax4.plot(X[labels_200 == -1, 0], X[labels_200 == -1, 1], 'k+', alpha=0.1)\nax4.set_title('Clustering at 2.0 epsilon cut\\nDBSCAN')\n\nplt.tight_layout()\nplt.show()"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.6.8"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,7 @@`
`26`	`26`	`},`
`27`	`27`	`"outputs": [],`
`28`	`28`	`"source": [`
`29`		- "print(__doc__)\n\nimport time\nimport warnings\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn import cluster, datasets, mixture\nfrom sklearn.neighbors import kneighbors_graph\nfrom sklearn.preprocessing import StandardScaler\nfrom itertools import cycle, islice\n\nnp.random.seed(0)\n\n# ============\n# Generate datasets. We choose the size big enough to see the scalability\n# of the algorithms, but not too big to avoid too long running times\n# ============\nn_samples = 1500\nnoisy_circles = datasets.make_circles(n_samples=n_samples, factor=.5,\n noise=.05)\nnoisy_moons = datasets.make_moons(n_samples=n_samples, noise=.05)\nblobs = datasets.make_blobs(n_samples=n_samples, random_state=8)\nno_structure = np.random.rand(n_samples, 2), None\n\n# Anisotropicly distributed data\nrandom_state = 170\nX, y = datasets.make_blobs(n_samples=n_samples, random_state=random_state)\ntransformation = [[0.6, -0.6], [-0.4, 0.8]]\nX_aniso = np.dot(X, transformation)\naniso = (X_aniso, y)\n\n# blobs with varied variances\nvaried = datasets.make_blobs(n_samples=n_samples,\n cluster_std=[1.0, 2.5, 0.5],\n random_state=random_state)\n\n# ============\n# Set up cluster parameters\n# ============\nplt.figure(figsize=(9 * 2 + 3, 12.5))\nplt.subplots_adjust(left=.02, right=.98, bottom=.001, top=.96, wspace=.05,\n hspace=.01)\n\nplot_num = 1\n\ndefault_base = {'quantile': .3,\n 'eps': .3,\n 'damping': .9,\n 'preference': -200,\n 'n_neighbors': 10,\n 'n_clusters': 3}\n\ndatasets = [\n (noisy_circles, {'damping': .77, 'preference': -240,\n 'quantile': .2, 'n_clusters': 2}),\n (noisy_moons, {'damping': .75, 'preference': -220, 'n_clusters': 2}),\n (varied, {'eps': .18, 'n_neighbors': 2}),\n (aniso, {'eps': .15, 'n_neighbors': 2}),\n (blobs, {}),\n (no_structure, {})]\n\nfor i_dataset, (dataset, algo_params) in enumerate(datasets):\n # update parameters with dataset-specific values\n params = default_base.copy()\n params.update(algo_params)\n\n X, y = dataset\n\n # normalize dataset for easier parameter selection\n X = StandardScaler().fit_transform(X)\n\n # estimate bandwidth for mean shift\n bandwidth = cluster.estimate_bandwidth(X, quantile=params['quantile'])\n\n # connectivity matrix for structured Ward\n connectivity = kneighbors_graph(\n X, n_neighbors=params['n_neighbors'], include_self=False)\n # make connectivity symmetric\n connectivity = 0.5 * (connectivity + connectivity.T)\n\n # ============\n # Create cluster objects\n # ============\n ms = cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True)\n two_means = cluster.MiniBatchKMeans(n_clusters=params['n_clusters'])\n ward = cluster.AgglomerativeClustering(\n n_clusters=params['n_clusters'], linkage='ward',\n connectivity=connectivity)\n spectral = cluster.SpectralClustering(\n n_clusters=params['n_clusters'], eigen_solver='arpack',\n affinity=\"nearest_neighbors\")\n dbscan = cluster.DBSCAN(eps=params['eps'])\n affinity_propagation = cluster.AffinityPropagation(\n damping=params['damping'], preference=params['preference'])\n average_linkage = cluster.AgglomerativeClustering(\n linkage=\"average\", affinity=\"cityblock\",\n n_clusters=params['n_clusters'], connectivity=connectivity)\n birch = cluster.Birch(n_clusters=params['n_clusters'])\n gmm = mixture.GaussianMixture(\n n_components=params['n_clusters'], covariance_type='full')\n\n clustering_algorithms = (\n ('MiniBatchKMeans', two_means),\n ('AffinityPropagation', affinity_propagation),\n ('MeanShift', ms),\n ('SpectralClustering', spectral),\n ('Ward', ward),\n ('AgglomerativeClustering', average_linkage),\n ('DBSCAN', dbscan),\n ('Birch', birch),\n ('GaussianMixture', gmm)\n )\n\n for name, algorithm in clustering_algorithms:\n t0 = time.time()\n\n # catch warnings related to kneighbors_graph\n with warnings.catch_warnings():\n warnings.filterwarnings(\n \"ignore\",\n message=\"the number of connected components of the \" +\n \"connectivity matrix is [0-9]{1,2}\" +\n \" > 1. Completing it to avoid stopping the tree early.\",\n category=UserWarning)\n warnings.filterwarnings(\n \"ignore\",\n message=\"Graph is not fully connected, spectral embedding\" +\n \" may not work as expected.\",\n category=UserWarning)\n algorithm.fit(X)\n\n t1 = time.time()\n if hasattr(algorithm, 'labels_'):\n y_pred = algorithm.labels_.astype(np.int)\n else:\n y_pred = algorithm.predict(X)\n\n plt.subplot(len(datasets), len(clustering_algorithms), plot_num)\n if i_dataset == 0:\n plt.title(name, size=18)\n\n colors = np.array(list(islice(cycle(['#377eb8', '#ff7f00', '#4daf4a',\n '#f781bf', '#a65628', '#984ea3',\n '#999999', '#e41a1c', '#dede00']),\n int(max(y_pred) + 1))))\n # add black color for outliers (if any)\n colors = np.append(colors, [\"#000000\"])\n plt.scatter(X[:, 0], X[:, 1], s=10, color=colors[y_pred])\n\n plt.xlim(-2.5, 2.5)\n plt.ylim(-2.5, 2.5)\n plt.xticks(())\n plt.yticks(())\n plt.text(.99, .01, ('%.2fs' % (t1 - t0)).lstrip('0'),\n transform=plt.gca().transAxes, size=15,\n horizontalalignment='right')\n plot_num += 1\n\nplt.show()"
	`29`	+ "print(__doc__)\n\nimport time\nimport warnings\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn import cluster, datasets, mixture\nfrom sklearn.neighbors import kneighbors_graph\nfrom sklearn.preprocessing import StandardScaler\nfrom itertools import cycle, islice\n\nnp.random.seed(0)\n\n# ============\n# Generate datasets. We choose the size big enough to see the scalability\n# of the algorithms, but not too big to avoid too long running times\n# ============\nn_samples = 1500\nnoisy_circles = datasets.make_circles(n_samples=n_samples, factor=.5,\n noise=.05)\nnoisy_moons = datasets.make_moons(n_samples=n_samples, noise=.05)\nblobs = datasets.make_blobs(n_samples=n_samples, random_state=8)\nno_structure = np.random.rand(n_samples, 2), None\n\n# Anisotropicly distributed data\nrandom_state = 170\nX, y = datasets.make_blobs(n_samples=n_samples, random_state=random_state)\ntransformation = [[0.6, -0.6], [-0.4, 0.8]]\nX_aniso = np.dot(X, transformation)\naniso = (X_aniso, y)\n\n# blobs with varied variances\nvaried = datasets.make_blobs(n_samples=n_samples,\n cluster_std=[1.0, 2.5, 0.5],\n random_state=random_state)\n\n# ============\n# Set up cluster parameters\n# ============\nplt.figure(figsize=(9 * 2 + 3, 12.5))\nplt.subplots_adjust(left=.02, right=.98, bottom=.001, top=.96, wspace=.05,\n hspace=.01)\n\nplot_num = 1\n\ndefault_base = {'quantile': .3,\n 'eps': .3,\n 'damping': .9,\n 'preference': -200,\n 'n_neighbors': 10,\n 'n_clusters': 3,\n 'min_samples': 20,\n 'xi': 0.05,\n 'min_cluster_size': 0.1}\n\ndatasets = [\n (noisy_circles, {'damping': .77, 'preference': -240,\n 'quantile': .2, 'n_clusters': 2,\n 'min_samples': 20, 'xi': 0.25}),\n (noisy_moons, {'damping': .75, 'preference': -220, 'n_clusters': 2}),\n (varied, {'eps': .18, 'n_neighbors': 2,\n 'min_samples': 5, 'xi': 0.035, 'min_cluster_size': .2}),\n (aniso, {'eps': .15, 'n_neighbors': 2,\n 'min_samples': 20, 'xi': 0.1, 'min_cluster_size': .2}),\n (blobs, {}),\n (no_structure, {})]\n\nfor i_dataset, (dataset, algo_params) in enumerate(datasets):\n # update parameters with dataset-specific values\n params = default_base.copy()\n params.update(algo_params)\n\n X, y = dataset\n\n # normalize dataset for easier parameter selection\n X = StandardScaler().fit_transform(X)\n\n # estimate bandwidth for mean shift\n bandwidth = cluster.estimate_bandwidth(X, quantile=params['quantile'])\n\n # connectivity matrix for structured Ward\n connectivity = kneighbors_graph(\n X, n_neighbors=params['n_neighbors'], include_self=False)\n # make connectivity symmetric\n connectivity = 0.5 * (connectivity + connectivity.T)\n\n # ============\n # Create cluster objects\n # ============\n ms = cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True)\n two_means = cluster.MiniBatchKMeans(n_clusters=params['n_clusters'])\n ward = cluster.AgglomerativeClustering(\n n_clusters=params['n_clusters'], linkage='ward',\n connectivity=connectivity)\n spectral = cluster.SpectralClustering(\n n_clusters=params['n_clusters'], eigen_solver='arpack',\n affinity=\"nearest_neighbors\")\n dbscan = cluster.DBSCAN(eps=params['eps'])\n optics = cluster.OPTICS(min_samples=params['min_samples'],\n xi=params['xi'],\n min_cluster_size=params['min_cluster_size'])\n affinity_propagation = cluster.AffinityPropagation(\n damping=params['damping'], preference=params['preference'])\n average_linkage = cluster.AgglomerativeClustering(\n linkage=\"average\", affinity=\"cityblock\",\n n_clusters=params['n_clusters'], connectivity=connectivity)\n birch = cluster.Birch(n_clusters=params['n_clusters'])\n gmm = mixture.GaussianMixture(\n n_components=params['n_clusters'], covariance_type='full')\n\n clustering_algorithms = (\n ('MiniBatchKMeans', two_means),\n ('AffinityPropagation', affinity_propagation),\n ('MeanShift', ms),\n ('SpectralClustering', spectral),\n ('Ward', ward),\n ('AgglomerativeClustering', average_linkage),\n ('DBSCAN', dbscan),\n ('OPTICS', optics),\n ('Birch', birch),\n ('GaussianMixture', gmm)\n )\n\n for name, algorithm in clustering_algorithms:\n t0 = time.time()\n\n # catch warnings related to kneighbors_graph\n with warnings.catch_warnings():\n warnings.filterwarnings(\n \"ignore\",\n message=\"the number of connected components of the \" +\n \"connectivity matrix is [0-9]{1,2}\" +\n \" > 1. Completing it to avoid stopping the tree early.\",\n category=UserWarning)\n warnings.filterwarnings(\n \"ignore\",\n message=\"Graph is not fully connected, spectral embedding\" +\n \" may not work as expected.\",\n category=UserWarning)\n algorithm.fit(X)\n\n t1 = time.time()\n if hasattr(algorithm, 'labels_'):\n y_pred = algorithm.labels_.astype(np.int)\n else:\n y_pred = algorithm.predict(X)\n\n plt.subplot(len(datasets), len(clustering_algorithms), plot_num)\n if i_dataset == 0:\n plt.title(name, size=18)\n\n colors = np.array(list(islice(cycle(['#377eb8', '#ff7f00', '#4daf4a',\n '#f781bf', '#a65628', '#984ea3',\n '#999999', '#e41a1c', '#dede00']),\n int(max(y_pred) + 1))))\n # add black color for outliers (if any)\n colors = np.append(colors, [\"#000000\"])\n plt.scatter(X[:, 0], X[:, 1], s=10, color=colors[y_pred])\n\n plt.xlim(-2.5, 2.5)\n plt.ylim(-2.5, 2.5)\n plt.xticks(())\n plt.yticks(())\n plt.text(.99, .01, ('%.2fs' % (t1 - t0)).lstrip('0'),\n transform=plt.gca().transAxes, size=15,\n horizontalalignment='right')\n plot_num += 1\n\nplt.show()"
`30`	`30`	`]`
`31`	`31`	`}`
`32`	`32`	`],`