scikit-learn
diff --git a/‎dev/_downloads/auto_examples_jupyter.zip
-158 Bytes b/‎dev/_downloads/auto_examples_jupyter.zip
-158 Bytes
diff --git a/‎dev/_downloads/auto_examples_python.zip
-160 Bytes b/‎dev/_downloads/auto_examples_python.zip
-160 Bytes
diff --git a/‎dev/_downloads/plot_discretization_classification.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/plot_discretization_classification.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/plot_discretization_classification.py
Lines changed: 10 additions & 9 deletions b/‎dev/_downloads/plot_discretization_classification.py
Lines changed: 10 additions & 9 deletions
diff --git a/‎dev/_downloads/plot_ensemble_oob.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/plot_ensemble_oob.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/plot_ensemble_oob.py
Lines changed: 6 additions & 3 deletions b/‎dev/_downloads/plot_ensemble_oob.py
Lines changed: 6 additions & 3 deletions
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "# Code source: Tom Dupr\u00e9 la Tour\n# Adapted from plot_classifier_comparison by Ga\u00ebl Varoquaux and Andreas M\u00fcller\n#\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom matplotlib.colors import ListedColormap\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.datasets import make_moons, make_circles, make_classification\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.preprocessing import KBinsDiscretizer\nfrom sklearn.svm import SVC, LinearSVC\nfrom sklearn.ensemble import GradientBoostingClassifier\nfrom sklearn.utils.testing import ignore_warnings\nfrom sklearn.exceptions import ConvergenceWarning\n\nprint(__doc__)\n\nh = .02  # step size in the mesh\n\n\ndef get_name(estimator):\n    name = estimator.__class__.__name__\n    if name == 'Pipeline':\n        name = [get_name(est[1]) for est in estimator.steps]\n        name = ' + '.join(name)\n    return name\n\n\n# list of (estimator, param_grid), where param_grid is used in GridSearchCV\nclassifiers = [\n    (LogisticRegression(solver='lbfgs', random_state=0), {\n        'C': np.logspace(-2, 7, 10)\n    }),\n    (LinearSVC(random_state=0), {\n        'C': np.logspace(-2, 7, 10)\n    }),\n    (make_pipeline(\n        KBinsDiscretizer(encode='onehot'),\n        LogisticRegression(solver='lbfgs', random_state=0)), {\n            'kbinsdiscretizer__n_bins': np.arange(2, 10),\n            'logisticregression__C': np.logspace(-2, 7, 10),\n        }),\n    (make_pipeline(\n        KBinsDiscretizer(encode='onehot'), LinearSVC(random_state=0)), {\n            'kbinsdiscretizer__n_bins': np.arange(2, 10),\n            'linearsvc__C': np.logspace(-2, 7, 10),\n        }),\n    (GradientBoostingClassifier(n_estimators=50, random_state=0), {\n        'learning_rate': np.logspace(-4, 0, 10)\n    }),\n    (SVC(random_state=0, gamma='scale'), {\n        'C': np.logspace(-2, 7, 10)\n    }),\n]\n\nnames = [get_name(e) for e, g in classifiers]\n\nn_samples = 100\ndatasets = [\n    make_moons(n_samples=n_samples, noise=0.2, random_state=0),\n    make_circles(n_samples=n_samples, noise=0.2, factor=0.5, random_state=1),\n    make_classification(n_samples=n_samples, n_features=2, n_redundant=0,\n                        n_informative=2, random_state=2,\n                        n_clusters_per_class=1)\n]\n\nfigure = plt.figure(figsize=(21, 9))\ncm = plt.cm.PiYG\ncm_bright = ListedColormap(['#b30065', '#178000'])\ni = 1\n# iterate over datasets\nfor ds_cnt, (X, y) in enumerate(datasets):\n    print('\\ndataset %d\\n---------' % ds_cnt)\n\n    # preprocess dataset, split into training and test part\n    X = StandardScaler().fit_transform(X)\n    X_train, X_test, y_train, y_test = train_test_split(\n        X, y, test_size=.5, random_state=42)\n\n    # create the grid for background colors\n    x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5\n    y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5\n    xx, yy = np.meshgrid(\n        np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))\n\n    # plot the dataset first\n    ax = plt.subplot(len(datasets), len(classifiers) + 1, i)\n    if ds_cnt == 0:\n        ax.set_title(\"Input data\")\n    # plot the training points\n    ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright,\n               edgecolors='k')\n    # and testing points\n    ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6,\n               edgecolors='k')\n    ax.set_xlim(xx.min(), xx.max())\n    ax.set_ylim(yy.min(), yy.max())\n    ax.set_xticks(())\n    ax.set_yticks(())\n    i += 1\n\n    # iterate over classifiers\n    for name, (estimator, param_grid) in zip(names, classifiers):\n        ax = plt.subplot(len(datasets), len(classifiers) + 1, i)\n\n        clf = GridSearchCV(estimator=estimator, param_grid=param_grid, cv=5,\n                           iid=False)\n        with ignore_warnings(category=ConvergenceWarning):\n            clf.fit(X_train, y_train)\n        score = clf.score(X_test, y_test)\n        print('%s: %.2f' % (name, score))\n\n        # plot the decision boundary. For that, we will assign a color to each\n        # point in the mesh [x_min, x_max]*[y_min, y_max].\n        if hasattr(clf, \"decision_function\"):\n            Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])\n        else:\n            Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]\n\n        # put the result into a color plot\n        Z = Z.reshape(xx.shape)\n        ax.contourf(xx, yy, Z, cmap=cm, alpha=.8)\n\n        # plot the training points\n        ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright,\n                   edgecolors='k')\n        # and testing points\n        ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright,\n                   edgecolors='k', alpha=0.6)\n        ax.set_xlim(xx.min(), xx.max())\n        ax.set_ylim(yy.min(), yy.max())\n        ax.set_xticks(())\n        ax.set_yticks(())\n\n        if ds_cnt == 0:\n            ax.set_title(name.replace(' + ', '\\n'))\n        ax.text(0.95, 0.06, ('%.2f' % score).lstrip('0'), size=15,\n                bbox=dict(boxstyle='round', alpha=0.8, facecolor='white'),\n                transform=ax.transAxes, horizontalalignment='right')\n\n        i += 1\n\nplt.tight_layout()\n\n# Add suptitles above the figure\nplt.subplots_adjust(top=0.90)\nsuptitles = [\n    'Linear classifiers',\n    'Feature discretization and linear classifiers',\n    'Non-linear classifiers',\n]\nfor i, suptitle in zip([2, 4, 6], suptitles):\n    ax = plt.subplot(len(datasets), len(classifiers) + 1, i)\n    ax.text(1.05, 1.25, suptitle, transform=ax.transAxes,\n            horizontalalignment='center', size='x-large')\nplt.show()"
+        "# Code source: Tom Dupr\u00e9 la Tour\n# Adapted from plot_classifier_comparison by Ga\u00ebl Varoquaux and Andreas M\u00fcller\n#\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom matplotlib.colors import ListedColormap\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.datasets import make_moons, make_circles, make_classification\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.preprocessing import KBinsDiscretizer\nfrom sklearn.svm import SVC, LinearSVC\nfrom sklearn.ensemble import GradientBoostingClassifier\nfrom sklearn.utils.testing import ignore_warnings\nfrom sklearn.exceptions import ConvergenceWarning\n\nprint(__doc__)\n\nh = .02  # step size in the mesh\n\n\ndef get_name(estimator):\n    name = estimator.__class__.__name__\n    if name == 'Pipeline':\n        name = [get_name(est[1]) for est in estimator.steps]\n        name = ' + '.join(name)\n    return name\n\n\n# list of (estimator, param_grid), where param_grid is used in GridSearchCV\nclassifiers = [\n    (LogisticRegression(solver='lbfgs', random_state=0), {\n        'C': np.logspace(-2, 7, 10)\n    }),\n    (LinearSVC(random_state=0), {\n        'C': np.logspace(-2, 7, 10)\n    }),\n    (make_pipeline(\n        KBinsDiscretizer(encode='onehot'),\n        LogisticRegression(solver='lbfgs', random_state=0)), {\n            'kbinsdiscretizer__n_bins': np.arange(2, 10),\n            'logisticregression__C': np.logspace(-2, 7, 10),\n        }),\n    (make_pipeline(\n        KBinsDiscretizer(encode='onehot'), LinearSVC(random_state=0)), {\n            'kbinsdiscretizer__n_bins': np.arange(2, 10),\n            'linearsvc__C': np.logspace(-2, 7, 10),\n        }),\n    (GradientBoostingClassifier(n_estimators=50, random_state=0), {\n        'learning_rate': np.logspace(-4, 0, 10)\n    }),\n    (SVC(random_state=0, gamma='scale'), {\n        'C': np.logspace(-2, 7, 10)\n    }),\n]\n\nnames = [get_name(e) for e, g in classifiers]\n\nn_samples = 100\ndatasets = [\n    make_moons(n_samples=n_samples, noise=0.2, random_state=0),\n    make_circles(n_samples=n_samples, noise=0.2, factor=0.5, random_state=1),\n    make_classification(n_samples=n_samples, n_features=2, n_redundant=0,\n                        n_informative=2, random_state=2,\n                        n_clusters_per_class=1)\n]\n\nfig, axes = plt.subplots(nrows=len(datasets), ncols=len(classifiers) + 1,\n                         figsize=(21, 9))\n\ncm = plt.cm.PiYG\ncm_bright = ListedColormap(['#b30065', '#178000'])\n\n# iterate over datasets\nfor ds_cnt, (X, y) in enumerate(datasets):\n    print('\\ndataset %d\\n---------' % ds_cnt)\n\n    # preprocess dataset, split into training and test part\n    X = StandardScaler().fit_transform(X)\n    X_train, X_test, y_train, y_test = train_test_split(\n        X, y, test_size=.5, random_state=42)\n\n    # create the grid for background colors\n    x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5\n    y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5\n    xx, yy = np.meshgrid(\n        np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))\n\n    # plot the dataset first\n    ax = axes[ds_cnt, 0]\n    if ds_cnt == 0:\n        ax.set_title(\"Input data\")\n    # plot the training points\n    ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright,\n               edgecolors='k')\n    # and testing points\n    ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6,\n               edgecolors='k')\n    ax.set_xlim(xx.min(), xx.max())\n    ax.set_ylim(yy.min(), yy.max())\n    ax.set_xticks(())\n    ax.set_yticks(())\n\n    # iterate over classifiers\n    for est_idx, (name, (estimator, param_grid)) in \\\n            enumerate(zip(names, classifiers)):\n        ax = axes[ds_cnt, est_idx + 1]\n\n        clf = GridSearchCV(estimator=estimator, param_grid=param_grid, cv=5,\n                           iid=False)\n        with ignore_warnings(category=ConvergenceWarning):\n            clf.fit(X_train, y_train)\n        score = clf.score(X_test, y_test)\n        print('%s: %.2f' % (name, score))\n\n        # plot the decision boundary. For that, we will assign a color to each\n        # point in the mesh [x_min, x_max]*[y_min, y_max].\n        if hasattr(clf, \"decision_function\"):\n            Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])\n        else:\n            Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]\n\n        # put the result into a color plot\n        Z = Z.reshape(xx.shape)\n        ax.contourf(xx, yy, Z, cmap=cm, alpha=.8)\n\n        # plot the training points\n        ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright,\n                   edgecolors='k')\n        # and testing points\n        ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright,\n                   edgecolors='k', alpha=0.6)\n        ax.set_xlim(xx.min(), xx.max())\n        ax.set_ylim(yy.min(), yy.max())\n        ax.set_xticks(())\n        ax.set_yticks(())\n\n        if ds_cnt == 0:\n            ax.set_title(name.replace(' + ', '\\n'))\n        ax.text(0.95, 0.06, ('%.2f' % score).lstrip('0'), size=15,\n                bbox=dict(boxstyle='round', alpha=0.8, facecolor='white'),\n                transform=ax.transAxes, horizontalalignment='right')\n\n\nplt.tight_layout()\n\n# Add suptitles above the figure\nplt.subplots_adjust(top=0.90)\nsuptitles = [\n    'Linear classifiers',\n    'Feature discretization and linear classifiers',\n    'Non-linear classifiers',\n]\nfor i, suptitle in zip([1, 3, 5], suptitles):\n    ax = axes[0, i]\n    ax.text(1.05, 1.25, suptitle, transform=ax.transAxes,\n            horizontalalignment='center', size='x-large')\nplt.show()"
       ]
     }
   ],
 
@@ -99,10 +99,12 @@ def get_name(estimator):
                         n_clusters_per_class=1)
 ]
 
-figure = plt.figure(figsize=(21, 9))
+fig, axes = plt.subplots(nrows=len(datasets), ncols=len(classifiers) + 1,
+                         figsize=(21, 9))
+
 cm = plt.cm.PiYG
 cm_bright = ListedColormap(['#b30065', '#178000'])
-i = 1
+
 # iterate over datasets
 for ds_cnt, (X, y) in enumerate(datasets):
     print('\ndataset %d\n---------' % ds_cnt)
@@ -119,7 +121,7 @@ def get_name(estimator):
         np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
 
     # plot the dataset first
-    ax = plt.subplot(len(datasets), len(classifiers) + 1, i)
+    ax = axes[ds_cnt, 0]
     if ds_cnt == 0:
         ax.set_title("Input data")
     # plot the training points
@@ -132,11 +134,11 @@ def get_name(estimator):
     ax.set_ylim(yy.min(), yy.max())
     ax.set_xticks(())
     ax.set_yticks(())
-    i += 1
 
     # iterate over classifiers
-    for name, (estimator, param_grid) in zip(names, classifiers):
-        ax = plt.subplot(len(datasets), len(classifiers) + 1, i)
+    for est_idx, (name, (estimator, param_grid)) in \
+            enumerate(zip(names, classifiers)):
+        ax = axes[ds_cnt, est_idx + 1]
 
         clf = GridSearchCV(estimator=estimator, param_grid=param_grid, cv=5,
                            iid=False)
@@ -173,7 +175,6 @@ def get_name(estimator):
                 bbox=dict(boxstyle='round', alpha=0.8, facecolor='white'),
                 transform=ax.transAxes, horizontalalignment='right')
 
-        i += 1
 
 plt.tight_layout()
 
@@ -184,8 +185,8 @@ def get_name(estimator):
     'Feature discretization and linear classifiers',
     'Non-linear classifiers',
 ]
-for i, suptitle in zip([2, 4, 6], suptitles):
-    ax = plt.subplot(len(datasets), len(classifiers) + 1, i)
+for i, suptitle in zip([1, 3, 5], suptitles):
+    ax = axes[0, i]
     ax.text(1.05, 1.25, suptitle, transform=ax.transAxes,
             horizontalalignment='center', size='x-large')
 plt.show()
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "import matplotlib.pyplot as plt\n\nfrom collections import OrderedDict\nfrom sklearn.datasets import make_classification\nfrom sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier\n\n# Author: Kian Ho <[email protected]>\n#         Gilles Louppe <[email protected]>\n#         Andreas Mueller <[email protected]>\n#\n# License: BSD 3 Clause\n\nprint(__doc__)\n\nRANDOM_STATE = 123\n\n# Generate a binary classification dataset.\nX, y = make_classification(n_samples=500, n_features=25,\n                           n_clusters_per_class=1, n_informative=15,\n                           random_state=RANDOM_STATE)\n\n# NOTE: Setting the `warm_start` construction parameter to `True` disables\n# support for parallelized ensembles but is necessary for tracking the OOB\n# error trajectory during training.\nensemble_clfs = [\n    (\"RandomForestClassifier, max_features='sqrt'\",\n        RandomForestClassifier(warm_start=True, oob_score=True,\n                               max_features=\"sqrt\",\n                               random_state=RANDOM_STATE)),\n    (\"RandomForestClassifier, max_features='log2'\",\n        RandomForestClassifier(warm_start=True, max_features='log2',\n                               oob_score=True,\n                               random_state=RANDOM_STATE)),\n    (\"RandomForestClassifier, max_features=None\",\n        RandomForestClassifier(warm_start=True, max_features=None,\n                               oob_score=True,\n                               random_state=RANDOM_STATE))\n]\n\n# Map a classifier name to a list of (<n_estimators>, <error rate>) pairs.\nerror_rate = OrderedDict((label, []) for label, _ in ensemble_clfs)\n\n# Range of `n_estimators` values to explore.\nmin_estimators = 15\nmax_estimators = 175\n\nfor label, clf in ensemble_clfs:\n    for i in range(min_estimators, max_estimators + 1):\n        clf.set_params(n_estimators=i)\n        clf.fit(X, y)\n\n        # Record the OOB error for each `n_estimators=i` setting.\n        oob_error = 1 - clf.oob_score_\n        error_rate[label].append((i, oob_error))\n\n# Generate the \"OOB error rate\" vs. \"n_estimators\" plot.\nfor label, clf_err in error_rate.items():\n    xs, ys = zip(*clf_err)\n    plt.plot(xs, ys, label=label)\n\nplt.xlim(min_estimators, max_estimators)\nplt.xlabel(\"n_estimators\")\nplt.ylabel(\"OOB error rate\")\nplt.legend(loc=\"upper right\")\nplt.show()"
+        "import matplotlib.pyplot as plt\n\nfrom collections import OrderedDict\nfrom sklearn.datasets import make_classification\nfrom sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier\n\n# Author: Kian Ho <[email protected]>\n#         Gilles Louppe <[email protected]>\n#         Andreas Mueller <[email protected]>\n#\n# License: BSD 3 Clause\n\nprint(__doc__)\n\nRANDOM_STATE = 123\n\n# Generate a binary classification dataset.\nX, y = make_classification(n_samples=500, n_features=25,\n                           n_clusters_per_class=1, n_informative=15,\n                           random_state=RANDOM_STATE)\n\n# NOTE: Setting the `warm_start` construction parameter to `True` disables\n# support for parallelized ensembles but is necessary for tracking the OOB\n# error trajectory during training.\nensemble_clfs = [\n    (\"RandomForestClassifier, max_features='sqrt'\",\n        RandomForestClassifier(n_estimators=100,\n                               warm_start=True, oob_score=True,\n                               max_features=\"sqrt\",\n                               random_state=RANDOM_STATE)),\n    (\"RandomForestClassifier, max_features='log2'\",\n        RandomForestClassifier(n_estimators=100,\n                               warm_start=True, max_features='log2',\n                               oob_score=True,\n                               random_state=RANDOM_STATE)),\n    (\"RandomForestClassifier, max_features=None\",\n        RandomForestClassifier(n_estimators=100,\n                               warm_start=True, max_features=None,\n                               oob_score=True,\n                               random_state=RANDOM_STATE))\n]\n\n# Map a classifier name to a list of (<n_estimators>, <error rate>) pairs.\nerror_rate = OrderedDict((label, []) for label, _ in ensemble_clfs)\n\n# Range of `n_estimators` values to explore.\nmin_estimators = 15\nmax_estimators = 175\n\nfor label, clf in ensemble_clfs:\n    for i in range(min_estimators, max_estimators + 1):\n        clf.set_params(n_estimators=i)\n        clf.fit(X, y)\n\n        # Record the OOB error for each `n_estimators=i` setting.\n        oob_error = 1 - clf.oob_score_\n        error_rate[label].append((i, oob_error))\n\n# Generate the \"OOB error rate\" vs. \"n_estimators\" plot.\nfor label, clf_err in error_rate.items():\n    xs, ys = zip(*clf_err)\n    plt.plot(xs, ys, label=label)\n\nplt.xlim(min_estimators, max_estimators)\nplt.xlabel(\"n_estimators\")\nplt.ylabel(\"OOB error rate\")\nplt.legend(loc=\"upper right\")\nplt.show()"
       ]
     }
   ],
 
@@ -45,15 +45,18 @@
 # error trajectory during training.
 ensemble_clfs = [
     ("RandomForestClassifier, max_features='sqrt'",
-        RandomForestClassifier(warm_start=True, oob_score=True,
+        RandomForestClassifier(n_estimators=100,
+                               warm_start=True, oob_score=True,
                                max_features="sqrt",
                                random_state=RANDOM_STATE)),
     ("RandomForestClassifier, max_features='log2'",
-        RandomForestClassifier(warm_start=True, max_features='log2',
+        RandomForestClassifier(n_estimators=100,
+                               warm_start=True, max_features='log2',
                                oob_score=True,
                                random_state=RANDOM_STATE)),
     ("RandomForestClassifier, max_features=None",
-        RandomForestClassifier(warm_start=True, max_features=None,
+        RandomForestClassifier(n_estimators=100,
+                               warm_start=True, max_features=None,
                                oob_score=True,
                                random_state=RANDOM_STATE))
 ]
Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,7 @@`
`26`	`26`	`},`
`27`	`27`	`"outputs": [],`
`28`	`28`	`"source": [`
`29`		- "# Code source: Tom Dupr\u00e9 la Tour\n# Adapted from plot_classifier_comparison by Ga\u00ebl Varoquaux and Andreas M\u00fcller\n#\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom matplotlib.colors import ListedColormap\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.datasets import make_moons, make_circles, make_classification\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.preprocessing import KBinsDiscretizer\nfrom sklearn.svm import SVC, LinearSVC\nfrom sklearn.ensemble import GradientBoostingClassifier\nfrom sklearn.utils.testing import ignore_warnings\nfrom sklearn.exceptions import ConvergenceWarning\n\nprint(__doc__)\n\nh = .02 # step size in the mesh\n\n\ndef get_name(estimator):\n name = estimator.__class__.__name__\n if name == 'Pipeline':\n name = [get_name(est[1]) for est in estimator.steps]\n name = ' + '.join(name)\n return name\n\n\n# list of (estimator, param_grid), where param_grid is used in GridSearchCV\nclassifiers = [\n (LogisticRegression(solver='lbfgs', random_state=0), {\n 'C': np.logspace(-2, 7, 10)\n }),\n (LinearSVC(random_state=0), {\n 'C': np.logspace(-2, 7, 10)\n }),\n (make_pipeline(\n KBinsDiscretizer(encode='onehot'),\n LogisticRegression(solver='lbfgs', random_state=0)), {\n 'kbinsdiscretizer__n_bins': np.arange(2, 10),\n 'logisticregression__C': np.logspace(-2, 7, 10),\n }),\n (make_pipeline(\n KBinsDiscretizer(encode='onehot'), LinearSVC(random_state=0)), {\n 'kbinsdiscretizer__n_bins': np.arange(2, 10),\n 'linearsvc__C': np.logspace(-2, 7, 10),\n }),\n (GradientBoostingClassifier(n_estimators=50, random_state=0), {\n 'learning_rate': np.logspace(-4, 0, 10)\n }),\n (SVC(random_state=0, gamma='scale'), {\n 'C': np.logspace(-2, 7, 10)\n }),\n]\n\nnames = [get_name(e) for e, g in classifiers]\n\nn_samples = 100\ndatasets = [\n make_moons(n_samples=n_samples, noise=0.2, random_state=0),\n make_circles(n_samples=n_samples, noise=0.2, factor=0.5, random_state=1),\n make_classification(n_samples=n_samples, n_features=2, n_redundant=0,\n n_informative=2, random_state=2,\n n_clusters_per_class=1)\n]\n\nfigure = plt.figure(figsize=(21, 9))\ncm = plt.cm.PiYG\ncm_bright = ListedColormap(['#b30065', '#178000'])\ni = 1\n# iterate over datasets\nfor ds_cnt, (X, y) in enumerate(datasets):\n print('\\ndataset %d\\n---------' % ds_cnt)\n\n # preprocess dataset, split into training and test part\n X = StandardScaler().fit_transform(X)\n X_train, X_test, y_train, y_test = train_test_split(\n X, y, test_size=.5, random_state=42)\n\n # create the grid for background colors\n x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5\n y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5\n xx, yy = np.meshgrid(\n np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))\n\n # plot the dataset first\n ax = plt.subplot(len(datasets), len(classifiers) + 1, i)\n if ds_cnt == 0:\n ax.set_title(\"Input data\")\n # plot the training points\n ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright,\n edgecolors='k')\n # and testing points\n ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6,\n edgecolors='k')\n ax.set_xlim(xx.min(), xx.max())\n ax.set_ylim(yy.min(), yy.max())\n ax.set_xticks(())\n ax.set_yticks(())\n i += 1\n\n # iterate over classifiers\n for name, (estimator, param_grid) in zip(names, classifiers):\n ax = plt.subplot(len(datasets), len(classifiers) + 1, i)\n\n clf = GridSearchCV(estimator=estimator, param_grid=param_grid, cv=5,\n iid=False)\n with ignore_warnings(category=ConvergenceWarning):\n clf.fit(X_train, y_train)\n score = clf.score(X_test, y_test)\n print('%s: %.2f' % (name, score))\n\n # plot the decision boundary. For that, we will assign a color to each\n # point in the mesh [x_min, x_max]*[y_min, y_max].\n if hasattr(clf, \"decision_function\"):\n Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])\n else:\n Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]\n\n # put the result into a color plot\n Z = Z.reshape(xx.shape)\n ax.contourf(xx, yy, Z, cmap=cm, alpha=.8)\n\n # plot the training points\n ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright,\n edgecolors='k')\n # and testing points\n ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright,\n edgecolors='k', alpha=0.6)\n ax.set_xlim(xx.min(), xx.max())\n ax.set_ylim(yy.min(), yy.max())\n ax.set_xticks(())\n ax.set_yticks(())\n\n if ds_cnt == 0:\n ax.set_title(name.replace(' + ', '\\n'))\n ax.text(0.95, 0.06, ('%.2f' % score).lstrip('0'), size=15,\n bbox=dict(boxstyle='round', alpha=0.8, facecolor='white'),\n transform=ax.transAxes, horizontalalignment='right')\n\n i += 1\n\nplt.tight_layout()\n\n# Add suptitles above the figure\nplt.subplots_adjust(top=0.90)\nsuptitles = [\n 'Linear classifiers',\n 'Feature discretization and linear classifiers',\n 'Non-linear classifiers',\n]\nfor i, suptitle in zip([2, 4, 6], suptitles):\n ax = plt.subplot(len(datasets), len(classifiers) + 1, i)\n ax.text(1.05, 1.25, suptitle, transform=ax.transAxes,\n horizontalalignment='center', size='x-large')\nplt.show()"
	`29`	+ "# Code source: Tom Dupr\u00e9 la Tour\n# Adapted from plot_classifier_comparison by Ga\u00ebl Varoquaux and Andreas M\u00fcller\n#\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom matplotlib.colors import ListedColormap\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.datasets import make_moons, make_circles, make_classification\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.preprocessing import KBinsDiscretizer\nfrom sklearn.svm import SVC, LinearSVC\nfrom sklearn.ensemble import GradientBoostingClassifier\nfrom sklearn.utils.testing import ignore_warnings\nfrom sklearn.exceptions import ConvergenceWarning\n\nprint(__doc__)\n\nh = .02 # step size in the mesh\n\n\ndef get_name(estimator):\n name = estimator.__class__.__name__\n if name == 'Pipeline':\n name = [get_name(est[1]) for est in estimator.steps]\n name = ' + '.join(name)\n return name\n\n\n# list of (estimator, param_grid), where param_grid is used in GridSearchCV\nclassifiers = [\n (LogisticRegression(solver='lbfgs', random_state=0), {\n 'C': np.logspace(-2, 7, 10)\n }),\n (LinearSVC(random_state=0), {\n 'C': np.logspace(-2, 7, 10)\n }),\n (make_pipeline(\n KBinsDiscretizer(encode='onehot'),\n LogisticRegression(solver='lbfgs', random_state=0)), {\n 'kbinsdiscretizer__n_bins': np.arange(2, 10),\n 'logisticregression__C': np.logspace(-2, 7, 10),\n }),\n (make_pipeline(\n KBinsDiscretizer(encode='onehot'), LinearSVC(random_state=0)), {\n 'kbinsdiscretizer__n_bins': np.arange(2, 10),\n 'linearsvc__C': np.logspace(-2, 7, 10),\n }),\n (GradientBoostingClassifier(n_estimators=50, random_state=0), {\n 'learning_rate': np.logspace(-4, 0, 10)\n }),\n (SVC(random_state=0, gamma='scale'), {\n 'C': np.logspace(-2, 7, 10)\n }),\n]\n\nnames = [get_name(e) for e, g in classifiers]\n\nn_samples = 100\ndatasets = [\n make_moons(n_samples=n_samples, noise=0.2, random_state=0),\n make_circles(n_samples=n_samples, noise=0.2, factor=0.5, random_state=1),\n make_classification(n_samples=n_samples, n_features=2, n_redundant=0,\n n_informative=2, random_state=2,\n n_clusters_per_class=1)\n]\n\nfig, axes = plt.subplots(nrows=len(datasets), ncols=len(classifiers) + 1,\n figsize=(21, 9))\n\ncm = plt.cm.PiYG\ncm_bright = ListedColormap(['#b30065', '#178000'])\n\n# iterate over datasets\nfor ds_cnt, (X, y) in enumerate(datasets):\n print('\\ndataset %d\\n---------' % ds_cnt)\n\n # preprocess dataset, split into training and test part\n X = StandardScaler().fit_transform(X)\n X_train, X_test, y_train, y_test = train_test_split(\n X, y, test_size=.5, random_state=42)\n\n # create the grid for background colors\n x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5\n y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5\n xx, yy = np.meshgrid(\n np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))\n\n # plot the dataset first\n ax = axes[ds_cnt, 0]\n if ds_cnt == 0:\n ax.set_title(\"Input data\")\n # plot the training points\n ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright,\n edgecolors='k')\n # and testing points\n ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6,\n edgecolors='k')\n ax.set_xlim(xx.min(), xx.max())\n ax.set_ylim(yy.min(), yy.max())\n ax.set_xticks(())\n ax.set_yticks(())\n\n # iterate over classifiers\n for est_idx, (name, (estimator, param_grid)) in \\\n enumerate(zip(names, classifiers)):\n ax = axes[ds_cnt, est_idx + 1]\n\n clf = GridSearchCV(estimator=estimator, param_grid=param_grid, cv=5,\n iid=False)\n with ignore_warnings(category=ConvergenceWarning):\n clf.fit(X_train, y_train)\n score = clf.score(X_test, y_test)\n print('%s: %.2f' % (name, score))\n\n # plot the decision boundary. For that, we will assign a color to each\n # point in the mesh [x_min, x_max]*[y_min, y_max].\n if hasattr(clf, \"decision_function\"):\n Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])\n else:\n Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]\n\n # put the result into a color plot\n Z = Z.reshape(xx.shape)\n ax.contourf(xx, yy, Z, cmap=cm, alpha=.8)\n\n # plot the training points\n ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright,\n edgecolors='k')\n # and testing points\n ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright,\n edgecolors='k', alpha=0.6)\n ax.set_xlim(xx.min(), xx.max())\n ax.set_ylim(yy.min(), yy.max())\n ax.set_xticks(())\n ax.set_yticks(())\n\n if ds_cnt == 0:\n ax.set_title(name.replace(' + ', '\\n'))\n ax.text(0.95, 0.06, ('%.2f' % score).lstrip('0'), size=15,\n bbox=dict(boxstyle='round', alpha=0.8, facecolor='white'),\n transform=ax.transAxes, horizontalalignment='right')\n\n\nplt.tight_layout()\n\n# Add suptitles above the figure\nplt.subplots_adjust(top=0.90)\nsuptitles = [\n 'Linear classifiers',\n 'Feature discretization and linear classifiers',\n 'Non-linear classifiers',\n]\nfor i, suptitle in zip([1, 3, 5], suptitles):\n ax = axes[0, i]\n ax.text(1.05, 1.25, suptitle, transform=ax.transAxes,\n horizontalalignment='center', size='x-large')\nplt.show()"
`30`	`30`	`]`
`31`	`31`	`}`
`32`	`32`	`],`