Skip to content

Commit e31df3b

Browse files
committed
Pushing the docs to dev/ for branch: master, commit bc63f49555ae1a8fdc27b3dc65d7faef336d0fee
1 parent 7d498a5 commit e31df3b

File tree

955 files changed

+2823
-2832
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

955 files changed

+2823
-2832
lines changed
-86 Bytes
Binary file not shown.
-81 Bytes
Binary file not shown.

dev/_downloads/plot_forest_iris.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
"execution_count": null,
2525
"cell_type": "code",
2626
"source": [
27-
"print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn import clone\nfrom sklearn.datasets import load_iris\nfrom sklearn.ensemble import (RandomForestClassifier, ExtraTreesClassifier,\n AdaBoostClassifier)\nfrom sklearn.externals.six.moves import xrange\nfrom sklearn.tree import DecisionTreeClassifier\n\n# Parameters\nn_classes = 3\nn_estimators = 30\nplot_colors = \"ryb\"\ncmap = plt.cm.RdYlBu\nplot_step = 0.02 # fine step width for decision surface contours\nplot_step_coarser = 0.5 # step widths for coarse classifier guesses\nRANDOM_SEED = 13 # fix the seed on each iteration\n\n# Load data\niris = load_iris()\n\nplot_idx = 1\n\nmodels = [DecisionTreeClassifier(max_depth=None),\n RandomForestClassifier(n_estimators=n_estimators),\n ExtraTreesClassifier(n_estimators=n_estimators),\n AdaBoostClassifier(DecisionTreeClassifier(max_depth=3),\n n_estimators=n_estimators)]\n\nfor pair in ([0, 1], [0, 2], [2, 3]):\n for model in models:\n # We only take the two corresponding features\n X = iris.data[:, pair]\n y = iris.target\n\n # Shuffle\n idx = np.arange(X.shape[0])\n np.random.seed(RANDOM_SEED)\n np.random.shuffle(idx)\n X = X[idx]\n y = y[idx]\n\n # Standardize\n mean = X.mean(axis=0)\n std = X.std(axis=0)\n X = (X - mean) / std\n\n # Train\n clf = clone(model)\n clf = model.fit(X, y)\n\n scores = clf.score(X, y)\n # Create a title for each column and the console by using str() and\n # slicing away useless parts of the string\n model_title = str(type(model)).split(\".\")[-1][:-2][:-len(\"Classifier\")]\n model_details = model_title\n if hasattr(model, \"estimators_\"):\n model_details += \" with {} estimators\".format(len(model.estimators_))\n print( model_details + \" with features\", pair, \"has a score of\", scores )\n\n plt.subplot(3, 4, plot_idx)\n if plot_idx <= len(models):\n # Add a title at the top of each column\n plt.title(model_title)\n\n # Now plot the decision boundary using a fine mesh as input to a\n # filled contour plot\n x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1\n y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1\n xx, yy = np.meshgrid(np.arange(x_min, x_max, plot_step),\n np.arange(y_min, y_max, plot_step))\n\n # Plot either a single DecisionTreeClassifier or alpha blend the\n # decision surfaces of the ensemble of classifiers\n if isinstance(model, DecisionTreeClassifier):\n Z = model.predict(np.c_[xx.ravel(), yy.ravel()])\n Z = Z.reshape(xx.shape)\n cs = plt.contourf(xx, yy, Z, cmap=cmap)\n else:\n # Choose alpha blend level with respect to the number of estimators\n # that are in use (noting that AdaBoost can use fewer estimators\n # than its maximum if it achieves a good enough fit early on)\n estimator_alpha = 1.0 / len(model.estimators_)\n for tree in model.estimators_:\n Z = tree.predict(np.c_[xx.ravel(), yy.ravel()])\n Z = Z.reshape(xx.shape)\n cs = plt.contourf(xx, yy, Z, alpha=estimator_alpha, cmap=cmap)\n\n # Build a coarser grid to plot a set of ensemble classifications\n # to show how these are different to what we see in the decision\n # surfaces. These points are regularly space and do not have a black outline\n xx_coarser, yy_coarser = np.meshgrid(np.arange(x_min, x_max, plot_step_coarser),\n np.arange(y_min, y_max, plot_step_coarser))\n Z_points_coarser = model.predict(np.c_[xx_coarser.ravel(), yy_coarser.ravel()]).reshape(xx_coarser.shape)\n cs_points = plt.scatter(xx_coarser, yy_coarser, s=15, c=Z_points_coarser, cmap=cmap, edgecolors=\"none\")\n\n # Plot the training points, these are clustered together and have a\n # black outline\n for i, c in zip(xrange(n_classes), plot_colors):\n idx = np.where(y == i)\n plt.scatter(X[idx, 0], X[idx, 1], c=c, label=iris.target_names[i],\n cmap=cmap)\n\n plot_idx += 1 # move on to the next plot in sequence\n\nplt.suptitle(\"Classifiers on feature subsets of the Iris dataset\")\nplt.axis(\"tight\")\n\nplt.show()"
27+
"print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom matplotlib.colors import ListedColormap\n\nfrom sklearn import clone\nfrom sklearn.datasets import load_iris\nfrom sklearn.ensemble import (RandomForestClassifier, ExtraTreesClassifier,\n AdaBoostClassifier)\nfrom sklearn.externals.six.moves import xrange\nfrom sklearn.tree import DecisionTreeClassifier\n\n# Parameters\nn_classes = 3\nn_estimators = 30\ncmap = plt.cm.RdYlBu\nplot_step = 0.02 # fine step width for decision surface contours\nplot_step_coarser = 0.5 # step widths for coarse classifier guesses\nRANDOM_SEED = 13 # fix the seed on each iteration\n\n# Load data\niris = load_iris()\n\nplot_idx = 1\n\nmodels = [DecisionTreeClassifier(max_depth=None),\n RandomForestClassifier(n_estimators=n_estimators),\n ExtraTreesClassifier(n_estimators=n_estimators),\n AdaBoostClassifier(DecisionTreeClassifier(max_depth=3),\n n_estimators=n_estimators)]\n\nfor pair in ([0, 1], [0, 2], [2, 3]):\n for model in models:\n # We only take the two corresponding features\n X = iris.data[:, pair]\n y = iris.target\n\n # Shuffle\n idx = np.arange(X.shape[0])\n np.random.seed(RANDOM_SEED)\n np.random.shuffle(idx)\n X = X[idx]\n y = y[idx]\n\n # Standardize\n mean = X.mean(axis=0)\n std = X.std(axis=0)\n X = (X - mean) / std\n\n # Train\n clf = clone(model)\n clf = model.fit(X, y)\n\n scores = clf.score(X, y)\n # Create a title for each column and the console by using str() and\n # slicing away useless parts of the string\n model_title = str(type(model)).split(\".\")[-1][:-2][:-len(\"Classifier\")]\n model_details = model_title\n if hasattr(model, \"estimators_\"):\n model_details += \" with {} estimators\".format(len(model.estimators_))\n print( model_details + \" with features\", pair, \"has a score of\", scores )\n\n plt.subplot(3, 4, plot_idx)\n if plot_idx <= len(models):\n # Add a title at the top of each column\n plt.title(model_title)\n\n # Now plot the decision boundary using a fine mesh as input to a\n # filled contour plot\n x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1\n y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1\n xx, yy = np.meshgrid(np.arange(x_min, x_max, plot_step),\n np.arange(y_min, y_max, plot_step))\n\n # Plot either a single DecisionTreeClassifier or alpha blend the\n # decision surfaces of the ensemble of classifiers\n if isinstance(model, DecisionTreeClassifier):\n Z = model.predict(np.c_[xx.ravel(), yy.ravel()])\n Z = Z.reshape(xx.shape)\n cs = plt.contourf(xx, yy, Z, cmap=cmap)\n else:\n # Choose alpha blend level with respect to the number of estimators\n # that are in use (noting that AdaBoost can use fewer estimators\n # than its maximum if it achieves a good enough fit early on)\n estimator_alpha = 1.0 / len(model.estimators_)\n for tree in model.estimators_:\n Z = tree.predict(np.c_[xx.ravel(), yy.ravel()])\n Z = Z.reshape(xx.shape)\n cs = plt.contourf(xx, yy, Z, alpha=estimator_alpha, cmap=cmap)\n\n # Build a coarser grid to plot a set of ensemble classifications\n # to show how these are different to what we see in the decision\n # surfaces. These points are regularly space and do not have a black outline\n xx_coarser, yy_coarser = np.meshgrid(np.arange(x_min, x_max, plot_step_coarser),\n np.arange(y_min, y_max, plot_step_coarser))\n Z_points_coarser = model.predict(np.c_[xx_coarser.ravel(), yy_coarser.ravel()]).reshape(xx_coarser.shape)\n cs_points = plt.scatter(xx_coarser, yy_coarser, s=15, c=Z_points_coarser, cmap=cmap, edgecolors=\"none\")\n\n # Plot the training points, these are clustered together and have a\n # black outline\n plt.scatter(X[:, 0], X[:, 1], c=y,\n cmap=ListedColormap(['r', 'y', 'b']))\n plot_idx += 1 # move on to the next plot in sequence\n\nplt.suptitle(\"Classifiers on feature subsets of the Iris dataset\")\nplt.axis(\"tight\")\n\nplt.show()"
2828
],
2929
"outputs": [],
3030
"metadata": {

dev/_downloads/plot_forest_iris.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242

4343
import numpy as np
4444
import matplotlib.pyplot as plt
45+
from matplotlib.colors import ListedColormap
4546

4647
from sklearn import clone
4748
from sklearn.datasets import load_iris
@@ -53,7 +54,6 @@
5354
# Parameters
5455
n_classes = 3
5556
n_estimators = 30
56-
plot_colors = "ryb"
5757
cmap = plt.cm.RdYlBu
5858
plot_step = 0.02 # fine step width for decision surface contours
5959
plot_step_coarser = 0.5 # step widths for coarse classifier guesses
@@ -139,11 +139,8 @@
139139

140140
# Plot the training points, these are clustered together and have a
141141
# black outline
142-
for i, c in zip(xrange(n_classes), plot_colors):
143-
idx = np.where(y == i)
144-
plt.scatter(X[idx, 0], X[idx, 1], c=c, label=iris.target_names[i],
145-
cmap=cmap)
146-
142+
plt.scatter(X[:, 0], X[:, 1], c=y,
143+
cmap=ListedColormap(['r', 'y', 'b']))
147144
plot_idx += 1 # move on to the next plot in sequence
148145

149146
plt.suptitle("Classifiers on feature subsets of the Iris dataset")

dev/_downloads/scikit-learn-docs.pdf

23.9 KB
Binary file not shown.

0 commit comments

Comments
 (0)