Skip to content

Commit cf5b93a

Browse files
committed
Pushing the docs to dev/ for branch: master, commit 7c8af444aeb29c143b041f6baa42df4e171eab07
1 parent 92d5e72 commit cf5b93a

File tree

1,195 files changed

+4064
-4035
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,195 files changed

+4064
-4035
lines changed
Binary file not shown.

dev/_downloads/3bd5b40c045ee4efd6b519876b8553cb/plot_forest_importances.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@
44
=========================================
55
66
This examples shows the use of forests of trees to evaluate the importance of
7-
features on an artificial classification task. The red bars are the feature
8-
importances of the forest, along with their inter-trees variability.
7+
features on an artificial classification task. The red bars are
8+
the impurity-based feature importances of the forest,
9+
along with their inter-trees variability.
910
1011
As expected, the plot suggests that 3 features are informative, while the
1112
remaining are not.
@@ -28,7 +29,7 @@
2829
random_state=0,
2930
shuffle=False)
3031

31-
# Build a forest and compute the feature importances
32+
# Build a forest and compute the impurity-based feature importances
3233
forest = ExtraTreesClassifier(n_estimators=250,
3334
random_state=0)
3435

@@ -44,7 +45,7 @@
4445
for f in range(X.shape[1]):
4546
print("%d. feature %d (%f)" % (f + 1, indices[f], importances[indices[f]]))
4647

47-
# Plot the feature importances of the forest
48+
# Plot the impurity-based feature importances of the forest
4849
plt.figure()
4950
plt.title("Feature importances")
5051
plt.bar(range(X.shape[1]), importances[indices],

dev/_downloads/5428c181b42d701a038ad0aae4b60b98/plot_forest_importances_faces.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33
Pixel importances with a parallel forest of trees
44
=================================================
55
6-
This example shows the use of forests of trees to evaluate the importance
7-
of the pixels in an image classification task (faces). The hotter the pixel,
8-
the more important.
6+
This example shows the use of forests of trees to evaluate the impurity-based
7+
importance of the pixels in an image classification task (faces).
8+
The hotter the pixel, the more important.
99
1010
The code below also illustrates how the construction and the computation
1111
of the predictions can be parallelized within multiple jobs.

dev/_downloads/679566501b743cb339497968edb9d62f/plot_gradient_boosting_regression.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@
6262
plt.ylabel('Deviance')
6363

6464
# #############################################################################
65-
# Plot feature importance
65+
# Plot impurity-based feature importance
6666
feature_importance = clf.feature_importances_
6767
# make importances relative to max importance
6868
feature_importance = 100.0 * (feature_importance / feature_importance.max())

dev/_downloads/778f5eb3754efb6327ad5d3aaa4bdc19/plot_forest_importances_faces.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
"cell_type": "markdown",
1616
"metadata": {},
1717
"source": [
18-
"\n# Pixel importances with a parallel forest of trees\n\n\nThis example shows the use of forests of trees to evaluate the importance\nof the pixels in an image classification task (faces). The hotter the pixel,\nthe more important.\n\nThe code below also illustrates how the construction and the computation\nof the predictions can be parallelized within multiple jobs.\n"
18+
"\n# Pixel importances with a parallel forest of trees\n\n\nThis example shows the use of forests of trees to evaluate the impurity-based\nimportance of the pixels in an image classification task (faces).\nThe hotter the pixel, the more important.\n\nThe code below also illustrates how the construction and the computation\nof the predictions can be parallelized within multiple jobs.\n"
1919
]
2020
},
2121
{

dev/_downloads/9fbbe00328ea0a237498701b1e8827fa/plot_forest_importances.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
"cell_type": "markdown",
1616
"metadata": {},
1717
"source": [
18-
"\n# Feature importances with forests of trees\n\n\nThis examples shows the use of forests of trees to evaluate the importance of\nfeatures on an artificial classification task. The red bars are the feature\nimportances of the forest, along with their inter-trees variability.\n\nAs expected, the plot suggests that 3 features are informative, while the\nremaining are not.\n"
18+
"\n# Feature importances with forests of trees\n\n\nThis examples shows the use of forests of trees to evaluate the importance of\nfeatures on an artificial classification task. The red bars are\nthe impurity-based feature importances of the forest,\nalong with their inter-trees variability.\n\nAs expected, the plot suggests that 3 features are informative, while the\nremaining are not.\n"
1919
]
2020
},
2121
{
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn.datasets import make_classification\nfrom sklearn.ensemble import ExtraTreesClassifier\n\n# Build a classification task using 3 informative features\nX, y = make_classification(n_samples=1000,\n n_features=10,\n n_informative=3,\n n_redundant=0,\n n_repeated=0,\n n_classes=2,\n random_state=0,\n shuffle=False)\n\n# Build a forest and compute the feature importances\nforest = ExtraTreesClassifier(n_estimators=250,\n random_state=0)\n\nforest.fit(X, y)\nimportances = forest.feature_importances_\nstd = np.std([tree.feature_importances_ for tree in forest.estimators_],\n axis=0)\nindices = np.argsort(importances)[::-1]\n\n# Print the feature ranking\nprint(\"Feature ranking:\")\n\nfor f in range(X.shape[1]):\n print(\"%d. feature %d (%f)\" % (f + 1, indices[f], importances[indices[f]]))\n\n# Plot the feature importances of the forest\nplt.figure()\nplt.title(\"Feature importances\")\nplt.bar(range(X.shape[1]), importances[indices],\n color=\"r\", yerr=std[indices], align=\"center\")\nplt.xticks(range(X.shape[1]), indices)\nplt.xlim([-1, X.shape[1]])\nplt.show()"
29+
"print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn.datasets import make_classification\nfrom sklearn.ensemble import ExtraTreesClassifier\n\n# Build a classification task using 3 informative features\nX, y = make_classification(n_samples=1000,\n n_features=10,\n n_informative=3,\n n_redundant=0,\n n_repeated=0,\n n_classes=2,\n random_state=0,\n shuffle=False)\n\n# Build a forest and compute the impurity-based feature importances\nforest = ExtraTreesClassifier(n_estimators=250,\n random_state=0)\n\nforest.fit(X, y)\nimportances = forest.feature_importances_\nstd = np.std([tree.feature_importances_ for tree in forest.estimators_],\n axis=0)\nindices = np.argsort(importances)[::-1]\n\n# Print the feature ranking\nprint(\"Feature ranking:\")\n\nfor f in range(X.shape[1]):\n print(\"%d. feature %d (%f)\" % (f + 1, indices[f], importances[indices[f]]))\n\n# Plot the impurity-based feature importances of the forest\nplt.figure()\nplt.title(\"Feature importances\")\nplt.bar(range(X.shape[1]), importances[indices],\n color=\"r\", yerr=std[indices], align=\"center\")\nplt.xticks(range(X.shape[1]), indices)\nplt.xlim([-1, X.shape[1]])\nplt.show()"
3030
]
3131
}
3232
],

dev/_downloads/cdc6134a701824f26cc08df7bd1e479a/plot_gradient_boosting_regression.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"print(__doc__)\n\n# Author: Peter Prettenhofer <[email protected]>\n#\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn import ensemble\nfrom sklearn import datasets\nfrom sklearn.utils import shuffle\nfrom sklearn.metrics import mean_squared_error\n\n# #############################################################################\n# Load data\nboston = datasets.load_boston()\nX, y = shuffle(boston.data, boston.target, random_state=13)\nX = X.astype(np.float32)\noffset = int(X.shape[0] * 0.9)\nX_train, y_train = X[:offset], y[:offset]\nX_test, y_test = X[offset:], y[offset:]\n\n# #############################################################################\n# Fit regression model\nparams = {'n_estimators': 500, 'max_depth': 4, 'min_samples_split': 2,\n 'learning_rate': 0.01, 'loss': 'ls'}\nclf = ensemble.GradientBoostingRegressor(**params)\n\nclf.fit(X_train, y_train)\nmse = mean_squared_error(y_test, clf.predict(X_test))\nprint(\"MSE: %.4f\" % mse)\n\n# #############################################################################\n# Plot training deviance\n\n# compute test set deviance\ntest_score = np.zeros((params['n_estimators'],), dtype=np.float64)\n\nfor i, y_pred in enumerate(clf.staged_predict(X_test)):\n test_score[i] = clf.loss_(y_test, y_pred)\n\nplt.figure(figsize=(12, 6))\nplt.subplot(1, 2, 1)\nplt.title('Deviance')\nplt.plot(np.arange(params['n_estimators']) + 1, clf.train_score_, 'b-',\n label='Training Set Deviance')\nplt.plot(np.arange(params['n_estimators']) + 1, test_score, 'r-',\n label='Test Set Deviance')\nplt.legend(loc='upper right')\nplt.xlabel('Boosting Iterations')\nplt.ylabel('Deviance')\n\n# #############################################################################\n# Plot feature importance\nfeature_importance = clf.feature_importances_\n# make importances relative to max importance\nfeature_importance = 100.0 * (feature_importance / feature_importance.max())\nsorted_idx = np.argsort(feature_importance)\npos = np.arange(sorted_idx.shape[0]) + .5\nplt.subplot(1, 2, 2)\nplt.barh(pos, feature_importance[sorted_idx], align='center')\nplt.yticks(pos, boston.feature_names[sorted_idx])\nplt.xlabel('Relative Importance')\nplt.title('Variable Importance')\nplt.show()"
29+
"print(__doc__)\n\n# Author: Peter Prettenhofer <[email protected]>\n#\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn import ensemble\nfrom sklearn import datasets\nfrom sklearn.utils import shuffle\nfrom sklearn.metrics import mean_squared_error\n\n# #############################################################################\n# Load data\nboston = datasets.load_boston()\nX, y = shuffle(boston.data, boston.target, random_state=13)\nX = X.astype(np.float32)\noffset = int(X.shape[0] * 0.9)\nX_train, y_train = X[:offset], y[:offset]\nX_test, y_test = X[offset:], y[offset:]\n\n# #############################################################################\n# Fit regression model\nparams = {'n_estimators': 500, 'max_depth': 4, 'min_samples_split': 2,\n 'learning_rate': 0.01, 'loss': 'ls'}\nclf = ensemble.GradientBoostingRegressor(**params)\n\nclf.fit(X_train, y_train)\nmse = mean_squared_error(y_test, clf.predict(X_test))\nprint(\"MSE: %.4f\" % mse)\n\n# #############################################################################\n# Plot training deviance\n\n# compute test set deviance\ntest_score = np.zeros((params['n_estimators'],), dtype=np.float64)\n\nfor i, y_pred in enumerate(clf.staged_predict(X_test)):\n test_score[i] = clf.loss_(y_test, y_pred)\n\nplt.figure(figsize=(12, 6))\nplt.subplot(1, 2, 1)\nplt.title('Deviance')\nplt.plot(np.arange(params['n_estimators']) + 1, clf.train_score_, 'b-',\n label='Training Set Deviance')\nplt.plot(np.arange(params['n_estimators']) + 1, test_score, 'r-',\n label='Test Set Deviance')\nplt.legend(loc='upper right')\nplt.xlabel('Boosting Iterations')\nplt.ylabel('Deviance')\n\n# #############################################################################\n# Plot impurity-based feature importance\nfeature_importance = clf.feature_importances_\n# make importances relative to max importance\nfeature_importance = 100.0 * (feature_importance / feature_importance.max())\nsorted_idx = np.argsort(feature_importance)\npos = np.arange(sorted_idx.shape[0]) + .5\nplt.subplot(1, 2, 2)\nplt.barh(pos, feature_importance[sorted_idx], align='center')\nplt.yticks(pos, boston.feature_names[sorted_idx])\nplt.xlabel('Relative Importance')\nplt.title('Variable Importance')\nplt.show()"
3030
]
3131
}
3232
],
Binary file not shown.

dev/_downloads/scikit-learn-docs.pdf

11.2 KB
Binary file not shown.

dev/_images/iris.png

0 Bytes

0 commit comments

Comments
 (0)