Skip to content

Commit 6b92b29

Browse files
committed
Pushing the docs to dev/ for branch: master, commit 54c3a1fbe7ef0f6814ae6406fbc0d52804303370
1 parent 92a6b80 commit 6b92b29

File tree

1,213 files changed

+3992
-3915
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,213 files changed

+3992
-3915
lines changed
Binary file not shown.

dev/_downloads/3bd5b40c045ee4efd6b519876b8553cb/plot_forest_importances.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,11 @@
1010
1111
As expected, the plot suggests that 3 features are informative, while the
1212
remaining are not.
13+
14+
Warning: impurity-based feature importances can be misleading for high
15+
cardinality features (many unique values). See
16+
:func:`sklearn.inspection.permutation_importance` as an alternative.
17+
1318
"""
1419
print(__doc__)
1520

@@ -49,7 +54,7 @@
4954
plt.figure()
5055
plt.title("Feature importances")
5156
plt.bar(range(X.shape[1]), importances[indices],
52-
color="r", yerr=std[indices], align="center")
57+
color="r", yerr=std[indices], align="center")
5358
plt.xticks(range(X.shape[1]), indices)
5459
plt.xlim([-1, X.shape[1]])
5560
plt.show()

dev/_downloads/679566501b743cb339497968edb9d62f/plot_gradient_boosting_regression.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,11 @@
6363

6464
# #############################################################################
6565
# Plot impurity-based feature importance
66+
#
67+
# Warning: impurity-based feature importances can be misleading for
68+
# high cardinality features (many unique values). See
69+
# :func:`sklearn.inspection.permutation_importance` as an alternative.
70+
6671
feature_importance = clf.feature_importances_
6772
# make importances relative to max importance
6873
feature_importance = 100.0 * (feature_importance / feature_importance.max())

dev/_downloads/9fbbe00328ea0a237498701b1e8827fa/plot_forest_importances.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
"cell_type": "markdown",
1616
"metadata": {},
1717
"source": [
18-
"\n# Feature importances with forests of trees\n\n\nThis examples shows the use of forests of trees to evaluate the importance of\nfeatures on an artificial classification task. The red bars are\nthe impurity-based feature importances of the forest,\nalong with their inter-trees variability.\n\nAs expected, the plot suggests that 3 features are informative, while the\nremaining are not.\n"
18+
"\n# Feature importances with forests of trees\n\n\nThis examples shows the use of forests of trees to evaluate the importance of\nfeatures on an artificial classification task. The red bars are\nthe impurity-based feature importances of the forest,\nalong with their inter-trees variability.\n\nAs expected, the plot suggests that 3 features are informative, while the\nremaining are not.\n\nWarning: impurity-based feature importances can be misleading for high\ncardinality features (many unique values). See\n:func:`sklearn.inspection.permutation_importance` as an alternative.\n"
1919
]
2020
},
2121
{
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn.datasets import make_classification\nfrom sklearn.ensemble import ExtraTreesClassifier\n\n# Build a classification task using 3 informative features\nX, y = make_classification(n_samples=1000,\n n_features=10,\n n_informative=3,\n n_redundant=0,\n n_repeated=0,\n n_classes=2,\n random_state=0,\n shuffle=False)\n\n# Build a forest and compute the impurity-based feature importances\nforest = ExtraTreesClassifier(n_estimators=250,\n random_state=0)\n\nforest.fit(X, y)\nimportances = forest.feature_importances_\nstd = np.std([tree.feature_importances_ for tree in forest.estimators_],\n axis=0)\nindices = np.argsort(importances)[::-1]\n\n# Print the feature ranking\nprint(\"Feature ranking:\")\n\nfor f in range(X.shape[1]):\n print(\"%d. feature %d (%f)\" % (f + 1, indices[f], importances[indices[f]]))\n\n# Plot the impurity-based feature importances of the forest\nplt.figure()\nplt.title(\"Feature importances\")\nplt.bar(range(X.shape[1]), importances[indices],\n color=\"r\", yerr=std[indices], align=\"center\")\nplt.xticks(range(X.shape[1]), indices)\nplt.xlim([-1, X.shape[1]])\nplt.show()"
29+
"print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn.datasets import make_classification\nfrom sklearn.ensemble import ExtraTreesClassifier\n\n# Build a classification task using 3 informative features\nX, y = make_classification(n_samples=1000,\n n_features=10,\n n_informative=3,\n n_redundant=0,\n n_repeated=0,\n n_classes=2,\n random_state=0,\n shuffle=False)\n\n# Build a forest and compute the impurity-based feature importances\nforest = ExtraTreesClassifier(n_estimators=250,\n random_state=0)\n\nforest.fit(X, y)\nimportances = forest.feature_importances_\nstd = np.std([tree.feature_importances_ for tree in forest.estimators_],\n axis=0)\nindices = np.argsort(importances)[::-1]\n\n# Print the feature ranking\nprint(\"Feature ranking:\")\n\nfor f in range(X.shape[1]):\n print(\"%d. feature %d (%f)\" % (f + 1, indices[f], importances[indices[f]]))\n\n# Plot the impurity-based feature importances of the forest\nplt.figure()\nplt.title(\"Feature importances\")\nplt.bar(range(X.shape[1]), importances[indices],\n color=\"r\", yerr=std[indices], align=\"center\")\nplt.xticks(range(X.shape[1]), indices)\nplt.xlim([-1, X.shape[1]])\nplt.show()"
3030
]
3131
}
3232
],

dev/_downloads/cdc6134a701824f26cc08df7bd1e479a/plot_gradient_boosting_regression.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"print(__doc__)\n\n# Author: Peter Prettenhofer <[email protected]>\n#\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn import ensemble\nfrom sklearn import datasets\nfrom sklearn.utils import shuffle\nfrom sklearn.metrics import mean_squared_error\n\n# #############################################################################\n# Load data\nboston = datasets.load_boston()\nX, y = shuffle(boston.data, boston.target, random_state=13)\nX = X.astype(np.float32)\noffset = int(X.shape[0] * 0.9)\nX_train, y_train = X[:offset], y[:offset]\nX_test, y_test = X[offset:], y[offset:]\n\n# #############################################################################\n# Fit regression model\nparams = {'n_estimators': 500, 'max_depth': 4, 'min_samples_split': 2,\n 'learning_rate': 0.01, 'loss': 'ls'}\nclf = ensemble.GradientBoostingRegressor(**params)\n\nclf.fit(X_train, y_train)\nmse = mean_squared_error(y_test, clf.predict(X_test))\nprint(\"MSE: %.4f\" % mse)\n\n# #############################################################################\n# Plot training deviance\n\n# compute test set deviance\ntest_score = np.zeros((params['n_estimators'],), dtype=np.float64)\n\nfor i, y_pred in enumerate(clf.staged_predict(X_test)):\n test_score[i] = clf.loss_(y_test, y_pred)\n\nplt.figure(figsize=(12, 6))\nplt.subplot(1, 2, 1)\nplt.title('Deviance')\nplt.plot(np.arange(params['n_estimators']) + 1, clf.train_score_, 'b-',\n label='Training Set Deviance')\nplt.plot(np.arange(params['n_estimators']) + 1, test_score, 'r-',\n label='Test Set Deviance')\nplt.legend(loc='upper right')\nplt.xlabel('Boosting Iterations')\nplt.ylabel('Deviance')\n\n# #############################################################################\n# Plot impurity-based feature importance\nfeature_importance = clf.feature_importances_\n# make importances relative to max importance\nfeature_importance = 100.0 * (feature_importance / feature_importance.max())\nsorted_idx = np.argsort(feature_importance)\npos = np.arange(sorted_idx.shape[0]) + .5\nplt.subplot(1, 2, 2)\nplt.barh(pos, feature_importance[sorted_idx], align='center')\nplt.yticks(pos, boston.feature_names[sorted_idx])\nplt.xlabel('Relative Importance')\nplt.title('Variable Importance')\nplt.show()"
29+
"print(__doc__)\n\n# Author: Peter Prettenhofer <[email protected]>\n#\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn import ensemble\nfrom sklearn import datasets\nfrom sklearn.utils import shuffle\nfrom sklearn.metrics import mean_squared_error\n\n# #############################################################################\n# Load data\nboston = datasets.load_boston()\nX, y = shuffle(boston.data, boston.target, random_state=13)\nX = X.astype(np.float32)\noffset = int(X.shape[0] * 0.9)\nX_train, y_train = X[:offset], y[:offset]\nX_test, y_test = X[offset:], y[offset:]\n\n# #############################################################################\n# Fit regression model\nparams = {'n_estimators': 500, 'max_depth': 4, 'min_samples_split': 2,\n 'learning_rate': 0.01, 'loss': 'ls'}\nclf = ensemble.GradientBoostingRegressor(**params)\n\nclf.fit(X_train, y_train)\nmse = mean_squared_error(y_test, clf.predict(X_test))\nprint(\"MSE: %.4f\" % mse)\n\n# #############################################################################\n# Plot training deviance\n\n# compute test set deviance\ntest_score = np.zeros((params['n_estimators'],), dtype=np.float64)\n\nfor i, y_pred in enumerate(clf.staged_predict(X_test)):\n test_score[i] = clf.loss_(y_test, y_pred)\n\nplt.figure(figsize=(12, 6))\nplt.subplot(1, 2, 1)\nplt.title('Deviance')\nplt.plot(np.arange(params['n_estimators']) + 1, clf.train_score_, 'b-',\n label='Training Set Deviance')\nplt.plot(np.arange(params['n_estimators']) + 1, test_score, 'r-',\n label='Test Set Deviance')\nplt.legend(loc='upper right')\nplt.xlabel('Boosting Iterations')\nplt.ylabel('Deviance')\n\n# #############################################################################\n# Plot impurity-based feature importance\n#\n# Warning: impurity-based feature importances can be misleading for\n# high cardinality features (many unique values). See\n# :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nfeature_importance = clf.feature_importances_\n# make importances relative to max importance\nfeature_importance = 100.0 * (feature_importance / feature_importance.max())\nsorted_idx = np.argsort(feature_importance)\npos = np.arange(sorted_idx.shape[0]) + .5\nplt.subplot(1, 2, 2)\nplt.barh(pos, feature_importance[sorted_idx], align='center')\nplt.yticks(pos, boston.feature_names[sorted_idx])\nplt.xlabel('Relative Importance')\nplt.title('Variable Importance')\nplt.show()"
3030
]
3131
}
3232
],
Binary file not shown.

dev/_downloads/scikit-learn-docs.pdf

343 Bytes
Binary file not shown.

dev/_images/iris.png

0 Bytes
23 Bytes
23 Bytes

0 commit comments

Comments
 (0)