scikit-learn
diff --git a/‎dev/_downloads/3409d9766d352cc9f9b169d4a799a87a/auto_examples_python.zip
75 Bytes b/‎dev/_downloads/3409d9766d352cc9f9b169d4a799a87a/auto_examples_python.zip
75 Bytes
diff --git a/‎dev/_downloads/3bd5b40c045ee4efd6b519876b8553cb/plot_forest_importances.py
Lines changed: 5 additions & 4 deletions b/‎dev/_downloads/3bd5b40c045ee4efd6b519876b8553cb/plot_forest_importances.py
Lines changed: 5 additions & 4 deletions
diff --git a/‎dev/_downloads/5428c181b42d701a038ad0aae4b60b98/plot_forest_importances_faces.py
Lines changed: 3 additions & 3 deletions b/‎dev/_downloads/5428c181b42d701a038ad0aae4b60b98/plot_forest_importances_faces.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎dev/_downloads/679566501b743cb339497968edb9d62f/plot_gradient_boosting_regression.py
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/679566501b743cb339497968edb9d62f/plot_gradient_boosting_regression.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/778f5eb3754efb6327ad5d3aaa4bdc19/plot_forest_importances_faces.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/778f5eb3754efb6327ad5d3aaa4bdc19/plot_forest_importances_faces.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/9fbbe00328ea0a237498701b1e8827fa/plot_forest_importances.ipynb
Lines changed: 2 additions & 2 deletions b/‎dev/_downloads/9fbbe00328ea0a237498701b1e8827fa/plot_forest_importances.ipynb
Lines changed: 2 additions & 2 deletions
diff --git a/‎dev/_downloads/cdc6134a701824f26cc08df7bd1e479a/plot_gradient_boosting_regression.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/cdc6134a701824f26cc08df7bd1e479a/plot_gradient_boosting_regression.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/d34667f097c619f8afda4bc936e7af21/auto_examples_jupyter.zip
76 Bytes b/‎dev/_downloads/d34667f097c619f8afda4bc936e7af21/auto_examples_jupyter.zip
76 Bytes
diff --git a/‎dev/_downloads/scikit-learn-docs.pdf
11.2 KB b/‎dev/_downloads/scikit-learn-docs.pdf
11.2 KB
diff --git a/‎dev/_images/iris.png
0 Bytes b/‎dev/_images/iris.png
0 Bytes
@@ -4,8 +4,9 @@
 =========================================
 
 This examples shows the use of forests of trees to evaluate the importance of
-features on an artificial classification task. The red bars are the feature
-importances of the forest, along with their inter-trees variability.
+features on an artificial classification task. The red bars are
+the impurity-based feature importances of the forest,
+along with their inter-trees variability.
 
 As expected, the plot suggests that 3 features are informative, while the
 remaining are not.
@@ -28,7 +29,7 @@
                            random_state=0,
                            shuffle=False)
 
-# Build a forest and compute the feature importances
+# Build a forest and compute the impurity-based feature importances
 forest = ExtraTreesClassifier(n_estimators=250,
                               random_state=0)
 
@@ -44,7 +45,7 @@
 for f in range(X.shape[1]):
     print("%d. feature %d (%f)" % (f + 1, indices[f], importances[indices[f]]))
 
-# Plot the feature importances of the forest
+# Plot the impurity-based feature importances of the forest
 plt.figure()
 plt.title("Feature importances")
 plt.bar(range(X.shape[1]), importances[indices],
 
@@ -3,9 +3,9 @@
 Pixel importances with a parallel forest of trees
 =================================================
 
-This example shows the use of forests of trees to evaluate the importance
-of the pixels in an image classification task (faces). The hotter the pixel,
-the more important.
+This example shows the use of forests of trees to evaluate the impurity-based
+importance of the pixels in an image classification task (faces).
+The hotter the pixel, the more important.
 
 The code below also illustrates how the construction and the computation
 of the predictions can be parallelized within multiple jobs.
 
@@ -62,7 +62,7 @@
 plt.ylabel('Deviance')
 
 # #############################################################################
-# Plot feature importance
+# Plot impurity-based feature importance
 feature_importance = clf.feature_importances_
 # make importances relative to max importance
 feature_importance = 100.0 * (feature_importance / feature_importance.max())
 
@@ -15,7 +15,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "\n# Pixel importances with a parallel forest of trees\n\n\nThis example shows the use of forests of trees to evaluate the importance\nof the pixels in an image classification task (faces). The hotter the pixel,\nthe more important.\n\nThe code below also illustrates how the construction and the computation\nof the predictions can be parallelized within multiple jobs.\n"
+        "\n# Pixel importances with a parallel forest of trees\n\n\nThis example shows the use of forests of trees to evaluate the impurity-based\nimportance of the pixels in an image classification task (faces).\nThe hotter the pixel, the more important.\n\nThe code below also illustrates how the construction and the computation\nof the predictions can be parallelized within multiple jobs.\n"
       ]
     },
     {
 
@@ -15,7 +15,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "\n# Feature importances with forests of trees\n\n\nThis examples shows the use of forests of trees to evaluate the importance of\nfeatures on an artificial classification task. The red bars are the feature\nimportances of the forest, along with their inter-trees variability.\n\nAs expected, the plot suggests that 3 features are informative, while the\nremaining are not.\n"
+        "\n# Feature importances with forests of trees\n\n\nThis examples shows the use of forests of trees to evaluate the importance of\nfeatures on an artificial classification task. The red bars are\nthe impurity-based feature importances of the forest,\nalong with their inter-trees variability.\n\nAs expected, the plot suggests that 3 features are informative, while the\nremaining are not.\n"
       ]
     },
     {
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn.datasets import make_classification\nfrom sklearn.ensemble import ExtraTreesClassifier\n\n# Build a classification task using 3 informative features\nX, y = make_classification(n_samples=1000,\n                           n_features=10,\n                           n_informative=3,\n                           n_redundant=0,\n                           n_repeated=0,\n                           n_classes=2,\n                           random_state=0,\n                           shuffle=False)\n\n# Build a forest and compute the feature importances\nforest = ExtraTreesClassifier(n_estimators=250,\n                              random_state=0)\n\nforest.fit(X, y)\nimportances = forest.feature_importances_\nstd = np.std([tree.feature_importances_ for tree in forest.estimators_],\n             axis=0)\nindices = np.argsort(importances)[::-1]\n\n# Print the feature ranking\nprint(\"Feature ranking:\")\n\nfor f in range(X.shape[1]):\n    print(\"%d. feature %d (%f)\" % (f + 1, indices[f], importances[indices[f]]))\n\n# Plot the feature importances of the forest\nplt.figure()\nplt.title(\"Feature importances\")\nplt.bar(range(X.shape[1]), importances[indices],\n       color=\"r\", yerr=std[indices], align=\"center\")\nplt.xticks(range(X.shape[1]), indices)\nplt.xlim([-1, X.shape[1]])\nplt.show()"
+        "print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn.datasets import make_classification\nfrom sklearn.ensemble import ExtraTreesClassifier\n\n# Build a classification task using 3 informative features\nX, y = make_classification(n_samples=1000,\n                           n_features=10,\n                           n_informative=3,\n                           n_redundant=0,\n                           n_repeated=0,\n                           n_classes=2,\n                           random_state=0,\n                           shuffle=False)\n\n# Build a forest and compute the impurity-based feature importances\nforest = ExtraTreesClassifier(n_estimators=250,\n                              random_state=0)\n\nforest.fit(X, y)\nimportances = forest.feature_importances_\nstd = np.std([tree.feature_importances_ for tree in forest.estimators_],\n             axis=0)\nindices = np.argsort(importances)[::-1]\n\n# Print the feature ranking\nprint(\"Feature ranking:\")\n\nfor f in range(X.shape[1]):\n    print(\"%d. feature %d (%f)\" % (f + 1, indices[f], importances[indices[f]]))\n\n# Plot the impurity-based feature importances of the forest\nplt.figure()\nplt.title(\"Feature importances\")\nplt.bar(range(X.shape[1]), importances[indices],\n       color=\"r\", yerr=std[indices], align=\"center\")\nplt.xticks(range(X.shape[1]), indices)\nplt.xlim([-1, X.shape[1]])\nplt.show()"
       ]
     }
   ],
 
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "print(__doc__)\n\n# Author: Peter Prettenhofer <[email protected]>\n#\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn import ensemble\nfrom sklearn import datasets\nfrom sklearn.utils import shuffle\nfrom sklearn.metrics import mean_squared_error\n\n# #############################################################################\n# Load data\nboston = datasets.load_boston()\nX, y = shuffle(boston.data, boston.target, random_state=13)\nX = X.astype(np.float32)\noffset = int(X.shape[0] * 0.9)\nX_train, y_train = X[:offset], y[:offset]\nX_test, y_test = X[offset:], y[offset:]\n\n# #############################################################################\n# Fit regression model\nparams = {'n_estimators': 500, 'max_depth': 4, 'min_samples_split': 2,\n          'learning_rate': 0.01, 'loss': 'ls'}\nclf = ensemble.GradientBoostingRegressor(**params)\n\nclf.fit(X_train, y_train)\nmse = mean_squared_error(y_test, clf.predict(X_test))\nprint(\"MSE: %.4f\" % mse)\n\n# #############################################################################\n# Plot training deviance\n\n# compute test set deviance\ntest_score = np.zeros((params['n_estimators'],), dtype=np.float64)\n\nfor i, y_pred in enumerate(clf.staged_predict(X_test)):\n    test_score[i] = clf.loss_(y_test, y_pred)\n\nplt.figure(figsize=(12, 6))\nplt.subplot(1, 2, 1)\nplt.title('Deviance')\nplt.plot(np.arange(params['n_estimators']) + 1, clf.train_score_, 'b-',\n         label='Training Set Deviance')\nplt.plot(np.arange(params['n_estimators']) + 1, test_score, 'r-',\n         label='Test Set Deviance')\nplt.legend(loc='upper right')\nplt.xlabel('Boosting Iterations')\nplt.ylabel('Deviance')\n\n# #############################################################################\n# Plot feature importance\nfeature_importance = clf.feature_importances_\n# make importances relative to max importance\nfeature_importance = 100.0 * (feature_importance / feature_importance.max())\nsorted_idx = np.argsort(feature_importance)\npos = np.arange(sorted_idx.shape[0]) + .5\nplt.subplot(1, 2, 2)\nplt.barh(pos, feature_importance[sorted_idx], align='center')\nplt.yticks(pos, boston.feature_names[sorted_idx])\nplt.xlabel('Relative Importance')\nplt.title('Variable Importance')\nplt.show()"
+        "print(__doc__)\n\n# Author: Peter Prettenhofer <[email protected]>\n#\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn import ensemble\nfrom sklearn import datasets\nfrom sklearn.utils import shuffle\nfrom sklearn.metrics import mean_squared_error\n\n# #############################################################################\n# Load data\nboston = datasets.load_boston()\nX, y = shuffle(boston.data, boston.target, random_state=13)\nX = X.astype(np.float32)\noffset = int(X.shape[0] * 0.9)\nX_train, y_train = X[:offset], y[:offset]\nX_test, y_test = X[offset:], y[offset:]\n\n# #############################################################################\n# Fit regression model\nparams = {'n_estimators': 500, 'max_depth': 4, 'min_samples_split': 2,\n          'learning_rate': 0.01, 'loss': 'ls'}\nclf = ensemble.GradientBoostingRegressor(**params)\n\nclf.fit(X_train, y_train)\nmse = mean_squared_error(y_test, clf.predict(X_test))\nprint(\"MSE: %.4f\" % mse)\n\n# #############################################################################\n# Plot training deviance\n\n# compute test set deviance\ntest_score = np.zeros((params['n_estimators'],), dtype=np.float64)\n\nfor i, y_pred in enumerate(clf.staged_predict(X_test)):\n    test_score[i] = clf.loss_(y_test, y_pred)\n\nplt.figure(figsize=(12, 6))\nplt.subplot(1, 2, 1)\nplt.title('Deviance')\nplt.plot(np.arange(params['n_estimators']) + 1, clf.train_score_, 'b-',\n         label='Training Set Deviance')\nplt.plot(np.arange(params['n_estimators']) + 1, test_score, 'r-',\n         label='Test Set Deviance')\nplt.legend(loc='upper right')\nplt.xlabel('Boosting Iterations')\nplt.ylabel('Deviance')\n\n# #############################################################################\n# Plot impurity-based feature importance\nfeature_importance = clf.feature_importances_\n# make importances relative to max importance\nfeature_importance = 100.0 * (feature_importance / feature_importance.max())\nsorted_idx = np.argsort(feature_importance)\npos = np.arange(sorted_idx.shape[0]) + .5\nplt.subplot(1, 2, 2)\nplt.barh(pos, feature_importance[sorted_idx], align='center')\nplt.yticks(pos, boston.feature_names[sorted_idx])\nplt.xlabel('Relative Importance')\nplt.title('Variable Importance')\nplt.show()"
       ]
     }
   ],
Original file line number	Diff line number	Diff line change
`@@ -15,7 +15,7 @@`
`15`	`15`	`"cell_type": "markdown",`
`16`	`16`	`"metadata": {},`
`17`	`17`	`"source": [`
`18`		`- "\n# Pixel importances with a parallel forest of trees\n\n\nThis example shows the use of forests of trees to evaluate the importance\nof the pixels in an image classification task (faces). The hotter the pixel,\nthe more important.\n\nThe code below also illustrates how the construction and the computation\nof the predictions can be parallelized within multiple jobs.\n"`
	`18`	`+ "\n# Pixel importances with a parallel forest of trees\n\n\nThis example shows the use of forests of trees to evaluate the impurity-based\nimportance of the pixels in an image classification task (faces).\nThe hotter the pixel, the more important.\n\nThe code below also illustrates how the construction and the computation\nof the predictions can be parallelized within multiple jobs.\n"`
`19`	`19`	`]`
`20`	`20`	`},`
`21`	`21`	`{`
Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,7 @@`
`26`	`26`	`},`
`27`	`27`	`"outputs": [],`
`28`	`28`	`"source": [`
`29`		- "print(__doc__)\n\n# Author: Peter Prettenhofer <[email protected]>\n#\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn import ensemble\nfrom sklearn import datasets\nfrom sklearn.utils import shuffle\nfrom sklearn.metrics import mean_squared_error\n\n# #############################################################################\n# Load data\nboston = datasets.load_boston()\nX, y = shuffle(boston.data, boston.target, random_state=13)\nX = X.astype(np.float32)\noffset = int(X.shape[0] * 0.9)\nX_train, y_train = X[:offset], y[:offset]\nX_test, y_test = X[offset:], y[offset:]\n\n# #############################################################################\n# Fit regression model\nparams = {'n_estimators': 500, 'max_depth': 4, 'min_samples_split': 2,\n 'learning_rate': 0.01, 'loss': 'ls'}\nclf = ensemble.GradientBoostingRegressor(*params)\n\nclf.fit(X_train, y_train)\nmse = mean_squared_error(y_test, clf.predict(X_test))\nprint(\"MSE: %.4f\" % mse)\n\n# #############################################################################\n# Plot training deviance\n\n# compute test set deviance\ntest_score = np.zeros((params['n_estimators'],), dtype=np.float64)\n\nfor i, y_pred in enumerate(clf.staged_predict(X_test)):\n test_score[i] = clf.loss_(y_test, y_pred)\n\nplt.figure(figsize=(12, 6))\nplt.subplot(1, 2, 1)\nplt.title('Deviance')\nplt.plot(np.arange(params['n_estimators']) + 1, clf.train_score_, 'b-',\n label='Training Set Deviance')\nplt.plot(np.arange(params['n_estimators']) + 1, test_score, 'r-',\n label='Test Set Deviance')\nplt.legend(loc='upper right')\nplt.xlabel('Boosting Iterations')\nplt.ylabel('Deviance')\n\n# #############################################################################\n# Plot feature importance\nfeature_importance = clf.feature_importances_\n# make importances relative to max importance\nfeature_importance = 100.0 (feature_importance / feature_importance.max())\nsorted_idx = np.argsort(feature_importance)\npos = np.arange(sorted_idx.shape[0]) + .5\nplt.subplot(1, 2, 2)\nplt.barh(pos, feature_importance[sorted_idx], align='center')\nplt.yticks(pos, boston.feature_names[sorted_idx])\nplt.xlabel('Relative Importance')\nplt.title('Variable Importance')\nplt.show()"
	`29`	+ "print(__doc__)\n\n# Author: Peter Prettenhofer <[email protected]>\n#\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn import ensemble\nfrom sklearn import datasets\nfrom sklearn.utils import shuffle\nfrom sklearn.metrics import mean_squared_error\n\n# #############################################################################\n# Load data\nboston = datasets.load_boston()\nX, y = shuffle(boston.data, boston.target, random_state=13)\nX = X.astype(np.float32)\noffset = int(X.shape[0] * 0.9)\nX_train, y_train = X[:offset], y[:offset]\nX_test, y_test = X[offset:], y[offset:]\n\n# #############################################################################\n# Fit regression model\nparams = {'n_estimators': 500, 'max_depth': 4, 'min_samples_split': 2,\n 'learning_rate': 0.01, 'loss': 'ls'}\nclf = ensemble.GradientBoostingRegressor(*params)\n\nclf.fit(X_train, y_train)\nmse = mean_squared_error(y_test, clf.predict(X_test))\nprint(\"MSE: %.4f\" % mse)\n\n# #############################################################################\n# Plot training deviance\n\n# compute test set deviance\ntest_score = np.zeros((params['n_estimators'],), dtype=np.float64)\n\nfor i, y_pred in enumerate(clf.staged_predict(X_test)):\n test_score[i] = clf.loss_(y_test, y_pred)\n\nplt.figure(figsize=(12, 6))\nplt.subplot(1, 2, 1)\nplt.title('Deviance')\nplt.plot(np.arange(params['n_estimators']) + 1, clf.train_score_, 'b-',\n label='Training Set Deviance')\nplt.plot(np.arange(params['n_estimators']) + 1, test_score, 'r-',\n label='Test Set Deviance')\nplt.legend(loc='upper right')\nplt.xlabel('Boosting Iterations')\nplt.ylabel('Deviance')\n\n# #############################################################################\n# Plot impurity-based feature importance\nfeature_importance = clf.feature_importances_\n# make importances relative to max importance\nfeature_importance = 100.0 (feature_importance / feature_importance.max())\nsorted_idx = np.argsort(feature_importance)\npos = np.arange(sorted_idx.shape[0]) + .5\nplt.subplot(1, 2, 2)\nplt.barh(pos, feature_importance[sorted_idx], align='center')\nplt.yticks(pos, boston.feature_names[sorted_idx])\nplt.xlabel('Relative Importance')\nplt.title('Variable Importance')\nplt.show()"
`30`	`30`	`]`
`31`	`31`	`}`
`32`	`32`	`],`