scikit-learn
diff --git a/‎dev/_downloads/plot_gradient_boosting_regression.py
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/plot_gradient_boosting_regression.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_images/plot_gradient_boosting_regression.png
1.16 KB b/‎dev/_images/plot_gradient_boosting_regression.png
1.16 KB
diff --git a/‎dev/_images/plot_gradient_boosting_regression1.png
1.16 KB b/‎dev/_images/plot_gradient_boosting_regression1.png
1.16 KB
diff --git a/‎dev/_images/plot_gradient_boosting_regression_001.png
2.11 KB b/‎dev/_images/plot_gradient_boosting_regression_001.png
2.11 KB
diff --git a/‎dev/_images/plot_gradient_boosting_regression_0011.png
2.11 KB b/‎dev/_images/plot_gradient_boosting_regression_0011.png
2.11 KB
diff --git a/‎dev/_sources/auto_examples/ensemble/plot_gradient_boosting_regression.txt
Lines changed: 3 additions & 3 deletions b/‎dev/_sources/auto_examples/ensemble/plot_gradient_boosting_regression.txt
Lines changed: 3 additions & 3 deletions
diff --git a/‎dev/_sources/modules/ensemble.txt
Lines changed: 3 additions & 3 deletions b/‎dev/_sources/modules/ensemble.txt
Lines changed: 3 additions & 3 deletions
diff --git a/‎dev/_sources/modules/tree.txt
Lines changed: 2 additions & 1 deletion b/‎dev/_sources/modules/tree.txt
Lines changed: 2 additions & 1 deletion
diff --git a/‎dev/_sources/whats_new.txt
Lines changed: 19 additions & 4 deletions b/‎dev/_sources/whats_new.txt
Lines changed: 19 additions & 4 deletions
diff --git a/‎dev/auto_examples/ensemble/plot_gradient_boosting_regression.html
Lines changed: 4 additions & 4 deletions b/‎dev/auto_examples/ensemble/plot_gradient_boosting_regression.html
Lines changed: 4 additions & 4 deletions
@@ -33,7 +33,7 @@
 
 ###############################################################################
 # Fit regression model
-params = {'n_estimators': 500, 'max_depth': 4, 'min_samples_split': 1,
+params = {'n_estimators': 500, 'max_depth': 4, 'min_samples_split': 2,
           'learning_rate': 0.01, 'loss': 'ls'}
 clf = ensemble.GradientBoostingRegressor(**params)
 
 
@@ -20,7 +20,7 @@ This example fits a Gradient Boosting model with least squares loss and
 
 **Script output**::
 
-  MSE: 6.7710
+  MSE: 6.5747
 
 
 
@@ -29,6 +29,6 @@ This example fits a Gradient Boosting model with least squares loss and
 .. literalinclude:: plot_gradient_boosting_regression.py
     :lines: 11-
 
-**Total running time of the example:**  1.16 seconds
-( 0 minutes  1.16 seconds)
+**Total running time of the example:**  1.13 seconds
+( 0 minutes  1.13 seconds)
 
@@ -165,20 +165,20 @@ in bias::
     >>> X, y = make_blobs(n_samples=10000, n_features=10, centers=100,
     ...     random_state=0)
 
-    >>> clf = DecisionTreeClassifier(max_depth=None, min_samples_split=1,
+    >>> clf = DecisionTreeClassifier(max_depth=None, min_samples_split=2,
     ...     random_state=0)
     >>> scores = cross_val_score(clf, X, y)
     >>> scores.mean()                             # doctest: +ELLIPSIS
     0.97...
 
     >>> clf = RandomForestClassifier(n_estimators=10, max_depth=None,
-    ...     min_samples_split=1, random_state=0)
+    ...     min_samples_split=2, random_state=0)
     >>> scores = cross_val_score(clf, X, y)
     >>> scores.mean()                             # doctest: +ELLIPSIS
     0.999...
 
     >>> clf = ExtraTreesClassifier(n_estimators=10, max_depth=None,
-    ...     min_samples_split=1, random_state=0)
+    ...     min_samples_split=2, random_state=0)
     >>> scores = cross_val_score(clf, X, y)
     >>> scores.mean() > 0.999
     True
 
@@ -343,7 +343,8 @@ Tips on practical use
   * Use ``min_samples_split`` or ``min_samples_leaf`` to control the number of
     samples at a leaf node.  A very small number will usually mean the tree
     will overfit, whereas a large number will prevent the tree from learning
-    the data.  Try ``min_samples_leaf=5`` as an initial value.
+    the data. Try ``min_samples_leaf=5`` as an initial value. If the sample size
+    varies greatly, a float number can be used as percentage in these two parameters.
     The main difference between the two is that ``min_samples_leaf`` guarantees
     a minimum number of samples in a leaf, while ``min_samples_split`` can
     create arbitrary small leaves, though ``min_samples_split`` is more common
 
@@ -21,8 +21,8 @@ New features
      implementation supports kernel engineering, gradient-based hyperparameter optimization or
      sampling of functions from GP prior and GP posterior. Extensive documentation and
      examples are provided. By `Jan Hendrik Metzen`_.
-     
-   - Added the :class:`ensemble.IsolationForest` class for anomaly detection based on 
+
+   - Added the :class:`ensemble.IsolationForest` class for anomaly detection based on
      random forests. By `Nicolas Goix`_.
 
 Enhancements
@@ -39,8 +39,18 @@ Enhancements
      method ``decision_path`` which returns the decision path of samples in
      the tree. By `Arnaud Joly`_
 
-   - A new example has been added unveling the decision tree structure.
-     By `Arnaud Joly`_
+
+    - The random forest, extra tree and decision tree estimators now has a
+      method ``decision_path`` which returns the decision path of samples in
+      the tree. By `Arnaud Joly`_
+
+    - A new example has been added unveling the decision tree structure.
+      By `Arnaud Joly`_
+
+    - Random forest, extra trees, decision trees and gradient boosting estimator
+      accept the parameter ``min_samples_split`` and ``min_samples_leaf``
+      provided as a percentage of the training samples. By
+      `yelite`_ and `Arnaud Joly`_
 
 Bug fixes
 .........
@@ -65,6 +75,10 @@ Bug fixes
       :class:`decomposition.KernelPCA`, :class:`manifold.LocallyLinearEmbedding`,
       and :class:`manifold.SpectralEmbedding`. By `Peter Fischer`_.
 
+    - Random forest, extra trees, decision trees and gradient boosting
+      won't accept anymore ``min_samples_split=1`` as at least 2 samples
+      are required to split a decision tree node. By `Arnaud Joly`_
+
 API changes summary
 -------------------
 
@@ -3854,3 +3868,4 @@ David Huard, Dave Morrill, Ed Schofield, Travis Oliphant, Pearu Peterson.
 .. _Graham Clenaghan: https://github.com/gclenaghan
 .. _Giorgio Patrini: https://github.com/giorgiop
 .. _Elvis Dohmatob: https://github.com/dohmatob
+.. _yelite https://github.com/yelite
@@ -172,7 +172,7 @@
 500 regression trees of depth 4.</p>
 <img alt="../../_images/plot_gradient_boosting_regression_001.png" class="align-center" src="../../_images/plot_gradient_boosting_regression_001.png" />
 <p><strong>Script output</strong>:</p>
-<div class="highlight-python"><div class="highlight"><pre>MSE: 6.7710
+<div class="highlight-python"><div class="highlight"><pre>MSE: 6.5747
 </pre></div>
 </div>
 <p><strong>Python source code:</strong> <a class="reference download internal" href="../../_downloads/plot_gradient_boosting_regression.py"><tt class="xref download docutils literal"><span class="pre">plot_gradient_boosting_regression.py</span></tt></a></p>
@@ -201,7 +201,7 @@
 
 <span class="c">###############################################################################</span>
 <span class="c"># Fit regression model</span>
-<span class="n">params</span> <span class="o">=</span> <span class="p">{</span><span class="s">&#39;n_estimators&#39;</span><span class="p">:</span> <span class="mi">500</span><span class="p">,</span> <span class="s">&#39;max_depth&#39;</span><span class="p">:</span> <span class="mi">4</span><span class="p">,</span> <span class="s">&#39;min_samples_split&#39;</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span>
+<span class="n">params</span> <span class="o">=</span> <span class="p">{</span><span class="s">&#39;n_estimators&#39;</span><span class="p">:</span> <span class="mi">500</span><span class="p">,</span> <span class="s">&#39;max_depth&#39;</span><span class="p">:</span> <span class="mi">4</span><span class="p">,</span> <span class="s">&#39;min_samples_split&#39;</span><span class="p">:</span> <span class="mi">2</span><span class="p">,</span>
           <span class="s">&#39;learning_rate&#39;</span><span class="p">:</span> <span class="mf">0.01</span><span class="p">,</span> <span class="s">&#39;loss&#39;</span><span class="p">:</span> <span class="s">&#39;ls&#39;</span><span class="p">}</span>
 <span class="n">clf</span> <span class="o">=</span> <span class="n">ensemble</span><span class="o">.</span><span class="n">GradientBoostingRegressor</span><span class="p">(</span><span class="o">**</span><span class="n">params</span><span class="p">)</span>
 
@@ -244,8 +244,8 @@
 <a href="http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.show"><span class="n">plt</span><span class="o">.</span><span class="n">show</span></a><span class="p">()</span>
 </pre></div>
 </div>
-<p><strong>Total running time of the example:</strong>  1.16 seconds
-( 0 minutes  1.16 seconds)</p>
+<p><strong>Total running time of the example:</strong>  1.13 seconds
+( 0 minutes  1.13 seconds)</p>
 </div>