heinzin
diff --git a/‎dev/_downloads/auto_examples_jupyter.zip
-221 Bytes b/‎dev/_downloads/auto_examples_jupyter.zip
-221 Bytes
diff --git a/‎dev/_downloads/auto_examples_python.zip
-214 Bytes b/‎dev/_downloads/auto_examples_python.zip
-214 Bytes
diff --git a/‎dev/_downloads/plot_adaboost_hastie_10_2.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/plot_adaboost_hastie_10_2.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/plot_adaboost_hastie_10_2.py
Lines changed: 2 additions & 2 deletions b/‎dev/_downloads/plot_adaboost_hastie_10_2.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎dev/_downloads/plot_gradient_boosting_oob.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/plot_gradient_boosting_oob.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/plot_gradient_boosting_oob.py
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/plot_gradient_boosting_oob.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/plot_gradient_boosting_quantile.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/plot_gradient_boosting_quantile.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/plot_gradient_boosting_quantile.py
Lines changed: 1 addition & 2 deletions b/‎dev/_downloads/plot_gradient_boosting_quantile.py
Lines changed: 1 addition & 2 deletions
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "print(__doc__)\n\n# Author: Peter Prettenhofer <[email protected]>,\n#         Noel Dawe <[email protected]>\n#\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn import datasets\nfrom sklearn.tree import DecisionTreeClassifier\nfrom sklearn.metrics import zero_one_loss\nfrom sklearn.ensemble import AdaBoostClassifier\n\n\nn_estimators = 400\n# A learning rate of 1. may not be optimal for both SAMME and SAMME.R\nlearning_rate = 1.\n\nX, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1)\n\nX_test, y_test = X[2000:], y[2000:]\nX_train, y_train = X[:2000], y[:2000]\n\ndt_stump = DecisionTreeClassifier(max_depth=1, min_samples_leaf=1)\ndt_stump.fit(X_train, y_train)\ndt_stump_err = 1.0 - dt_stump.score(X_test, y_test)\n\ndt = DecisionTreeClassifier(max_depth=9, min_samples_leaf=1)\ndt.fit(X_train, y_train)\ndt_err = 1.0 - dt.score(X_test, y_test)\n\nada_discrete = AdaBoostClassifier(\n    base_estimator=dt_stump,\n    learning_rate=learning_rate,\n    n_estimators=n_estimators,\n    algorithm=\"SAMME\")\nada_discrete.fit(X_train, y_train)\n\nada_real = AdaBoostClassifier(\n    base_estimator=dt_stump,\n    learning_rate=learning_rate,\n    n_estimators=n_estimators,\n    algorithm=\"SAMME.R\")\nada_real.fit(X_train, y_train)\n\nfig = plt.figure()\nax = fig.add_subplot(111)\n\nax.plot([1, n_estimators], [dt_stump_err] * 2, 'k-',\n        label='Decision Stump Error')\nax.plot([1, n_estimators], [dt_err] * 2, 'k--',\n        label='Decision Tree Error')\n\nada_discrete_err = np.zeros((n_estimators,))\nfor i, y_pred in enumerate(ada_discrete.staged_predict(X_test)):\n    ada_discrete_err[i] = zero_one_loss(y_pred, y_test)\n\nada_discrete_err_train = np.zeros((n_estimators,))\nfor i, y_pred in enumerate(ada_discrete.staged_predict(X_train)):\n    ada_discrete_err_train[i] = zero_one_loss(y_pred, y_train)\n\nada_real_err = np.zeros((n_estimators,))\nfor i, y_pred in enumerate(ada_real.staged_predict(X_test)):\n    ada_real_err[i] = zero_one_loss(y_pred, y_test)\n\nada_real_err_train = np.zeros((n_estimators,))\nfor i, y_pred in enumerate(ada_real.staged_predict(X_train)):\n    ada_real_err_train[i] = zero_one_loss(y_pred, y_train)\n\nax.plot(np.arange(n_estimators) + 1, ada_discrete_err,\n        label='Discrete AdaBoost Test Error',\n        color='red')\nax.plot(np.arange(n_estimators) + 1, ada_discrete_err_train,\n        label='Discrete AdaBoost Train Error',\n        color='blue')\nax.plot(np.arange(n_estimators) + 1, ada_real_err,\n        label='Real AdaBoost Test Error',\n        color='orange')\nax.plot(np.arange(n_estimators) + 1, ada_real_err_train,\n        label='Real AdaBoost Train Error',\n        color='green')\n\nax.set_ylim((0.0, 0.5))\nax.set_xlabel('n_estimators')\nax.set_ylabel('error rate')\n\nleg = ax.legend(loc='upper right', fancybox=True)\nleg.get_frame().set_alpha(0.7)\n\nplt.show()"
+        "print(__doc__)\n\n# Author: Peter Prettenhofer <[email protected]>,\n#         Noel Dawe <[email protected]>\n#\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn import datasets\nfrom sklearn.tree import DecisionTreeClassifier\nfrom sklearn.metrics import zero_one_loss\nfrom sklearn.ensemble import AdaBoostClassifier\n\n\nn_estimators = 400\n# A learning rate of 1. may not be optimal for both SAMME and SAMME.R\nlearning_rate = 1.\n\nX, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1)\n\nX_test, y_test = X[2000:], y[2000:]\nX_train, y_train = X[:2000], y[:2000]\n\ndt_stump = DecisionTreeClassifier(max_depth=1)\ndt_stump.fit(X_train, y_train)\ndt_stump_err = 1.0 - dt_stump.score(X_test, y_test)\n\ndt = DecisionTreeClassifier(max_depth=9)\ndt.fit(X_train, y_train)\ndt_err = 1.0 - dt.score(X_test, y_test)\n\nada_discrete = AdaBoostClassifier(\n    base_estimator=dt_stump,\n    learning_rate=learning_rate,\n    n_estimators=n_estimators,\n    algorithm=\"SAMME\")\nada_discrete.fit(X_train, y_train)\n\nada_real = AdaBoostClassifier(\n    base_estimator=dt_stump,\n    learning_rate=learning_rate,\n    n_estimators=n_estimators,\n    algorithm=\"SAMME.R\")\nada_real.fit(X_train, y_train)\n\nfig = plt.figure()\nax = fig.add_subplot(111)\n\nax.plot([1, n_estimators], [dt_stump_err] * 2, 'k-',\n        label='Decision Stump Error')\nax.plot([1, n_estimators], [dt_err] * 2, 'k--',\n        label='Decision Tree Error')\n\nada_discrete_err = np.zeros((n_estimators,))\nfor i, y_pred in enumerate(ada_discrete.staged_predict(X_test)):\n    ada_discrete_err[i] = zero_one_loss(y_pred, y_test)\n\nada_discrete_err_train = np.zeros((n_estimators,))\nfor i, y_pred in enumerate(ada_discrete.staged_predict(X_train)):\n    ada_discrete_err_train[i] = zero_one_loss(y_pred, y_train)\n\nada_real_err = np.zeros((n_estimators,))\nfor i, y_pred in enumerate(ada_real.staged_predict(X_test)):\n    ada_real_err[i] = zero_one_loss(y_pred, y_test)\n\nada_real_err_train = np.zeros((n_estimators,))\nfor i, y_pred in enumerate(ada_real.staged_predict(X_train)):\n    ada_real_err_train[i] = zero_one_loss(y_pred, y_train)\n\nax.plot(np.arange(n_estimators) + 1, ada_discrete_err,\n        label='Discrete AdaBoost Test Error',\n        color='red')\nax.plot(np.arange(n_estimators) + 1, ada_discrete_err_train,\n        label='Discrete AdaBoost Train Error',\n        color='blue')\nax.plot(np.arange(n_estimators) + 1, ada_real_err,\n        label='Real AdaBoost Test Error',\n        color='orange')\nax.plot(np.arange(n_estimators) + 1, ada_real_err_train,\n        label='Real AdaBoost Train Error',\n        color='green')\n\nax.set_ylim((0.0, 0.5))\nax.set_xlabel('n_estimators')\nax.set_ylabel('error rate')\n\nleg = ax.legend(loc='upper right', fancybox=True)\nleg.get_frame().set_alpha(0.7)\n\nplt.show()"
       ]
     }
   ],
 
@@ -43,11 +43,11 @@
 X_test, y_test = X[2000:], y[2000:]
 X_train, y_train = X[:2000], y[:2000]
 
-dt_stump = DecisionTreeClassifier(max_depth=1, min_samples_leaf=1)
+dt_stump = DecisionTreeClassifier(max_depth=1)
 dt_stump.fit(X_train, y_train)
 dt_stump_err = 1.0 - dt_stump.score(X_test, y_test)
 
-dt = DecisionTreeClassifier(max_depth=9, min_samples_leaf=1)
+dt = DecisionTreeClassifier(max_depth=9)
 dt.fit(X_train, y_train)
 dt_err = 1.0 - dt.score(X_test, y_test)
 
 
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "print(__doc__)\n\n# Author: Peter Prettenhofer <[email protected]>\n#\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn import ensemble\nfrom sklearn.model_selection import KFold\nfrom sklearn.model_selection import train_test_split\n\n\n# Generate data (adapted from G. Ridgeway's gbm example)\nn_samples = 1000\nrandom_state = np.random.RandomState(13)\nx1 = random_state.uniform(size=n_samples)\nx2 = random_state.uniform(size=n_samples)\nx3 = random_state.randint(0, 4, size=n_samples)\n\np = 1 / (1.0 + np.exp(-(np.sin(3 * x1) - 4 * x2 + x3)))\ny = random_state.binomial(1, p, size=n_samples)\n\nX = np.c_[x1, x2, x3]\n\nX = X.astype(np.float32)\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5,\n                                                    random_state=9)\n\n# Fit classifier with out-of-bag estimates\nparams = {'n_estimators': 1200, 'max_depth': 3, 'subsample': 0.5,\n          'learning_rate': 0.01, 'min_samples_leaf': 1, 'random_state': 3}\nclf = ensemble.GradientBoostingClassifier(**params)\n\nclf.fit(X_train, y_train)\nacc = clf.score(X_test, y_test)\nprint(\"Accuracy: {:.4f}\".format(acc))\n\nn_estimators = params['n_estimators']\nx = np.arange(n_estimators) + 1\n\n\ndef heldout_score(clf, X_test, y_test):\n    \"\"\"compute deviance scores on ``X_test`` and ``y_test``. \"\"\"\n    score = np.zeros((n_estimators,), dtype=np.float64)\n    for i, y_pred in enumerate(clf.staged_decision_function(X_test)):\n        score[i] = clf.loss_(y_test, y_pred)\n    return score\n\n\ndef cv_estimate(n_splits=None):\n    cv = KFold(n_splits=n_splits)\n    cv_clf = ensemble.GradientBoostingClassifier(**params)\n    val_scores = np.zeros((n_estimators,), dtype=np.float64)\n    for train, test in cv.split(X_train, y_train):\n        cv_clf.fit(X_train[train], y_train[train])\n        val_scores += heldout_score(cv_clf, X_train[test], y_train[test])\n    val_scores /= n_splits\n    return val_scores\n\n\n# Estimate best n_estimator using cross-validation\ncv_score = cv_estimate(3)\n\n# Compute best n_estimator for test data\ntest_score = heldout_score(clf, X_test, y_test)\n\n# negative cumulative sum of oob improvements\ncumsum = -np.cumsum(clf.oob_improvement_)\n\n# min loss according to OOB\noob_best_iter = x[np.argmin(cumsum)]\n\n# min loss according to test (normalize such that first loss is 0)\ntest_score -= test_score[0]\ntest_best_iter = x[np.argmin(test_score)]\n\n# min loss according to cv (normalize such that first loss is 0)\ncv_score -= cv_score[0]\ncv_best_iter = x[np.argmin(cv_score)]\n\n# color brew for the three curves\noob_color = list(map(lambda x: x / 256.0, (190, 174, 212)))\ntest_color = list(map(lambda x: x / 256.0, (127, 201, 127)))\ncv_color = list(map(lambda x: x / 256.0, (253, 192, 134)))\n\n# plot curves and vertical lines for best iterations\nplt.plot(x, cumsum, label='OOB loss', color=oob_color)\nplt.plot(x, test_score, label='Test loss', color=test_color)\nplt.plot(x, cv_score, label='CV loss', color=cv_color)\nplt.axvline(x=oob_best_iter, color=oob_color)\nplt.axvline(x=test_best_iter, color=test_color)\nplt.axvline(x=cv_best_iter, color=cv_color)\n\n# add three vertical lines to xticks\nxticks = plt.xticks()\nxticks_pos = np.array(xticks[0].tolist() +\n                      [oob_best_iter, cv_best_iter, test_best_iter])\nxticks_label = np.array(list(map(lambda t: int(t), xticks[0])) +\n                        ['OOB', 'CV', 'Test'])\nind = np.argsort(xticks_pos)\nxticks_pos = xticks_pos[ind]\nxticks_label = xticks_label[ind]\nplt.xticks(xticks_pos, xticks_label)\n\nplt.legend(loc='upper right')\nplt.ylabel('normalized loss')\nplt.xlabel('number of iterations')\n\nplt.show()"
+        "print(__doc__)\n\n# Author: Peter Prettenhofer <[email protected]>\n#\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn import ensemble\nfrom sklearn.model_selection import KFold\nfrom sklearn.model_selection import train_test_split\n\n\n# Generate data (adapted from G. Ridgeway's gbm example)\nn_samples = 1000\nrandom_state = np.random.RandomState(13)\nx1 = random_state.uniform(size=n_samples)\nx2 = random_state.uniform(size=n_samples)\nx3 = random_state.randint(0, 4, size=n_samples)\n\np = 1 / (1.0 + np.exp(-(np.sin(3 * x1) - 4 * x2 + x3)))\ny = random_state.binomial(1, p, size=n_samples)\n\nX = np.c_[x1, x2, x3]\n\nX = X.astype(np.float32)\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5,\n                                                    random_state=9)\n\n# Fit classifier with out-of-bag estimates\nparams = {'n_estimators': 1200, 'max_depth': 3, 'subsample': 0.5,\n          'learning_rate': 0.01, 'random_state': 3}\nclf = ensemble.GradientBoostingClassifier(**params)\n\nclf.fit(X_train, y_train)\nacc = clf.score(X_test, y_test)\nprint(\"Accuracy: {:.4f}\".format(acc))\n\nn_estimators = params['n_estimators']\nx = np.arange(n_estimators) + 1\n\n\ndef heldout_score(clf, X_test, y_test):\n    \"\"\"compute deviance scores on ``X_test`` and ``y_test``. \"\"\"\n    score = np.zeros((n_estimators,), dtype=np.float64)\n    for i, y_pred in enumerate(clf.staged_decision_function(X_test)):\n        score[i] = clf.loss_(y_test, y_pred)\n    return score\n\n\ndef cv_estimate(n_splits=None):\n    cv = KFold(n_splits=n_splits)\n    cv_clf = ensemble.GradientBoostingClassifier(**params)\n    val_scores = np.zeros((n_estimators,), dtype=np.float64)\n    for train, test in cv.split(X_train, y_train):\n        cv_clf.fit(X_train[train], y_train[train])\n        val_scores += heldout_score(cv_clf, X_train[test], y_train[test])\n    val_scores /= n_splits\n    return val_scores\n\n\n# Estimate best n_estimator using cross-validation\ncv_score = cv_estimate(3)\n\n# Compute best n_estimator for test data\ntest_score = heldout_score(clf, X_test, y_test)\n\n# negative cumulative sum of oob improvements\ncumsum = -np.cumsum(clf.oob_improvement_)\n\n# min loss according to OOB\noob_best_iter = x[np.argmin(cumsum)]\n\n# min loss according to test (normalize such that first loss is 0)\ntest_score -= test_score[0]\ntest_best_iter = x[np.argmin(test_score)]\n\n# min loss according to cv (normalize such that first loss is 0)\ncv_score -= cv_score[0]\ncv_best_iter = x[np.argmin(cv_score)]\n\n# color brew for the three curves\noob_color = list(map(lambda x: x / 256.0, (190, 174, 212)))\ntest_color = list(map(lambda x: x / 256.0, (127, 201, 127)))\ncv_color = list(map(lambda x: x / 256.0, (253, 192, 134)))\n\n# plot curves and vertical lines for best iterations\nplt.plot(x, cumsum, label='OOB loss', color=oob_color)\nplt.plot(x, test_score, label='Test loss', color=test_color)\nplt.plot(x, cv_score, label='CV loss', color=cv_color)\nplt.axvline(x=oob_best_iter, color=oob_color)\nplt.axvline(x=test_best_iter, color=test_color)\nplt.axvline(x=cv_best_iter, color=cv_color)\n\n# add three vertical lines to xticks\nxticks = plt.xticks()\nxticks_pos = np.array(xticks[0].tolist() +\n                      [oob_best_iter, cv_best_iter, test_best_iter])\nxticks_label = np.array(list(map(lambda t: int(t), xticks[0])) +\n                        ['OOB', 'CV', 'Test'])\nind = np.argsort(xticks_pos)\nxticks_pos = xticks_pos[ind]\nxticks_label = xticks_label[ind]\nplt.xticks(xticks_pos, xticks_label)\n\nplt.legend(loc='upper right')\nplt.ylabel('normalized loss')\nplt.xlabel('number of iterations')\n\nplt.show()"
       ]
     }
   ],
 
@@ -55,7 +55,7 @@
 
 # Fit classifier with out-of-bag estimates
 params = {'n_estimators': 1200, 'max_depth': 3, 'subsample': 0.5,
-          'learning_rate': 0.01, 'min_samples_leaf': 1, 'random_state': 3}
+          'learning_rate': 0.01, 'random_state': 3}
 clf = ensemble.GradientBoostingClassifier(**params)
 
 clf.fit(X_train, y_train)
 
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "import numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn.ensemble import GradientBoostingRegressor\n\nnp.random.seed(1)\n\n\ndef f(x):\n    \"\"\"The function to predict.\"\"\"\n    return x * np.sin(x)\n\n#----------------------------------------------------------------------\n#  First the noiseless case\nX = np.atleast_2d(np.random.uniform(0, 10.0, size=100)).T\nX = X.astype(np.float32)\n\n# Observations\ny = f(X).ravel()\n\ndy = 1.5 + 1.0 * np.random.random(y.shape)\nnoise = np.random.normal(0, dy)\ny += noise\ny = y.astype(np.float32)\n\n# Mesh the input space for evaluations of the real function, the prediction and\n# its MSE\nxx = np.atleast_2d(np.linspace(0, 10, 1000)).T\nxx = xx.astype(np.float32)\n\nalpha = 0.95\n\nclf = GradientBoostingRegressor(loss='quantile', alpha=alpha,\n                                n_estimators=250, max_depth=3,\n                                learning_rate=.1, min_samples_leaf=9,\n                                min_samples_split=9)\n\nclf.fit(X, y)\n\n# Make the prediction on the meshed x-axis\ny_upper = clf.predict(xx)\n\nclf.set_params(alpha=1.0 - alpha)\nclf.fit(X, y)\n\n# Make the prediction on the meshed x-axis\ny_lower = clf.predict(xx)\n\nclf.set_params(loss='ls')\nclf.fit(X, y)\n\n# Make the prediction on the meshed x-axis\ny_pred = clf.predict(xx)\n\n# Plot the function, the prediction and the 90% confidence interval based on\n# the MSE\nfig = plt.figure()\nplt.plot(xx, f(xx), 'g:', label=u'$f(x) = x\\,\\sin(x)$')\nplt.plot(X, y, 'b.', markersize=10, label=u'Observations')\nplt.plot(xx, y_pred, 'r-', label=u'Prediction')\nplt.plot(xx, y_upper, 'k-')\nplt.plot(xx, y_lower, 'k-')\nplt.fill(np.concatenate([xx, xx[::-1]]),\n         np.concatenate([y_upper, y_lower[::-1]]),\n         alpha=.5, fc='b', ec='None', label='90% prediction interval')\nplt.xlabel('$x$')\nplt.ylabel('$f(x)$')\nplt.ylim(-10, 20)\nplt.legend(loc='upper left')\nplt.show()"
+        "import numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn.ensemble import GradientBoostingRegressor\n\nnp.random.seed(1)\n\n\ndef f(x):\n    \"\"\"The function to predict.\"\"\"\n    return x * np.sin(x)\n\n#----------------------------------------------------------------------\n#  First the noiseless case\nX = np.atleast_2d(np.random.uniform(0, 10.0, size=100)).T\nX = X.astype(np.float32)\n\n# Observations\ny = f(X).ravel()\n\ndy = 1.5 + 1.0 * np.random.random(y.shape)\nnoise = np.random.normal(0, dy)\ny += noise\ny = y.astype(np.float32)\n\n# Mesh the input space for evaluations of the real function, the prediction and\n# its MSE\nxx = np.atleast_2d(np.linspace(0, 10, 1000)).T\nxx = xx.astype(np.float32)\n\nalpha = 0.95\n\nclf = GradientBoostingRegressor(loss='quantile', alpha=alpha,\n                                n_estimators=250, max_depth=3,\n                                learning_rate=.1, min_samples_split=9)\n\nclf.fit(X, y)\n\n# Make the prediction on the meshed x-axis\ny_upper = clf.predict(xx)\n\nclf.set_params(alpha=1.0 - alpha)\nclf.fit(X, y)\n\n# Make the prediction on the meshed x-axis\ny_lower = clf.predict(xx)\n\nclf.set_params(loss='ls')\nclf.fit(X, y)\n\n# Make the prediction on the meshed x-axis\ny_pred = clf.predict(xx)\n\n# Plot the function, the prediction and the 90% confidence interval based on\n# the MSE\nfig = plt.figure()\nplt.plot(xx, f(xx), 'g:', label=u'$f(x) = x\\,\\sin(x)$')\nplt.plot(X, y, 'b.', markersize=10, label=u'Observations')\nplt.plot(xx, y_pred, 'r-', label=u'Prediction')\nplt.plot(xx, y_upper, 'k-')\nplt.plot(xx, y_lower, 'k-')\nplt.fill(np.concatenate([xx, xx[::-1]]),\n         np.concatenate([y_upper, y_lower[::-1]]),\n         alpha=.5, fc='b', ec='None', label='90% prediction interval')\nplt.xlabel('$x$')\nplt.ylabel('$f(x)$')\nplt.ylim(-10, 20)\nplt.legend(loc='upper left')\nplt.show()"
       ]
     }
   ],
 
@@ -41,8 +41,7 @@ def f(x):
 
 clf = GradientBoostingRegressor(loss='quantile', alpha=alpha,
                                 n_estimators=250, max_depth=3,
-                                learning_rate=.1, min_samples_leaf=9,
-                                min_samples_split=9)
+                                learning_rate=.1, min_samples_split=9)
 
 clf.fit(X, y)
Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,7 @@`
`26`	`26`	`},`
`27`	`27`	`"outputs": [],`
`28`	`28`	`"source": [`
`29`		- "print(__doc__)\n\n# Author: Peter Prettenhofer <[email protected]>,\n# Noel Dawe <[email protected]>\n#\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn import datasets\nfrom sklearn.tree import DecisionTreeClassifier\nfrom sklearn.metrics import zero_one_loss\nfrom sklearn.ensemble import AdaBoostClassifier\n\n\nn_estimators = 400\n# A learning rate of 1. may not be optimal for both SAMME and SAMME.R\nlearning_rate = 1.\n\nX, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1)\n\nX_test, y_test = X[2000:], y[2000:]\nX_train, y_train = X[:2000], y[:2000]\n\ndt_stump = DecisionTreeClassifier(max_depth=1, min_samples_leaf=1)\ndt_stump.fit(X_train, y_train)\ndt_stump_err = 1.0 - dt_stump.score(X_test, y_test)\n\ndt = DecisionTreeClassifier(max_depth=9, min_samples_leaf=1)\ndt.fit(X_train, y_train)\ndt_err = 1.0 - dt.score(X_test, y_test)\n\nada_discrete = AdaBoostClassifier(\n base_estimator=dt_stump,\n learning_rate=learning_rate,\n n_estimators=n_estimators,\n algorithm=\"SAMME\")\nada_discrete.fit(X_train, y_train)\n\nada_real = AdaBoostClassifier(\n base_estimator=dt_stump,\n learning_rate=learning_rate,\n n_estimators=n_estimators,\n algorithm=\"SAMME.R\")\nada_real.fit(X_train, y_train)\n\nfig = plt.figure()\nax = fig.add_subplot(111)\n\nax.plot([1, n_estimators], [dt_stump_err] * 2, 'k-',\n label='Decision Stump Error')\nax.plot([1, n_estimators], [dt_err] * 2, 'k--',\n label='Decision Tree Error')\n\nada_discrete_err = np.zeros((n_estimators,))\nfor i, y_pred in enumerate(ada_discrete.staged_predict(X_test)):\n ada_discrete_err[i] = zero_one_loss(y_pred, y_test)\n\nada_discrete_err_train = np.zeros((n_estimators,))\nfor i, y_pred in enumerate(ada_discrete.staged_predict(X_train)):\n ada_discrete_err_train[i] = zero_one_loss(y_pred, y_train)\n\nada_real_err = np.zeros((n_estimators,))\nfor i, y_pred in enumerate(ada_real.staged_predict(X_test)):\n ada_real_err[i] = zero_one_loss(y_pred, y_test)\n\nada_real_err_train = np.zeros((n_estimators,))\nfor i, y_pred in enumerate(ada_real.staged_predict(X_train)):\n ada_real_err_train[i] = zero_one_loss(y_pred, y_train)\n\nax.plot(np.arange(n_estimators) + 1, ada_discrete_err,\n label='Discrete AdaBoost Test Error',\n color='red')\nax.plot(np.arange(n_estimators) + 1, ada_discrete_err_train,\n label='Discrete AdaBoost Train Error',\n color='blue')\nax.plot(np.arange(n_estimators) + 1, ada_real_err,\n label='Real AdaBoost Test Error',\n color='orange')\nax.plot(np.arange(n_estimators) + 1, ada_real_err_train,\n label='Real AdaBoost Train Error',\n color='green')\n\nax.set_ylim((0.0, 0.5))\nax.set_xlabel('n_estimators')\nax.set_ylabel('error rate')\n\nleg = ax.legend(loc='upper right', fancybox=True)\nleg.get_frame().set_alpha(0.7)\n\nplt.show()"
	`29`	+ "print(__doc__)\n\n# Author: Peter Prettenhofer <[email protected]>,\n# Noel Dawe <[email protected]>\n#\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn import datasets\nfrom sklearn.tree import DecisionTreeClassifier\nfrom sklearn.metrics import zero_one_loss\nfrom sklearn.ensemble import AdaBoostClassifier\n\n\nn_estimators = 400\n# A learning rate of 1. may not be optimal for both SAMME and SAMME.R\nlearning_rate = 1.\n\nX, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1)\n\nX_test, y_test = X[2000:], y[2000:]\nX_train, y_train = X[:2000], y[:2000]\n\ndt_stump = DecisionTreeClassifier(max_depth=1)\ndt_stump.fit(X_train, y_train)\ndt_stump_err = 1.0 - dt_stump.score(X_test, y_test)\n\ndt = DecisionTreeClassifier(max_depth=9)\ndt.fit(X_train, y_train)\ndt_err = 1.0 - dt.score(X_test, y_test)\n\nada_discrete = AdaBoostClassifier(\n base_estimator=dt_stump,\n learning_rate=learning_rate,\n n_estimators=n_estimators,\n algorithm=\"SAMME\")\nada_discrete.fit(X_train, y_train)\n\nada_real = AdaBoostClassifier(\n base_estimator=dt_stump,\n learning_rate=learning_rate,\n n_estimators=n_estimators,\n algorithm=\"SAMME.R\")\nada_real.fit(X_train, y_train)\n\nfig = plt.figure()\nax = fig.add_subplot(111)\n\nax.plot([1, n_estimators], [dt_stump_err] * 2, 'k-',\n label='Decision Stump Error')\nax.plot([1, n_estimators], [dt_err] * 2, 'k--',\n label='Decision Tree Error')\n\nada_discrete_err = np.zeros((n_estimators,))\nfor i, y_pred in enumerate(ada_discrete.staged_predict(X_test)):\n ada_discrete_err[i] = zero_one_loss(y_pred, y_test)\n\nada_discrete_err_train = np.zeros((n_estimators,))\nfor i, y_pred in enumerate(ada_discrete.staged_predict(X_train)):\n ada_discrete_err_train[i] = zero_one_loss(y_pred, y_train)\n\nada_real_err = np.zeros((n_estimators,))\nfor i, y_pred in enumerate(ada_real.staged_predict(X_test)):\n ada_real_err[i] = zero_one_loss(y_pred, y_test)\n\nada_real_err_train = np.zeros((n_estimators,))\nfor i, y_pred in enumerate(ada_real.staged_predict(X_train)):\n ada_real_err_train[i] = zero_one_loss(y_pred, y_train)\n\nax.plot(np.arange(n_estimators) + 1, ada_discrete_err,\n label='Discrete AdaBoost Test Error',\n color='red')\nax.plot(np.arange(n_estimators) + 1, ada_discrete_err_train,\n label='Discrete AdaBoost Train Error',\n color='blue')\nax.plot(np.arange(n_estimators) + 1, ada_real_err,\n label='Real AdaBoost Test Error',\n color='orange')\nax.plot(np.arange(n_estimators) + 1, ada_real_err_train,\n label='Real AdaBoost Train Error',\n color='green')\n\nax.set_ylim((0.0, 0.5))\nax.set_xlabel('n_estimators')\nax.set_ylabel('error rate')\n\nleg = ax.legend(loc='upper right', fancybox=True)\nleg.get_frame().set_alpha(0.7)\n\nplt.show()"
`30`	`30`	`]`
`31`	`31`	`}`
`32`	`32`	`],`