Skip to content

Commit 0722f08

Browse files
committed
Pushing the docs to dev/ for branch: master, commit e0697b01a78af2bae00dfb68e402b0ce6369e3ca
1 parent e960abb commit 0722f08

File tree

1,048 files changed

+3244
-3253
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,048 files changed

+3244
-3253
lines changed
-4 Bytes
Binary file not shown.
-1 Bytes
Binary file not shown.

dev/_downloads/plot_gradient_boosting_oob.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"print(__doc__)\n\n# Author: Peter Prettenhofer <[email protected]>\n#\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn import ensemble\nfrom sklearn.model_selection import KFold\nfrom sklearn.model_selection import train_test_split\n\n\n# Generate data (adapted from G. Ridgeway's gbm example)\nn_samples = 1000\nrandom_state = np.random.RandomState(13)\nx1 = random_state.uniform(size=n_samples)\nx2 = random_state.uniform(size=n_samples)\nx3 = random_state.randint(0, 4, size=n_samples)\n\np = 1 / (1.0 + np.exp(-(np.sin(3 * x1) - 4 * x2 + x3)))\ny = random_state.binomial(1, p, size=n_samples)\n\nX = np.c_[x1, x2, x3]\n\nX = X.astype(np.float32)\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5,\n random_state=9)\n\n# Fit classifier with out-of-bag estimates\nparams = {'n_estimators': 1200, 'max_depth': 3, 'subsample': 0.5,\n 'learning_rate': 0.01, 'min_samples_leaf': 1, 'random_state': 3}\nclf = ensemble.GradientBoostingClassifier(**params)\n\nclf.fit(X_train, y_train)\nacc = clf.score(X_test, y_test)\nprint(\"Accuracy: {:.4f}\".format(acc))\n\nn_estimators = params['n_estimators']\nx = np.arange(n_estimators) + 1\n\n\ndef heldout_score(clf, X_test, y_test):\n \"\"\"compute deviance scores on ``X_test`` and ``y_test``. \"\"\"\n score = np.zeros((n_estimators,), dtype=np.float64)\n for i, y_pred in enumerate(clf.staged_decision_function(X_test)):\n score[i] = clf.loss_(y_test, y_pred)\n return score\n\n\ndef cv_estimate(n_splits=None):\n cv = KFold(n_splits=n_splits)\n cv_clf = ensemble.GradientBoostingClassifier(**params)\n val_scores = np.zeros((n_estimators,), dtype=np.float64)\n for train, test in cv.split(X_train, y_train):\n cv_clf.fit(X_train[train], y_train[train])\n val_scores += heldout_score(cv_clf, X_train[test], y_train[test])\n val_scores /= n_splits\n return val_scores\n\n\n# Estimate best n_estimator using cross-validation\ncv_score = cv_estimate(3)\n\n# Compute best n_estimator for test data\ntest_score = heldout_score(clf, X_test, y_test)\n\n# negative cumulative sum of oob improvements\ncumsum = -np.cumsum(clf.oob_improvement_)\n\n# min loss according to OOB\noob_best_iter = x[np.argmin(cumsum)]\n\n# min loss according to test (normalize such that first loss is 0)\ntest_score -= test_score[0]\ntest_best_iter = x[np.argmin(test_score)]\n\n# min loss according to cv (normalize such that first loss is 0)\ncv_score -= cv_score[0]\ncv_best_iter = x[np.argmin(cv_score)]\n\n# color brew for the three curves\noob_color = list(map(lambda x: x / 256.0, (190, 174, 212)))\ntest_color = list(map(lambda x: x / 256.0, (127, 201, 127)))\ncv_color = list(map(lambda x: x / 256.0, (253, 192, 134)))\n\n# plot curves and vertical lines for best iterations\nplt.plot(x, cumsum, label='OOB loss', color=oob_color)\nplt.plot(x, test_score, label='Test loss', color=test_color)\nplt.plot(x, cv_score, label='CV loss', color=cv_color)\nplt.axvline(x=oob_best_iter, color=oob_color)\nplt.axvline(x=test_best_iter, color=test_color)\nplt.axvline(x=cv_best_iter, color=cv_color)\n\n# add three vertical lines to xticks\nxticks = plt.xticks()\nxticks_pos = np.array(xticks[0].tolist() +\n [oob_best_iter, cv_best_iter, test_best_iter])\nxticks_label = np.array(list(map(lambda t: int(t), xticks[0])) +\n ['OOB', 'CV', 'Test'])\nind = np.argsort(xticks_pos)\nxticks_pos = xticks_pos[ind]\nxticks_label = xticks_label[ind]\nplt.xticks(xticks_pos, xticks_label)\n\nplt.legend(loc='upper right')\nplt.ylabel('normalized loss')\nplt.xlabel('number of iterations')\n\nplt.show()"
29+
"print(__doc__)\n\n# Author: Peter Prettenhofer <[email protected]>\n#\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn import ensemble\nfrom sklearn.model_selection import KFold\nfrom sklearn.model_selection import train_test_split\n\nfrom scipy.special import expit\n\n# Generate data (adapted from G. Ridgeway's gbm example)\nn_samples = 1000\nrandom_state = np.random.RandomState(13)\nx1 = random_state.uniform(size=n_samples)\nx2 = random_state.uniform(size=n_samples)\nx3 = random_state.randint(0, 4, size=n_samples)\n\np = expit(np.sin(3 * x1) - 4 * x2 + x3)\ny = random_state.binomial(1, p, size=n_samples)\n\nX = np.c_[x1, x2, x3]\n\nX = X.astype(np.float32)\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5,\n random_state=9)\n\n# Fit classifier with out-of-bag estimates\nparams = {'n_estimators': 1200, 'max_depth': 3, 'subsample': 0.5,\n 'learning_rate': 0.01, 'min_samples_leaf': 1, 'random_state': 3}\nclf = ensemble.GradientBoostingClassifier(**params)\n\nclf.fit(X_train, y_train)\nacc = clf.score(X_test, y_test)\nprint(\"Accuracy: {:.4f}\".format(acc))\n\nn_estimators = params['n_estimators']\nx = np.arange(n_estimators) + 1\n\n\ndef heldout_score(clf, X_test, y_test):\n \"\"\"compute deviance scores on ``X_test`` and ``y_test``. \"\"\"\n score = np.zeros((n_estimators,), dtype=np.float64)\n for i, y_pred in enumerate(clf.staged_decision_function(X_test)):\n score[i] = clf.loss_(y_test, y_pred)\n return score\n\n\ndef cv_estimate(n_splits=None):\n cv = KFold(n_splits=n_splits)\n cv_clf = ensemble.GradientBoostingClassifier(**params)\n val_scores = np.zeros((n_estimators,), dtype=np.float64)\n for train, test in cv.split(X_train, y_train):\n cv_clf.fit(X_train[train], y_train[train])\n val_scores += heldout_score(cv_clf, X_train[test], y_train[test])\n val_scores /= n_splits\n return val_scores\n\n\n# Estimate best n_estimator using cross-validation\ncv_score = cv_estimate(3)\n\n# Compute best n_estimator for test data\ntest_score = heldout_score(clf, X_test, y_test)\n\n# negative cumulative sum of oob improvements\ncumsum = -np.cumsum(clf.oob_improvement_)\n\n# min loss according to OOB\noob_best_iter = x[np.argmin(cumsum)]\n\n# min loss according to test (normalize such that first loss is 0)\ntest_score -= test_score[0]\ntest_best_iter = x[np.argmin(test_score)]\n\n# min loss according to cv (normalize such that first loss is 0)\ncv_score -= cv_score[0]\ncv_best_iter = x[np.argmin(cv_score)]\n\n# color brew for the three curves\noob_color = list(map(lambda x: x / 256.0, (190, 174, 212)))\ntest_color = list(map(lambda x: x / 256.0, (127, 201, 127)))\ncv_color = list(map(lambda x: x / 256.0, (253, 192, 134)))\n\n# plot curves and vertical lines for best iterations\nplt.plot(x, cumsum, label='OOB loss', color=oob_color)\nplt.plot(x, test_score, label='Test loss', color=test_color)\nplt.plot(x, cv_score, label='CV loss', color=cv_color)\nplt.axvline(x=oob_best_iter, color=oob_color)\nplt.axvline(x=test_best_iter, color=test_color)\nplt.axvline(x=cv_best_iter, color=cv_color)\n\n# add three vertical lines to xticks\nxticks = plt.xticks()\nxticks_pos = np.array(xticks[0].tolist() +\n [oob_best_iter, cv_best_iter, test_best_iter])\nxticks_label = np.array(list(map(lambda t: int(t), xticks[0])) +\n ['OOB', 'CV', 'Test'])\nind = np.argsort(xticks_pos)\nxticks_pos = xticks_pos[ind]\nxticks_label = xticks_label[ind]\nplt.xticks(xticks_pos, xticks_label)\n\nplt.legend(loc='upper right')\nplt.ylabel('normalized loss')\nplt.xlabel('number of iterations')\n\nplt.show()"
3030
]
3131
}
3232
],

dev/_downloads/plot_gradient_boosting_oob.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
from sklearn.model_selection import KFold
3737
from sklearn.model_selection import train_test_split
3838

39+
from scipy.special import expit
3940

4041
# Generate data (adapted from G. Ridgeway's gbm example)
4142
n_samples = 1000
@@ -44,7 +45,7 @@
4445
x2 = random_state.uniform(size=n_samples)
4546
x3 = random_state.randint(0, 4, size=n_samples)
4647

47-
p = 1 / (1.0 + np.exp(-(np.sin(3 * x1) - 4 * x2 + x3)))
48+
p = expit(np.sin(3 * x1) - 4 * x2 + x3)
4849
y = random_state.binomial(1, p, size=n_samples)
4950

5051
X = np.c_[x1, x2, x3]

dev/_downloads/plot_logistic.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"print(__doc__)\n\n\n# Code source: Gael Varoquaux\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn import linear_model\n\n# General a toy dataset:s it's just a straight line with some Gaussian noise:\nxmin, xmax = -5, 5\nn_samples = 100\nnp.random.seed(0)\nX = np.random.normal(size=n_samples)\ny = (X > 0).astype(np.float)\nX[X > 0] *= 4\nX += .3 * np.random.normal(size=n_samples)\n\nX = X[:, np.newaxis]\n\n# Fit the classifier\nclf = linear_model.LogisticRegression(C=1e5, solver='lbfgs')\nclf.fit(X, y)\n\n# and plot the result\nplt.figure(1, figsize=(4, 3))\nplt.clf()\nplt.scatter(X.ravel(), y, color='black', zorder=20)\nX_test = np.linspace(-5, 10, 300)\n\n\ndef model(x):\n return 1 / (1 + np.exp(-x))\n\n\nloss = model(X_test * clf.coef_ + clf.intercept_).ravel()\nplt.plot(X_test, loss, color='red', linewidth=3)\n\nols = linear_model.LinearRegression()\nols.fit(X, y)\nplt.plot(X_test, ols.coef_ * X_test + ols.intercept_, linewidth=1)\nplt.axhline(.5, color='.5')\n\nplt.ylabel('y')\nplt.xlabel('X')\nplt.xticks(range(-5, 10))\nplt.yticks([0, 0.5, 1])\nplt.ylim(-.25, 1.25)\nplt.xlim(-4, 10)\nplt.legend(('Logistic Regression Model', 'Linear Regression Model'),\n loc=\"lower right\", fontsize='small')\nplt.tight_layout()\nplt.show()"
29+
"print(__doc__)\n\n\n# Code source: Gael Varoquaux\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn import linear_model\nfrom scipy.special import expit\n\n# General a toy dataset:s it's just a straight line with some Gaussian noise:\nxmin, xmax = -5, 5\nn_samples = 100\nnp.random.seed(0)\nX = np.random.normal(size=n_samples)\ny = (X > 0).astype(np.float)\nX[X > 0] *= 4\nX += .3 * np.random.normal(size=n_samples)\n\nX = X[:, np.newaxis]\n\n# Fit the classifier\nclf = linear_model.LogisticRegression(C=1e5, solver='lbfgs')\nclf.fit(X, y)\n\n# and plot the result\nplt.figure(1, figsize=(4, 3))\nplt.clf()\nplt.scatter(X.ravel(), y, color='black', zorder=20)\nX_test = np.linspace(-5, 10, 300)\n\nloss = expit(X_test * clf.coef_ + clf.intercept_).ravel()\nplt.plot(X_test, loss, color='red', linewidth=3)\n\nols = linear_model.LinearRegression()\nols.fit(X, y)\nplt.plot(X_test, ols.coef_ * X_test + ols.intercept_, linewidth=1)\nplt.axhline(.5, color='.5')\n\nplt.ylabel('y')\nplt.xlabel('X')\nplt.xticks(range(-5, 10))\nplt.yticks([0, 0.5, 1])\nplt.ylim(-.25, 1.25)\nplt.xlim(-4, 10)\nplt.legend(('Logistic Regression Model', 'Linear Regression Model'),\n loc=\"lower right\", fontsize='small')\nplt.tight_layout()\nplt.show()"
3030
]
3131
}
3232
],

dev/_downloads/plot_logistic.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import matplotlib.pyplot as plt
2323

2424
from sklearn import linear_model
25+
from scipy.special import expit
2526

2627
# General a toy dataset:s it's just a straight line with some Gaussian noise:
2728
xmin, xmax = -5, 5
@@ -44,12 +45,7 @@
4445
plt.scatter(X.ravel(), y, color='black', zorder=20)
4546
X_test = np.linspace(-5, 10, 300)
4647

47-
48-
def model(x):
49-
return 1 / (1 + np.exp(-x))
50-
51-
52-
loss = model(X_test * clf.coef_ + clf.intercept_).ravel()
48+
loss = expit(X_test * clf.coef_ + clf.intercept_).ravel()
5349
plt.plot(X_test, loss, color='red', linewidth=3)
5450

5551
ols = linear_model.LinearRegression()

dev/_downloads/scikit-learn-docs.pdf

-4.09 KB
Binary file not shown.

dev/_images/iris.png

0 Bytes

0 commit comments

Comments
 (0)