Skip to content

Commit ee0da2f

Browse files
committed
Pushing the docs to dev/ for branch: master, commit 932c60648d49dcda3784047d6df595421e8c32c8
1 parent f28426c commit ee0da2f

File tree

1,208 files changed

+4169
-4169
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,208 files changed

+4169
-4169
lines changed

dev/_downloads/2ae02325ffd71a3699f433ae3baecd85/plot_cv_predict.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"from sklearn import datasets\nfrom sklearn.model_selection import cross_val_predict\nfrom sklearn import linear_model\nimport matplotlib.pyplot as plt\n\nlr = linear_model.LinearRegression()\nX, y = datasets.load_boston(return_X_y=True)\n\n# cross_val_predict returns an array of the same size as `y` where each entry\n# is a prediction obtained by cross validation:\npredicted = cross_val_predict(lr, X, y, cv=10)\n\nfig, ax = plt.subplots()\nax.scatter(y, predicted, edgecolors=(0, 0, 0))\nax.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4)\nax.set_xlabel('Measured')\nax.set_ylabel('Predicted')\nplt.show()"
29+
"from sklearn import datasets\nfrom sklearn.model_selection import cross_val_predict\nfrom sklearn import linear_model\nimport matplotlib.pyplot as plt\n\nlr = linear_model.LinearRegression()\nX, y = datasets.load_diabetes(return_X_y=True)\n\n# cross_val_predict returns an array of the same size as `y` where each entry\n# is a prediction obtained by cross validation:\npredicted = cross_val_predict(lr, X, y, cv=10)\n\nfig, ax = plt.subplots()\nax.scatter(y, predicted, edgecolors=(0, 0, 0))\nax.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4)\nax.set_xlabel('Measured')\nax.set_ylabel('Predicted')\nplt.show()"
3030
]
3131
}
3232
],

dev/_downloads/336608b7fc391cd88b2587817f48ffdd/plot_cv_predict.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
import matplotlib.pyplot as plt
1515

1616
lr = linear_model.LinearRegression()
17-
X, y = datasets.load_boston(return_X_y=True)
17+
X, y = datasets.load_diabetes(return_X_y=True)
1818

1919
# cross_val_predict returns an array of the same size as `y` where each entry
2020
# is a prediction obtained by cross validation:
Binary file not shown.

dev/_downloads/5d93da33b794785877d0c01122dd0716/plot_iterative_imputer_variants_comparison.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport pandas as pd\n\n# To use this experimental feature, we need to explicitly ask for it:\nfrom sklearn.experimental import enable_iterative_imputer # noqa\nfrom sklearn.datasets import fetch_california_housing\nfrom sklearn.impute import SimpleImputer\nfrom sklearn.impute import IterativeImputer\nfrom sklearn.linear_model import BayesianRidge\nfrom sklearn.tree import DecisionTreeRegressor\nfrom sklearn.ensemble import ExtraTreesRegressor\nfrom sklearn.neighbors import KNeighborsRegressor\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.model_selection import cross_val_score\n\nN_SPLITS = 5\n\nrng = np.random.RandomState(0)\n\nX_full, y_full = fetch_california_housing(return_X_y=True)\n# ~2k samples is enough for the purpose of the example.\n# Remove the following two lines for a slower run with different error bars.\nX_full = X_full[::10]\ny_full = y_full[::10]\nn_samples, n_features = X_full.shape\n\n# Estimate the score on the entire dataset, with no missing values\nbr_estimator = BayesianRidge()\nscore_full_data = pd.DataFrame(\n cross_val_score(\n br_estimator, X_full, y_full, scoring='neg_mean_squared_error',\n cv=N_SPLITS\n ),\n columns=['Full Data']\n)\n\n# Add a single missing value to each row\nX_missing = X_full.copy()\ny_missing = y_full\nmissing_samples = np.arange(n_samples)\nmissing_features = rng.choice(n_features, n_samples, replace=True)\nX_missing[missing_samples, missing_features] = np.nan\n\n# Estimate the score after imputation (mean and median strategies)\nscore_simple_imputer = pd.DataFrame()\nfor strategy in ('mean', 'median'):\n estimator = make_pipeline(\n SimpleImputer(missing_values=np.nan, strategy=strategy),\n br_estimator\n )\n score_simple_imputer[strategy] = cross_val_score(\n estimator, X_missing, y_missing, scoring='neg_mean_squared_error',\n cv=N_SPLITS\n )\n\n# Estimate the score after iterative imputation of the missing values\n# with different estimators\nestimators = [\n BayesianRidge(),\n DecisionTreeRegressor(max_features='sqrt', random_state=0),\n ExtraTreesRegressor(n_estimators=10, random_state=0),\n KNeighborsRegressor(n_neighbors=15)\n]\nscore_iterative_imputer = pd.DataFrame()\nfor impute_estimator in estimators:\n estimator = make_pipeline(\n IterativeImputer(random_state=0, estimator=impute_estimator),\n br_estimator\n )\n score_iterative_imputer[impute_estimator.__class__.__name__] = \\\n cross_val_score(\n estimator, X_missing, y_missing, scoring='neg_mean_squared_error',\n cv=N_SPLITS\n )\n\nscores = pd.concat(\n [score_full_data, score_simple_imputer, score_iterative_imputer],\n keys=['Original', 'SimpleImputer', 'IterativeImputer'], axis=1\n)\n\n# plot boston results\nfig, ax = plt.subplots(figsize=(13, 6))\nmeans = -scores.mean()\nerrors = scores.std()\nmeans.plot.barh(xerr=errors, ax=ax)\nax.set_title('California Housing Regression with Different Imputation Methods')\nax.set_xlabel('MSE (smaller is better)')\nax.set_yticks(np.arange(means.shape[0]))\nax.set_yticklabels([\" w/ \".join(label) for label in means.index.get_values()])\nplt.tight_layout(pad=1)\nplt.show()"
29+
"print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport pandas as pd\n\n# To use this experimental feature, we need to explicitly ask for it:\nfrom sklearn.experimental import enable_iterative_imputer # noqa\nfrom sklearn.datasets import fetch_california_housing\nfrom sklearn.impute import SimpleImputer\nfrom sklearn.impute import IterativeImputer\nfrom sklearn.linear_model import BayesianRidge\nfrom sklearn.tree import DecisionTreeRegressor\nfrom sklearn.ensemble import ExtraTreesRegressor\nfrom sklearn.neighbors import KNeighborsRegressor\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.model_selection import cross_val_score\n\nN_SPLITS = 5\n\nrng = np.random.RandomState(0)\n\nX_full, y_full = fetch_california_housing(return_X_y=True)\n# ~2k samples is enough for the purpose of the example.\n# Remove the following two lines for a slower run with different error bars.\nX_full = X_full[::10]\ny_full = y_full[::10]\nn_samples, n_features = X_full.shape\n\n# Estimate the score on the entire dataset, with no missing values\nbr_estimator = BayesianRidge()\nscore_full_data = pd.DataFrame(\n cross_val_score(\n br_estimator, X_full, y_full, scoring='neg_mean_squared_error',\n cv=N_SPLITS\n ),\n columns=['Full Data']\n)\n\n# Add a single missing value to each row\nX_missing = X_full.copy()\ny_missing = y_full\nmissing_samples = np.arange(n_samples)\nmissing_features = rng.choice(n_features, n_samples, replace=True)\nX_missing[missing_samples, missing_features] = np.nan\n\n# Estimate the score after imputation (mean and median strategies)\nscore_simple_imputer = pd.DataFrame()\nfor strategy in ('mean', 'median'):\n estimator = make_pipeline(\n SimpleImputer(missing_values=np.nan, strategy=strategy),\n br_estimator\n )\n score_simple_imputer[strategy] = cross_val_score(\n estimator, X_missing, y_missing, scoring='neg_mean_squared_error',\n cv=N_SPLITS\n )\n\n# Estimate the score after iterative imputation of the missing values\n# with different estimators\nestimators = [\n BayesianRidge(),\n DecisionTreeRegressor(max_features='sqrt', random_state=0),\n ExtraTreesRegressor(n_estimators=10, random_state=0),\n KNeighborsRegressor(n_neighbors=15)\n]\nscore_iterative_imputer = pd.DataFrame()\nfor impute_estimator in estimators:\n estimator = make_pipeline(\n IterativeImputer(random_state=0, estimator=impute_estimator),\n br_estimator\n )\n score_iterative_imputer[impute_estimator.__class__.__name__] = \\\n cross_val_score(\n estimator, X_missing, y_missing, scoring='neg_mean_squared_error',\n cv=N_SPLITS\n )\n\nscores = pd.concat(\n [score_full_data, score_simple_imputer, score_iterative_imputer],\n keys=['Original', 'SimpleImputer', 'IterativeImputer'], axis=1\n)\n\n# plot boston results\nfig, ax = plt.subplots(figsize=(13, 6))\nmeans = -scores.mean()\nerrors = scores.std()\nmeans.plot.barh(xerr=errors, ax=ax)\nax.set_title('California Housing Regression with Different Imputation Methods')\nax.set_xlabel('MSE (smaller is better)')\nax.set_yticks(np.arange(means.shape[0]))\nax.set_yticklabels([\" w/ \".join(label) for label in means.index.tolist()])\nplt.tight_layout(pad=1)\nplt.show()"
3030
]
3131
}
3232
],

dev/_downloads/8191b75beb1a0a40ef8cc8560c5ace7a/plot_iterative_imputer_variants_comparison.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,6 @@
127127
ax.set_title('California Housing Regression with Different Imputation Methods')
128128
ax.set_xlabel('MSE (smaller is better)')
129129
ax.set_yticks(np.arange(means.shape[0]))
130-
ax.set_yticklabels([" w/ ".join(label) for label in means.index.get_values()])
130+
ax.set_yticklabels([" w/ ".join(label) for label in means.index.tolist()])
131131
plt.tight_layout(pad=1)
132132
plt.show()

dev/_downloads/89dae10a7736fbdd8790add4e83db2d6/plot_mlp_alpha.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"print(__doc__)\n\n\n# Author: Issam H. Laradji\n# License: BSD 3 clause\n\nimport numpy as np\nfrom matplotlib import pyplot as plt\nfrom matplotlib.colors import ListedColormap\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.datasets import make_moons, make_circles, make_classification\nfrom sklearn.neural_network import MLPClassifier\n\nh = .02 # step size in the mesh\n\nalphas = np.logspace(-5, 3, 5)\nnames = ['alpha ' + str(i) for i in alphas]\n\nclassifiers = []\nfor i in alphas:\n classifiers.append(MLPClassifier(solver='lbfgs', alpha=i, random_state=1,\n hidden_layer_sizes=[100, 100]))\n\nX, y = make_classification(n_features=2, n_redundant=0, n_informative=2,\n random_state=0, n_clusters_per_class=1)\nrng = np.random.RandomState(2)\nX += 2 * rng.uniform(size=X.shape)\nlinearly_separable = (X, y)\n\ndatasets = [make_moons(noise=0.3, random_state=0),\n make_circles(noise=0.2, factor=0.5, random_state=1),\n linearly_separable]\n\nfigure = plt.figure(figsize=(17, 9))\ni = 1\n# iterate over datasets\nfor X, y in datasets:\n # preprocess dataset, split into training and test part\n X = StandardScaler().fit_transform(X)\n X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.4)\n\n x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5\n y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5\n xx, yy = np.meshgrid(np.arange(x_min, x_max, h),\n np.arange(y_min, y_max, h))\n\n # just plot the dataset first\n cm = plt.cm.RdBu\n cm_bright = ListedColormap(['#FF0000', '#0000FF'])\n ax = plt.subplot(len(datasets), len(classifiers) + 1, i)\n # Plot the training points\n ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright)\n # and testing points\n ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6)\n ax.set_xlim(xx.min(), xx.max())\n ax.set_ylim(yy.min(), yy.max())\n ax.set_xticks(())\n ax.set_yticks(())\n i += 1\n\n # iterate over classifiers\n for name, clf in zip(names, classifiers):\n ax = plt.subplot(len(datasets), len(classifiers) + 1, i)\n clf.fit(X_train, y_train)\n score = clf.score(X_test, y_test)\n\n # Plot the decision boundary. For that, we will assign a color to each\n # point in the mesh [x_min, x_max]x[y_min, y_max].\n if hasattr(clf, \"decision_function\"):\n Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])\n else:\n Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]\n\n # Put the result into a color plot\n Z = Z.reshape(xx.shape)\n ax.contourf(xx, yy, Z, cmap=cm, alpha=.8)\n\n # Plot also the training points\n ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright,\n edgecolors='black', s=25)\n # and testing points\n ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright,\n alpha=0.6, edgecolors='black', s=25)\n\n ax.set_xlim(xx.min(), xx.max())\n ax.set_ylim(yy.min(), yy.max())\n ax.set_xticks(())\n ax.set_yticks(())\n ax.set_title(name)\n ax.text(xx.max() - .3, yy.min() + .3, ('%.2f' % score).lstrip('0'),\n size=15, horizontalalignment='right')\n i += 1\n\nfigure.subplots_adjust(left=.02, right=.98)\nplt.show()"
29+
"print(__doc__)\n\n\n# Author: Issam H. Laradji\n# License: BSD 3 clause\n\nimport numpy as np\nfrom matplotlib import pyplot as plt\nfrom matplotlib.colors import ListedColormap\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.datasets import make_moons, make_circles, make_classification\nfrom sklearn.neural_network import MLPClassifier\nfrom sklearn.pipeline import make_pipeline\n\nh = .02 # step size in the mesh\n\nalphas = np.logspace(-5, 3, 5)\nnames = ['alpha ' + str(i) for i in alphas]\n\nclassifiers = []\nfor i in alphas:\n classifiers.append(make_pipeline(\n StandardScaler(),\n MLPClassifier(solver='lbfgs', alpha=i,\n random_state=1, max_iter=2000,\n early_stopping=True,\n hidden_layer_sizes=[100, 100])\n ))\n\nX, y = make_classification(n_features=2, n_redundant=0, n_informative=2,\n random_state=0, n_clusters_per_class=1)\nrng = np.random.RandomState(2)\nX += 2 * rng.uniform(size=X.shape)\nlinearly_separable = (X, y)\n\ndatasets = [make_moons(noise=0.3, random_state=0),\n make_circles(noise=0.2, factor=0.5, random_state=1),\n linearly_separable]\n\nfigure = plt.figure(figsize=(17, 9))\ni = 1\n# iterate over datasets\nfor X, y in datasets:\n # preprocess dataset, split into training and test part\n X = StandardScaler().fit_transform(X)\n X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.4)\n\n x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5\n y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5\n xx, yy = np.meshgrid(np.arange(x_min, x_max, h),\n np.arange(y_min, y_max, h))\n\n # just plot the dataset first\n cm = plt.cm.RdBu\n cm_bright = ListedColormap(['#FF0000', '#0000FF'])\n ax = plt.subplot(len(datasets), len(classifiers) + 1, i)\n # Plot the training points\n ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright)\n # and testing points\n ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6)\n ax.set_xlim(xx.min(), xx.max())\n ax.set_ylim(yy.min(), yy.max())\n ax.set_xticks(())\n ax.set_yticks(())\n i += 1\n\n # iterate over classifiers\n for name, clf in zip(names, classifiers):\n ax = plt.subplot(len(datasets), len(classifiers) + 1, i)\n clf.fit(X_train, y_train)\n score = clf.score(X_test, y_test)\n\n # Plot the decision boundary. For that, we will assign a color to each\n # point in the mesh [x_min, x_max]x[y_min, y_max].\n if hasattr(clf, \"decision_function\"):\n Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])\n else:\n Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]\n\n # Put the result into a color plot\n Z = Z.reshape(xx.shape)\n ax.contourf(xx, yy, Z, cmap=cm, alpha=.8)\n\n # Plot also the training points\n ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright,\n edgecolors='black', s=25)\n # and testing points\n ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright,\n alpha=0.6, edgecolors='black', s=25)\n\n ax.set_xlim(xx.min(), xx.max())\n ax.set_ylim(yy.min(), yy.max())\n ax.set_xticks(())\n ax.set_yticks(())\n ax.set_title(name)\n ax.text(xx.max() - .3, yy.min() + .3, ('%.2f' % score).lstrip('0'),\n size=15, horizontalalignment='right')\n i += 1\n\nfigure.subplots_adjust(left=.02, right=.98)\nplt.show()"
3030
]
3131
}
3232
],

dev/_downloads/a0f005cb0d364f1e3b20dfe10804fbe9/plot_mlp_alpha.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
from sklearn.preprocessing import StandardScaler
2929
from sklearn.datasets import make_moons, make_circles, make_classification
3030
from sklearn.neural_network import MLPClassifier
31+
from sklearn.pipeline import make_pipeline
3132

3233
h = .02 # step size in the mesh
3334

@@ -36,8 +37,13 @@
3637

3738
classifiers = []
3839
for i in alphas:
39-
classifiers.append(MLPClassifier(solver='lbfgs', alpha=i, random_state=1,
40-
hidden_layer_sizes=[100, 100]))
40+
classifiers.append(make_pipeline(
41+
StandardScaler(),
42+
MLPClassifier(solver='lbfgs', alpha=i,
43+
random_state=1, max_iter=2000,
44+
early_stopping=True,
45+
hidden_layer_sizes=[100, 100])
46+
))
4147

4248
X, y = make_classification(n_features=2, n_redundant=0, n_informative=2,
4349
random_state=0, n_clusters_per_class=1)
Binary file not shown.

dev/_downloads/scikit-learn-docs.pdf

31.5 KB
Binary file not shown.

dev/_images/iris.png

0 Bytes

0 commit comments

Comments
 (0)