scikit-learn
diff --git a/‎dev/_downloads/2ae02325ffd71a3699f433ae3baecd85/plot_cv_predict.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/2ae02325ffd71a3699f433ae3baecd85/plot_cv_predict.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/336608b7fc391cd88b2587817f48ffdd/plot_cv_predict.py
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/336608b7fc391cd88b2587817f48ffdd/plot_cv_predict.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/3409d9766d352cc9f9b169d4a799a87a/auto_examples_python.zip
255 Bytes b/‎dev/_downloads/3409d9766d352cc9f9b169d4a799a87a/auto_examples_python.zip
255 Bytes
diff --git a/‎dev/_downloads/5d93da33b794785877d0c01122dd0716/plot_iterative_imputer_variants_comparison.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/5d93da33b794785877d0c01122dd0716/plot_iterative_imputer_variants_comparison.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/8191b75beb1a0a40ef8cc8560c5ace7a/plot_iterative_imputer_variants_comparison.py
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/8191b75beb1a0a40ef8cc8560c5ace7a/plot_iterative_imputer_variants_comparison.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/89dae10a7736fbdd8790add4e83db2d6/plot_mlp_alpha.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/89dae10a7736fbdd8790add4e83db2d6/plot_mlp_alpha.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/a0f005cb0d364f1e3b20dfe10804fbe9/plot_mlp_alpha.py
Lines changed: 8 additions & 2 deletions b/‎dev/_downloads/a0f005cb0d364f1e3b20dfe10804fbe9/plot_mlp_alpha.py
Lines changed: 8 additions & 2 deletions
diff --git a/‎dev/_downloads/d34667f097c619f8afda4bc936e7af21/auto_examples_jupyter.zip
261 Bytes b/‎dev/_downloads/d34667f097c619f8afda4bc936e7af21/auto_examples_jupyter.zip
261 Bytes
diff --git a/‎dev/_downloads/scikit-learn-docs.pdf
31.5 KB b/‎dev/_downloads/scikit-learn-docs.pdf
31.5 KB
diff --git a/‎dev/_images/iris.png
0 Bytes b/‎dev/_images/iris.png
0 Bytes
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "from sklearn import datasets\nfrom sklearn.model_selection import cross_val_predict\nfrom sklearn import linear_model\nimport matplotlib.pyplot as plt\n\nlr = linear_model.LinearRegression()\nX, y = datasets.load_boston(return_X_y=True)\n\n# cross_val_predict returns an array of the same size as `y` where each entry\n# is a prediction obtained by cross validation:\npredicted = cross_val_predict(lr, X, y, cv=10)\n\nfig, ax = plt.subplots()\nax.scatter(y, predicted, edgecolors=(0, 0, 0))\nax.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4)\nax.set_xlabel('Measured')\nax.set_ylabel('Predicted')\nplt.show()"
+        "from sklearn import datasets\nfrom sklearn.model_selection import cross_val_predict\nfrom sklearn import linear_model\nimport matplotlib.pyplot as plt\n\nlr = linear_model.LinearRegression()\nX, y = datasets.load_diabetes(return_X_y=True)\n\n# cross_val_predict returns an array of the same size as `y` where each entry\n# is a prediction obtained by cross validation:\npredicted = cross_val_predict(lr, X, y, cv=10)\n\nfig, ax = plt.subplots()\nax.scatter(y, predicted, edgecolors=(0, 0, 0))\nax.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4)\nax.set_xlabel('Measured')\nax.set_ylabel('Predicted')\nplt.show()"
       ]
     }
   ],
 
@@ -14,7 +14,7 @@
 import matplotlib.pyplot as plt
 
 lr = linear_model.LinearRegression()
-X, y = datasets.load_boston(return_X_y=True)
+X, y = datasets.load_diabetes(return_X_y=True)
 
 # cross_val_predict returns an array of the same size as `y` where each entry
 # is a prediction obtained by cross validation:
 
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport pandas as pd\n\n# To use this experimental feature, we need to explicitly ask for it:\nfrom sklearn.experimental import enable_iterative_imputer  # noqa\nfrom sklearn.datasets import fetch_california_housing\nfrom sklearn.impute import SimpleImputer\nfrom sklearn.impute import IterativeImputer\nfrom sklearn.linear_model import BayesianRidge\nfrom sklearn.tree import DecisionTreeRegressor\nfrom sklearn.ensemble import ExtraTreesRegressor\nfrom sklearn.neighbors import KNeighborsRegressor\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.model_selection import cross_val_score\n\nN_SPLITS = 5\n\nrng = np.random.RandomState(0)\n\nX_full, y_full = fetch_california_housing(return_X_y=True)\n# ~2k samples is enough for the purpose of the example.\n# Remove the following two lines for a slower run with different error bars.\nX_full = X_full[::10]\ny_full = y_full[::10]\nn_samples, n_features = X_full.shape\n\n# Estimate the score on the entire dataset, with no missing values\nbr_estimator = BayesianRidge()\nscore_full_data = pd.DataFrame(\n    cross_val_score(\n        br_estimator, X_full, y_full, scoring='neg_mean_squared_error',\n        cv=N_SPLITS\n    ),\n    columns=['Full Data']\n)\n\n# Add a single missing value to each row\nX_missing = X_full.copy()\ny_missing = y_full\nmissing_samples = np.arange(n_samples)\nmissing_features = rng.choice(n_features, n_samples, replace=True)\nX_missing[missing_samples, missing_features] = np.nan\n\n# Estimate the score after imputation (mean and median strategies)\nscore_simple_imputer = pd.DataFrame()\nfor strategy in ('mean', 'median'):\n    estimator = make_pipeline(\n        SimpleImputer(missing_values=np.nan, strategy=strategy),\n        br_estimator\n    )\n    score_simple_imputer[strategy] = cross_val_score(\n        estimator, X_missing, y_missing, scoring='neg_mean_squared_error',\n        cv=N_SPLITS\n    )\n\n# Estimate the score after iterative imputation of the missing values\n# with different estimators\nestimators = [\n    BayesianRidge(),\n    DecisionTreeRegressor(max_features='sqrt', random_state=0),\n    ExtraTreesRegressor(n_estimators=10, random_state=0),\n    KNeighborsRegressor(n_neighbors=15)\n]\nscore_iterative_imputer = pd.DataFrame()\nfor impute_estimator in estimators:\n    estimator = make_pipeline(\n        IterativeImputer(random_state=0, estimator=impute_estimator),\n        br_estimator\n    )\n    score_iterative_imputer[impute_estimator.__class__.__name__] = \\\n        cross_val_score(\n            estimator, X_missing, y_missing, scoring='neg_mean_squared_error',\n            cv=N_SPLITS\n        )\n\nscores = pd.concat(\n    [score_full_data, score_simple_imputer, score_iterative_imputer],\n    keys=['Original', 'SimpleImputer', 'IterativeImputer'], axis=1\n)\n\n# plot boston results\nfig, ax = plt.subplots(figsize=(13, 6))\nmeans = -scores.mean()\nerrors = scores.std()\nmeans.plot.barh(xerr=errors, ax=ax)\nax.set_title('California Housing Regression with Different Imputation Methods')\nax.set_xlabel('MSE (smaller is better)')\nax.set_yticks(np.arange(means.shape[0]))\nax.set_yticklabels([\" w/ \".join(label) for label in means.index.get_values()])\nplt.tight_layout(pad=1)\nplt.show()"
+        "print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport pandas as pd\n\n# To use this experimental feature, we need to explicitly ask for it:\nfrom sklearn.experimental import enable_iterative_imputer  # noqa\nfrom sklearn.datasets import fetch_california_housing\nfrom sklearn.impute import SimpleImputer\nfrom sklearn.impute import IterativeImputer\nfrom sklearn.linear_model import BayesianRidge\nfrom sklearn.tree import DecisionTreeRegressor\nfrom sklearn.ensemble import ExtraTreesRegressor\nfrom sklearn.neighbors import KNeighborsRegressor\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.model_selection import cross_val_score\n\nN_SPLITS = 5\n\nrng = np.random.RandomState(0)\n\nX_full, y_full = fetch_california_housing(return_X_y=True)\n# ~2k samples is enough for the purpose of the example.\n# Remove the following two lines for a slower run with different error bars.\nX_full = X_full[::10]\ny_full = y_full[::10]\nn_samples, n_features = X_full.shape\n\n# Estimate the score on the entire dataset, with no missing values\nbr_estimator = BayesianRidge()\nscore_full_data = pd.DataFrame(\n    cross_val_score(\n        br_estimator, X_full, y_full, scoring='neg_mean_squared_error',\n        cv=N_SPLITS\n    ),\n    columns=['Full Data']\n)\n\n# Add a single missing value to each row\nX_missing = X_full.copy()\ny_missing = y_full\nmissing_samples = np.arange(n_samples)\nmissing_features = rng.choice(n_features, n_samples, replace=True)\nX_missing[missing_samples, missing_features] = np.nan\n\n# Estimate the score after imputation (mean and median strategies)\nscore_simple_imputer = pd.DataFrame()\nfor strategy in ('mean', 'median'):\n    estimator = make_pipeline(\n        SimpleImputer(missing_values=np.nan, strategy=strategy),\n        br_estimator\n    )\n    score_simple_imputer[strategy] = cross_val_score(\n        estimator, X_missing, y_missing, scoring='neg_mean_squared_error',\n        cv=N_SPLITS\n    )\n\n# Estimate the score after iterative imputation of the missing values\n# with different estimators\nestimators = [\n    BayesianRidge(),\n    DecisionTreeRegressor(max_features='sqrt', random_state=0),\n    ExtraTreesRegressor(n_estimators=10, random_state=0),\n    KNeighborsRegressor(n_neighbors=15)\n]\nscore_iterative_imputer = pd.DataFrame()\nfor impute_estimator in estimators:\n    estimator = make_pipeline(\n        IterativeImputer(random_state=0, estimator=impute_estimator),\n        br_estimator\n    )\n    score_iterative_imputer[impute_estimator.__class__.__name__] = \\\n        cross_val_score(\n            estimator, X_missing, y_missing, scoring='neg_mean_squared_error',\n            cv=N_SPLITS\n        )\n\nscores = pd.concat(\n    [score_full_data, score_simple_imputer, score_iterative_imputer],\n    keys=['Original', 'SimpleImputer', 'IterativeImputer'], axis=1\n)\n\n# plot boston results\nfig, ax = plt.subplots(figsize=(13, 6))\nmeans = -scores.mean()\nerrors = scores.std()\nmeans.plot.barh(xerr=errors, ax=ax)\nax.set_title('California Housing Regression with Different Imputation Methods')\nax.set_xlabel('MSE (smaller is better)')\nax.set_yticks(np.arange(means.shape[0]))\nax.set_yticklabels([\" w/ \".join(label) for label in means.index.tolist()])\nplt.tight_layout(pad=1)\nplt.show()"
       ]
     }
   ],
 
@@ -127,6 +127,6 @@
 ax.set_title('California Housing Regression with Different Imputation Methods')
 ax.set_xlabel('MSE (smaller is better)')
 ax.set_yticks(np.arange(means.shape[0]))
-ax.set_yticklabels([" w/ ".join(label) for label in means.index.get_values()])
+ax.set_yticklabels([" w/ ".join(label) for label in means.index.tolist()])
 plt.tight_layout(pad=1)
 plt.show()
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "print(__doc__)\n\n\n# Author: Issam H. Laradji\n# License: BSD 3 clause\n\nimport numpy as np\nfrom matplotlib import pyplot as plt\nfrom matplotlib.colors import ListedColormap\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.datasets import make_moons, make_circles, make_classification\nfrom sklearn.neural_network import MLPClassifier\n\nh = .02  # step size in the mesh\n\nalphas = np.logspace(-5, 3, 5)\nnames = ['alpha ' + str(i) for i in alphas]\n\nclassifiers = []\nfor i in alphas:\n    classifiers.append(MLPClassifier(solver='lbfgs', alpha=i, random_state=1,\n                                     hidden_layer_sizes=[100, 100]))\n\nX, y = make_classification(n_features=2, n_redundant=0, n_informative=2,\n                           random_state=0, n_clusters_per_class=1)\nrng = np.random.RandomState(2)\nX += 2 * rng.uniform(size=X.shape)\nlinearly_separable = (X, y)\n\ndatasets = [make_moons(noise=0.3, random_state=0),\n            make_circles(noise=0.2, factor=0.5, random_state=1),\n            linearly_separable]\n\nfigure = plt.figure(figsize=(17, 9))\ni = 1\n# iterate over datasets\nfor X, y in datasets:\n    # preprocess dataset, split into training and test part\n    X = StandardScaler().fit_transform(X)\n    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.4)\n\n    x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5\n    y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5\n    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),\n                         np.arange(y_min, y_max, h))\n\n    # just plot the dataset first\n    cm = plt.cm.RdBu\n    cm_bright = ListedColormap(['#FF0000', '#0000FF'])\n    ax = plt.subplot(len(datasets), len(classifiers) + 1, i)\n    # Plot the training points\n    ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright)\n    # and testing points\n    ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6)\n    ax.set_xlim(xx.min(), xx.max())\n    ax.set_ylim(yy.min(), yy.max())\n    ax.set_xticks(())\n    ax.set_yticks(())\n    i += 1\n\n    # iterate over classifiers\n    for name, clf in zip(names, classifiers):\n        ax = plt.subplot(len(datasets), len(classifiers) + 1, i)\n        clf.fit(X_train, y_train)\n        score = clf.score(X_test, y_test)\n\n        # Plot the decision boundary. For that, we will assign a color to each\n        # point in the mesh [x_min, x_max]x[y_min, y_max].\n        if hasattr(clf, \"decision_function\"):\n            Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])\n        else:\n            Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]\n\n        # Put the result into a color plot\n        Z = Z.reshape(xx.shape)\n        ax.contourf(xx, yy, Z, cmap=cm, alpha=.8)\n\n        # Plot also the training points\n        ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright,\n                   edgecolors='black', s=25)\n        # and testing points\n        ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright,\n                   alpha=0.6, edgecolors='black', s=25)\n\n        ax.set_xlim(xx.min(), xx.max())\n        ax.set_ylim(yy.min(), yy.max())\n        ax.set_xticks(())\n        ax.set_yticks(())\n        ax.set_title(name)\n        ax.text(xx.max() - .3, yy.min() + .3, ('%.2f' % score).lstrip('0'),\n                size=15, horizontalalignment='right')\n        i += 1\n\nfigure.subplots_adjust(left=.02, right=.98)\nplt.show()"
+        "print(__doc__)\n\n\n# Author: Issam H. Laradji\n# License: BSD 3 clause\n\nimport numpy as np\nfrom matplotlib import pyplot as plt\nfrom matplotlib.colors import ListedColormap\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.datasets import make_moons, make_circles, make_classification\nfrom sklearn.neural_network import MLPClassifier\nfrom sklearn.pipeline import make_pipeline\n\nh = .02  # step size in the mesh\n\nalphas = np.logspace(-5, 3, 5)\nnames = ['alpha ' + str(i) for i in alphas]\n\nclassifiers = []\nfor i in alphas:\n    classifiers.append(make_pipeline(\n                       StandardScaler(),\n                       MLPClassifier(solver='lbfgs', alpha=i,\n                                     random_state=1, max_iter=2000,\n                                     early_stopping=True,\n                                     hidden_layer_sizes=[100, 100])\n                       ))\n\nX, y = make_classification(n_features=2, n_redundant=0, n_informative=2,\n                           random_state=0, n_clusters_per_class=1)\nrng = np.random.RandomState(2)\nX += 2 * rng.uniform(size=X.shape)\nlinearly_separable = (X, y)\n\ndatasets = [make_moons(noise=0.3, random_state=0),\n            make_circles(noise=0.2, factor=0.5, random_state=1),\n            linearly_separable]\n\nfigure = plt.figure(figsize=(17, 9))\ni = 1\n# iterate over datasets\nfor X, y in datasets:\n    # preprocess dataset, split into training and test part\n    X = StandardScaler().fit_transform(X)\n    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.4)\n\n    x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5\n    y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5\n    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),\n                         np.arange(y_min, y_max, h))\n\n    # just plot the dataset first\n    cm = plt.cm.RdBu\n    cm_bright = ListedColormap(['#FF0000', '#0000FF'])\n    ax = plt.subplot(len(datasets), len(classifiers) + 1, i)\n    # Plot the training points\n    ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright)\n    # and testing points\n    ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6)\n    ax.set_xlim(xx.min(), xx.max())\n    ax.set_ylim(yy.min(), yy.max())\n    ax.set_xticks(())\n    ax.set_yticks(())\n    i += 1\n\n    # iterate over classifiers\n    for name, clf in zip(names, classifiers):\n        ax = plt.subplot(len(datasets), len(classifiers) + 1, i)\n        clf.fit(X_train, y_train)\n        score = clf.score(X_test, y_test)\n\n        # Plot the decision boundary. For that, we will assign a color to each\n        # point in the mesh [x_min, x_max]x[y_min, y_max].\n        if hasattr(clf, \"decision_function\"):\n            Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])\n        else:\n            Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]\n\n        # Put the result into a color plot\n        Z = Z.reshape(xx.shape)\n        ax.contourf(xx, yy, Z, cmap=cm, alpha=.8)\n\n        # Plot also the training points\n        ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright,\n                   edgecolors='black', s=25)\n        # and testing points\n        ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright,\n                   alpha=0.6, edgecolors='black', s=25)\n\n        ax.set_xlim(xx.min(), xx.max())\n        ax.set_ylim(yy.min(), yy.max())\n        ax.set_xticks(())\n        ax.set_yticks(())\n        ax.set_title(name)\n        ax.text(xx.max() - .3, yy.min() + .3, ('%.2f' % score).lstrip('0'),\n                size=15, horizontalalignment='right')\n        i += 1\n\nfigure.subplots_adjust(left=.02, right=.98)\nplt.show()"
       ]
     }
   ],
 
@@ -28,6 +28,7 @@
 from sklearn.preprocessing import StandardScaler
 from sklearn.datasets import make_moons, make_circles, make_classification
 from sklearn.neural_network import MLPClassifier
+from sklearn.pipeline import make_pipeline
 
 h = .02  # step size in the mesh
 
@@ -36,8 +37,13 @@
 
 classifiers = []
 for i in alphas:
-    classifiers.append(MLPClassifier(solver='lbfgs', alpha=i, random_state=1,
-                                     hidden_layer_sizes=[100, 100]))
+    classifiers.append(make_pipeline(
+                       StandardScaler(),
+                       MLPClassifier(solver='lbfgs', alpha=i,
+                                     random_state=1, max_iter=2000,
+                                     early_stopping=True,
+                                     hidden_layer_sizes=[100, 100])
+                       ))
 
 X, y = make_classification(n_features=2, n_redundant=0, n_informative=2,
                            random_state=0, n_clusters_per_class=1)
Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,7 @@`
`26`	`26`	`},`
`27`	`27`	`"outputs": [],`
`28`	`28`	`"source": [`
`29`		- "from sklearn import datasets\nfrom sklearn.model_selection import cross_val_predict\nfrom sklearn import linear_model\nimport matplotlib.pyplot as plt\n\nlr = linear_model.LinearRegression()\nX, y = datasets.load_boston(return_X_y=True)\n\n# cross_val_predict returns an array of the same size as `y` where each entry\n# is a prediction obtained by cross validation:\npredicted = cross_val_predict(lr, X, y, cv=10)\n\nfig, ax = plt.subplots()\nax.scatter(y, predicted, edgecolors=(0, 0, 0))\nax.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4)\nax.set_xlabel('Measured')\nax.set_ylabel('Predicted')\nplt.show()"
	`29`	+ "from sklearn import datasets\nfrom sklearn.model_selection import cross_val_predict\nfrom sklearn import linear_model\nimport matplotlib.pyplot as plt\n\nlr = linear_model.LinearRegression()\nX, y = datasets.load_diabetes(return_X_y=True)\n\n# cross_val_predict returns an array of the same size as `y` where each entry\n# is a prediction obtained by cross validation:\npredicted = cross_val_predict(lr, X, y, cv=10)\n\nfig, ax = plt.subplots()\nax.scatter(y, predicted, edgecolors=(0, 0, 0))\nax.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4)\nax.set_xlabel('Measured')\nax.set_ylabel('Predicted')\nplt.show()"
`30`	`30`	`]`
`31`	`31`	`}`
`32`	`32`	`],`