Skip to content

Commit c82e28d

Browse files
committed
Pushing the docs to dev/ for branch: main, commit edc2730c0aa64561ff284b344c77e31a3fb0b7b1
1 parent 642d62e commit c82e28d

File tree

1,235 files changed

+4419
-4419
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,235 files changed

+4419
-4419
lines changed
Binary file not shown.

dev/_downloads/389fb4950ddfe12a741e6ac5b7d79193/plot_sgd_early_stopping.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"# Authors: Tom Dupre la Tour\n#\n# License: BSD 3 clause\n\nimport time\nimport sys\n\nimport pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn import linear_model\nfrom sklearn.datasets import fetch_openml\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.utils._testing import ignore_warnings\nfrom sklearn.exceptions import ConvergenceWarning\nfrom sklearn.utils import shuffle\n\n\ndef load_mnist(n_samples=None, class_0=\"0\", class_1=\"8\"):\n \"\"\"Load MNIST, select two classes, shuffle and return only n_samples.\"\"\"\n # Load data from http://openml.org/d/554\n mnist = fetch_openml(\"mnist_784\", version=1)\n\n # take only two classes for binary classification\n mask = np.logical_or(mnist.target == class_0, mnist.target == class_1)\n\n X, y = shuffle(mnist.data[mask], mnist.target[mask], random_state=42)\n if n_samples is not None:\n X, y = X[:n_samples], y[:n_samples]\n return X, y\n\n\n@ignore_warnings(category=ConvergenceWarning)\ndef fit_and_score(estimator, max_iter, X_train, X_test, y_train, y_test):\n \"\"\"Fit the estimator on the train set and score it on both sets\"\"\"\n estimator.set_params(max_iter=max_iter)\n estimator.set_params(random_state=0)\n\n start = time.time()\n estimator.fit(X_train, y_train)\n\n fit_time = time.time() - start\n n_iter = estimator.n_iter_\n train_score = estimator.score(X_train, y_train)\n test_score = estimator.score(X_test, y_test)\n\n return fit_time, n_iter, train_score, test_score\n\n\n# Define the estimators to compare\nestimator_dict = {\n \"No stopping criterion\": linear_model.SGDClassifier(n_iter_no_change=3),\n \"Training loss\": linear_model.SGDClassifier(\n early_stopping=False, n_iter_no_change=3, tol=0.1\n ),\n \"Validation score\": linear_model.SGDClassifier(\n early_stopping=True, n_iter_no_change=3, tol=0.0001, validation_fraction=0.2\n ),\n}\n\n# Load the dataset\nX, y = load_mnist(n_samples=10000)\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)\n\nresults = []\nfor estimator_name, estimator in estimator_dict.items():\n print(estimator_name + \": \", end=\"\")\n for max_iter in range(1, 50):\n print(\".\", end=\"\")\n sys.stdout.flush()\n\n fit_time, n_iter, train_score, test_score = fit_and_score(\n estimator, max_iter, X_train, X_test, y_train, y_test\n )\n\n results.append(\n (estimator_name, max_iter, fit_time, n_iter, train_score, test_score)\n )\n print(\"\")\n\n# Transform the results in a pandas dataframe for easy plotting\ncolumns = [\n \"Stopping criterion\",\n \"max_iter\",\n \"Fit time (sec)\",\n \"n_iter_\",\n \"Train score\",\n \"Test score\",\n]\nresults_df = pd.DataFrame(results, columns=columns)\n\n# Define what to plot (x_axis, y_axis)\nlines = \"Stopping criterion\"\nplot_list = [\n (\"max_iter\", \"Train score\"),\n (\"max_iter\", \"Test score\"),\n (\"max_iter\", \"n_iter_\"),\n (\"max_iter\", \"Fit time (sec)\"),\n]\n\nnrows = 2\nncols = int(np.ceil(len(plot_list) / 2.0))\nfig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(6 * ncols, 4 * nrows))\naxes[0, 0].get_shared_y_axes().join(axes[0, 0], axes[0, 1])\n\nfor ax, (x_axis, y_axis) in zip(axes.ravel(), plot_list):\n for criterion, group_df in results_df.groupby(lines):\n group_df.plot(x=x_axis, y=y_axis, label=criterion, ax=ax)\n ax.set_title(y_axis)\n ax.legend(title=lines)\n\nfig.tight_layout()\nplt.show()"
29+
"# Authors: Tom Dupre la Tour\n#\n# License: BSD 3 clause\n\nimport time\nimport sys\n\nimport pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn import linear_model\nfrom sklearn.datasets import fetch_openml\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.utils._testing import ignore_warnings\nfrom sklearn.exceptions import ConvergenceWarning\nfrom sklearn.utils import shuffle\n\n\ndef load_mnist(n_samples=None, class_0=\"0\", class_1=\"8\"):\n \"\"\"Load MNIST, select two classes, shuffle and return only n_samples.\"\"\"\n # Load data from http://openml.org/d/554\n mnist = fetch_openml(\"mnist_784\", version=1, as_frame=False)\n\n # take only two classes for binary classification\n mask = np.logical_or(mnist.target == class_0, mnist.target == class_1)\n\n X, y = shuffle(mnist.data[mask], mnist.target[mask], random_state=42)\n if n_samples is not None:\n X, y = X[:n_samples], y[:n_samples]\n return X, y\n\n\n@ignore_warnings(category=ConvergenceWarning)\ndef fit_and_score(estimator, max_iter, X_train, X_test, y_train, y_test):\n \"\"\"Fit the estimator on the train set and score it on both sets\"\"\"\n estimator.set_params(max_iter=max_iter)\n estimator.set_params(random_state=0)\n\n start = time.time()\n estimator.fit(X_train, y_train)\n\n fit_time = time.time() - start\n n_iter = estimator.n_iter_\n train_score = estimator.score(X_train, y_train)\n test_score = estimator.score(X_test, y_test)\n\n return fit_time, n_iter, train_score, test_score\n\n\n# Define the estimators to compare\nestimator_dict = {\n \"No stopping criterion\": linear_model.SGDClassifier(n_iter_no_change=3),\n \"Training loss\": linear_model.SGDClassifier(\n early_stopping=False, n_iter_no_change=3, tol=0.1\n ),\n \"Validation score\": linear_model.SGDClassifier(\n early_stopping=True, n_iter_no_change=3, tol=0.0001, validation_fraction=0.2\n ),\n}\n\n# Load the dataset\nX, y = load_mnist(n_samples=10000)\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)\n\nresults = []\nfor estimator_name, estimator in estimator_dict.items():\n print(estimator_name + \": \", end=\"\")\n for max_iter in range(1, 50):\n print(\".\", end=\"\")\n sys.stdout.flush()\n\n fit_time, n_iter, train_score, test_score = fit_and_score(\n estimator, max_iter, X_train, X_test, y_train, y_test\n )\n\n results.append(\n (estimator_name, max_iter, fit_time, n_iter, train_score, test_score)\n )\n print(\"\")\n\n# Transform the results in a pandas dataframe for easy plotting\ncolumns = [\n \"Stopping criterion\",\n \"max_iter\",\n \"Fit time (sec)\",\n \"n_iter_\",\n \"Train score\",\n \"Test score\",\n]\nresults_df = pd.DataFrame(results, columns=columns)\n\n# Define what to plot (x_axis, y_axis)\nlines = \"Stopping criterion\"\nplot_list = [\n (\"max_iter\", \"Train score\"),\n (\"max_iter\", \"Test score\"),\n (\"max_iter\", \"n_iter_\"),\n (\"max_iter\", \"Fit time (sec)\"),\n]\n\nnrows = 2\nncols = int(np.ceil(len(plot_list) / 2.0))\nfig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(6 * ncols, 4 * nrows))\naxes[0, 0].get_shared_y_axes().join(axes[0, 0], axes[0, 1])\n\nfor ax, (x_axis, y_axis) in zip(axes.ravel(), plot_list):\n for criterion, group_df in results_df.groupby(lines):\n group_df.plot(x=x_axis, y=y_axis, label=criterion, ax=ax)\n ax.set_title(y_axis)\n ax.legend(title=lines)\n\nfig.tight_layout()\nplt.show()"
3030
]
3131
}
3232
],

dev/_downloads/6522aa1dd16bb328d88cb09cbc08eded/plot_mnist_filters.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"import warnings\nimport matplotlib.pyplot as plt\nfrom sklearn.datasets import fetch_openml\nfrom sklearn.exceptions import ConvergenceWarning\nfrom sklearn.neural_network import MLPClassifier\nfrom sklearn.model_selection import train_test_split\n\n# Load data from https://www.openml.org/d/554\nX, y = fetch_openml(\"mnist_784\", version=1, return_X_y=True)\nX = X / 255.0\n\n# Split data into train partition and test partition\nX_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, test_size=0.7)\n\nmlp = MLPClassifier(\n hidden_layer_sizes=(40,),\n max_iter=8,\n alpha=1e-4,\n solver=\"sgd\",\n verbose=10,\n random_state=1,\n learning_rate_init=0.2,\n)\n\n# this example won't converge because of resource usage constraints on\n# our Continuous Integration infrastructure, so we catch the warning and\n# ignore it here\nwith warnings.catch_warnings():\n warnings.filterwarnings(\"ignore\", category=ConvergenceWarning, module=\"sklearn\")\n mlp.fit(X_train, y_train)\n\nprint(\"Training set score: %f\" % mlp.score(X_train, y_train))\nprint(\"Test set score: %f\" % mlp.score(X_test, y_test))\n\nfig, axes = plt.subplots(4, 4)\n# use global min / max to ensure all weights are shown on the same scale\nvmin, vmax = mlp.coefs_[0].min(), mlp.coefs_[0].max()\nfor coef, ax in zip(mlp.coefs_[0].T, axes.ravel()):\n ax.matshow(coef.reshape(28, 28), cmap=plt.cm.gray, vmin=0.5 * vmin, vmax=0.5 * vmax)\n ax.set_xticks(())\n ax.set_yticks(())\n\nplt.show()"
29+
"import warnings\nimport matplotlib.pyplot as plt\nfrom sklearn.datasets import fetch_openml\nfrom sklearn.exceptions import ConvergenceWarning\nfrom sklearn.neural_network import MLPClassifier\nfrom sklearn.model_selection import train_test_split\n\n# Load data from https://www.openml.org/d/554\nX, y = fetch_openml(\"mnist_784\", version=1, return_X_y=True, as_frame=False)\nX = X / 255.0\n\n# Split data into train partition and test partition\nX_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, test_size=0.7)\n\nmlp = MLPClassifier(\n hidden_layer_sizes=(40,),\n max_iter=8,\n alpha=1e-4,\n solver=\"sgd\",\n verbose=10,\n random_state=1,\n learning_rate_init=0.2,\n)\n\n# this example won't converge because of resource usage constraints on\n# our Continuous Integration infrastructure, so we catch the warning and\n# ignore it here\nwith warnings.catch_warnings():\n warnings.filterwarnings(\"ignore\", category=ConvergenceWarning, module=\"sklearn\")\n mlp.fit(X_train, y_train)\n\nprint(\"Training set score: %f\" % mlp.score(X_train, y_train))\nprint(\"Test set score: %f\" % mlp.score(X_test, y_test))\n\nfig, axes = plt.subplots(4, 4)\n# use global min / max to ensure all weights are shown on the same scale\nvmin, vmax = mlp.coefs_[0].min(), mlp.coefs_[0].max()\nfor coef, ax in zip(mlp.coefs_[0].T, axes.ravel()):\n ax.matshow(coef.reshape(28, 28), cmap=plt.cm.gray, vmin=0.5 * vmin, vmax=0.5 * vmax)\n ax.set_xticks(())\n ax.set_yticks(())\n\nplt.show()"
3030
]
3131
}
3232
],
Binary file not shown.

dev/_downloads/7534058b2748ca58f7594203b7723a0e/plot_mnist_filters.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
from sklearn.model_selection import train_test_split
3333

3434
# Load data from https://www.openml.org/d/554
35-
X, y = fetch_openml("mnist_784", version=1, return_X_y=True)
35+
X, y = fetch_openml("mnist_784", version=1, return_X_y=True, as_frame=False)
3636
X = X / 255.0
3737

3838
# Split data into train partition and test partition

dev/_downloads/b4d6bfda6769cc5cc1cf25427dec34d6/plot_sgd_early_stopping.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@
5959
def load_mnist(n_samples=None, class_0="0", class_1="8"):
6060
"""Load MNIST, select two classes, shuffle and return only n_samples."""
6161
# Load data from http://openml.org/d/554
62-
mnist = fetch_openml("mnist_784", version=1)
62+
mnist = fetch_openml("mnist_784", version=1, as_frame=False)
6363

6464
# take only two classes for binary classification
6565
mask = np.logical_or(mnist.target == class_0, mnist.target == class_1)

dev/_downloads/scikit-learn-docs.zip

-12.1 KB
Binary file not shown.
-122 Bytes
-50 Bytes
61 Bytes

0 commit comments

Comments
 (0)