Skip to content

Commit 8fc589c

Browse files
committed
Pushing the docs to dev/ for branch: master, commit 0a7adef0058ef28c7a146734f38161f7c7c581af
1 parent 9db30c1 commit 8fc589c

File tree

1,217 files changed

+3708
-3713
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,217 files changed

+3708
-3713
lines changed

dev/_downloads/05af92ebbf361ad6b838e9d4e34901da/plot_sgd_penalties.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
"cell_type": "markdown",
1616
"metadata": {},
1717
"source": [
18-
"\n==============\nSGD: Penalties\n==============\n\nContours of where the penalty is equal to 1\nfor the three penalties L1, L2 and elastic-net.\n\nAll of the above are supported by\n:class:`sklearn.linear_model.stochastic_gradient`.\n"
18+
"\n==============\nSGD: Penalties\n==============\n\nContours of where the penalty is equal to 1\nfor the three penalties L1, L2 and elastic-net.\n\nAll of the above are supported by :class:`~sklearn.linear_model.SGDClassifier`\nand :class:`~sklearn.linear_model.SGDRegressor`.\n"
1919
]
2020
},
2121
{

dev/_downloads/09342387020f5abd8190ad409affdd7b/plot_model_complexity_influence.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
from sklearn.metrics import mean_squared_error
3232
from sklearn.svm import NuSVR
3333
from sklearn.ensemble import GradientBoostingRegressor
34-
from sklearn.linear_model.stochastic_gradient import SGDClassifier
34+
from sklearn.linear_model import SGDClassifier
3535
from sklearn.metrics import hamming_loss
3636

3737
# #############################################################################
Binary file not shown.
Binary file not shown.

dev/_downloads/d949605a0ca662f531f8f556f98b4a54/plot_sgd_penalties.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
Contours of where the penalty is equal to 1
77
for the three penalties L1, L2 and elastic-net.
88
9-
All of the above are supported by
10-
:class:`sklearn.linear_model.stochastic_gradient`.
9+
All of the above are supported by :class:`~sklearn.linear_model.SGDClassifier`
10+
and :class:`~sklearn.linear_model.SGDRegressor`.
1111
1212
"""
1313
print(__doc__)

dev/_downloads/e70e2de8d5690c98b4cb502019310eca/plot_model_complexity_influence.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"print(__doc__)\n\n# Author: Eustache Diemert <[email protected]>\n# License: BSD 3 clause\n\nimport time\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom mpl_toolkits.axes_grid1.parasite_axes import host_subplot\nfrom mpl_toolkits.axisartist.axislines import Axes\nfrom scipy.sparse.csr import csr_matrix\n\nfrom sklearn import datasets\nfrom sklearn.utils import shuffle\nfrom sklearn.metrics import mean_squared_error\nfrom sklearn.svm import NuSVR\nfrom sklearn.ensemble import GradientBoostingRegressor\nfrom sklearn.linear_model.stochastic_gradient import SGDClassifier\nfrom sklearn.metrics import hamming_loss\n\n# #############################################################################\n# Routines\n\n\n# Initialize random generator\nnp.random.seed(0)\n\n\ndef generate_data(case, sparse=False):\n \"\"\"Generate regression/classification data.\"\"\"\n if case == 'regression':\n X, y = datasets.load_boston(return_X_y=True)\n elif case == 'classification':\n X, y = datasets.fetch_20newsgroups_vectorized(subset='all',\n return_X_y=True)\n X, y = shuffle(X, y)\n offset = int(X.shape[0] * 0.8)\n X_train, y_train = X[:offset], y[:offset]\n X_test, y_test = X[offset:], y[offset:]\n if sparse:\n X_train = csr_matrix(X_train)\n X_test = csr_matrix(X_test)\n else:\n X_train = np.array(X_train)\n X_test = np.array(X_test)\n y_test = np.array(y_test)\n y_train = np.array(y_train)\n data = {'X_train': X_train, 'X_test': X_test, 'y_train': y_train,\n 'y_test': y_test}\n return data\n\n\ndef benchmark_influence(conf):\n \"\"\"\n Benchmark influence of :changing_param: on both MSE and latency.\n \"\"\"\n prediction_times = []\n prediction_powers = []\n complexities = []\n for param_value in conf['changing_param_values']:\n conf['tuned_params'][conf['changing_param']] = param_value\n estimator = conf['estimator'](**conf['tuned_params'])\n print(\"Benchmarking %s\" % estimator)\n estimator.fit(conf['data']['X_train'], conf['data']['y_train'])\n conf['postfit_hook'](estimator)\n complexity = conf['complexity_computer'](estimator)\n complexities.append(complexity)\n start_time = time.time()\n for _ in range(conf['n_samples']):\n y_pred = estimator.predict(conf['data']['X_test'])\n elapsed_time = (time.time() - start_time) / float(conf['n_samples'])\n prediction_times.append(elapsed_time)\n pred_score = conf['prediction_performance_computer'](\n conf['data']['y_test'], y_pred)\n prediction_powers.append(pred_score)\n print(\"Complexity: %d | %s: %.4f | Pred. Time: %fs\\n\" % (\n complexity, conf['prediction_performance_label'], pred_score,\n elapsed_time))\n return prediction_powers, prediction_times, complexities\n\n\ndef plot_influence(conf, mse_values, prediction_times, complexities):\n \"\"\"\n Plot influence of model complexity on both accuracy and latency.\n \"\"\"\n plt.figure(figsize=(12, 6))\n host = host_subplot(111, axes_class=Axes)\n plt.subplots_adjust(right=0.75)\n par1 = host.twinx()\n host.set_xlabel('Model Complexity (%s)' % conf['complexity_label'])\n y1_label = conf['prediction_performance_label']\n y2_label = \"Time (s)\"\n host.set_ylabel(y1_label)\n par1.set_ylabel(y2_label)\n p1, = host.plot(complexities, mse_values, 'b-', label=\"prediction error\")\n p2, = par1.plot(complexities, prediction_times, 'r-',\n label=\"latency\")\n host.legend(loc='upper right')\n host.axis[\"left\"].label.set_color(p1.get_color())\n par1.axis[\"right\"].label.set_color(p2.get_color())\n plt.title('Influence of Model Complexity - %s' % conf['estimator'].__name__)\n plt.show()\n\n\ndef _count_nonzero_coefficients(estimator):\n a = estimator.coef_.toarray()\n return np.count_nonzero(a)\n\n# #############################################################################\n# Main code\nregression_data = generate_data('regression')\nclassification_data = generate_data('classification', sparse=True)\nconfigurations = [\n {'estimator': SGDClassifier,\n 'tuned_params': {'penalty': 'elasticnet', 'alpha': 0.001, 'loss':\n 'modified_huber', 'fit_intercept': True, 'tol': 1e-3},\n 'changing_param': 'l1_ratio',\n 'changing_param_values': [0.25, 0.5, 0.75, 0.9],\n 'complexity_label': 'non_zero coefficients',\n 'complexity_computer': _count_nonzero_coefficients,\n 'prediction_performance_computer': hamming_loss,\n 'prediction_performance_label': 'Hamming Loss (Misclassification Ratio)',\n 'postfit_hook': lambda x: x.sparsify(),\n 'data': classification_data,\n 'n_samples': 30},\n {'estimator': NuSVR,\n 'tuned_params': {'C': 1e3, 'gamma': 2 ** -15},\n 'changing_param': 'nu',\n 'changing_param_values': [0.1, 0.25, 0.5, 0.75, 0.9],\n 'complexity_label': 'n_support_vectors',\n 'complexity_computer': lambda x: len(x.support_vectors_),\n 'data': regression_data,\n 'postfit_hook': lambda x: x,\n 'prediction_performance_computer': mean_squared_error,\n 'prediction_performance_label': 'MSE',\n 'n_samples': 30},\n {'estimator': GradientBoostingRegressor,\n 'tuned_params': {'loss': 'ls'},\n 'changing_param': 'n_estimators',\n 'changing_param_values': [10, 50, 100, 200, 500],\n 'complexity_label': 'n_trees',\n 'complexity_computer': lambda x: x.n_estimators,\n 'data': regression_data,\n 'postfit_hook': lambda x: x,\n 'prediction_performance_computer': mean_squared_error,\n 'prediction_performance_label': 'MSE',\n 'n_samples': 30},\n]\nfor conf in configurations:\n prediction_performances, prediction_times, complexities = \\\n benchmark_influence(conf)\n plot_influence(conf, prediction_performances, prediction_times,\n complexities)"
29+
"print(__doc__)\n\n# Author: Eustache Diemert <[email protected]>\n# License: BSD 3 clause\n\nimport time\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom mpl_toolkits.axes_grid1.parasite_axes import host_subplot\nfrom mpl_toolkits.axisartist.axislines import Axes\nfrom scipy.sparse.csr import csr_matrix\n\nfrom sklearn import datasets\nfrom sklearn.utils import shuffle\nfrom sklearn.metrics import mean_squared_error\nfrom sklearn.svm import NuSVR\nfrom sklearn.ensemble import GradientBoostingRegressor\nfrom sklearn.linear_model import SGDClassifier\nfrom sklearn.metrics import hamming_loss\n\n# #############################################################################\n# Routines\n\n\n# Initialize random generator\nnp.random.seed(0)\n\n\ndef generate_data(case, sparse=False):\n \"\"\"Generate regression/classification data.\"\"\"\n if case == 'regression':\n X, y = datasets.load_boston(return_X_y=True)\n elif case == 'classification':\n X, y = datasets.fetch_20newsgroups_vectorized(subset='all',\n return_X_y=True)\n X, y = shuffle(X, y)\n offset = int(X.shape[0] * 0.8)\n X_train, y_train = X[:offset], y[:offset]\n X_test, y_test = X[offset:], y[offset:]\n if sparse:\n X_train = csr_matrix(X_train)\n X_test = csr_matrix(X_test)\n else:\n X_train = np.array(X_train)\n X_test = np.array(X_test)\n y_test = np.array(y_test)\n y_train = np.array(y_train)\n data = {'X_train': X_train, 'X_test': X_test, 'y_train': y_train,\n 'y_test': y_test}\n return data\n\n\ndef benchmark_influence(conf):\n \"\"\"\n Benchmark influence of :changing_param: on both MSE and latency.\n \"\"\"\n prediction_times = []\n prediction_powers = []\n complexities = []\n for param_value in conf['changing_param_values']:\n conf['tuned_params'][conf['changing_param']] = param_value\n estimator = conf['estimator'](**conf['tuned_params'])\n print(\"Benchmarking %s\" % estimator)\n estimator.fit(conf['data']['X_train'], conf['data']['y_train'])\n conf['postfit_hook'](estimator)\n complexity = conf['complexity_computer'](estimator)\n complexities.append(complexity)\n start_time = time.time()\n for _ in range(conf['n_samples']):\n y_pred = estimator.predict(conf['data']['X_test'])\n elapsed_time = (time.time() - start_time) / float(conf['n_samples'])\n prediction_times.append(elapsed_time)\n pred_score = conf['prediction_performance_computer'](\n conf['data']['y_test'], y_pred)\n prediction_powers.append(pred_score)\n print(\"Complexity: %d | %s: %.4f | Pred. Time: %fs\\n\" % (\n complexity, conf['prediction_performance_label'], pred_score,\n elapsed_time))\n return prediction_powers, prediction_times, complexities\n\n\ndef plot_influence(conf, mse_values, prediction_times, complexities):\n \"\"\"\n Plot influence of model complexity on both accuracy and latency.\n \"\"\"\n plt.figure(figsize=(12, 6))\n host = host_subplot(111, axes_class=Axes)\n plt.subplots_adjust(right=0.75)\n par1 = host.twinx()\n host.set_xlabel('Model Complexity (%s)' % conf['complexity_label'])\n y1_label = conf['prediction_performance_label']\n y2_label = \"Time (s)\"\n host.set_ylabel(y1_label)\n par1.set_ylabel(y2_label)\n p1, = host.plot(complexities, mse_values, 'b-', label=\"prediction error\")\n p2, = par1.plot(complexities, prediction_times, 'r-',\n label=\"latency\")\n host.legend(loc='upper right')\n host.axis[\"left\"].label.set_color(p1.get_color())\n par1.axis[\"right\"].label.set_color(p2.get_color())\n plt.title('Influence of Model Complexity - %s' % conf['estimator'].__name__)\n plt.show()\n\n\ndef _count_nonzero_coefficients(estimator):\n a = estimator.coef_.toarray()\n return np.count_nonzero(a)\n\n# #############################################################################\n# Main code\nregression_data = generate_data('regression')\nclassification_data = generate_data('classification', sparse=True)\nconfigurations = [\n {'estimator': SGDClassifier,\n 'tuned_params': {'penalty': 'elasticnet', 'alpha': 0.001, 'loss':\n 'modified_huber', 'fit_intercept': True, 'tol': 1e-3},\n 'changing_param': 'l1_ratio',\n 'changing_param_values': [0.25, 0.5, 0.75, 0.9],\n 'complexity_label': 'non_zero coefficients',\n 'complexity_computer': _count_nonzero_coefficients,\n 'prediction_performance_computer': hamming_loss,\n 'prediction_performance_label': 'Hamming Loss (Misclassification Ratio)',\n 'postfit_hook': lambda x: x.sparsify(),\n 'data': classification_data,\n 'n_samples': 30},\n {'estimator': NuSVR,\n 'tuned_params': {'C': 1e3, 'gamma': 2 ** -15},\n 'changing_param': 'nu',\n 'changing_param_values': [0.1, 0.25, 0.5, 0.75, 0.9],\n 'complexity_label': 'n_support_vectors',\n 'complexity_computer': lambda x: len(x.support_vectors_),\n 'data': regression_data,\n 'postfit_hook': lambda x: x,\n 'prediction_performance_computer': mean_squared_error,\n 'prediction_performance_label': 'MSE',\n 'n_samples': 30},\n {'estimator': GradientBoostingRegressor,\n 'tuned_params': {'loss': 'ls'},\n 'changing_param': 'n_estimators',\n 'changing_param_values': [10, 50, 100, 200, 500],\n 'complexity_label': 'n_trees',\n 'complexity_computer': lambda x: x.n_estimators,\n 'data': regression_data,\n 'postfit_hook': lambda x: x,\n 'prediction_performance_computer': mean_squared_error,\n 'prediction_performance_label': 'MSE',\n 'n_samples': 30},\n]\nfor conf in configurations:\n prediction_performances, prediction_times, complexities = \\\n benchmark_influence(conf)\n plot_influence(conf, prediction_performances, prediction_times,\n complexities)"
3030
]
3131
}
3232
],

dev/_downloads/scikit-learn-docs.pdf

-25.7 KB
Binary file not shown.

dev/_images/iris.png

0 Bytes
586 Bytes
586 Bytes

0 commit comments

Comments
 (0)