Skip to content

Commit 8c1571e

Browse files
committed
Pushing the docs to dev/ for branch: main, commit fda5b16b10fbdcf23f998d3d6f9596688343e65d
1 parent 0d1f13f commit 8c1571e

File tree

1,234 files changed

+4598
-4579
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,234 files changed

+4598
-4579
lines changed
Binary file not shown.

dev/_downloads/3ed102fa8211c8d36f2331f0c5e1dcef/plot_model_complexity_influence.ipynb

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"# Authors: Eustache Diemert <[email protected]>\n# Maria Telenczuk <https://github.com/maikia>\n# Guillaume Lemaitre <[email protected]>\n# License: BSD 3 clause\n\nimport time\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn import datasets\nfrom sklearn.utils import shuffle\nfrom sklearn.metrics import mean_squared_error\nfrom sklearn.svm import NuSVR\nfrom sklearn.ensemble import GradientBoostingRegressor\nfrom sklearn.linear_model import SGDClassifier\nfrom sklearn.metrics import hamming_loss\n\n\n# Initialize random generator\nnp.random.seed(0)"
29+
"# Authors: Eustache Diemert <[email protected]>\n# Maria Telenczuk <https://github.com/maikia>\n# Guillaume Lemaitre <[email protected]>\n# License: BSD 3 clause\n\nimport time\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn import datasets\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.metrics import mean_squared_error\nfrom sklearn.svm import NuSVR\nfrom sklearn.ensemble import GradientBoostingRegressor\nfrom sklearn.linear_model import SGDClassifier\nfrom sklearn.metrics import hamming_loss\n\n# Initialize random generator\nnp.random.seed(0)"
3030
]
3131
},
3232
{
@@ -44,7 +44,7 @@
4444
},
4545
"outputs": [],
4646
"source": [
47-
"def generate_data(case):\n \"\"\"Generate regression/classification data.\"\"\"\n if case == \"regression\":\n X, y = datasets.load_diabetes(return_X_y=True)\n elif case == \"classification\":\n X, y = datasets.fetch_20newsgroups_vectorized(subset=\"all\", return_X_y=True)\n X, y = shuffle(X, y)\n offset = int(X.shape[0] * 0.8)\n X_train, y_train = X[:offset], y[:offset]\n X_test, y_test = X[offset:], y[offset:]\n\n data = {\"X_train\": X_train, \"X_test\": X_test, \"y_train\": y_train, \"y_test\": y_test}\n return data\n\n\nregression_data = generate_data(\"regression\")\nclassification_data = generate_data(\"classification\")"
47+
"def generate_data(case):\n \"\"\"Generate regression/classification data.\"\"\"\n if case == \"regression\":\n X, y = datasets.load_diabetes(return_X_y=True)\n train_size = 0.8\n elif case == \"classification\":\n X, y = datasets.fetch_20newsgroups_vectorized(subset=\"all\", return_X_y=True)\n train_size = 0.4 # to make the example run faster\n\n X_train, X_test, y_train, y_test = train_test_split(\n X, y, train_size=train_size, random_state=0\n )\n\n data = {\"X_train\": X_train, \"X_test\": X_test, \"y_train\": y_train, \"y_test\": y_test}\n return data\n\n\nregression_data = generate_data(\"regression\")\nclassification_data = generate_data(\"classification\")"
4848
]
4949
},
5050
{
@@ -80,7 +80,7 @@
8080
},
8181
"outputs": [],
8282
"source": [
83-
"def _count_nonzero_coefficients(estimator):\n a = estimator.coef_.toarray()\n return np.count_nonzero(a)\n\n\nconfigurations = [\n {\n \"estimator\": SGDClassifier,\n \"tuned_params\": {\n \"penalty\": \"elasticnet\",\n \"alpha\": 0.001,\n \"loss\": \"modified_huber\",\n \"fit_intercept\": True,\n \"tol\": 1e-3,\n },\n \"changing_param\": \"l1_ratio\",\n \"changing_param_values\": [0.25, 0.5, 0.75, 0.9],\n \"complexity_label\": \"non_zero coefficients\",\n \"complexity_computer\": _count_nonzero_coefficients,\n \"prediction_performance_computer\": hamming_loss,\n \"prediction_performance_label\": \"Hamming Loss (Misclassification Ratio)\",\n \"postfit_hook\": lambda x: x.sparsify(),\n \"data\": classification_data,\n \"n_samples\": 30,\n },\n {\n \"estimator\": NuSVR,\n \"tuned_params\": {\"C\": 1e3, \"gamma\": 2 ** -15},\n \"changing_param\": \"nu\",\n \"changing_param_values\": [0.1, 0.25, 0.5, 0.75, 0.9],\n \"complexity_label\": \"n_support_vectors\",\n \"complexity_computer\": lambda x: len(x.support_vectors_),\n \"data\": regression_data,\n \"postfit_hook\": lambda x: x,\n \"prediction_performance_computer\": mean_squared_error,\n \"prediction_performance_label\": \"MSE\",\n \"n_samples\": 30,\n },\n {\n \"estimator\": GradientBoostingRegressor,\n \"tuned_params\": {\"loss\": \"squared_error\"},\n \"changing_param\": \"n_estimators\",\n \"changing_param_values\": [10, 50, 100, 200, 500],\n \"complexity_label\": \"n_trees\",\n \"complexity_computer\": lambda x: x.n_estimators,\n \"data\": regression_data,\n \"postfit_hook\": lambda x: x,\n \"prediction_performance_computer\": mean_squared_error,\n \"prediction_performance_label\": \"MSE\",\n \"n_samples\": 30,\n },\n]"
83+
"def _count_nonzero_coefficients(estimator):\n a = estimator.coef_.toarray()\n return np.count_nonzero(a)\n\n\nconfigurations = [\n {\n \"estimator\": SGDClassifier,\n \"tuned_params\": {\n \"penalty\": \"elasticnet\",\n \"alpha\": 0.001,\n \"loss\": \"modified_huber\",\n \"fit_intercept\": True,\n \"tol\": 1e-3,\n },\n \"changing_param\": \"l1_ratio\",\n \"changing_param_values\": [0.25, 0.5, 0.75, 0.9],\n \"complexity_label\": \"non_zero coefficients\",\n \"complexity_computer\": _count_nonzero_coefficients,\n \"prediction_performance_computer\": hamming_loss,\n \"prediction_performance_label\": \"Hamming Loss (Misclassification Ratio)\",\n \"postfit_hook\": lambda x: x.sparsify(),\n \"data\": classification_data,\n \"n_samples\": 5,\n },\n {\n \"estimator\": NuSVR,\n \"tuned_params\": {\"C\": 1e3, \"gamma\": 2 ** -15},\n \"changing_param\": \"nu\",\n \"changing_param_values\": [0.05, 0.1, 0.2, 0.35, 0.5],\n \"complexity_label\": \"n_support_vectors\",\n \"complexity_computer\": lambda x: len(x.support_vectors_),\n \"data\": regression_data,\n \"postfit_hook\": lambda x: x,\n \"prediction_performance_computer\": mean_squared_error,\n \"prediction_performance_label\": \"MSE\",\n \"n_samples\": 15,\n },\n {\n \"estimator\": GradientBoostingRegressor,\n \"tuned_params\": {\n \"loss\": \"squared_error\",\n \"learning_rate\": 0.05,\n \"max_depth\": 2,\n },\n \"changing_param\": \"n_estimators\",\n \"changing_param_values\": [10, 25, 50, 75, 100],\n \"complexity_label\": \"n_trees\",\n \"complexity_computer\": lambda x: x.n_estimators,\n \"data\": regression_data,\n \"postfit_hook\": lambda x: x,\n \"prediction_performance_computer\": mean_squared_error,\n \"prediction_performance_label\": \"MSE\",\n \"n_samples\": 15,\n },\n]"
8484
]
8585
},
8686
{
@@ -98,7 +98,7 @@
9898
},
9999
"outputs": [],
100100
"source": [
101-
"def plot_influence(conf, mse_values, prediction_times, complexities):\n \"\"\"\n Plot influence of model complexity on both accuracy and latency.\n \"\"\"\n\n fig = plt.figure()\n fig.subplots_adjust(right=0.75)\n\n # first axes (prediction error)\n ax1 = fig.add_subplot(111)\n line1 = ax1.plot(complexities, mse_values, c=\"tab:blue\", ls=\"-\")[0]\n ax1.set_xlabel(\"Model Complexity (%s)\" % conf[\"complexity_label\"])\n y1_label = conf[\"prediction_performance_label\"]\n ax1.set_ylabel(y1_label)\n\n ax1.spines[\"left\"].set_color(line1.get_color())\n ax1.yaxis.label.set_color(line1.get_color())\n ax1.tick_params(axis=\"y\", colors=line1.get_color())\n\n # second axes (latency)\n ax2 = fig.add_subplot(111, sharex=ax1, frameon=False)\n line2 = ax2.plot(complexities, prediction_times, c=\"tab:orange\", ls=\"-\")[0]\n ax2.yaxis.tick_right()\n ax2.yaxis.set_label_position(\"right\")\n y2_label = \"Time (s)\"\n ax2.set_ylabel(y2_label)\n ax1.spines[\"right\"].set_color(line2.get_color())\n ax2.yaxis.label.set_color(line2.get_color())\n ax2.tick_params(axis=\"y\", colors=line2.get_color())\n\n plt.legend((line1, line2), (\"prediction error\", \"latency\"), loc=\"upper right\")\n\n plt.title(\n \"Influence of varying '%s' on %s\"\n % (conf[\"changing_param\"], conf[\"estimator\"].__name__)\n )\n\n\nfor conf in configurations:\n prediction_performances, prediction_times, complexities = benchmark_influence(conf)\n plot_influence(conf, prediction_performances, prediction_times, complexities)\nplt.show()"
101+
"def plot_influence(conf, mse_values, prediction_times, complexities):\n \"\"\"\n Plot influence of model complexity on both accuracy and latency.\n \"\"\"\n\n fig = plt.figure()\n fig.subplots_adjust(right=0.75)\n\n # first axes (prediction error)\n ax1 = fig.add_subplot(111)\n line1 = ax1.plot(complexities, mse_values, c=\"tab:blue\", ls=\"-\")[0]\n ax1.set_xlabel(\"Model Complexity (%s)\" % conf[\"complexity_label\"])\n y1_label = conf[\"prediction_performance_label\"]\n ax1.set_ylabel(y1_label)\n\n ax1.spines[\"left\"].set_color(line1.get_color())\n ax1.yaxis.label.set_color(line1.get_color())\n ax1.tick_params(axis=\"y\", colors=line1.get_color())\n\n # second axes (latency)\n ax2 = fig.add_subplot(111, sharex=ax1, frameon=False)\n line2 = ax2.plot(complexities, prediction_times, c=\"tab:orange\", ls=\"-\")[0]\n ax2.yaxis.tick_right()\n ax2.yaxis.set_label_position(\"right\")\n y2_label = \"Time (s)\"\n ax2.set_ylabel(y2_label)\n ax1.spines[\"right\"].set_color(line2.get_color())\n ax2.yaxis.label.set_color(line2.get_color())\n ax2.tick_params(axis=\"y\", colors=line2.get_color())\n\n plt.legend(\n (line1, line2), (\"prediction error\", \"prediction latency\"), loc=\"upper right\"\n )\n\n plt.title(\n \"Influence of varying '%s' on %s\"\n % (conf[\"changing_param\"], conf[\"estimator\"].__name__)\n )\n\n\nfor conf in configurations:\n prediction_performances, prediction_times, complexities = benchmark_influence(conf)\n plot_influence(conf, prediction_performances, prediction_times, complexities)\nplt.show()"
102102
]
103103
},
104104
{

dev/_downloads/54c7ea3b3671861fbfb2161a6f0ab6d0/plot_nca_classification.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom matplotlib.colors import ListedColormap\nfrom sklearn import datasets\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.neighbors import KNeighborsClassifier, NeighborhoodComponentsAnalysis\nfrom sklearn.pipeline import Pipeline\n\n\nn_neighbors = 1\n\ndataset = datasets.load_iris()\nX, y = dataset.data, dataset.target\n\n# we only take two features. We could avoid this ugly\n# slicing by using a two-dim dataset\nX = X[:, [0, 2]]\n\nX_train, X_test, y_train, y_test = train_test_split(\n X, y, stratify=y, test_size=0.7, random_state=42\n)\n\nh = 0.01 # step size in the mesh\n\n# Create color maps\ncmap_light = ListedColormap([\"#FFAAAA\", \"#AAFFAA\", \"#AAAAFF\"])\ncmap_bold = ListedColormap([\"#FF0000\", \"#00FF00\", \"#0000FF\"])\n\nnames = [\"KNN\", \"NCA, KNN\"]\n\nclassifiers = [\n Pipeline(\n [\n (\"scaler\", StandardScaler()),\n (\"knn\", KNeighborsClassifier(n_neighbors=n_neighbors)),\n ]\n ),\n Pipeline(\n [\n (\"scaler\", StandardScaler()),\n (\"nca\", NeighborhoodComponentsAnalysis()),\n (\"knn\", KNeighborsClassifier(n_neighbors=n_neighbors)),\n ]\n ),\n]\n\nx_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1\ny_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1\nxx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))\n\nfor name, clf in zip(names, classifiers):\n\n clf.fit(X_train, y_train)\n score = clf.score(X_test, y_test)\n\n # Plot the decision boundary. For that, we will assign a color to each\n # point in the mesh [x_min, x_max]x[y_min, y_max].\n Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])\n\n # Put the result into a color plot\n Z = Z.reshape(xx.shape)\n plt.figure()\n plt.pcolormesh(xx, yy, Z, cmap=cmap_light, alpha=0.8)\n\n # Plot also the training and testing points\n plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold, edgecolor=\"k\", s=20)\n plt.xlim(xx.min(), xx.max())\n plt.ylim(yy.min(), yy.max())\n plt.title(\"{} (k = {})\".format(name, n_neighbors))\n plt.text(\n 0.9,\n 0.1,\n \"{:.2f}\".format(score),\n size=15,\n ha=\"center\",\n va=\"center\",\n transform=plt.gca().transAxes,\n )\n\nplt.show()"
29+
"# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom matplotlib.colors import ListedColormap\nfrom sklearn import datasets\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.neighbors import KNeighborsClassifier, NeighborhoodComponentsAnalysis\nfrom sklearn.pipeline import Pipeline\n\n\nn_neighbors = 1\n\ndataset = datasets.load_iris()\nX, y = dataset.data, dataset.target\n\n# we only take two features. We could avoid this ugly\n# slicing by using a two-dim dataset\nX = X[:, [0, 2]]\n\nX_train, X_test, y_train, y_test = train_test_split(\n X, y, stratify=y, test_size=0.7, random_state=42\n)\n\nh = 0.05 # step size in the mesh\n\n# Create color maps\ncmap_light = ListedColormap([\"#FFAAAA\", \"#AAFFAA\", \"#AAAAFF\"])\ncmap_bold = ListedColormap([\"#FF0000\", \"#00FF00\", \"#0000FF\"])\n\nnames = [\"KNN\", \"NCA, KNN\"]\n\nclassifiers = [\n Pipeline(\n [\n (\"scaler\", StandardScaler()),\n (\"knn\", KNeighborsClassifier(n_neighbors=n_neighbors)),\n ]\n ),\n Pipeline(\n [\n (\"scaler\", StandardScaler()),\n (\"nca\", NeighborhoodComponentsAnalysis()),\n (\"knn\", KNeighborsClassifier(n_neighbors=n_neighbors)),\n ]\n ),\n]\n\nx_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1\ny_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1\nxx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))\n\nfor name, clf in zip(names, classifiers):\n\n clf.fit(X_train, y_train)\n score = clf.score(X_test, y_test)\n\n # Plot the decision boundary. For that, we will assign a color to each\n # point in the mesh [x_min, x_max]x[y_min, y_max].\n Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])\n\n # Put the result into a color plot\n Z = Z.reshape(xx.shape)\n plt.figure()\n plt.pcolormesh(xx, yy, Z, cmap=cmap_light, alpha=0.8)\n\n # Plot also the training and testing points\n plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold, edgecolor=\"k\", s=20)\n plt.xlim(xx.min(), xx.max())\n plt.ylim(yy.min(), yy.max())\n plt.title(\"{} (k = {})\".format(name, n_neighbors))\n plt.text(\n 0.9,\n 0.1,\n \"{:.2f}\".format(score),\n size=15,\n ha=\"center\",\n va=\"center\",\n transform=plt.gca().transAxes,\n )\n\nplt.show()"
3030
]
3131
}
3232
],

0 commit comments

Comments
 (0)