Skip to content

Commit 651a150

Browse files
committed
Pushing the docs to dev/ for branch: main, commit 7e809dbc3200f63e21a7da3c1aa75c519c7069db
1 parent 47a4b11 commit 651a150

File tree

1,263 files changed

+4529
-4529
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,263 files changed

+4529
-4529
lines changed
Binary file not shown.

dev/_downloads/24475810034a0d0d190a9de0f87d72b5/plot_all_scaling.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@
6969

7070
feature_mapping = {
7171
"MedInc": "Median income in block",
72-
"HousAge": "Median house age in block",
72+
"HouseAge": "Median house age in block",
7373
"AveRooms": "Average number of rooms",
7474
"AveBedrms": "Average number of bedrooms",
7575
"Population": "Block population",
Binary file not shown.

dev/_downloads/e60e99adef360baabc49b925646a39d9/plot_all_scaling.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"# Author: Raghav RV <[email protected]>\n# Guillaume Lemaitre <[email protected]>\n# Thomas Unterthiner\n# License: BSD 3 clause\n\nimport numpy as np\n\nimport matplotlib as mpl\nfrom matplotlib import pyplot as plt\nfrom matplotlib import cm\n\nfrom sklearn.preprocessing import MinMaxScaler\nfrom sklearn.preprocessing import minmax_scale\nfrom sklearn.preprocessing import MaxAbsScaler\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.preprocessing import RobustScaler\nfrom sklearn.preprocessing import Normalizer\nfrom sklearn.preprocessing import QuantileTransformer\nfrom sklearn.preprocessing import PowerTransformer\n\nfrom sklearn.datasets import fetch_california_housing\n\ndataset = fetch_california_housing()\nX_full, y_full = dataset.data, dataset.target\nfeature_names = dataset.feature_names\n\nfeature_mapping = {\n \"MedInc\": \"Median income in block\",\n \"HousAge\": \"Median house age in block\",\n \"AveRooms\": \"Average number of rooms\",\n \"AveBedrms\": \"Average number of bedrooms\",\n \"Population\": \"Block population\",\n \"AveOccup\": \"Average house occupancy\",\n \"Latitude\": \"House block latitude\",\n \"Longitude\": \"House block longitude\",\n}\n\n# Take only 2 features to make visualization easier\n# Feature MedInc has a long tail distribution.\n# Feature AveOccup has a few but very large outliers.\nfeatures = [\"MedInc\", \"AveOccup\"]\nfeatures_idx = [feature_names.index(feature) for feature in features]\nX = X_full[:, features_idx]\ndistributions = [\n (\"Unscaled data\", X),\n (\"Data after standard scaling\", StandardScaler().fit_transform(X)),\n (\"Data after min-max scaling\", MinMaxScaler().fit_transform(X)),\n (\"Data after max-abs scaling\", MaxAbsScaler().fit_transform(X)),\n (\n \"Data after robust scaling\",\n RobustScaler(quantile_range=(25, 75)).fit_transform(X),\n ),\n (\n \"Data after power transformation (Yeo-Johnson)\",\n PowerTransformer(method=\"yeo-johnson\").fit_transform(X),\n ),\n (\n \"Data after power transformation (Box-Cox)\",\n PowerTransformer(method=\"box-cox\").fit_transform(X),\n ),\n (\n \"Data after quantile transformation (uniform pdf)\",\n QuantileTransformer(output_distribution=\"uniform\").fit_transform(X),\n ),\n (\n \"Data after quantile transformation (gaussian pdf)\",\n QuantileTransformer(output_distribution=\"normal\").fit_transform(X),\n ),\n (\"Data after sample-wise L2 normalizing\", Normalizer().fit_transform(X)),\n]\n\n# scale the output between 0 and 1 for the colorbar\ny = minmax_scale(y_full)\n\n# plasma does not exist in matplotlib < 1.5\ncmap = getattr(cm, \"plasma_r\", cm.hot_r)\n\n\ndef create_axes(title, figsize=(16, 6)):\n fig = plt.figure(figsize=figsize)\n fig.suptitle(title)\n\n # define the axis for the first plot\n left, width = 0.1, 0.22\n bottom, height = 0.1, 0.7\n bottom_h = height + 0.15\n left_h = left + width + 0.02\n\n rect_scatter = [left, bottom, width, height]\n rect_histx = [left, bottom_h, width, 0.1]\n rect_histy = [left_h, bottom, 0.05, height]\n\n ax_scatter = plt.axes(rect_scatter)\n ax_histx = plt.axes(rect_histx)\n ax_histy = plt.axes(rect_histy)\n\n # define the axis for the zoomed-in plot\n left = width + left + 0.2\n left_h = left + width + 0.02\n\n rect_scatter = [left, bottom, width, height]\n rect_histx = [left, bottom_h, width, 0.1]\n rect_histy = [left_h, bottom, 0.05, height]\n\n ax_scatter_zoom = plt.axes(rect_scatter)\n ax_histx_zoom = plt.axes(rect_histx)\n ax_histy_zoom = plt.axes(rect_histy)\n\n # define the axis for the colorbar\n left, width = width + left + 0.13, 0.01\n\n rect_colorbar = [left, bottom, width, height]\n ax_colorbar = plt.axes(rect_colorbar)\n\n return (\n (ax_scatter, ax_histy, ax_histx),\n (ax_scatter_zoom, ax_histy_zoom, ax_histx_zoom),\n ax_colorbar,\n )\n\n\ndef plot_distribution(axes, X, y, hist_nbins=50, title=\"\", x0_label=\"\", x1_label=\"\"):\n ax, hist_X1, hist_X0 = axes\n\n ax.set_title(title)\n ax.set_xlabel(x0_label)\n ax.set_ylabel(x1_label)\n\n # The scatter plot\n colors = cmap(y)\n ax.scatter(X[:, 0], X[:, 1], alpha=0.5, marker=\"o\", s=5, lw=0, c=colors)\n\n # Removing the top and the right spine for aesthetics\n # make nice axis layout\n ax.spines[\"top\"].set_visible(False)\n ax.spines[\"right\"].set_visible(False)\n ax.get_xaxis().tick_bottom()\n ax.get_yaxis().tick_left()\n ax.spines[\"left\"].set_position((\"outward\", 10))\n ax.spines[\"bottom\"].set_position((\"outward\", 10))\n\n # Histogram for axis X1 (feature 5)\n hist_X1.set_ylim(ax.get_ylim())\n hist_X1.hist(\n X[:, 1], bins=hist_nbins, orientation=\"horizontal\", color=\"grey\", ec=\"grey\"\n )\n hist_X1.axis(\"off\")\n\n # Histogram for axis X0 (feature 0)\n hist_X0.set_xlim(ax.get_xlim())\n hist_X0.hist(\n X[:, 0], bins=hist_nbins, orientation=\"vertical\", color=\"grey\", ec=\"grey\"\n )\n hist_X0.axis(\"off\")"
29+
"# Author: Raghav RV <[email protected]>\n# Guillaume Lemaitre <[email protected]>\n# Thomas Unterthiner\n# License: BSD 3 clause\n\nimport numpy as np\n\nimport matplotlib as mpl\nfrom matplotlib import pyplot as plt\nfrom matplotlib import cm\n\nfrom sklearn.preprocessing import MinMaxScaler\nfrom sklearn.preprocessing import minmax_scale\nfrom sklearn.preprocessing import MaxAbsScaler\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.preprocessing import RobustScaler\nfrom sklearn.preprocessing import Normalizer\nfrom sklearn.preprocessing import QuantileTransformer\nfrom sklearn.preprocessing import PowerTransformer\n\nfrom sklearn.datasets import fetch_california_housing\n\ndataset = fetch_california_housing()\nX_full, y_full = dataset.data, dataset.target\nfeature_names = dataset.feature_names\n\nfeature_mapping = {\n \"MedInc\": \"Median income in block\",\n \"HouseAge\": \"Median house age in block\",\n \"AveRooms\": \"Average number of rooms\",\n \"AveBedrms\": \"Average number of bedrooms\",\n \"Population\": \"Block population\",\n \"AveOccup\": \"Average house occupancy\",\n \"Latitude\": \"House block latitude\",\n \"Longitude\": \"House block longitude\",\n}\n\n# Take only 2 features to make visualization easier\n# Feature MedInc has a long tail distribution.\n# Feature AveOccup has a few but very large outliers.\nfeatures = [\"MedInc\", \"AveOccup\"]\nfeatures_idx = [feature_names.index(feature) for feature in features]\nX = X_full[:, features_idx]\ndistributions = [\n (\"Unscaled data\", X),\n (\"Data after standard scaling\", StandardScaler().fit_transform(X)),\n (\"Data after min-max scaling\", MinMaxScaler().fit_transform(X)),\n (\"Data after max-abs scaling\", MaxAbsScaler().fit_transform(X)),\n (\n \"Data after robust scaling\",\n RobustScaler(quantile_range=(25, 75)).fit_transform(X),\n ),\n (\n \"Data after power transformation (Yeo-Johnson)\",\n PowerTransformer(method=\"yeo-johnson\").fit_transform(X),\n ),\n (\n \"Data after power transformation (Box-Cox)\",\n PowerTransformer(method=\"box-cox\").fit_transform(X),\n ),\n (\n \"Data after quantile transformation (uniform pdf)\",\n QuantileTransformer(output_distribution=\"uniform\").fit_transform(X),\n ),\n (\n \"Data after quantile transformation (gaussian pdf)\",\n QuantileTransformer(output_distribution=\"normal\").fit_transform(X),\n ),\n (\"Data after sample-wise L2 normalizing\", Normalizer().fit_transform(X)),\n]\n\n# scale the output between 0 and 1 for the colorbar\ny = minmax_scale(y_full)\n\n# plasma does not exist in matplotlib < 1.5\ncmap = getattr(cm, \"plasma_r\", cm.hot_r)\n\n\ndef create_axes(title, figsize=(16, 6)):\n fig = plt.figure(figsize=figsize)\n fig.suptitle(title)\n\n # define the axis for the first plot\n left, width = 0.1, 0.22\n bottom, height = 0.1, 0.7\n bottom_h = height + 0.15\n left_h = left + width + 0.02\n\n rect_scatter = [left, bottom, width, height]\n rect_histx = [left, bottom_h, width, 0.1]\n rect_histy = [left_h, bottom, 0.05, height]\n\n ax_scatter = plt.axes(rect_scatter)\n ax_histx = plt.axes(rect_histx)\n ax_histy = plt.axes(rect_histy)\n\n # define the axis for the zoomed-in plot\n left = width + left + 0.2\n left_h = left + width + 0.02\n\n rect_scatter = [left, bottom, width, height]\n rect_histx = [left, bottom_h, width, 0.1]\n rect_histy = [left_h, bottom, 0.05, height]\n\n ax_scatter_zoom = plt.axes(rect_scatter)\n ax_histx_zoom = plt.axes(rect_histx)\n ax_histy_zoom = plt.axes(rect_histy)\n\n # define the axis for the colorbar\n left, width = width + left + 0.13, 0.01\n\n rect_colorbar = [left, bottom, width, height]\n ax_colorbar = plt.axes(rect_colorbar)\n\n return (\n (ax_scatter, ax_histy, ax_histx),\n (ax_scatter_zoom, ax_histy_zoom, ax_histx_zoom),\n ax_colorbar,\n )\n\n\ndef plot_distribution(axes, X, y, hist_nbins=50, title=\"\", x0_label=\"\", x1_label=\"\"):\n ax, hist_X1, hist_X0 = axes\n\n ax.set_title(title)\n ax.set_xlabel(x0_label)\n ax.set_ylabel(x1_label)\n\n # The scatter plot\n colors = cmap(y)\n ax.scatter(X[:, 0], X[:, 1], alpha=0.5, marker=\"o\", s=5, lw=0, c=colors)\n\n # Removing the top and the right spine for aesthetics\n # make nice axis layout\n ax.spines[\"top\"].set_visible(False)\n ax.spines[\"right\"].set_visible(False)\n ax.get_xaxis().tick_bottom()\n ax.get_yaxis().tick_left()\n ax.spines[\"left\"].set_position((\"outward\", 10))\n ax.spines[\"bottom\"].set_position((\"outward\", 10))\n\n # Histogram for axis X1 (feature 5)\n hist_X1.set_ylim(ax.get_ylim())\n hist_X1.hist(\n X[:, 1], bins=hist_nbins, orientation=\"horizontal\", color=\"grey\", ec=\"grey\"\n )\n hist_X1.axis(\"off\")\n\n # Histogram for axis X0 (feature 0)\n hist_X0.set_xlim(ax.get_xlim())\n hist_X0.hist(\n X[:, 0], bins=hist_nbins, orientation=\"vertical\", color=\"grey\", ec=\"grey\"\n )\n hist_X0.axis(\"off\")"
3030
]
3131
},
3232
{

dev/_downloads/scikit-learn-docs.zip

5.59 KB
Binary file not shown.
23 Bytes
-37 Bytes
121 Bytes
148 Bytes
10 Bytes

0 commit comments

Comments
 (0)