Skip to content

Commit 7c55ccd

Browse files
committed
Pushing the docs to dev/ for branch: master, commit 4a95e33e6344e15a197f33ddc2f8f786a0a45df0
1 parent f3aee0a commit 7c55ccd

File tree

1,241 files changed

+4023
-3843
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,241 files changed

+4023
-3843
lines changed

dev/_downloads/0256e8c73ba2281b77ba23f17e7d1549/plot_species_kde.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@
4242
import numpy as np
4343
import matplotlib.pyplot as plt
4444
from sklearn.datasets import fetch_species_distributions
45-
from sklearn.datasets.species_distributions import construct_grids
4645
from sklearn.neighbors import KernelDensity
4746

4847
# if basemap is available, we'll use it.
@@ -53,6 +52,34 @@
5352
except ImportError:
5453
basemap = False
5554

55+
56+
def construct_grids(batch):
57+
"""Construct the map grid from the batch object
58+
59+
Parameters
60+
----------
61+
batch : Batch object
62+
The object returned by :func:`fetch_species_distributions`
63+
64+
Returns
65+
-------
66+
(xgrid, ygrid) : 1-D arrays
67+
The grid corresponding to the values in batch.coverages
68+
"""
69+
# x,y coordinates for corner cells
70+
xmin = batch.x_left_lower_corner + batch.grid_size
71+
xmax = xmin + (batch.Nx * batch.grid_size)
72+
ymin = batch.y_left_lower_corner + batch.grid_size
73+
ymax = ymin + (batch.Ny * batch.grid_size)
74+
75+
# x coordinates of the grid cells
76+
xgrid = np.arange(xmin, xmax, batch.grid_size)
77+
# y coordinates of the grid cells
78+
ygrid = np.arange(ymin, ymax, batch.grid_size)
79+
80+
return (xgrid, ygrid)
81+
82+
5683
# Get matrices/arrays of species IDs and locations
5784
data = fetch_species_distributions()
5885
species_names = ['Bradypus Variegatus', 'Microryzomys Minutus']

dev/_downloads/06d1bf4510bd82b665c44c2bd2c364ae/plot_feature_selection_pipeline.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"from sklearn import svm\nfrom sklearn.datasets import samples_generator\nfrom sklearn.feature_selection import SelectKBest, f_regression\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.metrics import classification_report\n\nprint(__doc__)\n\n# import some data to play with\nX, y = samples_generator.make_classification(\n n_features=20, n_informative=3, n_redundant=0, n_classes=4,\n n_clusters_per_class=2)\n\nX_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)\n\n# ANOVA SVM-C\n# 1) anova filter, take 3 best ranked features\nanova_filter = SelectKBest(f_regression, k=3)\n# 2) svm\nclf = svm.LinearSVC()\n\nanova_svm = make_pipeline(anova_filter, clf)\nanova_svm.fit(X_train, y_train)\ny_pred = anova_svm.predict(X_test)\nprint(classification_report(y_test, y_pred))\n\ncoef = anova_svm[:-1].inverse_transform(anova_svm['linearsvc'].coef_)\nprint(coef)"
29+
"from sklearn import svm\nfrom sklearn.datasets import make_classification\nfrom sklearn.feature_selection import SelectKBest, f_regression\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.metrics import classification_report\n\nprint(__doc__)\n\n# import some data to play with\nX, y = make_classification(\n n_features=20, n_informative=3, n_redundant=0, n_classes=4,\n n_clusters_per_class=2)\n\nX_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)\n\n# ANOVA SVM-C\n# 1) anova filter, take 3 best ranked features\nanova_filter = SelectKBest(f_regression, k=3)\n# 2) svm\nclf = svm.LinearSVC()\n\nanova_svm = make_pipeline(anova_filter, clf)\nanova_svm.fit(X_train, y_train)\ny_pred = anova_svm.predict(X_test)\nprint(classification_report(y_test, y_pred))\n\ncoef = anova_svm[:-1].inverse_transform(anova_svm['linearsvc'].coef_)\nprint(coef)"
3030
]
3131
}
3232
],

dev/_downloads/0855ac0efd714397341de370a68cf6f3/plot_mean_shift.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"print(__doc__)\n\nimport numpy as np\nfrom sklearn.cluster import MeanShift, estimate_bandwidth\nfrom sklearn.datasets.samples_generator import make_blobs\n\n# #############################################################################\n# Generate sample data\ncenters = [[1, 1], [-1, -1], [1, -1]]\nX, _ = make_blobs(n_samples=10000, centers=centers, cluster_std=0.6)\n\n# #############################################################################\n# Compute clustering with MeanShift\n\n# The following bandwidth can be automatically detected using\nbandwidth = estimate_bandwidth(X, quantile=0.2, n_samples=500)\n\nms = MeanShift(bandwidth=bandwidth, bin_seeding=True)\nms.fit(X)\nlabels = ms.labels_\ncluster_centers = ms.cluster_centers_\n\nlabels_unique = np.unique(labels)\nn_clusters_ = len(labels_unique)\n\nprint(\"number of estimated clusters : %d\" % n_clusters_)\n\n# #############################################################################\n# Plot result\nimport matplotlib.pyplot as plt\nfrom itertools import cycle\n\nplt.figure(1)\nplt.clf()\n\ncolors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')\nfor k, col in zip(range(n_clusters_), colors):\n my_members = labels == k\n cluster_center = cluster_centers[k]\n plt.plot(X[my_members, 0], X[my_members, 1], col + '.')\n plt.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col,\n markeredgecolor='k', markersize=14)\nplt.title('Estimated number of clusters: %d' % n_clusters_)\nplt.show()"
29+
"print(__doc__)\n\nimport numpy as np\nfrom sklearn.cluster import MeanShift, estimate_bandwidth\nfrom sklearn.datasets import make_blobs\n\n# #############################################################################\n# Generate sample data\ncenters = [[1, 1], [-1, -1], [1, -1]]\nX, _ = make_blobs(n_samples=10000, centers=centers, cluster_std=0.6)\n\n# #############################################################################\n# Compute clustering with MeanShift\n\n# The following bandwidth can be automatically detected using\nbandwidth = estimate_bandwidth(X, quantile=0.2, n_samples=500)\n\nms = MeanShift(bandwidth=bandwidth, bin_seeding=True)\nms.fit(X)\nlabels = ms.labels_\ncluster_centers = ms.cluster_centers_\n\nlabels_unique = np.unique(labels)\nn_clusters_ = len(labels_unique)\n\nprint(\"number of estimated clusters : %d\" % n_clusters_)\n\n# #############################################################################\n# Plot result\nimport matplotlib.pyplot as plt\nfrom itertools import cycle\n\nplt.figure(1)\nplt.clf()\n\ncolors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')\nfor k, col in zip(range(n_clusters_), colors):\n my_members = labels == k\n cluster_center = cluster_centers[k]\n plt.plot(X[my_members, 0], X[my_members, 1], col + '.')\n plt.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col,\n markeredgecolor='k', markersize=14)\nplt.title('Estimated number of clusters: %d' % n_clusters_)\nplt.show()"
3030
]
3131
}
3232
],

dev/_downloads/18e2721d4cbbd390f886d71f471ce223/plot_species_distribution_modeling.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,8 @@
4545
import numpy as np
4646
import matplotlib.pyplot as plt
4747

48-
from sklearn.datasets.base import Bunch
48+
from sklearn.utils import Bunch
4949
from sklearn.datasets import fetch_species_distributions
50-
from sklearn.datasets.species_distributions import construct_grids
5150
from sklearn import svm, metrics
5251

5352
# if basemap is available, we'll use it.
@@ -61,6 +60,33 @@
6160
print(__doc__)
6261

6362

63+
def construct_grids(batch):
64+
"""Construct the map grid from the batch object
65+
66+
Parameters
67+
----------
68+
batch : Batch object
69+
The object returned by :func:`fetch_species_distributions`
70+
71+
Returns
72+
-------
73+
(xgrid, ygrid) : 1-D arrays
74+
The grid corresponding to the values in batch.coverages
75+
"""
76+
# x,y coordinates for corner cells
77+
xmin = batch.x_left_lower_corner + batch.grid_size
78+
xmax = xmin + (batch.Nx * batch.grid_size)
79+
ymin = batch.y_left_lower_corner + batch.grid_size
80+
ymax = ymin + (batch.Ny * batch.grid_size)
81+
82+
# x coordinates of the grid cells
83+
xgrid = np.arange(xmin, xmax, batch.grid_size)
84+
# y coordinates of the grid cells
85+
ygrid = np.arange(ymin, ymax, batch.grid_size)
86+
87+
return (xgrid, ygrid)
88+
89+
6490
def create_species_bunch(species_name, train, test, coverages, xgrid, ygrid):
6591
"""Create a bunch with information about a particular organism
6692

dev/_downloads/27375e56cec22d4d2650b24abbf811d2/plot_lasso_dense_vs_sparse_data.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from scipy import sparse
1414
from scipy import linalg
1515

16-
from sklearn.datasets.samples_generator import make_regression
16+
from sklearn.datasets import make_regression
1717
from sklearn.linear_model import Lasso
1818

1919

dev/_downloads/275c1a8902428a3a52b079bb6f13591a/plot_sgd_separating_hyperplane.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
import numpy as np
1313
import matplotlib.pyplot as plt
1414
from sklearn.linear_model import SGDClassifier
15-
from sklearn.datasets.samples_generator import make_blobs
15+
from sklearn.datasets import make_blobs
1616

1717
# we create 50 separable points
1818
X, Y = make_blobs(n_samples=50, centers=2, random_state=0, cluster_std=0.60)

dev/_downloads/2c18712ac7ff7f791c5400c9935d733d/plot_species_kde.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"# Author: Jake Vanderplas <[email protected]>\n#\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.datasets import fetch_species_distributions\nfrom sklearn.datasets.species_distributions import construct_grids\nfrom sklearn.neighbors import KernelDensity\n\n# if basemap is available, we'll use it.\n# otherwise, we'll improvise later...\ntry:\n from mpl_toolkits.basemap import Basemap\n basemap = True\nexcept ImportError:\n basemap = False\n\n# Get matrices/arrays of species IDs and locations\ndata = fetch_species_distributions()\nspecies_names = ['Bradypus Variegatus', 'Microryzomys Minutus']\n\nXtrain = np.vstack([data['train']['dd lat'],\n data['train']['dd long']]).T\nytrain = np.array([d.decode('ascii').startswith('micro')\n for d in data['train']['species']], dtype='int')\nXtrain *= np.pi / 180. # Convert lat/long to radians\n\n# Set up the data grid for the contour plot\nxgrid, ygrid = construct_grids(data)\nX, Y = np.meshgrid(xgrid[::5], ygrid[::5][::-1])\nland_reference = data.coverages[6][::5, ::5]\nland_mask = (land_reference > -9999).ravel()\n\nxy = np.vstack([Y.ravel(), X.ravel()]).T\nxy = xy[land_mask]\nxy *= np.pi / 180.\n\n# Plot map of South America with distributions of each species\nfig = plt.figure()\nfig.subplots_adjust(left=0.05, right=0.95, wspace=0.05)\n\nfor i in range(2):\n plt.subplot(1, 2, i + 1)\n\n # construct a kernel density estimate of the distribution\n print(\" - computing KDE in spherical coordinates\")\n kde = KernelDensity(bandwidth=0.04, metric='haversine',\n kernel='gaussian', algorithm='ball_tree')\n kde.fit(Xtrain[ytrain == i])\n\n # evaluate only on the land: -9999 indicates ocean\n Z = np.full(land_mask.shape[0], -9999, dtype='int')\n Z[land_mask] = np.exp(kde.score_samples(xy))\n Z = Z.reshape(X.shape)\n\n # plot contours of the density\n levels = np.linspace(0, Z.max(), 25)\n plt.contourf(X, Y, Z, levels=levels, cmap=plt.cm.Reds)\n\n if basemap:\n print(\" - plot coastlines using basemap\")\n m = Basemap(projection='cyl', llcrnrlat=Y.min(),\n urcrnrlat=Y.max(), llcrnrlon=X.min(),\n urcrnrlon=X.max(), resolution='c')\n m.drawcoastlines()\n m.drawcountries()\n else:\n print(\" - plot coastlines from coverage\")\n plt.contour(X, Y, land_reference,\n levels=[-9998], colors=\"k\",\n linestyles=\"solid\")\n plt.xticks([])\n plt.yticks([])\n\n plt.title(species_names[i])\n\nplt.show()"
29+
"# Author: Jake Vanderplas <[email protected]>\n#\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.datasets import fetch_species_distributions\nfrom sklearn.neighbors import KernelDensity\n\n# if basemap is available, we'll use it.\n# otherwise, we'll improvise later...\ntry:\n from mpl_toolkits.basemap import Basemap\n basemap = True\nexcept ImportError:\n basemap = False\n\n\ndef construct_grids(batch):\n \"\"\"Construct the map grid from the batch object\n\n Parameters\n ----------\n batch : Batch object\n The object returned by :func:`fetch_species_distributions`\n\n Returns\n -------\n (xgrid, ygrid) : 1-D arrays\n The grid corresponding to the values in batch.coverages\n \"\"\"\n # x,y coordinates for corner cells\n xmin = batch.x_left_lower_corner + batch.grid_size\n xmax = xmin + (batch.Nx * batch.grid_size)\n ymin = batch.y_left_lower_corner + batch.grid_size\n ymax = ymin + (batch.Ny * batch.grid_size)\n\n # x coordinates of the grid cells\n xgrid = np.arange(xmin, xmax, batch.grid_size)\n # y coordinates of the grid cells\n ygrid = np.arange(ymin, ymax, batch.grid_size)\n\n return (xgrid, ygrid)\n\n\n# Get matrices/arrays of species IDs and locations\ndata = fetch_species_distributions()\nspecies_names = ['Bradypus Variegatus', 'Microryzomys Minutus']\n\nXtrain = np.vstack([data['train']['dd lat'],\n data['train']['dd long']]).T\nytrain = np.array([d.decode('ascii').startswith('micro')\n for d in data['train']['species']], dtype='int')\nXtrain *= np.pi / 180. # Convert lat/long to radians\n\n# Set up the data grid for the contour plot\nxgrid, ygrid = construct_grids(data)\nX, Y = np.meshgrid(xgrid[::5], ygrid[::5][::-1])\nland_reference = data.coverages[6][::5, ::5]\nland_mask = (land_reference > -9999).ravel()\n\nxy = np.vstack([Y.ravel(), X.ravel()]).T\nxy = xy[land_mask]\nxy *= np.pi / 180.\n\n# Plot map of South America with distributions of each species\nfig = plt.figure()\nfig.subplots_adjust(left=0.05, right=0.95, wspace=0.05)\n\nfor i in range(2):\n plt.subplot(1, 2, i + 1)\n\n # construct a kernel density estimate of the distribution\n print(\" - computing KDE in spherical coordinates\")\n kde = KernelDensity(bandwidth=0.04, metric='haversine',\n kernel='gaussian', algorithm='ball_tree')\n kde.fit(Xtrain[ytrain == i])\n\n # evaluate only on the land: -9999 indicates ocean\n Z = np.full(land_mask.shape[0], -9999, dtype='int')\n Z[land_mask] = np.exp(kde.score_samples(xy))\n Z = Z.reshape(X.shape)\n\n # plot contours of the density\n levels = np.linspace(0, Z.max(), 25)\n plt.contourf(X, Y, Z, levels=levels, cmap=plt.cm.Reds)\n\n if basemap:\n print(\" - plot coastlines using basemap\")\n m = Basemap(projection='cyl', llcrnrlat=Y.min(),\n urcrnrlat=Y.max(), llcrnrlon=X.min(),\n urcrnrlon=X.max(), resolution='c')\n m.drawcoastlines()\n m.drawcountries()\n else:\n print(\" - plot coastlines from coverage\")\n plt.contour(X, Y, land_reference,\n levels=[-9998], colors=\"k\",\n linestyles=\"solid\")\n plt.xticks([])\n plt.yticks([])\n\n plt.title(species_names[i])\n\nplt.show()"
3030
]
3131
}
3232
],

dev/_downloads/31ed7d76091fdf7cbba173b644810790/plot_spectral_coclustering.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"print(__doc__)\n\n# Author: Kemal Eren <[email protected]>\n# License: BSD 3 clause\n\nimport numpy as np\nfrom matplotlib import pyplot as plt\n\nfrom sklearn.datasets import make_biclusters\nfrom sklearn.datasets import samples_generator as sg\nfrom sklearn.cluster import SpectralCoclustering\nfrom sklearn.metrics import consensus_score\n\ndata, rows, columns = make_biclusters(\n shape=(300, 300), n_clusters=5, noise=5,\n shuffle=False, random_state=0)\n\nplt.matshow(data, cmap=plt.cm.Blues)\nplt.title(\"Original dataset\")\n\ndata, row_idx, col_idx = sg._shuffle(data, random_state=0)\nplt.matshow(data, cmap=plt.cm.Blues)\nplt.title(\"Shuffled dataset\")\n\nmodel = SpectralCoclustering(n_clusters=5, random_state=0)\nmodel.fit(data)\nscore = consensus_score(model.biclusters_,\n (rows[:, row_idx], columns[:, col_idx]))\n\nprint(\"consensus score: {:.3f}\".format(score))\n\nfit_data = data[np.argsort(model.row_labels_)]\nfit_data = fit_data[:, np.argsort(model.column_labels_)]\n\nplt.matshow(fit_data, cmap=plt.cm.Blues)\nplt.title(\"After biclustering; rearranged to show biclusters\")\n\nplt.show()"
29+
"print(__doc__)\n\n# Author: Kemal Eren <[email protected]>\n# License: BSD 3 clause\n\nimport numpy as np\nfrom matplotlib import pyplot as plt\n\nfrom sklearn.datasets import make_biclusters\nfrom sklearn.cluster import SpectralCoclustering\nfrom sklearn.metrics import consensus_score\n\ndata, rows, columns = make_biclusters(\n shape=(300, 300), n_clusters=5, noise=5,\n shuffle=False, random_state=0)\n\nplt.matshow(data, cmap=plt.cm.Blues)\nplt.title(\"Original dataset\")\n\n# shuffle clusters\nrng = np.random.RandomState(0)\nrow_idx = rng.permutation(data.shape[0])\ncol_idx = rng.permutation(data.shape[1])\ndata = data[row_idx][:, col_idx]\n\nplt.matshow(data, cmap=plt.cm.Blues)\nplt.title(\"Shuffled dataset\")\n\nmodel = SpectralCoclustering(n_clusters=5, random_state=0)\nmodel.fit(data)\nscore = consensus_score(model.biclusters_,\n (rows[:, row_idx], columns[:, col_idx]))\n\nprint(\"consensus score: {:.3f}\".format(score))\n\nfit_data = data[np.argsort(model.row_labels_)]\nfit_data = fit_data[:, np.argsort(model.column_labels_)]\n\nplt.matshow(fit_data, cmap=plt.cm.Blues)\nplt.title(\"After biclustering; rearranged to show biclusters\")\n\nplt.show()"
3030
]
3131
}
3232
],
Binary file not shown.

dev/_downloads/3f03e7551c80c7ecdd383bb1773446cc/plot_lasso_dense_vs_sparse_data.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"print(__doc__)\n\nfrom time import time\nfrom scipy import sparse\nfrom scipy import linalg\n\nfrom sklearn.datasets.samples_generator import make_regression\nfrom sklearn.linear_model import Lasso\n\n\n# #############################################################################\n# The two Lasso implementations on Dense data\nprint(\"--- Dense matrices\")\n\nX, y = make_regression(n_samples=200, n_features=5000, random_state=0)\nX_sp = sparse.coo_matrix(X)\n\nalpha = 1\nsparse_lasso = Lasso(alpha=alpha, fit_intercept=False, max_iter=1000)\ndense_lasso = Lasso(alpha=alpha, fit_intercept=False, max_iter=1000)\n\nt0 = time()\nsparse_lasso.fit(X_sp, y)\nprint(\"Sparse Lasso done in %fs\" % (time() - t0))\n\nt0 = time()\ndense_lasso.fit(X, y)\nprint(\"Dense Lasso done in %fs\" % (time() - t0))\n\nprint(\"Distance between coefficients : %s\"\n % linalg.norm(sparse_lasso.coef_ - dense_lasso.coef_))\n\n# #############################################################################\n# The two Lasso implementations on Sparse data\nprint(\"--- Sparse matrices\")\n\nXs = X.copy()\nXs[Xs < 2.5] = 0.0\nXs = sparse.coo_matrix(Xs)\nXs = Xs.tocsc()\n\nprint(\"Matrix density : %s %%\" % (Xs.nnz / float(X.size) * 100))\n\nalpha = 0.1\nsparse_lasso = Lasso(alpha=alpha, fit_intercept=False, max_iter=10000)\ndense_lasso = Lasso(alpha=alpha, fit_intercept=False, max_iter=10000)\n\nt0 = time()\nsparse_lasso.fit(Xs, y)\nprint(\"Sparse Lasso done in %fs\" % (time() - t0))\n\nt0 = time()\ndense_lasso.fit(Xs.toarray(), y)\nprint(\"Dense Lasso done in %fs\" % (time() - t0))\n\nprint(\"Distance between coefficients : %s\"\n % linalg.norm(sparse_lasso.coef_ - dense_lasso.coef_))"
29+
"print(__doc__)\n\nfrom time import time\nfrom scipy import sparse\nfrom scipy import linalg\n\nfrom sklearn.datasets import make_regression\nfrom sklearn.linear_model import Lasso\n\n\n# #############################################################################\n# The two Lasso implementations on Dense data\nprint(\"--- Dense matrices\")\n\nX, y = make_regression(n_samples=200, n_features=5000, random_state=0)\nX_sp = sparse.coo_matrix(X)\n\nalpha = 1\nsparse_lasso = Lasso(alpha=alpha, fit_intercept=False, max_iter=1000)\ndense_lasso = Lasso(alpha=alpha, fit_intercept=False, max_iter=1000)\n\nt0 = time()\nsparse_lasso.fit(X_sp, y)\nprint(\"Sparse Lasso done in %fs\" % (time() - t0))\n\nt0 = time()\ndense_lasso.fit(X, y)\nprint(\"Dense Lasso done in %fs\" % (time() - t0))\n\nprint(\"Distance between coefficients : %s\"\n % linalg.norm(sparse_lasso.coef_ - dense_lasso.coef_))\n\n# #############################################################################\n# The two Lasso implementations on Sparse data\nprint(\"--- Sparse matrices\")\n\nXs = X.copy()\nXs[Xs < 2.5] = 0.0\nXs = sparse.coo_matrix(Xs)\nXs = Xs.tocsc()\n\nprint(\"Matrix density : %s %%\" % (Xs.nnz / float(X.size) * 100))\n\nalpha = 0.1\nsparse_lasso = Lasso(alpha=alpha, fit_intercept=False, max_iter=10000)\ndense_lasso = Lasso(alpha=alpha, fit_intercept=False, max_iter=10000)\n\nt0 = time()\nsparse_lasso.fit(Xs, y)\nprint(\"Sparse Lasso done in %fs\" % (time() - t0))\n\nt0 = time()\ndense_lasso.fit(Xs.toarray(), y)\nprint(\"Dense Lasso done in %fs\" % (time() - t0))\n\nprint(\"Distance between coefficients : %s\"\n % linalg.norm(sparse_lasso.coef_ - dense_lasso.coef_))"
3030
]
3131
}
3232
],

0 commit comments

Comments
 (0)