|
| 1 | +""" |
| 2 | +============================================================== |
| 3 | +Plot Ridge coefficients as a function of the L2 regularization |
| 4 | +============================================================== |
| 5 | +
|
| 6 | +.. currentmodule:: sklearn.linear_model |
| 7 | +
|
| 8 | +:class:`Ridge` Regression is the estimator used in this example. |
| 9 | +Each color in the left plot represents one different dimension of the |
| 10 | +coefficient vector, and this is displayed as a function of the |
| 11 | +regularization parameter. The right plot shows how exact the solution |
| 12 | +is. This example illustrates how a well defined solution is |
| 13 | +found by Ridge regression and how regularization affects the |
| 14 | +coefficients and their values. The plot on the right shows how |
| 15 | +the difference of the coefficients from the estimator changes |
| 16 | +as a function of regularization. |
| 17 | +
|
| 18 | +In this example the dependent variable Y is set as a function |
| 19 | +of the input features: y = X*w + c. The coefficient vector w is |
| 20 | +randomly sampled from a normal distribution, whereas the bias term c is |
| 21 | +set to a constant. |
| 22 | +
|
| 23 | +As alpha tends toward zero the coefficients found by Ridge |
| 24 | +regression stabilize towards the randomly sampled vector w. |
| 25 | +For big alpha (strong regularisation) the coefficients |
| 26 | +are smaller (eventually converging at 0) leading to a |
| 27 | +simpler and biased solution. |
| 28 | +These dependencies can be observed on the left plot. |
| 29 | +
|
| 30 | +The right plot shows the mean squared error between the |
| 31 | +coefficients found by the model and the chosen vector w. |
| 32 | +Less regularised models retrieve the exact |
| 33 | +coefficients (error is equal to 0), stronger regularised |
| 34 | +models increase the error. |
| 35 | +
|
| 36 | +Please note that in this example the data is non-noisy, hence |
| 37 | +it is possible to extract the exact coefficients. |
| 38 | +""" |
| 39 | + |
| 40 | +# Author: Kornel Kielczewski -- <[email protected]> |
| 41 | + |
| 42 | +print(__doc__) |
| 43 | + |
| 44 | +import matplotlib.pyplot as plt |
| 45 | +import numpy as np |
| 46 | + |
| 47 | +from sklearn.datasets import make_regression |
| 48 | +from sklearn.linear_model import Ridge |
| 49 | +from sklearn.metrics import mean_squared_error |
| 50 | + |
| 51 | +clf = Ridge() |
| 52 | + |
| 53 | +X, y, w = make_regression(n_samples=10, n_features=10, coef=True, |
| 54 | + random_state=1, bias=3.5) |
| 55 | + |
| 56 | +coefs = [] |
| 57 | +errors = [] |
| 58 | + |
| 59 | +alphas = np.logspace(-6, 6, 200) |
| 60 | + |
| 61 | +# Train the model with different regularisation strengths |
| 62 | +for a in alphas: |
| 63 | + clf.set_params(alpha=a) |
| 64 | + clf.fit(X, y) |
| 65 | + coefs.append(clf.coef_) |
| 66 | + errors.append(mean_squared_error(clf.coef_, w)) |
| 67 | + |
| 68 | +# Display results |
| 69 | +plt.figure(figsize=(20, 6)) |
| 70 | + |
| 71 | +plt.subplot(121) |
| 72 | +ax = plt.gca() |
| 73 | +ax.plot(alphas, coefs) |
| 74 | +ax.set_xscale('log') |
| 75 | +plt.xlabel('alpha') |
| 76 | +plt.ylabel('weights') |
| 77 | +plt.title('Ridge coefficients as a function of the regularization') |
| 78 | +plt.axis('tight') |
| 79 | + |
| 80 | +plt.subplot(122) |
| 81 | +ax = plt.gca() |
| 82 | +ax.plot(alphas, errors) |
| 83 | +ax.set_xscale('log') |
| 84 | +plt.xlabel('alpha') |
| 85 | +plt.ylabel('error') |
| 86 | +plt.title('Coefficient error as a function of the regularization') |
| 87 | +plt.axis('tight') |
| 88 | + |
| 89 | +plt.show() |
0 commit comments