|
1 |
| -#!/usr/bin/env python |
2 | 1 | """
|
3 |
| -======================== |
4 |
| -Polynomial interpolation |
5 |
| -======================== |
6 |
| -
|
7 |
| -This example demonstrates how to approximate a function with a polynomial of |
8 |
| -degree n_degree by using ridge regression. Concretely, from n_samples 1d |
9 |
| -points, it suffices to build the Vandermonde matrix, which is n_samples x |
10 |
| -n_degree+1 and has the following form: |
11 |
| -
|
12 |
| -[[1, x_1, x_1 ** 2, x_1 ** 3, ...], |
13 |
| - [1, x_2, x_2 ** 2, x_2 ** 3, ...], |
14 |
| - ...] |
15 |
| -
|
16 |
| -Intuitively, this matrix can be interpreted as a matrix of pseudo features (the |
17 |
| -points raised to some power). The matrix is akin to (but different from) the |
18 |
| -matrix induced by a polynomial kernel. |
19 |
| -
|
20 |
| -This example shows that you can do non-linear regression with a linear model, |
21 |
| -using a pipeline to add non-linear features. Kernel methods extend this idea |
22 |
| -and can induce very high (even infinite) dimensional feature spaces. |
| 2 | +=================================== |
| 3 | +Polynomial and Spline interpolation |
| 4 | +=================================== |
| 5 | +
|
| 6 | +This example demonstrates how to approximate a function with polynomials up to |
| 7 | +degree ``degree`` by using ridge regression. We show two different ways given |
| 8 | +``n_samples`` of 1d points ``x_i``: |
| 9 | +
|
| 10 | +- :class:`~sklearn.preprocessing.PolynomialFeatures` generates all monomials |
| 11 | + up to ``degree``. This gives us the so called Vandermonde matrix with |
| 12 | + ``n_samples`` rows and ``degree + 1`` columns:: |
| 13 | +
|
| 14 | + [[1, x_0, x_0 ** 2, x_0 ** 3, ..., x_0 ** degree], |
| 15 | + [1, x_1, x_1 ** 2, x_1 ** 3, ..., x_1 ** degree], |
| 16 | + ...] |
| 17 | +
|
| 18 | + Intuitively, this matrix can be interpreted as a matrix of pseudo features |
| 19 | + (the points raised to some power). The matrix is akin to (but different from) |
| 20 | + the matrix induced by a polynomial kernel. |
| 21 | +
|
| 22 | +- :class:`~sklearn.preprocessing.SplineTransformer` generates B-spline basis |
| 23 | + functions. A basis function of a B-spline is a piece-wise polynomial function |
| 24 | + of degree ``degree`` that is non-zero only between ``degree+1`` consecutive |
| 25 | + knots. Given ``n_knots`` number of knots, this results in matrix of |
| 26 | + ``n_samples`` rows and ``n_knots + degree - 1`` columns:: |
| 27 | +
|
| 28 | + [[basis_1(x_0), basis_2(x_0), ...], |
| 29 | + [basis_1(x_1), basis_2(x_1), ...], |
| 30 | + ...] |
| 31 | +
|
| 32 | +This example shows that these two transformers are well suited to model |
| 33 | +non-linear effects with a linear model, using a pipeline to add non-linear |
| 34 | +features. Kernel methods extend this idea and can induce very high (even |
| 35 | +infinite) dimensional feature spaces. |
23 | 36 | """
|
24 | 37 | print(__doc__)
|
25 | 38 |
|
26 | 39 | # Author: Mathieu Blondel
|
27 | 40 | # Jake Vanderplas
|
| 41 | +# Christian Lorentzen |
28 | 42 | # License: BSD 3 clause
|
29 | 43 |
|
30 | 44 | import numpy as np
|
31 | 45 | import matplotlib.pyplot as plt
|
32 | 46 |
|
33 | 47 | from sklearn.linear_model import Ridge
|
34 |
| -from sklearn.preprocessing import PolynomialFeatures |
| 48 | +from sklearn.preprocessing import PolynomialFeatures, SplineTransformer |
35 | 49 | from sklearn.pipeline import make_pipeline
|
36 | 50 |
|
37 | 51 |
|
| 52 | +# %% |
| 53 | +# We start by defining a function that we intent to approximate and prepare |
| 54 | +# plotting it. |
| 55 | + |
38 | 56 | def f(x):
|
39 |
| - """ function to approximate by polynomial interpolation""" |
| 57 | + """Function to be approximated by polynomial interpolation.""" |
40 | 58 | return x * np.sin(x)
|
41 | 59 |
|
42 | 60 |
|
43 |
| -# generate points used to plot |
44 |
| -x_plot = np.linspace(0, 10, 100) |
| 61 | +# whole range we want to plot |
| 62 | +x_plot = np.linspace(-1, 11, 100) |
| 63 | + |
| 64 | +# %% |
| 65 | +# To make it interesting, we only give a small subset of points to train on. |
45 | 66 |
|
46 |
| -# generate points and keep a subset of them |
47 |
| -x = np.linspace(0, 10, 100) |
| 67 | +x_train = np.linspace(0, 10, 100) |
48 | 68 | rng = np.random.RandomState(0)
|
49 |
| -rng.shuffle(x) |
50 |
| -x = np.sort(x[:20]) |
51 |
| -y = f(x) |
| 69 | +x_train = np.sort(rng.choice(x_train, size=20, replace=False)) |
| 70 | +y_train = f(x_train) |
52 | 71 |
|
53 |
| -# create matrix versions of these arrays |
54 |
| -X = x[:, np.newaxis] |
| 72 | +# create 2D-array versions of these arrays to feed to transformers |
| 73 | +X_train = x_train[:, np.newaxis] |
55 | 74 | X_plot = x_plot[:, np.newaxis]
|
56 | 75 |
|
57 |
| -colors = ['teal', 'yellowgreen', 'gold'] |
58 |
| -lw = 2 |
59 |
| -plt.plot(x_plot, f(x_plot), color='cornflowerblue', linewidth=lw, |
60 |
| - label="ground truth") |
61 |
| -plt.scatter(x, y, color='navy', s=30, marker='o', label="training points") |
| 76 | +# %% |
| 77 | +# Now we are ready to create polynomial features and splines, fit on the |
| 78 | +# training points and show how well they interpolate. |
62 | 79 |
|
63 |
| -for count, degree in enumerate([3, 4, 5]): |
64 |
| - model = make_pipeline(PolynomialFeatures(degree), Ridge()) |
65 |
| - model.fit(X, y) |
| 80 | +# plot function |
| 81 | +lw = 2 |
| 82 | +fig, ax = plt.subplots() |
| 83 | +ax.set_prop_cycle(color=[ |
| 84 | + "black", "teal", "yellowgreen", "gold", "darkorange", "tomato" |
| 85 | +]) |
| 86 | +ax.plot(x_plot, f(x_plot), linewidth=lw, label="ground truth") |
| 87 | + |
| 88 | +# plot training points |
| 89 | +ax.scatter(x_train, y_train, label="training points") |
| 90 | + |
| 91 | +# polynomial features |
| 92 | +for degree in [3, 4, 5]: |
| 93 | + model = make_pipeline(PolynomialFeatures(degree), Ridge(alpha=1e-3)) |
| 94 | + model.fit(X_train, y_train) |
66 | 95 | y_plot = model.predict(X_plot)
|
67 |
| - plt.plot(x_plot, y_plot, color=colors[count], linewidth=lw, |
68 |
| - label="degree %d" % degree) |
| 96 | + ax.plot(x_plot, y_plot, label=f"degree {degree}") |
69 | 97 |
|
70 |
| -plt.legend(loc='lower left') |
| 98 | +# B-spline with 4 + 3 - 1 = 6 basis functions |
| 99 | +model = make_pipeline(SplineTransformer(n_knots=4, degree=3), |
| 100 | + Ridge(alpha=1e-3)) |
| 101 | +model.fit(X_train, y_train) |
71 | 102 |
|
| 103 | +y_plot = model.predict(X_plot) |
| 104 | +ax.plot(x_plot, y_plot, label="B-spline") |
| 105 | +ax.legend(loc='lower center') |
| 106 | +ax.set_ylim(-20, 10) |
72 | 107 | plt.show()
|
| 108 | + |
| 109 | +# %% |
| 110 | +# This shows nicely that higher degree polynomials can fit the data better. But |
| 111 | +# at the same time, too high powers can show unwanted oscillatory behaviour |
| 112 | +# and are particularly dangerous for extrapolation beyond the range of fitted |
| 113 | +# data. This is an advantage of B-splines. They usually fit the data as well as |
| 114 | +# polynomials and show very nice and smooth behaviour. They have also good |
| 115 | +# options to control the extrapolation, which defaults to continue with a |
| 116 | +# constant. Note that most often, you would rather increase the number of knots |
| 117 | +# but keep ``degree=3``. |
| 118 | +# |
| 119 | +# In order to give more insights into the generated feature bases, we plot all |
| 120 | +# columns of both transformers separately. |
| 121 | + |
| 122 | +fig, axes = plt.subplots(ncols=2, figsize=(16, 5)) |
| 123 | +pft = PolynomialFeatures(degree=3).fit(X_train) |
| 124 | +axes[0].plot(x_plot, pft.transform(X_plot)) |
| 125 | +axes[0].legend(axes[0].lines, [f"degree {n}" for n in range(4)]) |
| 126 | +axes[0].set_title("PolynomialFeatures") |
| 127 | + |
| 128 | +splt = SplineTransformer(n_knots=4, degree=3).fit(X_train) |
| 129 | +axes[1].plot(x_plot, splt.transform(X_plot)) |
| 130 | +axes[1].legend(axes[1].lines, [f"spline {n}" for n in range(4)]) |
| 131 | +axes[1].set_title("SplineTransformer") |
| 132 | + |
| 133 | +# plot knots of spline |
| 134 | +knots = splt.bsplines_[0].t |
| 135 | +axes[1].vlines(knots[3:-3], ymin=0, ymax=0.8, linestyles='dashed') |
| 136 | +plt.show() |
| 137 | + |
| 138 | +# %% |
| 139 | +# In the left plot, we recognize the lines corresponding to simple monomials |
| 140 | +# from ``x**0`` to ``x**3``. In the right figure, we see the four B-spline |
| 141 | +# basis functions of ``degree=3`` and also the four knot positions that were |
| 142 | +# chosen during ``fit``. Note that there are ``degree`` number of additional |
| 143 | +# knots each to the left and to the right of the fitted interval. These are |
| 144 | +# there for technical reasons, so we refrain from showing them. Every basis |
| 145 | +# function has local support and is continued as a constant beyond the fitted |
| 146 | +# range. This extrapolating behaviour could be changed by the argument |
| 147 | +# ``extrapolation``. |
0 commit comments