Skip to content

Commit 26d9445

Browse files
committed
Pushing the docs to dev/ for branch: master, commit 075d4245681110386cb4312cbdbd0c82776290ac
1 parent dafd258 commit 26d9445

File tree

1,219 files changed

+5087
-4007
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,219 files changed

+5087
-4007
lines changed
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
"""
2+
==========================
3+
Non-negative least squares
4+
==========================
5+
6+
In this example, we fit a linear model with positive constraints on the
7+
regression coefficients and compare the estimated coefficients to a classic
8+
linear regression.
9+
"""
10+
print(__doc__)
11+
import numpy as np
12+
import matplotlib.pyplot as plt
13+
from sklearn.metrics import r2_score
14+
15+
# %%
16+
# Generate some random data
17+
np.random.seed(42)
18+
19+
n_samples, n_features = 200, 50
20+
X = np.random.randn(n_samples, n_features)
21+
true_coef = 3 * np.random.randn(n_features)
22+
# Threshold coefficients to render them non-negative
23+
true_coef[true_coef < 0] = 0
24+
y = np.dot(X, true_coef)
25+
26+
# Add some noise
27+
y += 5 * np.random.normal(size=(n_samples, ))
28+
29+
# %%
30+
# Split the data in train set and test set
31+
from sklearn.model_selection import train_test_split
32+
33+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)
34+
35+
# %%
36+
# Fit the Non-Negative least squares.
37+
from sklearn.linear_model import LinearRegression
38+
39+
reg_nnls = LinearRegression(positive=True)
40+
y_pred_nnls = reg_nnls.fit(X_train, y_train).predict(X_test)
41+
r2_score_nnls = r2_score(y_test, y_pred_nnls)
42+
print("NNLS R2 score", r2_score_nnls)
43+
44+
# %%
45+
# Fit an OLS.
46+
reg_ols = LinearRegression()
47+
y_pred_ols = reg_ols.fit(X_train, y_train).predict(X_test)
48+
r2_score_ols = r2_score(y_test, y_pred_ols)
49+
print("OLS R2 score", r2_score_ols)
50+
51+
52+
# %%
53+
# Comparing the regression coefficients between OLS and NNLS, we can observe
54+
# they are highly correlated (the dashed line is the identity relation),
55+
# but the non-negative constraint shrinks some to 0.
56+
# The Non-Negative Least squares inherently yield sparse results.
57+
58+
fig, ax = plt.subplots()
59+
ax.plot(reg_ols.coef_, reg_nnls.coef_, linewidth=0, marker=".")
60+
61+
low_x, high_x = ax.get_xlim()
62+
low_y, high_y = ax.get_ylim()
63+
low = max(low_x, low_y)
64+
high = min(high_x, high_y)
65+
ax.plot([low, high], [low, high], ls="--", c=".3", alpha=.5)
66+
ax.set_xlabel("OLS regression coefficients", fontweight="bold")
67+
ax.set_ylabel("NNLS regression coefficients", fontweight="bold")
Binary file not shown.
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {
7+
"collapsed": false
8+
},
9+
"outputs": [],
10+
"source": [
11+
"%matplotlib inline"
12+
]
13+
},
14+
{
15+
"cell_type": "markdown",
16+
"metadata": {},
17+
"source": [
18+
"\n# Non-negative least squares\n\n\nIn this example, we fit a linear model with positive constraints on the\nregression coefficients and compare the estimated coefficients to a classic\nlinear regression.\n"
19+
]
20+
},
21+
{
22+
"cell_type": "code",
23+
"execution_count": null,
24+
"metadata": {
25+
"collapsed": false
26+
},
27+
"outputs": [],
28+
"source": [
29+
"print(__doc__)\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.metrics import r2_score"
30+
]
31+
},
32+
{
33+
"cell_type": "markdown",
34+
"metadata": {},
35+
"source": [
36+
"Generate some random data\n\n"
37+
]
38+
},
39+
{
40+
"cell_type": "code",
41+
"execution_count": null,
42+
"metadata": {
43+
"collapsed": false
44+
},
45+
"outputs": [],
46+
"source": [
47+
"np.random.seed(42)\n\nn_samples, n_features = 200, 50\nX = np.random.randn(n_samples, n_features)\ntrue_coef = 3 * np.random.randn(n_features)\n# Threshold coefficients to render them non-negative\ntrue_coef[true_coef < 0] = 0\ny = np.dot(X, true_coef)\n\n# Add some noise\ny += 5 * np.random.normal(size=(n_samples, ))"
48+
]
49+
},
50+
{
51+
"cell_type": "markdown",
52+
"metadata": {},
53+
"source": [
54+
"Split the data in train set and test set\n\n"
55+
]
56+
},
57+
{
58+
"cell_type": "code",
59+
"execution_count": null,
60+
"metadata": {
61+
"collapsed": false
62+
},
63+
"outputs": [],
64+
"source": [
65+
"from sklearn.model_selection import train_test_split\n\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)"
66+
]
67+
},
68+
{
69+
"cell_type": "markdown",
70+
"metadata": {},
71+
"source": [
72+
"Fit the Non-Negative least squares.\n\n"
73+
]
74+
},
75+
{
76+
"cell_type": "code",
77+
"execution_count": null,
78+
"metadata": {
79+
"collapsed": false
80+
},
81+
"outputs": [],
82+
"source": [
83+
"from sklearn.linear_model import LinearRegression\n\nreg_nnls = LinearRegression(positive=True)\ny_pred_nnls = reg_nnls.fit(X_train, y_train).predict(X_test)\nr2_score_nnls = r2_score(y_test, y_pred_nnls)\nprint(\"NNLS R2 score\", r2_score_nnls)"
84+
]
85+
},
86+
{
87+
"cell_type": "markdown",
88+
"metadata": {},
89+
"source": [
90+
"Fit an OLS.\n\n"
91+
]
92+
},
93+
{
94+
"cell_type": "code",
95+
"execution_count": null,
96+
"metadata": {
97+
"collapsed": false
98+
},
99+
"outputs": [],
100+
"source": [
101+
"reg_ols = LinearRegression()\ny_pred_ols = reg_ols.fit(X_train, y_train).predict(X_test)\nr2_score_ols = r2_score(y_test, y_pred_ols)\nprint(\"OLS R2 score\", r2_score_ols)"
102+
]
103+
},
104+
{
105+
"cell_type": "markdown",
106+
"metadata": {},
107+
"source": [
108+
"Comparing the regression coefficients between OLS and NNLS, we can observe\nthey are highly correlated (the dashed line is the identity relation),\nbut the non-negative constraint shrinks some to 0.\nThe Non-Negative Least squares inherently yield sparse results.\n\n"
109+
]
110+
},
111+
{
112+
"cell_type": "code",
113+
"execution_count": null,
114+
"metadata": {
115+
"collapsed": false
116+
},
117+
"outputs": [],
118+
"source": [
119+
"fig, ax = plt.subplots()\nax.plot(reg_ols.coef_, reg_nnls.coef_, linewidth=0, marker=\".\")\n\nlow_x, high_x = ax.get_xlim()\nlow_y, high_y = ax.get_ylim()\nlow = max(low_x, low_y)\nhigh = min(high_x, high_y)\nax.plot([low, high], [low, high], ls=\"--\", c=\".3\", alpha=.5)\nax.set_xlabel(\"OLS regression coefficients\", fontweight=\"bold\")\nax.set_ylabel(\"NNLS regression coefficients\", fontweight=\"bold\")"
120+
]
121+
}
122+
],
123+
"metadata": {
124+
"kernelspec": {
125+
"display_name": "Python 3",
126+
"language": "python",
127+
"name": "python3"
128+
},
129+
"language_info": {
130+
"codemirror_mode": {
131+
"name": "ipython",
132+
"version": 3
133+
},
134+
"file_extension": ".py",
135+
"mimetype": "text/x-python",
136+
"name": "python",
137+
"nbconvert_exporter": "python",
138+
"pygments_lexer": "ipython3",
139+
"version": "3.8.5"
140+
}
141+
},
142+
"nbformat": 4,
143+
"nbformat_minor": 0
144+
}
Binary file not shown.

dev/_downloads/scikit-learn-docs.pdf

31 KB
Binary file not shown.

dev/_images/iris.png

0 Bytes
484 Bytes
484 Bytes
696 Bytes
696 Bytes

0 commit comments

Comments
 (0)