Skip to content

Commit 25b074b

Browse files
committed
Pushing the docs to dev/ for branch: main, commit f853e78b0aa0ff79912765f39b31677b3e5a104e
1 parent 09887d1 commit 25b074b

File tree

1,232 files changed

+5195
-4867
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,232 files changed

+5195
-4867
lines changed
Binary file not shown.

dev/_downloads/23e3d7fa2388aef4e9a60c4a6caf166d/plot_face_recognition.ipynb

Lines changed: 153 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
"cell_type": "markdown",
1616
"metadata": {},
1717
"source": [
18-
"\n# Faces recognition example using eigenfaces and SVMs\n\nThe dataset used in this example is a preprocessed excerpt of the\n\"Labeled Faces in the Wild\", aka LFW_:\n\n http://vis-www.cs.umass.edu/lfw/lfw-funneled.tgz (233MB)\n\n\nExpected results for the top 5 most represented people in the dataset:\n\n================== ============ ======= ========== =======\n precision recall f1-score support\n================== ============ ======= ========== =======\n Ariel Sharon 0.67 0.92 0.77 13\n Colin Powell 0.75 0.78 0.76 60\n Donald Rumsfeld 0.78 0.67 0.72 27\n George W Bush 0.86 0.86 0.86 146\nGerhard Schroeder 0.76 0.76 0.76 25\n Hugo Chavez 0.67 0.67 0.67 15\n Tony Blair 0.81 0.69 0.75 36\n\n avg / total 0.80 0.80 0.80 322\n================== ============ ======= ========== =======\n"
18+
"\n# Faces recognition example using eigenfaces and SVMs\n\nThe dataset used in this example is a preprocessed excerpt of the\n\"Labeled Faces in the Wild\", aka LFW_:\n\n http://vis-www.cs.umass.edu/lfw/lfw-funneled.tgz (233MB)\n\n"
1919
]
2020
},
2121
{
@@ -26,7 +26,158 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"from time import time\nimport logging\nimport matplotlib.pyplot as plt\n\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.datasets import fetch_lfw_people\nfrom sklearn.metrics import classification_report\nfrom sklearn.metrics import confusion_matrix\nfrom sklearn.decomposition import PCA\nfrom sklearn.svm import SVC\n\n\n# Display progress logs on stdout\nlogging.basicConfig(level=logging.INFO, format=\"%(asctime)s %(message)s\")\n\n\n# #############################################################################\n# Download the data, if not already on disk and load it as numpy arrays\n\nlfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4)\n\n# introspect the images arrays to find the shapes (for plotting)\nn_samples, h, w = lfw_people.images.shape\n\n# for machine learning we use the 2 data directly (as relative pixel\n# positions info is ignored by this model)\nX = lfw_people.data\nn_features = X.shape[1]\n\n# the label to predict is the id of the person\ny = lfw_people.target\ntarget_names = lfw_people.target_names\nn_classes = target_names.shape[0]\n\nprint(\"Total dataset size:\")\nprint(\"n_samples: %d\" % n_samples)\nprint(\"n_features: %d\" % n_features)\nprint(\"n_classes: %d\" % n_classes)\n\n\n# #############################################################################\n# Split into a training set and a test set using a stratified k fold\n\n# split into a training and testing set\nX_train, X_test, y_train, y_test = train_test_split(\n X, y, test_size=0.25, random_state=42\n)\n\n\n# #############################################################################\n# Compute a PCA (eigenfaces) on the face dataset (treated as unlabeled\n# dataset): unsupervised feature extraction / dimensionality reduction\nn_components = 150\n\nprint(\n \"Extracting the top %d eigenfaces from %d faces\" % (n_components, X_train.shape[0])\n)\nt0 = time()\npca = PCA(n_components=n_components, svd_solver=\"randomized\", whiten=True).fit(X_train)\nprint(\"done in %0.3fs\" % (time() - t0))\n\neigenfaces = pca.components_.reshape((n_components, h, w))\n\nprint(\"Projecting the input data on the eigenfaces orthonormal basis\")\nt0 = time()\nX_train_pca = pca.transform(X_train)\nX_test_pca = pca.transform(X_test)\nprint(\"done in %0.3fs\" % (time() - t0))\n\n\n# #############################################################################\n# Train a SVM classification model\n\nprint(\"Fitting the classifier to the training set\")\nt0 = time()\nparam_grid = {\n \"C\": [1e3, 5e3, 1e4, 5e4, 1e5],\n \"gamma\": [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1],\n}\nclf = GridSearchCV(SVC(kernel=\"rbf\", class_weight=\"balanced\"), param_grid)\nclf = clf.fit(X_train_pca, y_train)\nprint(\"done in %0.3fs\" % (time() - t0))\nprint(\"Best estimator found by grid search:\")\nprint(clf.best_estimator_)\n\n\n# #############################################################################\n# Quantitative evaluation of the model quality on the test set\n\nprint(\"Predicting people's names on the test set\")\nt0 = time()\ny_pred = clf.predict(X_test_pca)\nprint(\"done in %0.3fs\" % (time() - t0))\n\nprint(classification_report(y_test, y_pred, target_names=target_names))\nprint(confusion_matrix(y_test, y_pred, labels=range(n_classes)))\n\n\n# #############################################################################\n# Qualitative evaluation of the predictions using matplotlib\n\n\ndef plot_gallery(images, titles, h, w, n_row=3, n_col=4):\n \"\"\"Helper function to plot a gallery of portraits\"\"\"\n plt.figure(figsize=(1.8 * n_col, 2.4 * n_row))\n plt.subplots_adjust(bottom=0, left=0.01, right=0.99, top=0.90, hspace=0.35)\n for i in range(n_row * n_col):\n plt.subplot(n_row, n_col, i + 1)\n plt.imshow(images[i].reshape((h, w)), cmap=plt.cm.gray)\n plt.title(titles[i], size=12)\n plt.xticks(())\n plt.yticks(())\n\n\n# plot the result of the prediction on a portion of the test set\n\n\ndef title(y_pred, y_test, target_names, i):\n pred_name = target_names[y_pred[i]].rsplit(\" \", 1)[-1]\n true_name = target_names[y_test[i]].rsplit(\" \", 1)[-1]\n return \"predicted: %s\\ntrue: %s\" % (pred_name, true_name)\n\n\nprediction_titles = [\n title(y_pred, y_test, target_names, i) for i in range(y_pred.shape[0])\n]\n\nplot_gallery(X_test, prediction_titles, h, w)\n\n# plot the gallery of the most significative eigenfaces\n\neigenface_titles = [\"eigenface %d\" % i for i in range(eigenfaces.shape[0])]\nplot_gallery(eigenfaces, eigenface_titles, h, w)\n\nplt.show()"
29+
"from time import time\nimport matplotlib.pyplot as plt\n\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.model_selection import RandomizedSearchCV\nfrom sklearn.datasets import fetch_lfw_people\nfrom sklearn.metrics import classification_report\nfrom sklearn.metrics import ConfusionMatrixDisplay\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.decomposition import PCA\nfrom sklearn.svm import SVC\nfrom sklearn.utils.fixes import loguniform"
30+
]
31+
},
32+
{
33+
"cell_type": "markdown",
34+
"metadata": {},
35+
"source": [
36+
"Download the data, if not already on disk and load it as numpy arrays\n\n"
37+
]
38+
},
39+
{
40+
"cell_type": "code",
41+
"execution_count": null,
42+
"metadata": {
43+
"collapsed": false
44+
},
45+
"outputs": [],
46+
"source": [
47+
"lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4)\n\n# introspect the images arrays to find the shapes (for plotting)\nn_samples, h, w = lfw_people.images.shape\n\n# for machine learning we use the 2 data directly (as relative pixel\n# positions info is ignored by this model)\nX = lfw_people.data\nn_features = X.shape[1]\n\n# the label to predict is the id of the person\ny = lfw_people.target\ntarget_names = lfw_people.target_names\nn_classes = target_names.shape[0]\n\nprint(\"Total dataset size:\")\nprint(\"n_samples: %d\" % n_samples)\nprint(\"n_features: %d\" % n_features)\nprint(\"n_classes: %d\" % n_classes)"
48+
]
49+
},
50+
{
51+
"cell_type": "markdown",
52+
"metadata": {},
53+
"source": [
54+
"Split into a training set and a test and keep 25% of the data for testing.\n\n"
55+
]
56+
},
57+
{
58+
"cell_type": "code",
59+
"execution_count": null,
60+
"metadata": {
61+
"collapsed": false
62+
},
63+
"outputs": [],
64+
"source": [
65+
"X_train, X_test, y_train, y_test = train_test_split(\n X, y, test_size=0.25, random_state=42\n)\n\nscaler = StandardScaler()\nX_train = scaler.fit_transform(X_train)\nX_test = scaler.transform(X_test)"
66+
]
67+
},
68+
{
69+
"cell_type": "markdown",
70+
"metadata": {},
71+
"source": [
72+
"Compute a PCA (eigenfaces) on the face dataset (treated as unlabeled\ndataset): unsupervised feature extraction / dimensionality reduction\n\n"
73+
]
74+
},
75+
{
76+
"cell_type": "code",
77+
"execution_count": null,
78+
"metadata": {
79+
"collapsed": false
80+
},
81+
"outputs": [],
82+
"source": [
83+
"n_components = 150\n\nprint(\n \"Extracting the top %d eigenfaces from %d faces\" % (n_components, X_train.shape[0])\n)\nt0 = time()\npca = PCA(n_components=n_components, svd_solver=\"randomized\", whiten=True).fit(X_train)\nprint(\"done in %0.3fs\" % (time() - t0))\n\neigenfaces = pca.components_.reshape((n_components, h, w))\n\nprint(\"Projecting the input data on the eigenfaces orthonormal basis\")\nt0 = time()\nX_train_pca = pca.transform(X_train)\nX_test_pca = pca.transform(X_test)\nprint(\"done in %0.3fs\" % (time() - t0))"
84+
]
85+
},
86+
{
87+
"cell_type": "markdown",
88+
"metadata": {},
89+
"source": [
90+
"Train a SVM classification model\n\n"
91+
]
92+
},
93+
{
94+
"cell_type": "code",
95+
"execution_count": null,
96+
"metadata": {
97+
"collapsed": false
98+
},
99+
"outputs": [],
100+
"source": [
101+
"print(\"Fitting the classifier to the training set\")\nt0 = time()\nparam_grid = {\n \"C\": loguniform(1e3, 1e5),\n \"gamma\": loguniform(1e-4, 1e-1),\n}\nclf = RandomizedSearchCV(\n SVC(kernel=\"rbf\", class_weight=\"balanced\"), param_grid, n_iter=10\n)\nclf = clf.fit(X_train_pca, y_train)\nprint(\"done in %0.3fs\" % (time() - t0))\nprint(\"Best estimator found by grid search:\")\nprint(clf.best_estimator_)"
102+
]
103+
},
104+
{
105+
"cell_type": "markdown",
106+
"metadata": {},
107+
"source": [
108+
"Quantitative evaluation of the model quality on the test set\n\n"
109+
]
110+
},
111+
{
112+
"cell_type": "code",
113+
"execution_count": null,
114+
"metadata": {
115+
"collapsed": false
116+
},
117+
"outputs": [],
118+
"source": [
119+
"print(\"Predicting people's names on the test set\")\nt0 = time()\ny_pred = clf.predict(X_test_pca)\nprint(\"done in %0.3fs\" % (time() - t0))\n\nprint(classification_report(y_test, y_pred, target_names=target_names))\nConfusionMatrixDisplay.from_estimator(\n clf, X_test_pca, y_test, display_labels=target_names, xticks_rotation=\"vertical\"\n)\nplt.tight_layout()\nplt.show()"
120+
]
121+
},
122+
{
123+
"cell_type": "markdown",
124+
"metadata": {},
125+
"source": [
126+
"Qualitative evaluation of the predictions using matplotlib\n\n"
127+
]
128+
},
129+
{
130+
"cell_type": "code",
131+
"execution_count": null,
132+
"metadata": {
133+
"collapsed": false
134+
},
135+
"outputs": [],
136+
"source": [
137+
"def plot_gallery(images, titles, h, w, n_row=3, n_col=4):\n \"\"\"Helper function to plot a gallery of portraits\"\"\"\n plt.figure(figsize=(1.8 * n_col, 2.4 * n_row))\n plt.subplots_adjust(bottom=0, left=0.01, right=0.99, top=0.90, hspace=0.35)\n for i in range(n_row * n_col):\n plt.subplot(n_row, n_col, i + 1)\n plt.imshow(images[i].reshape((h, w)), cmap=plt.cm.gray)\n plt.title(titles[i], size=12)\n plt.xticks(())\n plt.yticks(())"
138+
]
139+
},
140+
{
141+
"cell_type": "markdown",
142+
"metadata": {},
143+
"source": [
144+
"plot the result of the prediction on a portion of the test set\n\n"
145+
]
146+
},
147+
{
148+
"cell_type": "code",
149+
"execution_count": null,
150+
"metadata": {
151+
"collapsed": false
152+
},
153+
"outputs": [],
154+
"source": [
155+
"def title(y_pred, y_test, target_names, i):\n pred_name = target_names[y_pred[i]].rsplit(\" \", 1)[-1]\n true_name = target_names[y_test[i]].rsplit(\" \", 1)[-1]\n return \"predicted: %s\\ntrue: %s\" % (pred_name, true_name)\n\n\nprediction_titles = [\n title(y_pred, y_test, target_names, i) for i in range(y_pred.shape[0])\n]\n\nplot_gallery(X_test, prediction_titles, h, w)"
156+
]
157+
},
158+
{
159+
"cell_type": "markdown",
160+
"metadata": {},
161+
"source": [
162+
"plot the gallery of the most significative eigenfaces\n\n"
163+
]
164+
},
165+
{
166+
"cell_type": "code",
167+
"execution_count": null,
168+
"metadata": {
169+
"collapsed": false
170+
},
171+
"outputs": [],
172+
"source": [
173+
"eigenface_titles = [\"eigenface %d\" % i for i in range(eigenfaces.shape[0])]\nplot_gallery(eigenfaces, eigenface_titles, h, w)\n\nplt.show()"
174+
]
175+
},
176+
{
177+
"cell_type": "markdown",
178+
"metadata": {},
179+
"source": [
180+
"Face recognition problem would be much more effectively solved by training\nconvolutional neural networks but this family of models is outside of the scope of\nthe scikit-learn library. Interested readers should instead try to use pytorch or\ntensorflow to implement such models.\n\n"
30181
]
31182
}
32183
],
Binary file not shown.

dev/_downloads/b3a994b2ad66fe78bcedaf151ab78b07/plot_face_recognition.py

Lines changed: 34 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -10,42 +10,23 @@
1010
1111
.. _LFW: http://vis-www.cs.umass.edu/lfw/
1212
13-
Expected results for the top 5 most represented people in the dataset:
14-
15-
================== ============ ======= ========== =======
16-
precision recall f1-score support
17-
================== ============ ======= ========== =======
18-
Ariel Sharon 0.67 0.92 0.77 13
19-
Colin Powell 0.75 0.78 0.76 60
20-
Donald Rumsfeld 0.78 0.67 0.72 27
21-
George W Bush 0.86 0.86 0.86 146
22-
Gerhard Schroeder 0.76 0.76 0.76 25
23-
Hugo Chavez 0.67 0.67 0.67 15
24-
Tony Blair 0.81 0.69 0.75 36
25-
26-
avg / total 0.80 0.80 0.80 322
27-
================== ============ ======= ========== =======
28-
2913
"""
30-
14+
# %%
3115
from time import time
32-
import logging
3316
import matplotlib.pyplot as plt
3417

3518
from sklearn.model_selection import train_test_split
36-
from sklearn.model_selection import GridSearchCV
19+
from sklearn.model_selection import RandomizedSearchCV
3720
from sklearn.datasets import fetch_lfw_people
3821
from sklearn.metrics import classification_report
39-
from sklearn.metrics import confusion_matrix
22+
from sklearn.metrics import ConfusionMatrixDisplay
23+
from sklearn.preprocessing import StandardScaler
4024
from sklearn.decomposition import PCA
4125
from sklearn.svm import SVC
26+
from sklearn.utils.fixes import loguniform
4227

4328

44-
# Display progress logs on stdout
45-
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
46-
47-
48-
# #############################################################################
29+
# %%
4930
# Download the data, if not already on disk and load it as numpy arrays
5031

5132
lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4)
@@ -69,18 +50,21 @@
6950
print("n_classes: %d" % n_classes)
7051

7152

72-
# #############################################################################
73-
# Split into a training set and a test set using a stratified k fold
53+
# %%
54+
# Split into a training set and a test and keep 25% of the data for testing.
7455

75-
# split into a training and testing set
7656
X_train, X_test, y_train, y_test = train_test_split(
7757
X, y, test_size=0.25, random_state=42
7858
)
7959

60+
scaler = StandardScaler()
61+
X_train = scaler.fit_transform(X_train)
62+
X_test = scaler.transform(X_test)
8063

81-
# #############################################################################
64+
# %%
8265
# Compute a PCA (eigenfaces) on the face dataset (treated as unlabeled
8366
# dataset): unsupervised feature extraction / dimensionality reduction
67+
8468
n_components = 150
8569

8670
print(
@@ -99,23 +83,25 @@
9983
print("done in %0.3fs" % (time() - t0))
10084

10185

102-
# #############################################################################
86+
# %%
10387
# Train a SVM classification model
10488

10589
print("Fitting the classifier to the training set")
10690
t0 = time()
10791
param_grid = {
108-
"C": [1e3, 5e3, 1e4, 5e4, 1e5],
109-
"gamma": [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1],
92+
"C": loguniform(1e3, 1e5),
93+
"gamma": loguniform(1e-4, 1e-1),
11094
}
111-
clf = GridSearchCV(SVC(kernel="rbf", class_weight="balanced"), param_grid)
95+
clf = RandomizedSearchCV(
96+
SVC(kernel="rbf", class_weight="balanced"), param_grid, n_iter=10
97+
)
11298
clf = clf.fit(X_train_pca, y_train)
11399
print("done in %0.3fs" % (time() - t0))
114100
print("Best estimator found by grid search:")
115101
print(clf.best_estimator_)
116102

117103

118-
# #############################################################################
104+
# %%
119105
# Quantitative evaluation of the model quality on the test set
120106

121107
print("Predicting people's names on the test set")
@@ -124,10 +110,14 @@
124110
print("done in %0.3fs" % (time() - t0))
125111

126112
print(classification_report(y_test, y_pred, target_names=target_names))
127-
print(confusion_matrix(y_test, y_pred, labels=range(n_classes)))
113+
ConfusionMatrixDisplay.from_estimator(
114+
clf, X_test_pca, y_test, display_labels=target_names, xticks_rotation="vertical"
115+
)
116+
plt.tight_layout()
117+
plt.show()
128118

129119

130-
# #############################################################################
120+
# %%
131121
# Qualitative evaluation of the predictions using matplotlib
132122

133123

@@ -143,6 +133,7 @@ def plot_gallery(images, titles, h, w, n_row=3, n_col=4):
143133
plt.yticks(())
144134

145135

136+
# %%
146137
# plot the result of the prediction on a portion of the test set
147138

148139

@@ -157,10 +148,16 @@ def title(y_pred, y_test, target_names, i):
157148
]
158149

159150
plot_gallery(X_test, prediction_titles, h, w)
160-
151+
# %%
161152
# plot the gallery of the most significative eigenfaces
162153

163154
eigenface_titles = ["eigenface %d" % i for i in range(eigenfaces.shape[0])]
164155
plot_gallery(eigenfaces, eigenface_titles, h, w)
165156

166157
plt.show()
158+
159+
# %%
160+
# Face recognition problem would be much more effectively solved by training
161+
# convolutional neural networks but this family of models is outside of the scope of
162+
# the scikit-learn library. Interested readers should instead try to use pytorch or
163+
# tensorflow to implement such models.

dev/_downloads/scikit-learn-docs.zip

20.6 KB
Binary file not shown.
7 Bytes
8 Bytes
21 Bytes
-241 Bytes
-4 Bytes

0 commit comments

Comments
 (0)