Skip to content

Commit 321514e

Browse files
committed
Pushing the docs to dev/ for branch: main, commit e7c710424afe445516d14535d05231fabd0af798
1 parent 8ecb613 commit 321514e

File tree

1,222 files changed

+5323
-4572
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,222 files changed

+5323
-4572
lines changed
Binary file not shown.

dev/_downloads/22bab9f5303a6f1be42d14efb0f90b40/plot_label_propagation_digits.ipynb

Lines changed: 163 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,169 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"# Authors: Clay Woolam <[email protected]>\n# License: BSD\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom scipy import stats\n\nfrom sklearn import datasets\nfrom sklearn.semi_supervised import LabelSpreading\n\nfrom sklearn.metrics import confusion_matrix, classification_report\n\ndigits = datasets.load_digits()\nrng = np.random.RandomState(2)\nindices = np.arange(len(digits.data))\nrng.shuffle(indices)\n\nX = digits.data[indices[:340]]\ny = digits.target[indices[:340]]\nimages = digits.images[indices[:340]]\n\nn_total_samples = len(y)\nn_labeled_points = 40\n\nindices = np.arange(n_total_samples)\n\nunlabeled_set = indices[n_labeled_points:]\n\n# #############################################################################\n# Shuffle everything around\ny_train = np.copy(y)\ny_train[unlabeled_set] = -1\n\n# #############################################################################\n# Learn with LabelSpreading\nlp_model = LabelSpreading(gamma=0.25, max_iter=20)\nlp_model.fit(X, y_train)\npredicted_labels = lp_model.transduction_[unlabeled_set]\ntrue_labels = y[unlabeled_set]\n\ncm = confusion_matrix(true_labels, predicted_labels, labels=lp_model.classes_)\n\nprint(\n \"Label Spreading model: %d labeled & %d unlabeled points (%d total)\"\n % (n_labeled_points, n_total_samples - n_labeled_points, n_total_samples)\n)\n\nprint(classification_report(true_labels, predicted_labels))\n\nprint(\"Confusion matrix\")\nprint(cm)\n\n# #############################################################################\n# Calculate uncertainty values for each transduced distribution\npred_entropies = stats.distributions.entropy(lp_model.label_distributions_.T)\n\n# #############################################################################\n# Pick the top 10 most uncertain labels\nuncertainty_index = np.argsort(pred_entropies)[-10:]\n\n# #############################################################################\n# Plot\nf = plt.figure(figsize=(7, 5))\nfor index, image_index in enumerate(uncertainty_index):\n image = images[image_index]\n\n sub = f.add_subplot(2, 5, index + 1)\n sub.imshow(image, cmap=plt.cm.gray_r)\n plt.xticks([])\n plt.yticks([])\n sub.set_title(\n \"predict: %i\\ntrue: %i\" % (lp_model.transduction_[image_index], y[image_index])\n )\n\nf.suptitle(\"Learning with small amount of labeled data\")\nplt.show()"
29+
"# Authors: Clay Woolam <[email protected]>\n# License: BSD"
30+
]
31+
},
32+
{
33+
"cell_type": "markdown",
34+
"metadata": {},
35+
"source": [
36+
"## Data generation\n\nWe use the digits dataset. We only use a subset of randomly selected samples.\n\n"
37+
]
38+
},
39+
{
40+
"cell_type": "code",
41+
"execution_count": null,
42+
"metadata": {
43+
"collapsed": false
44+
},
45+
"outputs": [],
46+
"source": [
47+
"from sklearn import datasets\nimport numpy as np\n\ndigits = datasets.load_digits()\nrng = np.random.RandomState(2)\nindices = np.arange(len(digits.data))\nrng.shuffle(indices)"
48+
]
49+
},
50+
{
51+
"cell_type": "markdown",
52+
"metadata": {},
53+
"source": [
54+
"We selected 340 samples of which only 40 will be associated with a known label.\nTherefore, we store the indices of the 300 other samples for which we are not\nsupposed to know their labels.\n\n"
55+
]
56+
},
57+
{
58+
"cell_type": "code",
59+
"execution_count": null,
60+
"metadata": {
61+
"collapsed": false
62+
},
63+
"outputs": [],
64+
"source": [
65+
"X = digits.data[indices[:340]]\ny = digits.target[indices[:340]]\nimages = digits.images[indices[:340]]\n\nn_total_samples = len(y)\nn_labeled_points = 40\n\nindices = np.arange(n_total_samples)\n\nunlabeled_set = indices[n_labeled_points:]"
66+
]
67+
},
68+
{
69+
"cell_type": "markdown",
70+
"metadata": {},
71+
"source": [
72+
"Shuffle everything around\n\n"
73+
]
74+
},
75+
{
76+
"cell_type": "code",
77+
"execution_count": null,
78+
"metadata": {
79+
"collapsed": false
80+
},
81+
"outputs": [],
82+
"source": [
83+
"y_train = np.copy(y)\ny_train[unlabeled_set] = -1"
84+
]
85+
},
86+
{
87+
"cell_type": "markdown",
88+
"metadata": {},
89+
"source": [
90+
"## Semi-supervised learning\n\nWe fit a :class:`~sklearn.semi_supervised.LabelSpreading` and use it to predict\nthe unknown labels.\n\n"
91+
]
92+
},
93+
{
94+
"cell_type": "code",
95+
"execution_count": null,
96+
"metadata": {
97+
"collapsed": false
98+
},
99+
"outputs": [],
100+
"source": [
101+
"from sklearn.semi_supervised import LabelSpreading\nfrom sklearn.metrics import classification_report\n\nlp_model = LabelSpreading(gamma=0.25, max_iter=20)\nlp_model.fit(X, y_train)\npredicted_labels = lp_model.transduction_[unlabeled_set]\ntrue_labels = y[unlabeled_set]\n\nprint(\n \"Label Spreading model: %d labeled & %d unlabeled points (%d total)\"\n % (n_labeled_points, n_total_samples - n_labeled_points, n_total_samples)\n)"
102+
]
103+
},
104+
{
105+
"cell_type": "markdown",
106+
"metadata": {},
107+
"source": [
108+
"Classification report\n\n"
109+
]
110+
},
111+
{
112+
"cell_type": "code",
113+
"execution_count": null,
114+
"metadata": {
115+
"collapsed": false
116+
},
117+
"outputs": [],
118+
"source": [
119+
"print(classification_report(true_labels, predicted_labels))"
120+
]
121+
},
122+
{
123+
"cell_type": "markdown",
124+
"metadata": {},
125+
"source": [
126+
"Confusion matrix\n\n"
127+
]
128+
},
129+
{
130+
"cell_type": "code",
131+
"execution_count": null,
132+
"metadata": {
133+
"collapsed": false
134+
},
135+
"outputs": [],
136+
"source": [
137+
"from sklearn.metrics import ConfusionMatrixDisplay\n\nConfusionMatrixDisplay.from_predictions(\n true_labels, predicted_labels, labels=lp_model.classes_\n)"
138+
]
139+
},
140+
{
141+
"cell_type": "markdown",
142+
"metadata": {},
143+
"source": [
144+
"## Plot the most uncertain predictions\n\nHere, we will pick and show the 10 most uncertain predictions.\n\n"
145+
]
146+
},
147+
{
148+
"cell_type": "code",
149+
"execution_count": null,
150+
"metadata": {
151+
"collapsed": false
152+
},
153+
"outputs": [],
154+
"source": [
155+
"from scipy import stats\n\npred_entropies = stats.distributions.entropy(lp_model.label_distributions_.T)"
156+
]
157+
},
158+
{
159+
"cell_type": "markdown",
160+
"metadata": {},
161+
"source": [
162+
"Pick the top 10 most uncertain labels\n\n"
163+
]
164+
},
165+
{
166+
"cell_type": "code",
167+
"execution_count": null,
168+
"metadata": {
169+
"collapsed": false
170+
},
171+
"outputs": [],
172+
"source": [
173+
"uncertainty_index = np.argsort(pred_entropies)[-10:]"
174+
]
175+
},
176+
{
177+
"cell_type": "markdown",
178+
"metadata": {},
179+
"source": [
180+
"Plot\n\n"
181+
]
182+
},
183+
{
184+
"cell_type": "code",
185+
"execution_count": null,
186+
"metadata": {
187+
"collapsed": false
188+
},
189+
"outputs": [],
190+
"source": [
191+
"import matplotlib.pyplot as plt\n\nf = plt.figure(figsize=(7, 5))\nfor index, image_index in enumerate(uncertainty_index):\n image = images[image_index]\n\n sub = f.add_subplot(2, 5, index + 1)\n sub.imshow(image, cmap=plt.cm.gray_r)\n plt.xticks([])\n plt.yticks([])\n sub.set_title(\n \"predict: %i\\ntrue: %i\" % (lp_model.transduction_[image_index], y[image_index])\n )\n\nf.suptitle(\"Learning with small amount of labeled data\")\nplt.show()"
30192
]
31193
}
32194
],
Binary file not shown.

dev/_downloads/9c824e9beef1b72c9f1ad3f39de0bf57/plot_label_propagation_structure.ipynb

Lines changed: 73 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,79 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"# Authors: Clay Woolam <[email protected]>\n# Andreas Mueller <[email protected]>\n# License: BSD\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.semi_supervised import LabelSpreading\nfrom sklearn.datasets import make_circles\n\n# generate ring with inner box\nn_samples = 200\nX, y = make_circles(n_samples=n_samples, shuffle=False)\nouter, inner = 0, 1\nlabels = np.full(n_samples, -1.0)\nlabels[0] = outer\nlabels[-1] = inner\n\n# #############################################################################\n# Learn with LabelSpreading\nlabel_spread = LabelSpreading(kernel=\"knn\", alpha=0.8)\nlabel_spread.fit(X, labels)\n\n# #############################################################################\n# Plot output labels\noutput_labels = label_spread.transduction_\nplt.figure(figsize=(8.5, 4))\nplt.subplot(1, 2, 1)\nplt.scatter(\n X[labels == outer, 0],\n X[labels == outer, 1],\n color=\"navy\",\n marker=\"s\",\n lw=0,\n label=\"outer labeled\",\n s=10,\n)\nplt.scatter(\n X[labels == inner, 0],\n X[labels == inner, 1],\n color=\"c\",\n marker=\"s\",\n lw=0,\n label=\"inner labeled\",\n s=10,\n)\nplt.scatter(\n X[labels == -1, 0],\n X[labels == -1, 1],\n color=\"darkorange\",\n marker=\".\",\n label=\"unlabeled\",\n)\nplt.legend(scatterpoints=1, shadow=False, loc=\"upper right\")\nplt.title(\"Raw data (2 classes=outer and inner)\")\n\nplt.subplot(1, 2, 2)\noutput_label_array = np.asarray(output_labels)\nouter_numbers = np.where(output_label_array == outer)[0]\ninner_numbers = np.where(output_label_array == inner)[0]\nplt.scatter(\n X[outer_numbers, 0],\n X[outer_numbers, 1],\n color=\"navy\",\n marker=\"s\",\n lw=0,\n s=10,\n label=\"outer learned\",\n)\nplt.scatter(\n X[inner_numbers, 0],\n X[inner_numbers, 1],\n color=\"c\",\n marker=\"s\",\n lw=0,\n s=10,\n label=\"inner learned\",\n)\nplt.legend(scatterpoints=1, shadow=False, loc=\"upper right\")\nplt.title(\"Labels learned with Label Spreading (KNN)\")\n\nplt.subplots_adjust(left=0.07, bottom=0.07, right=0.93, top=0.92)\nplt.show()"
29+
"# Authors: Clay Woolam <[email protected]>\n# Andreas Mueller <[email protected]>\n# License: BSD"
30+
]
31+
},
32+
{
33+
"cell_type": "markdown",
34+
"metadata": {},
35+
"source": [
36+
"We generate a dataset with two concentric circles. In addition, a label\nis associated with each sample of the dataset that is: 0 (belonging to\nthe outer circle), 1 (belonging to the inner circle), and -1 (unknown).\nHere, all labels but two are tagged as unknown.\n\n"
37+
]
38+
},
39+
{
40+
"cell_type": "code",
41+
"execution_count": null,
42+
"metadata": {
43+
"collapsed": false
44+
},
45+
"outputs": [],
46+
"source": [
47+
"import numpy as np\nfrom sklearn.datasets import make_circles\n\nn_samples = 200\nX, y = make_circles(n_samples=n_samples, shuffle=False)\nouter, inner = 0, 1\nlabels = np.full(n_samples, -1.0)\nlabels[0] = outer\nlabels[-1] = inner"
48+
]
49+
},
50+
{
51+
"cell_type": "markdown",
52+
"metadata": {},
53+
"source": [
54+
"Plot raw data\n\n"
55+
]
56+
},
57+
{
58+
"cell_type": "code",
59+
"execution_count": null,
60+
"metadata": {
61+
"collapsed": false
62+
},
63+
"outputs": [],
64+
"source": [
65+
"import matplotlib.pyplot as plt\n\nplt.figure(figsize=(4, 4))\nplt.scatter(\n X[labels == outer, 0],\n X[labels == outer, 1],\n color=\"navy\",\n marker=\"s\",\n lw=0,\n label=\"outer labeled\",\n s=10,\n)\nplt.scatter(\n X[labels == inner, 0],\n X[labels == inner, 1],\n color=\"c\",\n marker=\"s\",\n lw=0,\n label=\"inner labeled\",\n s=10,\n)\nplt.scatter(\n X[labels == -1, 0],\n X[labels == -1, 1],\n color=\"darkorange\",\n marker=\".\",\n label=\"unlabeled\",\n)\nplt.legend(scatterpoints=1, shadow=False, loc=\"upper right\")\nplt.title(\"Raw data (2 classes=outer and inner)\")"
66+
]
67+
},
68+
{
69+
"cell_type": "markdown",
70+
"metadata": {},
71+
"source": [
72+
"The aim of :class:`~sklearn.semi_supervised.LabelSpreading` is to associate\na label to sample where the label is initially unknown.\n\n"
73+
]
74+
},
75+
{
76+
"cell_type": "code",
77+
"execution_count": null,
78+
"metadata": {
79+
"collapsed": false
80+
},
81+
"outputs": [],
82+
"source": [
83+
"from sklearn.semi_supervised import LabelSpreading\n\nlabel_spread = LabelSpreading(kernel=\"knn\", alpha=0.8)\nlabel_spread.fit(X, labels)"
84+
]
85+
},
86+
{
87+
"cell_type": "markdown",
88+
"metadata": {},
89+
"source": [
90+
"Now, we can check which labels have been associated with each sample\nwhen the label was unknown.\n\n"
91+
]
92+
},
93+
{
94+
"cell_type": "code",
95+
"execution_count": null,
96+
"metadata": {
97+
"collapsed": false
98+
},
99+
"outputs": [],
100+
"source": [
101+
"output_labels = label_spread.transduction_\noutput_label_array = np.asarray(output_labels)\nouter_numbers = np.where(output_label_array == outer)[0]\ninner_numbers = np.where(output_label_array == inner)[0]\n\nplt.figure(figsize=(4, 4))\nplt.scatter(\n X[outer_numbers, 0],\n X[outer_numbers, 1],\n color=\"navy\",\n marker=\"s\",\n lw=0,\n s=10,\n label=\"outer learned\",\n)\nplt.scatter(\n X[inner_numbers, 0],\n X[inner_numbers, 1],\n color=\"c\",\n marker=\"s\",\n lw=0,\n s=10,\n label=\"inner learned\",\n)\nplt.legend(scatterpoints=1, shadow=False, loc=\"upper right\")\nplt.title(\"Labels learned with Label Spreading (KNN)\")\nplt.show()"
30102
]
31103
}
32104
],

dev/_downloads/c6e2877780eeb2421a441896c8ec77b7/plot_label_propagation_structure.py

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -15,29 +15,27 @@
1515
# Andreas Mueller <[email protected]>
1616
# License: BSD
1717

18+
# %%
19+
# We generate a dataset with two concentric circles. In addition, a label
20+
# is associated with each sample of the dataset that is: 0 (belonging to
21+
# the outer circle), 1 (belonging to the inner circle), and -1 (unknown).
22+
# Here, all labels but two are tagged as unknown.
23+
1824
import numpy as np
19-
import matplotlib.pyplot as plt
20-
from sklearn.semi_supervised import LabelSpreading
2125
from sklearn.datasets import make_circles
2226

23-
# generate ring with inner box
2427
n_samples = 200
2528
X, y = make_circles(n_samples=n_samples, shuffle=False)
2629
outer, inner = 0, 1
2730
labels = np.full(n_samples, -1.0)
2831
labels[0] = outer
2932
labels[-1] = inner
3033

31-
# #############################################################################
32-
# Learn with LabelSpreading
33-
label_spread = LabelSpreading(kernel="knn", alpha=0.8)
34-
label_spread.fit(X, labels)
34+
# %%
35+
# Plot raw data
36+
import matplotlib.pyplot as plt
3537

36-
# #############################################################################
37-
# Plot output labels
38-
output_labels = label_spread.transduction_
39-
plt.figure(figsize=(8.5, 4))
40-
plt.subplot(1, 2, 1)
38+
plt.figure(figsize=(4, 4))
4139
plt.scatter(
4240
X[labels == outer, 0],
4341
X[labels == outer, 1],
@@ -66,10 +64,24 @@
6664
plt.legend(scatterpoints=1, shadow=False, loc="upper right")
6765
plt.title("Raw data (2 classes=outer and inner)")
6866

69-
plt.subplot(1, 2, 2)
67+
# %%
68+
#
69+
# The aim of :class:`~sklearn.semi_supervised.LabelSpreading` is to associate
70+
# a label to sample where the label is initially unknown.
71+
from sklearn.semi_supervised import LabelSpreading
72+
73+
label_spread = LabelSpreading(kernel="knn", alpha=0.8)
74+
label_spread.fit(X, labels)
75+
76+
# %%
77+
# Now, we can check which labels have been associated with each sample
78+
# when the label was unknown.
79+
output_labels = label_spread.transduction_
7080
output_label_array = np.asarray(output_labels)
7181
outer_numbers = np.where(output_label_array == outer)[0]
7282
inner_numbers = np.where(output_label_array == inner)[0]
83+
84+
plt.figure(figsize=(4, 4))
7385
plt.scatter(
7486
X[outer_numbers, 0],
7587
X[outer_numbers, 1],
@@ -90,6 +102,4 @@
90102
)
91103
plt.legend(scatterpoints=1, shadow=False, loc="upper right")
92104
plt.title("Labels learned with Label Spreading (KNN)")
93-
94-
plt.subplots_adjust(left=0.07, bottom=0.07, right=0.93, top=0.92)
95105
plt.show()

0 commit comments

Comments
 (0)