Skip to content

Commit 7995077

Browse files
committed
Pushing the docs to dev/ for branch: master, commit 1b0ec1bf4fd9aeec50828ddfd6700e69a41fb5a2
1 parent 9a80466 commit 7995077

File tree

897 files changed

+2628
-2586
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

897 files changed

+2628
-2586
lines changed
681 Bytes
Binary file not shown.
659 Bytes
Binary file not shown.

dev/_downloads/plot_label_propagation_digits_active_learning.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
},
1616
{
1717
"source": [
18-
"\n# Label Propagation digits active learning\n\n\nDemonstrates an active learning technique to learn handwritten digits\nusing label propagation.\n\nWe start by training a label propagation model with only 10 labeled points,\nthen we select the top five most uncertain points to label. Next, we train\nwith 15 labeled points (original 10 + 5 new ones). We repeat this process\nfour times to have a model trained with 30 labeled examples.\n\nA plot will appear showing the top 5 most uncertain digits for each iteration\nof training. These may or may not contain mistakes, but we will train the next\nmodel with their true labels.\n\n"
18+
"\n# Label Propagation digits active learning\n\n\nDemonstrates an active learning technique to learn handwritten digits\nusing label propagation.\n\nWe start by training a label propagation model with only 10 labeled points,\nthen we select the top five most uncertain points to label. Next, we train\nwith 15 labeled points (original 10 + 5 new ones). We repeat this process\nfour times to have a model trained with 30 labeled examples. Note you can\nincrease this to label more than 30 by changing `max_iterations`. Labeling\nmore than 30 can be useful to get a sense for the speed of convergence of\nthis active learning technique.\n\nA plot will appear showing the top 5 most uncertain digits for each iteration\nof training. These may or may not contain mistakes, but we will train the next\nmodel with their true labels.\n\n"
1919
],
2020
"cell_type": "markdown",
2121
"metadata": {}
@@ -24,7 +24,7 @@
2424
"execution_count": null,
2525
"cell_type": "code",
2626
"source": [
27-
"print(__doc__)\n\n# Authors: Clay Woolam <[email protected]>\n# License: BSD\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom scipy import stats\n\nfrom sklearn import datasets\nfrom sklearn.semi_supervised import label_propagation\nfrom sklearn.metrics import classification_report, confusion_matrix\n\ndigits = datasets.load_digits()\nrng = np.random.RandomState(0)\nindices = np.arange(len(digits.data))\nrng.shuffle(indices)\n\nX = digits.data[indices[:330]]\ny = digits.target[indices[:330]]\nimages = digits.images[indices[:330]]\n\nn_total_samples = len(y)\nn_labeled_points = 10\n\nunlabeled_indices = np.arange(n_total_samples)[n_labeled_points:]\nf = plt.figure()\n\nfor i in range(5):\n y_train = np.copy(y)\n y_train[unlabeled_indices] = -1\n\n lp_model = label_propagation.LabelSpreading(gamma=0.25, max_iter=5)\n lp_model.fit(X, y_train)\n\n predicted_labels = lp_model.transduction_[unlabeled_indices]\n true_labels = y[unlabeled_indices]\n\n cm = confusion_matrix(true_labels, predicted_labels,\n labels=lp_model.classes_)\n\n print('Iteration %i %s' % (i, 70 * '_'))\n print(\"Label Spreading model: %d labeled & %d unlabeled (%d total)\"\n % (n_labeled_points, n_total_samples - n_labeled_points, n_total_samples))\n\n print(classification_report(true_labels, predicted_labels))\n\n print(\"Confusion matrix\")\n print(cm)\n\n # compute the entropies of transduced label distributions\n pred_entropies = stats.distributions.entropy(\n lp_model.label_distributions_.T)\n\n # select five digit examples that the classifier is most uncertain about\n uncertainty_index = uncertainty_index = np.argsort(pred_entropies)[-5:]\n\n # keep track of indices that we get labels for\n delete_indices = np.array([])\n\n f.text(.05, (1 - (i + 1) * .183),\n \"model %d\\n\\nfit with\\n%d labels\" % ((i + 1), i * 5 + 10), size=10)\n for index, image_index in enumerate(uncertainty_index):\n image = images[image_index]\n\n sub = f.add_subplot(5, 5, index + 1 + (5 * i))\n sub.imshow(image, cmap=plt.cm.gray_r)\n sub.set_title('predict: %i\\ntrue: %i' % (\n lp_model.transduction_[image_index], y[image_index]), size=10)\n sub.axis('off')\n\n # labeling 5 points, remote from labeled set\n delete_index, = np.where(unlabeled_indices == image_index)\n delete_indices = np.concatenate((delete_indices, delete_index))\n\n unlabeled_indices = np.delete(unlabeled_indices, delete_indices)\n n_labeled_points += 5\n\nf.suptitle(\"Active learning with Label Propagation.\\nRows show 5 most \"\n \"uncertain labels to learn with the next model.\")\nplt.subplots_adjust(0.12, 0.03, 0.9, 0.8, 0.2, 0.45)\nplt.show()"
27+
"print(__doc__)\n\n# Authors: Clay Woolam <[email protected]>\n# License: BSD\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom scipy import stats\n\nfrom sklearn import datasets\nfrom sklearn.semi_supervised import label_propagation\nfrom sklearn.metrics import classification_report, confusion_matrix\n\ndigits = datasets.load_digits()\nrng = np.random.RandomState(0)\nindices = np.arange(len(digits.data))\nrng.shuffle(indices)\n\nX = digits.data[indices[:330]]\ny = digits.target[indices[:330]]\nimages = digits.images[indices[:330]]\n\nn_total_samples = len(y)\nn_labeled_points = 10\nmax_iterations = 5\n\nunlabeled_indices = np.arange(n_total_samples)[n_labeled_points:]\nf = plt.figure()\n\nfor i in range(max_iterations):\n if len(unlabeled_indices) == 0:\n print(\"No unlabeled items left to label.\")\n break\n y_train = np.copy(y)\n y_train[unlabeled_indices] = -1\n\n lp_model = label_propagation.LabelSpreading(gamma=0.25, max_iter=5)\n lp_model.fit(X, y_train)\n\n predicted_labels = lp_model.transduction_[unlabeled_indices]\n true_labels = y[unlabeled_indices]\n\n cm = confusion_matrix(true_labels, predicted_labels,\n labels=lp_model.classes_)\n\n print(\"Iteration %i %s\" % (i, 70 * \"_\"))\n print(\"Label Spreading model: %d labeled & %d unlabeled (%d total)\"\n % (n_labeled_points, n_total_samples - n_labeled_points, n_total_samples))\n\n print(classification_report(true_labels, predicted_labels))\n\n print(\"Confusion matrix\")\n print(cm)\n\n # compute the entropies of transduced label distributions\n pred_entropies = stats.distributions.entropy(\n lp_model.label_distributions_.T)\n\n # select up to 5 digit examples that the classifier is most uncertain about\n uncertainty_index = np.argsort(pred_entropies)[::-1]\n uncertainty_index = uncertainty_index[\n np.in1d(uncertainty_index, unlabeled_indices)][:5]\n\n # keep track of indices that we get labels for\n delete_indices = np.array([])\n\n # for more than 5 iterations, visualize the gain only on the first 5\n if i < 5:\n f.text(.05, (1 - (i + 1) * .183),\n \"model %d\\n\\nfit with\\n%d labels\" %\n ((i + 1), i * 5 + 10), size=10)\n for index, image_index in enumerate(uncertainty_index):\n image = images[image_index]\n\n # for more than 5 iterations, visualize the gain only on the first 5\n if i < 5:\n sub = f.add_subplot(5, 5, index + 1 + (5 * i))\n sub.imshow(image, cmap=plt.cm.gray_r)\n sub.set_title(\"predict: %i\\ntrue: %i\" % (\n lp_model.transduction_[image_index], y[image_index]), size=10)\n sub.axis('off')\n\n # labeling 5 points, remote from labeled set\n delete_index, = np.where(unlabeled_indices == image_index)\n delete_indices = np.concatenate((delete_indices, delete_index))\n\n unlabeled_indices = np.delete(unlabeled_indices, delete_indices)\n n_labeled_points += len(uncertainty_index)\n\nf.suptitle(\"Active learning with Label Propagation.\\nRows show 5 most \"\n \"uncertain labels to learn with the next model.\")\nplt.subplots_adjust(0.12, 0.03, 0.9, 0.8, 0.2, 0.45)\nplt.show()"
2828
],
2929
"outputs": [],
3030
"metadata": {

dev/_downloads/plot_label_propagation_digits_active_learning.py

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,10 @@
99
We start by training a label propagation model with only 10 labeled points,
1010
then we select the top five most uncertain points to label. Next, we train
1111
with 15 labeled points (original 10 + 5 new ones). We repeat this process
12-
four times to have a model trained with 30 labeled examples.
12+
four times to have a model trained with 30 labeled examples. Note you can
13+
increase this to label more than 30 by changing `max_iterations`. Labeling
14+
more than 30 can be useful to get a sense for the speed of convergence of
15+
this active learning technique.
1316
1417
A plot will appear showing the top 5 most uncertain digits for each iteration
1518
of training. These may or may not contain mistakes, but we will train the next
@@ -39,11 +42,15 @@
3942

4043
n_total_samples = len(y)
4144
n_labeled_points = 10
45+
max_iterations = 5
4246

4347
unlabeled_indices = np.arange(n_total_samples)[n_labeled_points:]
4448
f = plt.figure()
4549

46-
for i in range(5):
50+
for i in range(max_iterations):
51+
if len(unlabeled_indices) == 0:
52+
print("No unlabeled items left to label.")
53+
break
4754
y_train = np.copy(y)
4855
y_train[unlabeled_indices] = -1
4956

@@ -56,7 +63,7 @@
5663
cm = confusion_matrix(true_labels, predicted_labels,
5764
labels=lp_model.classes_)
5865

59-
print('Iteration %i %s' % (i, 70 * '_'))
66+
print("Iteration %i %s" % (i, 70 * "_"))
6067
print("Label Spreading model: %d labeled & %d unlabeled (%d total)"
6168
% (n_labeled_points, n_total_samples - n_labeled_points, n_total_samples))
6269

@@ -69,29 +76,36 @@
6976
pred_entropies = stats.distributions.entropy(
7077
lp_model.label_distributions_.T)
7178

72-
# select five digit examples that the classifier is most uncertain about
73-
uncertainty_index = uncertainty_index = np.argsort(pred_entropies)[-5:]
79+
# select up to 5 digit examples that the classifier is most uncertain about
80+
uncertainty_index = np.argsort(pred_entropies)[::-1]
81+
uncertainty_index = uncertainty_index[
82+
np.in1d(uncertainty_index, unlabeled_indices)][:5]
7483

7584
# keep track of indices that we get labels for
7685
delete_indices = np.array([])
7786

78-
f.text(.05, (1 - (i + 1) * .183),
79-
"model %d\n\nfit with\n%d labels" % ((i + 1), i * 5 + 10), size=10)
87+
# for more than 5 iterations, visualize the gain only on the first 5
88+
if i < 5:
89+
f.text(.05, (1 - (i + 1) * .183),
90+
"model %d\n\nfit with\n%d labels" %
91+
((i + 1), i * 5 + 10), size=10)
8092
for index, image_index in enumerate(uncertainty_index):
8193
image = images[image_index]
8294

83-
sub = f.add_subplot(5, 5, index + 1 + (5 * i))
84-
sub.imshow(image, cmap=plt.cm.gray_r)
85-
sub.set_title('predict: %i\ntrue: %i' % (
86-
lp_model.transduction_[image_index], y[image_index]), size=10)
87-
sub.axis('off')
95+
# for more than 5 iterations, visualize the gain only on the first 5
96+
if i < 5:
97+
sub = f.add_subplot(5, 5, index + 1 + (5 * i))
98+
sub.imshow(image, cmap=plt.cm.gray_r)
99+
sub.set_title("predict: %i\ntrue: %i" % (
100+
lp_model.transduction_[image_index], y[image_index]), size=10)
101+
sub.axis('off')
88102

89103
# labeling 5 points, remote from labeled set
90104
delete_index, = np.where(unlabeled_indices == image_index)
91105
delete_indices = np.concatenate((delete_indices, delete_index))
92106

93107
unlabeled_indices = np.delete(unlabeled_indices, delete_indices)
94-
n_labeled_points += 5
108+
n_labeled_points += len(uncertainty_index)
95109

96110
f.suptitle("Active learning with Label Propagation.\nRows show 5 most "
97111
"uncertain labels to learn with the next model.")

dev/_downloads/scikit-learn-docs.pdf

10.2 KB
Binary file not shown.
152 Bytes
152 Bytes
9 Bytes
9 Bytes
-116 Bytes

0 commit comments

Comments
 (0)