Skip to content

Commit 0c56ff6

Browse files
committed
Pushing the docs to 0.22/ for branch: 0.22.X, commit d39134bc77d9f9a5a0316e21ee32ac3f9683da3d
1 parent 9460f70 commit 0c56ff6

File tree

1,360 files changed

+7716
-5443
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,360 files changed

+7716
-5443
lines changed

0.22/.buildinfo

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# Sphinx build info version 1
22
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
3-
config: 506805eb51c13bf5284500a7ba8b5be1
3+
config: b519b539e4a25d23c2dd2ff8c6365093
44
tags: 645f666f9bcd5a90fca523b33c5a78b7
Binary file not shown.

0.22/_downloads/3a6b0f407d8829a616b0eff2bf71828a/plot_confusion_matrix.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn import svm, datasets\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.metrics import confusion_matrix\nfrom sklearn.utils.multiclass import unique_labels\n\n# import some data to play with\niris = datasets.load_iris()\nX = iris.data\ny = iris.target\nclass_names = iris.target_names\n\n# Split the data into a training set and a test set\nX_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)\n\n# Run classifier, using a model that is too regularized (C too low) to see\n# the impact on the results\nclassifier = svm.SVC(kernel='linear', C=0.01)\ny_pred = classifier.fit(X_train, y_train).predict(X_test)\n\n\ndef plot_confusion_matrix(y_true, y_pred, classes,\n normalize=False,\n title=None,\n cmap=plt.cm.Blues):\n \"\"\"\n This function prints and plots the confusion matrix.\n Normalization can be applied by setting `normalize=True`.\n \"\"\"\n if not title:\n if normalize:\n title = 'Normalized confusion matrix'\n else:\n title = 'Confusion matrix, without normalization'\n\n # Compute confusion matrix\n cm = confusion_matrix(y_true, y_pred)\n # Only use the labels that appear in the data\n classes = classes[unique_labels(y_true, y_pred)]\n if normalize:\n cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]\n print(\"Normalized confusion matrix\")\n else:\n print('Confusion matrix, without normalization')\n\n print(cm)\n\n fig, ax = plt.subplots()\n im = ax.imshow(cm, interpolation='nearest', cmap=cmap)\n ax.figure.colorbar(im, ax=ax)\n # We want to show all ticks...\n ax.set(xticks=np.arange(cm.shape[1]),\n yticks=np.arange(cm.shape[0]),\n # ... and label them with the respective list entries\n xticklabels=classes, yticklabels=classes,\n title=title,\n ylabel='True label',\n xlabel='Predicted label')\n\n # Rotate the tick labels and set their alignment.\n plt.setp(ax.get_xticklabels(), rotation=45, ha=\"right\",\n rotation_mode=\"anchor\")\n\n # Loop over data dimensions and create text annotations.\n fmt = '.2f' if normalize else 'd'\n thresh = cm.max() / 2.\n for i in range(cm.shape[0]):\n for j in range(cm.shape[1]):\n ax.text(j, i, format(cm[i, j], fmt),\n ha=\"center\", va=\"center\",\n color=\"white\" if cm[i, j] > thresh else \"black\")\n fig.tight_layout()\n return ax\n\n\nnp.set_printoptions(precision=2)\n\n# Plot non-normalized confusion matrix\nplot_confusion_matrix(y_test, y_pred, classes=class_names,\n title='Confusion matrix, without normalization')\n\n# Plot normalized confusion matrix\nplot_confusion_matrix(y_test, y_pred, classes=class_names, normalize=True,\n title='Normalized confusion matrix')\n\nplt.show()"
29+
"print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn import svm, datasets\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.metrics import plot_confusion_matrix\n\n# import some data to play with\niris = datasets.load_iris()\nX = iris.data\ny = iris.target\nclass_names = iris.target_names\n\n# Split the data into a training set and a test set\nX_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)\n\n# Run classifier, using a model that is too regularized (C too low) to see\n# the impact on the results\nclassifier = svm.SVC(kernel='linear', C=0.01).fit(X_train, y_train)\n\nnp.set_printoptions(precision=2)\n\n# Plot non-normalized confusion matrix\ntitles_options = [(\"Confusion matrix, without normalization\", None),\n (\"Normalized confusion matrix\", 'true')]\nfor title, normalize in titles_options:\n disp = plot_confusion_matrix(classifier, X_test, y_test,\n display_labels=class_names,\n cmap=plt.cm.Blues,\n normalize=normalize)\n disp.ax_.set_title(title)\n\n print(title)\n print(disp.confusion_matrix)\n\nplt.show()"
3030
]
3131
}
3232
],
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {
7+
"collapsed": false
8+
},
9+
"outputs": [],
10+
"source": [
11+
"%matplotlib inline"
12+
]
13+
},
14+
{
15+
"cell_type": "markdown",
16+
"metadata": {},
17+
"source": [
18+
"\n# Gaussian processes on discrete data structures\n\n\nThis example illustrates the use of Gaussian processes for regression and\nclassification tasks on data that are not in fixed-length feature vector form.\nThis is achieved through the use of kernel functions that operates directly\non discrete structures such as variable-length sequences, trees, and graphs.\n\nSpecifically, here the input variables are some gene sequences stored as\nvariable-length strings consisting of letters 'A', 'T', 'C', and 'G',\nwhile the output variables are floating point numbers and True/False labels\nin the regression and classification tasks, respectively.\n\nA kernel between the gene sequences is defined using R-convolution [1]_ by\nintegrating a binary letter-wise kernel over all pairs of letters among a pair\nof strings.\n\nThis example will generate three figures.\n\nIn the first figure, we visualize the value of the kernel, i.e. the similarity\nof the sequences, using a colormap. Brighter color here indicates higher\nsimilarity.\n\nIn the second figure, we show some regression result on a dataset of 6\nsequences. Here we use the 1st, 2nd, 4th, and 5th sequences as the training set\nto make predictions on the 3rd and 6th sequences.\n\nIn the third figure, we demonstrate a classification model by training on 6\nsequences and make predictions on another 5 sequences. The ground truth here is\nsimply whether there is at least one 'A' in the sequence. Here the model makes\nfour correct classifications and fails on one.\n\n.. [1] Haussler, D. (1999). Convolution kernels on discrete structures\n(Vol. 646). Technical report, Department of Computer Science, University of\nCalifornia at Santa Cruz.\n"
19+
]
20+
},
21+
{
22+
"cell_type": "code",
23+
"execution_count": null,
24+
"metadata": {
25+
"collapsed": false
26+
},
27+
"outputs": [],
28+
"source": [
29+
"print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.gaussian_process.kernels import Kernel, Hyperparameter\nfrom sklearn.gaussian_process.kernels import GenericKernelMixin\nfrom sklearn.gaussian_process import GaussianProcessRegressor\nfrom sklearn.gaussian_process import GaussianProcessClassifier\nfrom sklearn.base import clone\n\n\nclass SequenceKernel(GenericKernelMixin, Kernel):\n '''\n A minimal (but valid) convolutional kernel for sequences of variable\n lengths.'''\n def __init__(self,\n baseline_similarity=0.5,\n baseline_similarity_bounds=(1e-5, 1)):\n self.baseline_similarity = baseline_similarity\n self.baseline_similarity_bounds = baseline_similarity_bounds\n\n @property\n def hyperparameter_baseline_similarity(self):\n return Hyperparameter(\"baseline_similarity\",\n \"numeric\",\n self.baseline_similarity_bounds)\n\n def _f(self, s1, s2):\n '''\n kernel value between a pair of sequences\n '''\n return sum([1.0 if c1 == c2 else self.baseline_similarity\n for c1 in s1\n for c2 in s2])\n\n def _g(self, s1, s2):\n '''\n kernel derivative between a pair of sequences\n '''\n return sum([0.0 if c1 == c2 else 1.0\n for c1 in s1\n for c2 in s2])\n\n def __call__(self, X, Y=None, eval_gradient=False):\n if Y is None:\n Y = X\n\n if eval_gradient:\n return (np.array([[self._f(x, y) for y in Y] for x in X]),\n np.array([[[self._g(x, y)] for y in Y] for x in X]))\n else:\n return np.array([[self._f(x, y) for y in Y] for x in X])\n\n def diag(self, X):\n return np.array([self._f(x, x) for x in X])\n\n def is_stationary(self):\n return False\n\n def clone_with_theta(self, theta):\n cloned = clone(self)\n cloned.theta = theta\n return cloned\n\n\nkernel = SequenceKernel()\n\n'''\nSequence similarity matrix under the kernel\n===========================================\n'''\n\nX = np.array(['AGCT', 'AGC', 'AACT', 'TAA', 'AAA', 'GAACA'])\n\nK = kernel(X)\nD = kernel.diag(X)\n\nplt.figure(figsize=(8, 5))\nplt.imshow(np.diag(D**-0.5).dot(K).dot(np.diag(D**-0.5)))\nplt.xticks(np.arange(len(X)), X)\nplt.yticks(np.arange(len(X)), X)\nplt.title('Sequence similarity under the kernel')\n\n'''\nRegression\n==========\n'''\n\nX = np.array(['AGCT', 'AGC', 'AACT', 'TAA', 'AAA', 'GAACA'])\nY = np.array([1.0, 1.0, 2.0, 2.0, 3.0, 3.0])\n\ntraining_idx = [0, 1, 3, 4]\ngp = GaussianProcessRegressor(kernel=kernel)\ngp.fit(X[training_idx], Y[training_idx])\n\nplt.figure(figsize=(8, 5))\nplt.bar(np.arange(len(X)), gp.predict(X), color='b', label='prediction')\nplt.bar(training_idx, Y[training_idx], width=0.2, color='r',\n alpha=1, label='training')\nplt.xticks(np.arange(len(X)), X)\nplt.title('Regression on sequences')\nplt.legend()\n\n'''\nClassification\n==============\n'''\n\nX_train = np.array(['AGCT', 'CGA', 'TAAC', 'TCG', 'CTTT', 'TGCT'])\n# whether there are 'A's in the sequence\nY_train = np.array([True, True, True, False, False, False])\n\ngp = GaussianProcessClassifier(kernel)\ngp.fit(X_train, Y_train)\n\nX_test = ['AAA', 'ATAG', 'CTC', 'CT', 'C']\nY_test = [True, True, False, False, False]\n\nplt.figure(figsize=(8, 5))\nplt.scatter(np.arange(len(X_train)), [1.0 if c else -1.0 for c in Y_train],\n s=100, marker='o', edgecolor='none', facecolor=(1, 0.75, 0),\n label='training')\nplt.scatter(len(X_train) + np.arange(len(X_test)),\n [1.0 if c else -1.0 for c in Y_test],\n s=100, marker='o', edgecolor='none', facecolor='r', label='truth')\nplt.scatter(len(X_train) + np.arange(len(X_test)),\n [1.0 if c else -1.0 for c in gp.predict(X_test)],\n s=100, marker='x', edgecolor=(0, 1.0, 0.3), linewidth=2,\n label='prediction')\nplt.xticks(np.arange(len(X_train) + len(X_test)),\n np.concatenate((X_train, X_test)))\nplt.yticks([-1, 1], [False, True])\nplt.title('Classification on sequences')\nplt.legend()\n\nplt.show()"
30+
]
31+
}
32+
],
33+
"metadata": {
34+
"kernelspec": {
35+
"display_name": "Python 3",
36+
"language": "python",
37+
"name": "python3"
38+
},
39+
"language_info": {
40+
"codemirror_mode": {
41+
"name": "ipython",
42+
"version": 3
43+
},
44+
"file_extension": ".py",
45+
"mimetype": "text/x-python",
46+
"name": "python",
47+
"nbconvert_exporter": "python",
48+
"pygments_lexer": "ipython3",
49+
"version": "3.7.5"
50+
}
51+
},
52+
"nbformat": 4,
53+
"nbformat_minor": 0
54+
}

0.22/_downloads/622fb50f5e367eda84eb7c32d306f659/plot_digits_classification.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"print(__doc__)\n\n# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org>\n# License: BSD 3 clause\n\n# Standard scientific Python imports\nimport matplotlib.pyplot as plt\n\n# Import datasets, classifiers and performance metrics\nfrom sklearn import datasets, svm, metrics\nfrom sklearn.model_selection import train_test_split\n\n# The digits dataset\ndigits = datasets.load_digits()\n\n# The data that we are interested in is made of 8x8 images of digits, let's\n# have a look at the first 4 images, stored in the `images` attribute of the\n# dataset. If we were working from image files, we could load them using\n# matplotlib.pyplot.imread. Note that each image must have the same size. For these\n# images, we know which digit they represent: it is given in the 'target' of\n# the dataset.\nimages_and_labels = list(zip(digits.images, digits.target))\nfor index, (image, label) in enumerate(images_and_labels[:4]):\n plt.subplot(2, 4, index + 1)\n plt.axis('off')\n plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')\n plt.title('Training: %i' % label)\n\n# To apply a classifier on this data, we need to flatten the image, to\n# turn the data in a (samples, feature) matrix:\nn_samples = len(digits.images)\ndata = digits.images.reshape((n_samples, -1))\n\n# Create a classifier: a support vector classifier\nclassifier = svm.SVC(gamma=0.001)\n\n# Split data into train and test subsets\nX_train, X_test, y_train, y_test = train_test_split(\n data, digits.target, test_size=0.5, shuffle=False)\n\n# We learn the digits on the first half of the digits\nclassifier.fit(X_train, y_train)\n\n# Now predict the value of the digit on the second half:\npredicted = classifier.predict(X_test)\n\nprint(\"Classification report for classifier %s:\\n%s\\n\"\n % (classifier, metrics.classification_report(y_test, predicted)))\nprint(\"Confusion matrix:\\n%s\" % metrics.confusion_matrix(y_test, predicted))\n\nimages_and_predictions = list(zip(digits.images[n_samples // 2:], predicted))\nfor index, (image, prediction) in enumerate(images_and_predictions[:4]):\n plt.subplot(2, 4, index + 5)\n plt.axis('off')\n plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')\n plt.title('Prediction: %i' % prediction)\n\nplt.show()"
29+
"print(__doc__)\n\n# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org>\n# License: BSD 3 clause\n\n# Standard scientific Python imports\nimport matplotlib.pyplot as plt\n\n# Import datasets, classifiers and performance metrics\nfrom sklearn import datasets, svm, metrics\nfrom sklearn.model_selection import train_test_split\n\n# The digits dataset\ndigits = datasets.load_digits()\n\n# The data that we are interested in is made of 8x8 images of digits, let's\n# have a look at the first 4 images, stored in the `images` attribute of the\n# dataset. If we were working from image files, we could load them using\n# matplotlib.pyplot.imread. Note that each image must have the same size. For these\n# images, we know which digit they represent: it is given in the 'target' of\n# the dataset.\n_, axes = plt.subplots(2, 4)\nimages_and_labels = list(zip(digits.images, digits.target))\nfor ax, (image, label) in zip(axes[0, :], images_and_labels[:4]):\n ax.set_axis_off()\n ax.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')\n ax.set_title('Training: %i' % label)\n\n# To apply a classifier on this data, we need to flatten the image, to\n# turn the data in a (samples, feature) matrix:\nn_samples = len(digits.images)\ndata = digits.images.reshape((n_samples, -1))\n\n# Create a classifier: a support vector classifier\nclassifier = svm.SVC(gamma=0.001)\n\n# Split data into train and test subsets\nX_train, X_test, y_train, y_test = train_test_split(\n data, digits.target, test_size=0.5, shuffle=False)\n\n# We learn the digits on the first half of the digits\nclassifier.fit(X_train, y_train)\n\n# Now predict the value of the digit on the second half:\npredicted = classifier.predict(X_test)\n\nimages_and_predictions = list(zip(digits.images[n_samples // 2:], predicted))\nfor ax, (image, prediction) in zip(axes[1, :], images_and_predictions[:4]):\n ax.set_axis_off()\n ax.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')\n ax.set_title('Prediction: %i' % prediction)\n\nprint(\"Classification report for classifier %s:\\n%s\\n\"\n % (classifier, metrics.classification_report(y_test, predicted)))\ndisp = metrics.plot_confusion_matrix(classifier, X_test, y_test)\ndisp.figure_.suptitle(\"Confusion Matrix\")\nprint(\"Confusion matrix:\\n%s\" % disp.confusion_matrix)\n\nplt.show()"
3030
]
3131
}
3232
],

0.22/_downloads/7e0e90df87894c2fadaf3004c5316545/plot_confusion_matrix.py

Lines changed: 13 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,7 @@
3131

3232
from sklearn import svm, datasets
3333
from sklearn.model_selection import train_test_split
34-
from sklearn.metrics import confusion_matrix
35-
from sklearn.utils.multiclass import unique_labels
34+
from sklearn.metrics import plot_confusion_matrix
3635

3736
# import some data to play with
3837
iris = datasets.load_iris()
@@ -45,72 +44,21 @@
4544

4645
# Run classifier, using a model that is too regularized (C too low) to see
4746
# the impact on the results
48-
classifier = svm.SVC(kernel='linear', C=0.01)
49-
y_pred = classifier.fit(X_train, y_train).predict(X_test)
50-
51-
52-
def plot_confusion_matrix(y_true, y_pred, classes,
53-
normalize=False,
54-
title=None,
55-
cmap=plt.cm.Blues):
56-
"""
57-
This function prints and plots the confusion matrix.
58-
Normalization can be applied by setting `normalize=True`.
59-
"""
60-
if not title:
61-
if normalize:
62-
title = 'Normalized confusion matrix'
63-
else:
64-
title = 'Confusion matrix, without normalization'
65-
66-
# Compute confusion matrix
67-
cm = confusion_matrix(y_true, y_pred)
68-
# Only use the labels that appear in the data
69-
classes = classes[unique_labels(y_true, y_pred)]
70-
if normalize:
71-
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
72-
print("Normalized confusion matrix")
73-
else:
74-
print('Confusion matrix, without normalization')
75-
76-
print(cm)
77-
78-
fig, ax = plt.subplots()
79-
im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
80-
ax.figure.colorbar(im, ax=ax)
81-
# We want to show all ticks...
82-
ax.set(xticks=np.arange(cm.shape[1]),
83-
yticks=np.arange(cm.shape[0]),
84-
# ... and label them with the respective list entries
85-
xticklabels=classes, yticklabels=classes,
86-
title=title,
87-
ylabel='True label',
88-
xlabel='Predicted label')
89-
90-
# Rotate the tick labels and set their alignment.
91-
plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
92-
rotation_mode="anchor")
93-
94-
# Loop over data dimensions and create text annotations.
95-
fmt = '.2f' if normalize else 'd'
96-
thresh = cm.max() / 2.
97-
for i in range(cm.shape[0]):
98-
for j in range(cm.shape[1]):
99-
ax.text(j, i, format(cm[i, j], fmt),
100-
ha="center", va="center",
101-
color="white" if cm[i, j] > thresh else "black")
102-
fig.tight_layout()
103-
return ax
104-
47+
classifier = svm.SVC(kernel='linear', C=0.01).fit(X_train, y_train)
10548

10649
np.set_printoptions(precision=2)
10750

10851
# Plot non-normalized confusion matrix
109-
plot_confusion_matrix(y_test, y_pred, classes=class_names,
110-
title='Confusion matrix, without normalization')
111-
112-
# Plot normalized confusion matrix
113-
plot_confusion_matrix(y_test, y_pred, classes=class_names, normalize=True,
114-
title='Normalized confusion matrix')
52+
titles_options = [("Confusion matrix, without normalization", None),
53+
("Normalized confusion matrix", 'true')]
54+
for title, normalize in titles_options:
55+
disp = plot_confusion_matrix(classifier, X_test, y_test,
56+
display_labels=class_names,
57+
cmap=plt.cm.Blues,
58+
normalize=normalize)
59+
disp.ax_.set_title(title)
60+
61+
print(title)
62+
print(disp.confusion_matrix)
11563

11664
plt.show()

0 commit comments

Comments
 (0)