Skip to content

Commit 642d62e

Browse files
committed
Pushing the docs to dev/ for branch: main, commit 77fbdd1c46c2931fb06b373786efd03da90d3e78
1 parent 7e8ba7e commit 642d62e

File tree

1,242 files changed

+4630
-4501
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,242 files changed

+4630
-4501
lines changed

dev/_downloads/00ae629d652473137a3905a5e08ea815/plot_iris_dtc.py

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""
2-
================================================================
3-
Plot the decision surface of a decision tree on the iris dataset
4-
================================================================
2+
=======================================================================
3+
Plot the decision surface of decision trees trained on the iris dataset
4+
=======================================================================
55
66
Plot the decision surface of a decision tree trained on pairs
77
of features of the iris dataset.
@@ -14,20 +14,24 @@
1414
1515
We also show the tree structure of a model built on all of the features.
1616
"""
17+
# %%
18+
# First load the copy of the Iris dataset shipped with scikit-learn:
19+
from sklearn.datasets import load_iris
20+
21+
iris = load_iris()
22+
1723

24+
# %%
25+
# Display the decision functions of trees trained on all pairs of features.
1826
import numpy as np
1927
import matplotlib.pyplot as plt
20-
21-
from sklearn.datasets import load_iris
22-
from sklearn.tree import DecisionTreeClassifier, plot_tree
28+
from sklearn.tree import DecisionTreeClassifier
2329

2430
# Parameters
2531
n_classes = 3
2632
plot_colors = "ryb"
2733
plot_step = 0.02
2834

29-
# Load data
30-
iris = load_iris()
3135

3236
for pairidx, pair in enumerate([[0, 1], [0, 2], [0, 3], [1, 2], [1, 3], [2, 3]]):
3337
# We only take the two corresponding features
@@ -67,11 +71,17 @@
6771
s=15,
6872
)
6973

70-
plt.suptitle("Decision surface of a decision tree using paired features")
74+
plt.suptitle("Decision surface of decision trees trained on pairs of features")
7175
plt.legend(loc="lower right", borderpad=0, handletextpad=0)
72-
plt.axis("tight")
76+
_ = plt.axis("tight")
77+
78+
# %%
79+
# Display the structure of a single decision tree trained on all the features
80+
# together.
81+
from sklearn.tree import plot_tree
7382

7483
plt.figure()
7584
clf = DecisionTreeClassifier().fit(iris.data, iris.target)
7685
plot_tree(clf, filled=True)
86+
plt.title("Decision tree trained on all the iris features")
7787
plt.show()
Binary file not shown.

dev/_downloads/40f4aad91af595a370d7582e3a23bed7/plot_roc.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"import numpy as np\nimport matplotlib.pyplot as plt\nfrom itertools import cycle\n\nfrom sklearn import svm, datasets\nfrom sklearn.metrics import roc_curve, auc\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import label_binarize\nfrom sklearn.multiclass import OneVsRestClassifier\nfrom scipy import interp\nfrom sklearn.metrics import roc_auc_score\n\n# Import some data to play with\niris = datasets.load_iris()\nX = iris.data\ny = iris.target\n\n# Binarize the output\ny = label_binarize(y, classes=[0, 1, 2])\nn_classes = y.shape[1]\n\n# Add noisy features to make the problem harder\nrandom_state = np.random.RandomState(0)\nn_samples, n_features = X.shape\nX = np.c_[X, random_state.randn(n_samples, 200 * n_features)]\n\n# shuffle and split training and test sets\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)\n\n# Learn to predict each class against the other\nclassifier = OneVsRestClassifier(\n svm.SVC(kernel=\"linear\", probability=True, random_state=random_state)\n)\ny_score = classifier.fit(X_train, y_train).decision_function(X_test)\n\n# Compute ROC curve and ROC area for each class\nfpr = dict()\ntpr = dict()\nroc_auc = dict()\nfor i in range(n_classes):\n fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])\n roc_auc[i] = auc(fpr[i], tpr[i])\n\n# Compute micro-average ROC curve and ROC area\nfpr[\"micro\"], tpr[\"micro\"], _ = roc_curve(y_test.ravel(), y_score.ravel())\nroc_auc[\"micro\"] = auc(fpr[\"micro\"], tpr[\"micro\"])"
29+
"import numpy as np\nimport matplotlib.pyplot as plt\nfrom itertools import cycle\n\nfrom sklearn import svm, datasets\nfrom sklearn.metrics import roc_curve, auc\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import label_binarize\nfrom sklearn.multiclass import OneVsRestClassifier\nfrom sklearn.metrics import roc_auc_score\n\n# Import some data to play with\niris = datasets.load_iris()\nX = iris.data\ny = iris.target\n\n# Binarize the output\ny = label_binarize(y, classes=[0, 1, 2])\nn_classes = y.shape[1]\n\n# Add noisy features to make the problem harder\nrandom_state = np.random.RandomState(0)\nn_samples, n_features = X.shape\nX = np.c_[X, random_state.randn(n_samples, 200 * n_features)]\n\n# shuffle and split training and test sets\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)\n\n# Learn to predict each class against the other\nclassifier = OneVsRestClassifier(\n svm.SVC(kernel=\"linear\", probability=True, random_state=random_state)\n)\ny_score = classifier.fit(X_train, y_train).decision_function(X_test)\n\n# Compute ROC curve and ROC area for each class\nfpr = dict()\ntpr = dict()\nroc_auc = dict()\nfor i in range(n_classes):\n fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])\n roc_auc[i] = auc(fpr[i], tpr[i])\n\n# Compute micro-average ROC curve and ROC area\nfpr[\"micro\"], tpr[\"micro\"], _ = roc_curve(y_test.ravel(), y_score.ravel())\nroc_auc[\"micro\"] = auc(fpr[\"micro\"], tpr[\"micro\"])"
3030
]
3131
},
3232
{
@@ -62,7 +62,7 @@
6262
},
6363
"outputs": [],
6464
"source": [
65-
"# First aggregate all false positive rates\nall_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))\n\n# Then interpolate all ROC curves at this points\nmean_tpr = np.zeros_like(all_fpr)\nfor i in range(n_classes):\n mean_tpr += interp(all_fpr, fpr[i], tpr[i])\n\n# Finally average it and compute AUC\nmean_tpr /= n_classes\n\nfpr[\"macro\"] = all_fpr\ntpr[\"macro\"] = mean_tpr\nroc_auc[\"macro\"] = auc(fpr[\"macro\"], tpr[\"macro\"])\n\n# Plot all ROC curves\nplt.figure()\nplt.plot(\n fpr[\"micro\"],\n tpr[\"micro\"],\n label=\"micro-average ROC curve (area = {0:0.2f})\".format(roc_auc[\"micro\"]),\n color=\"deeppink\",\n linestyle=\":\",\n linewidth=4,\n)\n\nplt.plot(\n fpr[\"macro\"],\n tpr[\"macro\"],\n label=\"macro-average ROC curve (area = {0:0.2f})\".format(roc_auc[\"macro\"]),\n color=\"navy\",\n linestyle=\":\",\n linewidth=4,\n)\n\ncolors = cycle([\"aqua\", \"darkorange\", \"cornflowerblue\"])\nfor i, color in zip(range(n_classes), colors):\n plt.plot(\n fpr[i],\n tpr[i],\n color=color,\n lw=lw,\n label=\"ROC curve of class {0} (area = {1:0.2f})\".format(i, roc_auc[i]),\n )\n\nplt.plot([0, 1], [0, 1], \"k--\", lw=lw)\nplt.xlim([0.0, 1.0])\nplt.ylim([0.0, 1.05])\nplt.xlabel(\"False Positive Rate\")\nplt.ylabel(\"True Positive Rate\")\nplt.title(\"Some extension of Receiver operating characteristic to multiclass\")\nplt.legend(loc=\"lower right\")\nplt.show()"
65+
"# First aggregate all false positive rates\nall_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))\n\n# Then interpolate all ROC curves at this points\nmean_tpr = np.zeros_like(all_fpr)\nfor i in range(n_classes):\n mean_tpr += np.interp(all_fpr, fpr[i], tpr[i])\n\n# Finally average it and compute AUC\nmean_tpr /= n_classes\n\nfpr[\"macro\"] = all_fpr\ntpr[\"macro\"] = mean_tpr\nroc_auc[\"macro\"] = auc(fpr[\"macro\"], tpr[\"macro\"])\n\n# Plot all ROC curves\nplt.figure()\nplt.plot(\n fpr[\"micro\"],\n tpr[\"micro\"],\n label=\"micro-average ROC curve (area = {0:0.2f})\".format(roc_auc[\"micro\"]),\n color=\"deeppink\",\n linestyle=\":\",\n linewidth=4,\n)\n\nplt.plot(\n fpr[\"macro\"],\n tpr[\"macro\"],\n label=\"macro-average ROC curve (area = {0:0.2f})\".format(roc_auc[\"macro\"]),\n color=\"navy\",\n linestyle=\":\",\n linewidth=4,\n)\n\ncolors = cycle([\"aqua\", \"darkorange\", \"cornflowerblue\"])\nfor i, color in zip(range(n_classes), colors):\n plt.plot(\n fpr[i],\n tpr[i],\n color=color,\n lw=lw,\n label=\"ROC curve of class {0} (area = {1:0.2f})\".format(i, roc_auc[i]),\n )\n\nplt.plot([0, 1], [0, 1], \"k--\", lw=lw)\nplt.xlim([0.0, 1.0])\nplt.ylim([0.0, 1.05])\nplt.xlabel(\"False Positive Rate\")\nplt.ylabel(\"True Positive Rate\")\nplt.title(\"Some extension of Receiver operating characteristic to multiclass\")\nplt.legend(loc=\"lower right\")\nplt.show()"
6666
]
6767
},
6868
{
Binary file not shown.

dev/_downloads/80fef09514fd851560e999a5b7daa303/plot_roc.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@
4242
from sklearn.model_selection import train_test_split
4343
from sklearn.preprocessing import label_binarize
4444
from sklearn.multiclass import OneVsRestClassifier
45-
from scipy import interp
4645
from sklearn.metrics import roc_auc_score
4746

4847
# Import some data to play with
@@ -113,7 +112,7 @@
113112
# Then interpolate all ROC curves at this points
114113
mean_tpr = np.zeros_like(all_fpr)
115114
for i in range(n_classes):
116-
mean_tpr += interp(all_fpr, fpr[i], tpr[i])
115+
mean_tpr += np.interp(all_fpr, fpr[i], tpr[i])
117116

118117
# Finally average it and compute AUC
119118
mean_tpr /= n_classes

0 commit comments

Comments
 (0)