scikit-learn
diff --git a/‎dev/_downloads/00ae629d652473137a3905a5e08ea815/plot_iris_dtc.py
Lines changed: 20 additions & 10 deletions b/‎dev/_downloads/00ae629d652473137a3905a5e08ea815/plot_iris_dtc.py
Lines changed: 20 additions & 10 deletions
diff --git a/‎dev/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip
333 Bytes b/‎dev/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip
333 Bytes
diff --git a/‎dev/_downloads/40f4aad91af595a370d7582e3a23bed7/plot_roc.ipynb
Lines changed: 2 additions & 2 deletions b/‎dev/_downloads/40f4aad91af595a370d7582e3a23bed7/plot_roc.ipynb
Lines changed: 2 additions & 2 deletions
diff --git a/‎dev/_downloads/6f1e7a639e0699d6164445b55e6c116d/auto_examples_jupyter.zip
981 Bytes b/‎dev/_downloads/6f1e7a639e0699d6164445b55e6c116d/auto_examples_jupyter.zip
981 Bytes
diff --git a/‎dev/_downloads/80fef09514fd851560e999a5b7daa303/plot_roc.py
Lines changed: 1 addition & 2 deletions b/‎dev/_downloads/80fef09514fd851560e999a5b7daa303/plot_roc.py
Lines changed: 1 addition & 2 deletions
@@ -1,7 +1,7 @@
 """
-================================================================
-Plot the decision surface of a decision tree on the iris dataset
-================================================================
+=======================================================================
+Plot the decision surface of decision trees trained on the iris dataset
+=======================================================================
 
 Plot the decision surface of a decision tree trained on pairs
 of features of the iris dataset.
@@ -14,20 +14,24 @@
 
 We also show the tree structure of a model built on all of the features.
 """
+# %%
+# First load the copy of the Iris dataset shipped with scikit-learn:
+from sklearn.datasets import load_iris
+
+iris = load_iris()
+
 
+# %%
+# Display the decision functions of trees trained on all pairs of features.
 import numpy as np
 import matplotlib.pyplot as plt
-
-from sklearn.datasets import load_iris
-from sklearn.tree import DecisionTreeClassifier, plot_tree
+from sklearn.tree import DecisionTreeClassifier
 
 # Parameters
 n_classes = 3
 plot_colors = "ryb"
 plot_step = 0.02
 
-# Load data
-iris = load_iris()
 
 for pairidx, pair in enumerate([[0, 1], [0, 2], [0, 3], [1, 2], [1, 3], [2, 3]]):
     # We only take the two corresponding features
@@ -67,11 +71,17 @@
             s=15,
         )
 
-plt.suptitle("Decision surface of a decision tree using paired features")
+plt.suptitle("Decision surface of decision trees trained on pairs of features")
 plt.legend(loc="lower right", borderpad=0, handletextpad=0)
-plt.axis("tight")
+_ = plt.axis("tight")
+
+# %%
+# Display the structure of a single decision tree trained on all the features
+# together.
+from sklearn.tree import plot_tree
 
 plt.figure()
 clf = DecisionTreeClassifier().fit(iris.data, iris.target)
 plot_tree(clf, filled=True)
+plt.title("Decision tree trained on all the iris features")
 plt.show()
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "import numpy as np\nimport matplotlib.pyplot as plt\nfrom itertools import cycle\n\nfrom sklearn import svm, datasets\nfrom sklearn.metrics import roc_curve, auc\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import label_binarize\nfrom sklearn.multiclass import OneVsRestClassifier\nfrom scipy import interp\nfrom sklearn.metrics import roc_auc_score\n\n# Import some data to play with\niris = datasets.load_iris()\nX = iris.data\ny = iris.target\n\n# Binarize the output\ny = label_binarize(y, classes=[0, 1, 2])\nn_classes = y.shape[1]\n\n# Add noisy features to make the problem harder\nrandom_state = np.random.RandomState(0)\nn_samples, n_features = X.shape\nX = np.c_[X, random_state.randn(n_samples, 200 * n_features)]\n\n# shuffle and split training and test sets\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)\n\n# Learn to predict each class against the other\nclassifier = OneVsRestClassifier(\n    svm.SVC(kernel=\"linear\", probability=True, random_state=random_state)\n)\ny_score = classifier.fit(X_train, y_train).decision_function(X_test)\n\n# Compute ROC curve and ROC area for each class\nfpr = dict()\ntpr = dict()\nroc_auc = dict()\nfor i in range(n_classes):\n    fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])\n    roc_auc[i] = auc(fpr[i], tpr[i])\n\n# Compute micro-average ROC curve and ROC area\nfpr[\"micro\"], tpr[\"micro\"], _ = roc_curve(y_test.ravel(), y_score.ravel())\nroc_auc[\"micro\"] = auc(fpr[\"micro\"], tpr[\"micro\"])"
+        "import numpy as np\nimport matplotlib.pyplot as plt\nfrom itertools import cycle\n\nfrom sklearn import svm, datasets\nfrom sklearn.metrics import roc_curve, auc\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import label_binarize\nfrom sklearn.multiclass import OneVsRestClassifier\nfrom sklearn.metrics import roc_auc_score\n\n# Import some data to play with\niris = datasets.load_iris()\nX = iris.data\ny = iris.target\n\n# Binarize the output\ny = label_binarize(y, classes=[0, 1, 2])\nn_classes = y.shape[1]\n\n# Add noisy features to make the problem harder\nrandom_state = np.random.RandomState(0)\nn_samples, n_features = X.shape\nX = np.c_[X, random_state.randn(n_samples, 200 * n_features)]\n\n# shuffle and split training and test sets\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)\n\n# Learn to predict each class against the other\nclassifier = OneVsRestClassifier(\n    svm.SVC(kernel=\"linear\", probability=True, random_state=random_state)\n)\ny_score = classifier.fit(X_train, y_train).decision_function(X_test)\n\n# Compute ROC curve and ROC area for each class\nfpr = dict()\ntpr = dict()\nroc_auc = dict()\nfor i in range(n_classes):\n    fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])\n    roc_auc[i] = auc(fpr[i], tpr[i])\n\n# Compute micro-average ROC curve and ROC area\nfpr[\"micro\"], tpr[\"micro\"], _ = roc_curve(y_test.ravel(), y_score.ravel())\nroc_auc[\"micro\"] = auc(fpr[\"micro\"], tpr[\"micro\"])"
       ]
     },
     {
@@ -62,7 +62,7 @@
       },
       "outputs": [],
       "source": [
-        "# First aggregate all false positive rates\nall_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))\n\n# Then interpolate all ROC curves at this points\nmean_tpr = np.zeros_like(all_fpr)\nfor i in range(n_classes):\n    mean_tpr += interp(all_fpr, fpr[i], tpr[i])\n\n# Finally average it and compute AUC\nmean_tpr /= n_classes\n\nfpr[\"macro\"] = all_fpr\ntpr[\"macro\"] = mean_tpr\nroc_auc[\"macro\"] = auc(fpr[\"macro\"], tpr[\"macro\"])\n\n# Plot all ROC curves\nplt.figure()\nplt.plot(\n    fpr[\"micro\"],\n    tpr[\"micro\"],\n    label=\"micro-average ROC curve (area = {0:0.2f})\".format(roc_auc[\"micro\"]),\n    color=\"deeppink\",\n    linestyle=\":\",\n    linewidth=4,\n)\n\nplt.plot(\n    fpr[\"macro\"],\n    tpr[\"macro\"],\n    label=\"macro-average ROC curve (area = {0:0.2f})\".format(roc_auc[\"macro\"]),\n    color=\"navy\",\n    linestyle=\":\",\n    linewidth=4,\n)\n\ncolors = cycle([\"aqua\", \"darkorange\", \"cornflowerblue\"])\nfor i, color in zip(range(n_classes), colors):\n    plt.plot(\n        fpr[i],\n        tpr[i],\n        color=color,\n        lw=lw,\n        label=\"ROC curve of class {0} (area = {1:0.2f})\".format(i, roc_auc[i]),\n    )\n\nplt.plot([0, 1], [0, 1], \"k--\", lw=lw)\nplt.xlim([0.0, 1.0])\nplt.ylim([0.0, 1.05])\nplt.xlabel(\"False Positive Rate\")\nplt.ylabel(\"True Positive Rate\")\nplt.title(\"Some extension of Receiver operating characteristic to multiclass\")\nplt.legend(loc=\"lower right\")\nplt.show()"
+        "# First aggregate all false positive rates\nall_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))\n\n# Then interpolate all ROC curves at this points\nmean_tpr = np.zeros_like(all_fpr)\nfor i in range(n_classes):\n    mean_tpr += np.interp(all_fpr, fpr[i], tpr[i])\n\n# Finally average it and compute AUC\nmean_tpr /= n_classes\n\nfpr[\"macro\"] = all_fpr\ntpr[\"macro\"] = mean_tpr\nroc_auc[\"macro\"] = auc(fpr[\"macro\"], tpr[\"macro\"])\n\n# Plot all ROC curves\nplt.figure()\nplt.plot(\n    fpr[\"micro\"],\n    tpr[\"micro\"],\n    label=\"micro-average ROC curve (area = {0:0.2f})\".format(roc_auc[\"micro\"]),\n    color=\"deeppink\",\n    linestyle=\":\",\n    linewidth=4,\n)\n\nplt.plot(\n    fpr[\"macro\"],\n    tpr[\"macro\"],\n    label=\"macro-average ROC curve (area = {0:0.2f})\".format(roc_auc[\"macro\"]),\n    color=\"navy\",\n    linestyle=\":\",\n    linewidth=4,\n)\n\ncolors = cycle([\"aqua\", \"darkorange\", \"cornflowerblue\"])\nfor i, color in zip(range(n_classes), colors):\n    plt.plot(\n        fpr[i],\n        tpr[i],\n        color=color,\n        lw=lw,\n        label=\"ROC curve of class {0} (area = {1:0.2f})\".format(i, roc_auc[i]),\n    )\n\nplt.plot([0, 1], [0, 1], \"k--\", lw=lw)\nplt.xlim([0.0, 1.0])\nplt.ylim([0.0, 1.05])\nplt.xlabel(\"False Positive Rate\")\nplt.ylabel(\"True Positive Rate\")\nplt.title(\"Some extension of Receiver operating characteristic to multiclass\")\nplt.legend(loc=\"lower right\")\nplt.show()"
       ]
     },
     {
 
@@ -42,7 +42,6 @@
 from sklearn.model_selection import train_test_split
 from sklearn.preprocessing import label_binarize
 from sklearn.multiclass import OneVsRestClassifier
-from scipy import interp
 from sklearn.metrics import roc_auc_score
 
 # Import some data to play with
@@ -113,7 +112,7 @@
 # Then interpolate all ROC curves at this points
 mean_tpr = np.zeros_like(all_fpr)
 for i in range(n_classes):
-    mean_tpr += interp(all_fpr, fpr[i], tpr[i])
+    mean_tpr += np.interp(all_fpr, fpr[i], tpr[i])
 
 # Finally average it and compute AUC
 mean_tpr /= n_classes
Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,7 @@`
`26`	`26`	`},`
`27`	`27`	`"outputs": [],`
`28`	`28`	`"source": [`
`29`		- "import numpy as np\nimport matplotlib.pyplot as plt\nfrom itertools import cycle\n\nfrom sklearn import svm, datasets\nfrom sklearn.metrics import roc_curve, auc\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import label_binarize\nfrom sklearn.multiclass import OneVsRestClassifier\nfrom scipy import interp\nfrom sklearn.metrics import roc_auc_score\n\n# Import some data to play with\niris = datasets.load_iris()\nX = iris.data\ny = iris.target\n\n# Binarize the output\ny = label_binarize(y, classes=[0, 1, 2])\nn_classes = y.shape[1]\n\n# Add noisy features to make the problem harder\nrandom_state = np.random.RandomState(0)\nn_samples, n_features = X.shape\nX = np.c_[X, random_state.randn(n_samples, 200 * n_features)]\n\n# shuffle and split training and test sets\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)\n\n# Learn to predict each class against the other\nclassifier = OneVsRestClassifier(\n svm.SVC(kernel=\"linear\", probability=True, random_state=random_state)\n)\ny_score = classifier.fit(X_train, y_train).decision_function(X_test)\n\n# Compute ROC curve and ROC area for each class\nfpr = dict()\ntpr = dict()\nroc_auc = dict()\nfor i in range(n_classes):\n fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])\n roc_auc[i] = auc(fpr[i], tpr[i])\n\n# Compute micro-average ROC curve and ROC area\nfpr[\"micro\"], tpr[\"micro\"], _ = roc_curve(y_test.ravel(), y_score.ravel())\nroc_auc[\"micro\"] = auc(fpr[\"micro\"], tpr[\"micro\"])"
	`29`	+ "import numpy as np\nimport matplotlib.pyplot as plt\nfrom itertools import cycle\n\nfrom sklearn import svm, datasets\nfrom sklearn.metrics import roc_curve, auc\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import label_binarize\nfrom sklearn.multiclass import OneVsRestClassifier\nfrom sklearn.metrics import roc_auc_score\n\n# Import some data to play with\niris = datasets.load_iris()\nX = iris.data\ny = iris.target\n\n# Binarize the output\ny = label_binarize(y, classes=[0, 1, 2])\nn_classes = y.shape[1]\n\n# Add noisy features to make the problem harder\nrandom_state = np.random.RandomState(0)\nn_samples, n_features = X.shape\nX = np.c_[X, random_state.randn(n_samples, 200 * n_features)]\n\n# shuffle and split training and test sets\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)\n\n# Learn to predict each class against the other\nclassifier = OneVsRestClassifier(\n svm.SVC(kernel=\"linear\", probability=True, random_state=random_state)\n)\ny_score = classifier.fit(X_train, y_train).decision_function(X_test)\n\n# Compute ROC curve and ROC area for each class\nfpr = dict()\ntpr = dict()\nroc_auc = dict()\nfor i in range(n_classes):\n fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])\n roc_auc[i] = auc(fpr[i], tpr[i])\n\n# Compute micro-average ROC curve and ROC area\nfpr[\"micro\"], tpr[\"micro\"], _ = roc_curve(y_test.ravel(), y_score.ravel())\nroc_auc[\"micro\"] = auc(fpr[\"micro\"], tpr[\"micro\"])"
`30`	`30`	`]`
`31`	`31`	`},`
`32`	`32`	`{`
`@@ -62,7 +62,7 @@`
`62`	`62`	`},`
`63`	`63`	`"outputs": [],`
`64`	`64`	`"source": [`
`65`		- "# First aggregate all false positive rates\nall_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))\n\n# Then interpolate all ROC curves at this points\nmean_tpr = np.zeros_like(all_fpr)\nfor i in range(n_classes):\n mean_tpr += interp(all_fpr, fpr[i], tpr[i])\n\n# Finally average it and compute AUC\nmean_tpr /= n_classes\n\nfpr[\"macro\"] = all_fpr\ntpr[\"macro\"] = mean_tpr\nroc_auc[\"macro\"] = auc(fpr[\"macro\"], tpr[\"macro\"])\n\n# Plot all ROC curves\nplt.figure()\nplt.plot(\n fpr[\"micro\"],\n tpr[\"micro\"],\n label=\"micro-average ROC curve (area = {0:0.2f})\".format(roc_auc[\"micro\"]),\n color=\"deeppink\",\n linestyle=\":\",\n linewidth=4,\n)\n\nplt.plot(\n fpr[\"macro\"],\n tpr[\"macro\"],\n label=\"macro-average ROC curve (area = {0:0.2f})\".format(roc_auc[\"macro\"]),\n color=\"navy\",\n linestyle=\":\",\n linewidth=4,\n)\n\ncolors = cycle([\"aqua\", \"darkorange\", \"cornflowerblue\"])\nfor i, color in zip(range(n_classes), colors):\n plt.plot(\n fpr[i],\n tpr[i],\n color=color,\n lw=lw,\n label=\"ROC curve of class {0} (area = {1:0.2f})\".format(i, roc_auc[i]),\n )\n\nplt.plot([0, 1], [0, 1], \"k--\", lw=lw)\nplt.xlim([0.0, 1.0])\nplt.ylim([0.0, 1.05])\nplt.xlabel(\"False Positive Rate\")\nplt.ylabel(\"True Positive Rate\")\nplt.title(\"Some extension of Receiver operating characteristic to multiclass\")\nplt.legend(loc=\"lower right\")\nplt.show()"
	`65`	+ "# First aggregate all false positive rates\nall_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))\n\n# Then interpolate all ROC curves at this points\nmean_tpr = np.zeros_like(all_fpr)\nfor i in range(n_classes):\n mean_tpr += np.interp(all_fpr, fpr[i], tpr[i])\n\n# Finally average it and compute AUC\nmean_tpr /= n_classes\n\nfpr[\"macro\"] = all_fpr\ntpr[\"macro\"] = mean_tpr\nroc_auc[\"macro\"] = auc(fpr[\"macro\"], tpr[\"macro\"])\n\n# Plot all ROC curves\nplt.figure()\nplt.plot(\n fpr[\"micro\"],\n tpr[\"micro\"],\n label=\"micro-average ROC curve (area = {0:0.2f})\".format(roc_auc[\"micro\"]),\n color=\"deeppink\",\n linestyle=\":\",\n linewidth=4,\n)\n\nplt.plot(\n fpr[\"macro\"],\n tpr[\"macro\"],\n label=\"macro-average ROC curve (area = {0:0.2f})\".format(roc_auc[\"macro\"]),\n color=\"navy\",\n linestyle=\":\",\n linewidth=4,\n)\n\ncolors = cycle([\"aqua\", \"darkorange\", \"cornflowerblue\"])\nfor i, color in zip(range(n_classes), colors):\n plt.plot(\n fpr[i],\n tpr[i],\n color=color,\n lw=lw,\n label=\"ROC curve of class {0} (area = {1:0.2f})\".format(i, roc_auc[i]),\n )\n\nplt.plot([0, 1], [0, 1], \"k--\", lw=lw)\nplt.xlim([0.0, 1.0])\nplt.ylim([0.0, 1.05])\nplt.xlabel(\"False Positive Rate\")\nplt.ylabel(\"True Positive Rate\")\nplt.title(\"Some extension of Receiver operating characteristic to multiclass\")\nplt.legend(loc=\"lower right\")\nplt.show()"
`66`	`66`	`]`
`67`	`67`	`},`
`68`	`68`	`{`