scikit-learn
diff --git a/‎0.24/.buildinfo
Lines changed: 4 additions & 0 deletions b/‎0.24/.buildinfo
Lines changed: 4 additions & 0 deletions
diff --git a/‎0.24/_downloads/002ebccb35a2de5ac6d32e3f54d8fa4f/plot_iris_exercise.py
Lines changed: 67 additions & 0 deletions b/‎0.24/_downloads/002ebccb35a2de5ac6d32e3f54d8fa4f/plot_iris_exercise.py
Lines changed: 67 additions & 0 deletions
diff --git a/‎0.24/_downloads/006fc185672e58b056a5c134db26935c/plot_coin_segmentation.ipynb
Lines changed: 72 additions & 0 deletions b/‎0.24/_downloads/006fc185672e58b056a5c134db26935c/plot_coin_segmentation.ipynb
Lines changed: 72 additions & 0 deletions
diff --git a/‎0.24/_downloads/00ae629d652473137a3905a5e08ea815/plot_iris_dtc.py
Lines changed: 71 additions & 0 deletions b/‎0.24/_downloads/00ae629d652473137a3905a5e08ea815/plot_iris_dtc.py
Lines changed: 71 additions & 0 deletions
diff --git a/‎0.24/_downloads/010337852815f8103ac6cca38a812b3c/plot_roc_crossval.py
Lines changed: 99 additions & 0 deletions b/‎0.24/_downloads/010337852815f8103ac6cca38a812b3c/plot_roc_crossval.py
Lines changed: 99 additions & 0 deletions
diff --git a/‎0.24/_downloads/01fdc7c95204e4a420de7cd297711693/plot_feature_union.py
Lines changed: 59 additions & 0 deletions b/‎0.24/_downloads/01fdc7c95204e4a420de7cd297711693/plot_feature_union.py
Lines changed: 59 additions & 0 deletions
@@ -0,0 +1,4 @@
+# Sphinx build info version 1
+# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
+config: 133687f2af5eb2f75adf185bdaa6b0c9
+tags: 645f666f9bcd5a90fca523b33c5a78b7
@@ -0,0 +1,67 @@
+"""
+================================
+SVM Exercise
+================================
+
+A tutorial exercise for using different SVM kernels.
+
+This exercise is used in the :ref:`using_kernels_tut` part of the
+:ref:`supervised_learning_tut` section of the :ref:`stat_learn_tut_index`.
+"""
+print(__doc__)
+
+
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn import datasets, svm
+
+iris = datasets.load_iris()
+X = iris.data
+y = iris.target
+
+X = X[y != 0, :2]
+y = y[y != 0]
+
+n_sample = len(X)
+
+np.random.seed(0)
+order = np.random.permutation(n_sample)
+X = X[order]
+y = y[order].astype(float)
+
+X_train = X[:int(.9 * n_sample)]
+y_train = y[:int(.9 * n_sample)]
+X_test = X[int(.9 * n_sample):]
+y_test = y[int(.9 * n_sample):]
+
+# fit the model
+for kernel in ('linear', 'rbf', 'poly'):
+    clf = svm.SVC(kernel=kernel, gamma=10)
+    clf.fit(X_train, y_train)
+
+    plt.figure()
+    plt.clf()
+    plt.scatter(X[:, 0], X[:, 1], c=y, zorder=10, cmap=plt.cm.Paired,
+                edgecolor='k', s=20)
+
+    # Circle out the test data
+    plt.scatter(X_test[:, 0], X_test[:, 1], s=80, facecolors='none',
+                zorder=10, edgecolor='k')
+
+    plt.axis('tight')
+    x_min = X[:, 0].min()
+    x_max = X[:, 0].max()
+    y_min = X[:, 1].min()
+    y_max = X[:, 1].max()
+
+    XX, YY = np.mgrid[x_min:x_max:200j, y_min:y_max:200j]
+    Z = clf.decision_function(np.c_[XX.ravel(), YY.ravel()])
+
+    # Put the result into a color plot
+    Z = Z.reshape(XX.shape)
+    plt.pcolormesh(XX, YY, Z > 0, cmap=plt.cm.Paired)
+    plt.contour(XX, YY, Z, colors=['k', 'k', 'k'],
+                linestyles=['--', '-', '--'], levels=[-.5, 0, .5])
+
+    plt.title(kernel)
+plt.show()
@@ -0,0 +1,72 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "%matplotlib inline"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "\n# Segmenting the picture of greek coins in regions\n\nThis example uses `spectral_clustering` on a graph created from\nvoxel-to-voxel difference on an image to break this image into multiple\npartly-homogeneous regions.\n\nThis procedure (spectral clustering on an image) is an efficient\napproximate solution for finding normalized graph cuts.\n\nThere are two options to assign labels:\n\n* with 'kmeans' spectral clustering will cluster samples in the embedding space\n  using a kmeans algorithm\n* whereas 'discrete' will iteratively search for the closest partition\n  space to the embedding space.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "print(__doc__)\n\n# Author: Gael Varoquaux <[email protected]>, Brian Cheung\n# License: BSD 3 clause\n\nimport time\n\nimport numpy as np\nfrom scipy.ndimage.filters import gaussian_filter\nimport matplotlib.pyplot as plt\nimport skimage\nfrom skimage.data import coins\nfrom skimage.transform import rescale\n\nfrom sklearn.feature_extraction import image\nfrom sklearn.cluster import spectral_clustering\nfrom sklearn.utils.fixes import parse_version\n\n# these were introduced in skimage-0.14\nif parse_version(skimage.__version__) >= parse_version('0.14'):\n    rescale_params = {'anti_aliasing': False, 'multichannel': False}\nelse:\n    rescale_params = {}\n\n# load the coins as a numpy array\norig_coins = coins()\n\n# Resize it to 20% of the original size to speed up the processing\n# Applying a Gaussian filter for smoothing prior to down-scaling\n# reduces aliasing artifacts.\nsmoothened_coins = gaussian_filter(orig_coins, sigma=2)\nrescaled_coins = rescale(smoothened_coins, 0.2, mode=\"reflect\",\n                         **rescale_params)\n\n# Convert the image into a graph with the value of the gradient on the\n# edges.\ngraph = image.img_to_graph(rescaled_coins)\n\n# Take a decreasing function of the gradient: an exponential\n# The smaller beta is, the more independent the segmentation is of the\n# actual image. For beta=1, the segmentation is close to a voronoi\nbeta = 10\neps = 1e-6\ngraph.data = np.exp(-beta * graph.data / graph.data.std()) + eps\n\n# Apply spectral clustering (this step goes much faster if you have pyamg\n# installed)\nN_REGIONS = 25"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Visualize the resulting regions\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "for assign_labels in ('kmeans', 'discretize'):\n    t0 = time.time()\n    labels = spectral_clustering(graph, n_clusters=N_REGIONS,\n                                 assign_labels=assign_labels, random_state=42)\n    t1 = time.time()\n    labels = labels.reshape(rescaled_coins.shape)\n\n    plt.figure(figsize=(5, 5))\n    plt.imshow(rescaled_coins, cmap=plt.cm.gray)\n    for l in range(N_REGIONS):\n        plt.contour(labels == l,\n                    colors=[plt.cm.nipy_spectral(l / float(N_REGIONS))])\n    plt.xticks(())\n    plt.yticks(())\n    title = 'Spectral clustering: %s, %.2fs' % (assign_labels, (t1 - t0))\n    print(title)\n    plt.title(title)\nplt.show()"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.8.5"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
@@ -0,0 +1,71 @@
+"""
+================================================================
+Plot the decision surface of a decision tree on the iris dataset
+================================================================
+
+Plot the decision surface of a decision tree trained on pairs
+of features of the iris dataset.
+
+See :ref:`decision tree <tree>` for more information on the estimator.
+
+For each pair of iris features, the decision tree learns decision
+boundaries made of combinations of simple thresholding rules inferred from
+the training samples.
+
+We also show the tree structure of a model built on all of the features.
+"""
+print(__doc__)
+
+import numpy as np
+import matplotlib.pyplot as plt
+
+from sklearn.datasets import load_iris
+from sklearn.tree import DecisionTreeClassifier, plot_tree
+
+# Parameters
+n_classes = 3
+plot_colors = "ryb"
+plot_step = 0.02
+
+# Load data
+iris = load_iris()
+
+for pairidx, pair in enumerate([[0, 1], [0, 2], [0, 3],
+                                [1, 2], [1, 3], [2, 3]]):
+    # We only take the two corresponding features
+    X = iris.data[:, pair]
+    y = iris.target
+
+    # Train
+    clf = DecisionTreeClassifier().fit(X, y)
+
+    # Plot the decision boundary
+    plt.subplot(2, 3, pairidx + 1)
+
+    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
+    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
+    xx, yy = np.meshgrid(np.arange(x_min, x_max, plot_step),
+                         np.arange(y_min, y_max, plot_step))
+    plt.tight_layout(h_pad=0.5, w_pad=0.5, pad=2.5)
+
+    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
+    Z = Z.reshape(xx.shape)
+    cs = plt.contourf(xx, yy, Z, cmap=plt.cm.RdYlBu)
+
+    plt.xlabel(iris.feature_names[pair[0]])
+    plt.ylabel(iris.feature_names[pair[1]])
+
+    # Plot the training points
+    for i, color in zip(range(n_classes), plot_colors):
+        idx = np.where(y == i)
+        plt.scatter(X[idx, 0], X[idx, 1], c=color, label=iris.target_names[i],
+                    cmap=plt.cm.RdYlBu, edgecolor='black', s=15)
+
+plt.suptitle("Decision surface of a decision tree using paired features")
+plt.legend(loc='lower right', borderpad=0, handletextpad=0)
+plt.axis("tight")
+
+plt.figure()
+clf = DecisionTreeClassifier().fit(iris.data, iris.target)
+plot_tree(clf, filled=True)
+plt.show()
@@ -0,0 +1,99 @@
+"""
+=============================================================
+Receiver Operating Characteristic (ROC) with cross validation
+=============================================================
+
+Example of Receiver Operating Characteristic (ROC) metric to evaluate
+classifier output quality using cross-validation.
+
+ROC curves typically feature true positive rate on the Y axis, and false
+positive rate on the X axis. This means that the top left corner of the plot is
+the "ideal" point - a false positive rate of zero, and a true positive rate of
+one. This is not very realistic, but it does mean that a larger area under the
+curve (AUC) is usually better.
+
+The "steepness" of ROC curves is also important, since it is ideal to maximize
+the true positive rate while minimizing the false positive rate.
+
+This example shows the ROC response of different datasets, created from K-fold
+cross-validation. Taking all of these curves, it is possible to calculate the
+mean area under curve, and see the variance of the curve when the
+training set is split into different subsets. This roughly shows how the
+classifier output is affected by changes in the training data, and how
+different the splits generated by K-fold cross-validation are from one another.
+
+.. note::
+
+    See also :func:`sklearn.metrics.roc_auc_score`,
+             :func:`sklearn.model_selection.cross_val_score`,
+             :ref:`sphx_glr_auto_examples_model_selection_plot_roc.py`,
+
+"""
+print(__doc__)
+
+import numpy as np
+import matplotlib.pyplot as plt
+
+from sklearn import svm, datasets
+from sklearn.metrics import auc
+from sklearn.metrics import plot_roc_curve
+from sklearn.model_selection import StratifiedKFold
+
+# #############################################################################
+# Data IO and generation
+
+# Import some data to play with
+iris = datasets.load_iris()
+X = iris.data
+y = iris.target
+X, y = X[y != 2], y[y != 2]
+n_samples, n_features = X.shape
+
+# Add noisy features
+random_state = np.random.RandomState(0)
+X = np.c_[X, random_state.randn(n_samples, 200 * n_features)]
+
+# #############################################################################
+# Classification and ROC analysis
+
+# Run classifier with cross-validation and plot ROC curves
+cv = StratifiedKFold(n_splits=6)
+classifier = svm.SVC(kernel='linear', probability=True,
+                     random_state=random_state)
+
+tprs = []
+aucs = []
+mean_fpr = np.linspace(0, 1, 100)
+
+fig, ax = plt.subplots()
+for i, (train, test) in enumerate(cv.split(X, y)):
+    classifier.fit(X[train], y[train])
+    viz = plot_roc_curve(classifier, X[test], y[test],
+                         name='ROC fold {}'.format(i),
+                         alpha=0.3, lw=1, ax=ax)
+    interp_tpr = np.interp(mean_fpr, viz.fpr, viz.tpr)
+    interp_tpr[0] = 0.0
+    tprs.append(interp_tpr)
+    aucs.append(viz.roc_auc)
+
+ax.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r',
+        label='Chance', alpha=.8)
+
+mean_tpr = np.mean(tprs, axis=0)
+mean_tpr[-1] = 1.0
+mean_auc = auc(mean_fpr, mean_tpr)
+std_auc = np.std(aucs)
+ax.plot(mean_fpr, mean_tpr, color='b',
+        label=r'Mean ROC (AUC = %0.2f $\pm$ %0.2f)' % (mean_auc, std_auc),
+        lw=2, alpha=.8)
+
+std_tpr = np.std(tprs, axis=0)
+tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
+tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
+ax.fill_between(mean_fpr, tprs_lower, tprs_upper, color='grey', alpha=.2,
+                label=r'$\pm$ 1 std. dev.')
+
+ax.set(xlim=[-0.05, 1.05], ylim=[-0.05, 1.05],
+       title="Receiver operating characteristic example")
+ax.legend(loc="lower right")
+plt.show()
@@ -0,0 +1,59 @@
+"""
+=================================================
+Concatenating multiple feature extraction methods
+=================================================
+
+In many real-world examples, there are many ways to extract features from a
+dataset. Often it is beneficial to combine several methods to obtain good
+performance. This example shows how to use ``FeatureUnion`` to combine
+features obtained by PCA and univariate selection.
+
+Combining features using this transformer has the benefit that it allows
+cross validation and grid searches over the whole process.
+
+The combination used in this example is not particularly helpful on this
+dataset and is only used to illustrate the usage of FeatureUnion.
+"""
+
+# Author: Andreas Mueller <[email protected]>
+#
+# License: BSD 3 clause
+
+from sklearn.pipeline import Pipeline, FeatureUnion
+from sklearn.model_selection import GridSearchCV
+from sklearn.svm import SVC
+from sklearn.datasets import load_iris
+from sklearn.decomposition import PCA
+from sklearn.feature_selection import SelectKBest
+
+iris = load_iris()
+
+X, y = iris.data, iris.target
+
+# This dataset is way too high-dimensional. Better do PCA:
+pca = PCA(n_components=2)
+
+# Maybe some original features were good, too?
+selection = SelectKBest(k=1)
+
+# Build estimator from PCA and Univariate selection:
+
+combined_features = FeatureUnion([("pca", pca), ("univ_select", selection)])
+
+# Use combined features to transform dataset:
+X_features = combined_features.fit(X, y).transform(X)
+print("Combined space has", X_features.shape[1], "features")
+
+svm = SVC(kernel="linear")
+
+# Do grid search over k, n_components and C:
+
+pipeline = Pipeline([("features", combined_features), ("svm", svm)])
+
+param_grid = dict(features__pca__n_components=[1, 2, 3],
+                  features__univ_select__k=[1, 2],
+                  svm__C=[0.1, 1, 10])
+
+grid_search = GridSearchCV(pipeline, param_grid=param_grid, verbose=10)
+grid_search.fit(X, y)
+print(grid_search.best_estimator_)