scikit-learn
diff --git a/‎dev/_downloads/grid_search_digits.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/grid_search_digits.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/grid_search_digits.py
Lines changed: 4 additions & 4 deletions b/‎dev/_downloads/grid_search_digits.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎dev/_downloads/plot_compare_reduction.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/plot_compare_reduction.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/plot_compare_reduction.py
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/plot_compare_reduction.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/plot_rbf_parameters.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/plot_rbf_parameters.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/plot_rbf_parameters.py
Lines changed: 2 additions & 2 deletions b/‎dev/_downloads/plot_rbf_parameters.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎dev/_downloads/plot_svm_scale_c.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/plot_svm_scale_c.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/plot_svm_scale_c.py
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/plot_svm_scale_c.py
Lines changed: 1 addition & 1 deletion
@@ -24,7 +24,7 @@
       "execution_count": null, 
       "cell_type": "code", 
       "source": [
-        "from __future__ import print_function\n\nfrom sklearn import datasets\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.metrics import classification_report\nfrom sklearn.svm import SVC\n\nprint(__doc__)\n\n# Loading the Digits dataset\ndigits = datasets.load_digits()\n\n# To apply an classifier on this data, we need to flatten the image, to\n# turn the data in a (samples, feature) matrix:\nn_samples = len(digits.images)\nX = digits.images.reshape((n_samples, -1))\ny = digits.target\n\n# Split the dataset in two equal parts\nX_train, X_test, y_train, y_test = train_test_split(\n    X, y, test_size=0.5, random_state=0)\n\n# Set the parameters by cross-validation\ntuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4],\n                     'C': [1, 10, 100, 1000]},\n                    {'kernel': ['linear'], 'C': [1, 10, 100, 1000]}]\n\nscores = ['precision', 'recall']\n\nfor score in scores:\n    print(\"# Tuning hyper-parameters for %s\" % score)\n    print()\n\n    clf = GridSearchCV(SVC(C=1), tuned_parameters, cv=5,\n                       scoring='%s_macro' % score)\n    clf.fit(X_train, y_train)\n\n    print(\"Best parameters set found on development set:\")\n    print()\n    print(clf.best_params_)\n    print()\n    print(\"Grid scores on development set:\")\n    print()\n    means = clf.results_['test_mean_score']\n    stds = clf.results_['test_std_score']\n    for i in range(len(clf.results_['params'])):\n        print(\"%0.3f (+/-%0.03f) for %r\"\n              % (means[i], stds[i] * 2, clf.results_['params'][i]))\n    print()\n\n    print(\"Detailed classification report:\")\n    print()\n    print(\"The model is trained on the full development set.\")\n    print(\"The scores are computed on the full evaluation set.\")\n    print()\n    y_true, y_pred = y_test, clf.predict(X_test)\n    print(classification_report(y_true, y_pred))\n    print()\n\n# Note the problem is too easy: the hyperparameter plateau is too flat and the\n# output model is the same for precision and recall with ties in quality."
+        "from __future__ import print_function\n\nfrom sklearn import datasets\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.metrics import classification_report\nfrom sklearn.svm import SVC\n\nprint(__doc__)\n\n# Loading the Digits dataset\ndigits = datasets.load_digits()\n\n# To apply an classifier on this data, we need to flatten the image, to\n# turn the data in a (samples, feature) matrix:\nn_samples = len(digits.images)\nX = digits.images.reshape((n_samples, -1))\ny = digits.target\n\n# Split the dataset in two equal parts\nX_train, X_test, y_train, y_test = train_test_split(\n    X, y, test_size=0.5, random_state=0)\n\n# Set the parameters by cross-validation\ntuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4],\n                     'C': [1, 10, 100, 1000]},\n                    {'kernel': ['linear'], 'C': [1, 10, 100, 1000]}]\n\nscores = ['precision', 'recall']\n\nfor score in scores:\n    print(\"# Tuning hyper-parameters for %s\" % score)\n    print()\n\n    clf = GridSearchCV(SVC(C=1), tuned_parameters, cv=5,\n                       scoring='%s_macro' % score)\n    clf.fit(X_train, y_train)\n\n    print(\"Best parameters set found on development set:\")\n    print()\n    print(clf.best_params_)\n    print()\n    print(\"Grid scores on development set:\")\n    print()\n    means = clf.cv_results_['mean_test_score']\n    stds = clf.cv_results_['std_test_score']\n    for mean, std, params in zip(means, stds, clf.cv_results_['params']):\n        print(\"%0.3f (+/-%0.03f) for %r\"\n              % (mean, std * 2, params))\n    print()\n\n    print(\"Detailed classification report:\")\n    print()\n    print(\"The model is trained on the full development set.\")\n    print(\"The scores are computed on the full evaluation set.\")\n    print()\n    y_true, y_pred = y_test, clf.predict(X_test)\n    print(classification_report(y_true, y_pred))\n    print()\n\n# Note the problem is too easy: the hyperparameter plateau is too flat and the\n# output model is the same for precision and recall with ties in quality."
       ], 
       "outputs": [], 
       "metadata": {
 
@@ -60,11 +60,11 @@
     print()
     print("Grid scores on development set:")
     print()
-    means = clf.results_['test_mean_score']
-    stds = clf.results_['test_std_score']
-    for i in range(len(clf.results_['params'])):
+    means = clf.cv_results_['mean_test_score']
+    stds = clf.cv_results_['std_test_score']
+    for mean, std, params in zip(means, stds, clf.cv_results_['params']):
         print("%0.3f (+/-%0.03f) for %r"
-              % (means[i], stds[i] * 2, clf.results_['params'][i]))
+              % (mean, std * 2, params))
     print()
 
     print("Detailed classification report:")
 
@@ -24,7 +24,7 @@
       "execution_count": null, 
       "cell_type": "code", 
       "source": [
-        "# Authors: Robert McGibbon, Joel Nothman\n\nfrom __future__ import print_function, division\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.datasets import load_digits\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.svm import LinearSVC\nfrom sklearn.decomposition import PCA, NMF\nfrom sklearn.feature_selection import SelectKBest, chi2\n\nprint(__doc__)\n\npipe = Pipeline([\n    ('reduce_dim', PCA()),\n    ('classify', LinearSVC())\n])\n\nN_FEATURES_OPTIONS = [2, 4, 8]\nC_OPTIONS = [1, 10, 100, 1000]\nparam_grid = [\n    {\n        'reduce_dim': [PCA(iterated_power=7), NMF()],\n        'reduce_dim__n_components': N_FEATURES_OPTIONS,\n        'classify__C': C_OPTIONS\n    },\n    {\n        'reduce_dim': [SelectKBest(chi2)],\n        'reduce_dim__k': N_FEATURES_OPTIONS,\n        'classify__C': C_OPTIONS\n    },\n]\nreducer_labels = ['PCA', 'NMF', 'KBest(chi2)']\n\ngrid = GridSearchCV(pipe, cv=3, n_jobs=2, param_grid=param_grid)\ndigits = load_digits()\ngrid.fit(digits.data, digits.target)\n\nmean_scores = np.array(grid.results_['test_mean_score'])\n# scores are in the order of param_grid iteration, which is alphabetical\nmean_scores = mean_scores.reshape(len(C_OPTIONS), -1, len(N_FEATURES_OPTIONS))\n# select score for best C\nmean_scores = mean_scores.max(axis=0)\nbar_offsets = (np.arange(len(N_FEATURES_OPTIONS)) *\n               (len(reducer_labels) + 1) + .5)\n\nplt.figure()\nCOLORS = 'bgrcmyk'\nfor i, (label, reducer_scores) in enumerate(zip(reducer_labels, mean_scores)):\n    plt.bar(bar_offsets + i, reducer_scores, label=label, color=COLORS[i])\n\nplt.title(\"Comparing feature reduction techniques\")\nplt.xlabel('Reduced number of features')\nplt.xticks(bar_offsets + len(reducer_labels) / 2, N_FEATURES_OPTIONS)\nplt.ylabel('Digit classification accuracy')\nplt.ylim((0, 1))\nplt.legend(loc='upper left')\nplt.show()"
+        "# Authors: Robert McGibbon, Joel Nothman\n\nfrom __future__ import print_function, division\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.datasets import load_digits\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.svm import LinearSVC\nfrom sklearn.decomposition import PCA, NMF\nfrom sklearn.feature_selection import SelectKBest, chi2\n\nprint(__doc__)\n\npipe = Pipeline([\n    ('reduce_dim', PCA()),\n    ('classify', LinearSVC())\n])\n\nN_FEATURES_OPTIONS = [2, 4, 8]\nC_OPTIONS = [1, 10, 100, 1000]\nparam_grid = [\n    {\n        'reduce_dim': [PCA(iterated_power=7), NMF()],\n        'reduce_dim__n_components': N_FEATURES_OPTIONS,\n        'classify__C': C_OPTIONS\n    },\n    {\n        'reduce_dim': [SelectKBest(chi2)],\n        'reduce_dim__k': N_FEATURES_OPTIONS,\n        'classify__C': C_OPTIONS\n    },\n]\nreducer_labels = ['PCA', 'NMF', 'KBest(chi2)']\n\ngrid = GridSearchCV(pipe, cv=3, n_jobs=2, param_grid=param_grid)\ndigits = load_digits()\ngrid.fit(digits.data, digits.target)\n\nmean_scores = np.array(grid.cv_results_['mean_test_score'])\n# scores are in the order of param_grid iteration, which is alphabetical\nmean_scores = mean_scores.reshape(len(C_OPTIONS), -1, len(N_FEATURES_OPTIONS))\n# select score for best C\nmean_scores = mean_scores.max(axis=0)\nbar_offsets = (np.arange(len(N_FEATURES_OPTIONS)) *\n               (len(reducer_labels) + 1) + .5)\n\nplt.figure()\nCOLORS = 'bgrcmyk'\nfor i, (label, reducer_scores) in enumerate(zip(reducer_labels, mean_scores)):\n    plt.bar(bar_offsets + i, reducer_scores, label=label, color=COLORS[i])\n\nplt.title(\"Comparing feature reduction techniques\")\nplt.xlabel('Reduced number of features')\nplt.xticks(bar_offsets + len(reducer_labels) / 2, N_FEATURES_OPTIONS)\nplt.ylabel('Digit classification accuracy')\nplt.ylim((0, 1))\nplt.legend(loc='upper left')\nplt.show()"
       ], 
       "outputs": [], 
       "metadata": {
 
@@ -53,7 +53,7 @@
 digits = load_digits()
 grid.fit(digits.data, digits.target)
 
-mean_scores = np.array(grid.results_['test_mean_score'])
+mean_scores = np.array(grid.cv_results_['mean_test_score'])
 # scores are in the order of param_grid iteration, which is alphabetical
 mean_scores = mean_scores.reshape(len(C_OPTIONS), -1, len(N_FEATURES_OPTIONS))
 # select score for best C
 
@@ -78,7 +78,7 @@
       "execution_count": null, 
       "cell_type": "code", 
       "source": [
-        "plt.figure(figsize=(8, 6))\nxx, yy = np.meshgrid(np.linspace(-3, 3, 200), np.linspace(-3, 3, 200))\nfor (k, (C, gamma, clf)) in enumerate(classifiers):\n    # evaluate decision function in a grid\n    Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])\n    Z = Z.reshape(xx.shape)\n\n    # visualize decision function for these parameters\n    plt.subplot(len(C_2d_range), len(gamma_2d_range), k + 1)\n    plt.title(\"gamma=10^%d, C=10^%d\" % (np.log10(gamma), np.log10(C)),\n              size='medium')\n\n    # visualize parameter's effect on decision function\n    plt.pcolormesh(xx, yy, -Z, cmap=plt.cm.RdBu)\n    plt.scatter(X_2d[:, 0], X_2d[:, 1], c=y_2d, cmap=plt.cm.RdBu_r)\n    plt.xticks(())\n    plt.yticks(())\n    plt.axis('tight')\n\nscores = grid.results_['test_mean_score'].reshape(len(C_range),\n                                                  len(gamma_range))\n\n# Draw heatmap of the validation accuracy as a function of gamma and C\n#\n# The score are encoded as colors with the hot colormap which varies from dark\n# red to bright yellow. As the most interesting scores are all located in the\n# 0.92 to 0.97 range we use a custom normalizer to set the mid-point to 0.92 so\n# as to make it easier to visualize the small variations of score values in the\n# interesting range while not brutally collapsing all the low score values to\n# the same color.\n\nplt.figure(figsize=(8, 6))\nplt.subplots_adjust(left=.2, right=0.95, bottom=0.15, top=0.95)\nplt.imshow(scores, interpolation='nearest', cmap=plt.cm.hot,\n           norm=MidpointNormalize(vmin=0.2, midpoint=0.92))\nplt.xlabel('gamma')\nplt.ylabel('C')\nplt.colorbar()\nplt.xticks(np.arange(len(gamma_range)), gamma_range, rotation=45)\nplt.yticks(np.arange(len(C_range)), C_range)\nplt.title('Validation accuracy')\nplt.show()"
+        "plt.figure(figsize=(8, 6))\nxx, yy = np.meshgrid(np.linspace(-3, 3, 200), np.linspace(-3, 3, 200))\nfor (k, (C, gamma, clf)) in enumerate(classifiers):\n    # evaluate decision function in a grid\n    Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])\n    Z = Z.reshape(xx.shape)\n\n    # visualize decision function for these parameters\n    plt.subplot(len(C_2d_range), len(gamma_2d_range), k + 1)\n    plt.title(\"gamma=10^%d, C=10^%d\" % (np.log10(gamma), np.log10(C)),\n              size='medium')\n\n    # visualize parameter's effect on decision function\n    plt.pcolormesh(xx, yy, -Z, cmap=plt.cm.RdBu)\n    plt.scatter(X_2d[:, 0], X_2d[:, 1], c=y_2d, cmap=plt.cm.RdBu_r)\n    plt.xticks(())\n    plt.yticks(())\n    plt.axis('tight')\n\nscores = grid.cv_results_['mean_test_score'].reshape(len(C_range),\n                                                     len(gamma_range))\n\n# Draw heatmap of the validation accuracy as a function of gamma and C\n#\n# The score are encoded as colors with the hot colormap which varies from dark\n# red to bright yellow. As the most interesting scores are all located in the\n# 0.92 to 0.97 range we use a custom normalizer to set the mid-point to 0.92 so\n# as to make it easier to visualize the small variations of score values in the\n# interesting range while not brutally collapsing all the low score values to\n# the same color.\n\nplt.figure(figsize=(8, 6))\nplt.subplots_adjust(left=.2, right=0.95, bottom=0.15, top=0.95)\nplt.imshow(scores, interpolation='nearest', cmap=plt.cm.hot,\n           norm=MidpointNormalize(vmin=0.2, midpoint=0.92))\nplt.xlabel('gamma')\nplt.ylabel('C')\nplt.colorbar()\nplt.xticks(np.arange(len(gamma_range)), gamma_range, rotation=45)\nplt.yticks(np.arange(len(C_range)), C_range)\nplt.title('Validation accuracy')\nplt.show()"
       ], 
       "outputs": [], 
       "metadata": {
 
@@ -171,8 +171,8 @@ def __call__(self, value, clip=None):
     plt.yticks(())
     plt.axis('tight')
 
-scores = grid.results_['test_mean_score'].reshape(len(C_range),
-                                                  len(gamma_range))
+scores = grid.cv_results_['mean_test_score'].reshape(len(C_range),
+                                                     len(gamma_range))
 
 # Draw heatmap of the validation accuracy as a function of gamma and C
 #
 
@@ -24,7 +24,7 @@
       "execution_count": null, 
       "cell_type": "code", 
       "source": [
-        "print(__doc__)\n\n\n# Author: Andreas Mueller <[email protected]>\n#         Jaques Grobler <[email protected]>\n# License: BSD 3 clause\n\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn.svm import LinearSVC\nfrom sklearn.model_selection import ShuffleSplit\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.utils import check_random_state\nfrom sklearn import datasets\n\nrnd = check_random_state(1)\n\n# set up dataset\nn_samples = 100\nn_features = 300\n\n# l1 data (only 5 informative features)\nX_1, y_1 = datasets.make_classification(n_samples=n_samples,\n                                        n_features=n_features, n_informative=5,\n                                        random_state=1)\n\n# l2 data: non sparse, but less features\ny_2 = np.sign(.5 - rnd.rand(n_samples))\nX_2 = rnd.randn(n_samples, n_features / 5) + y_2[:, np.newaxis]\nX_2 += 5 * rnd.randn(n_samples, n_features / 5)\n\nclf_sets = [(LinearSVC(penalty='l1', loss='squared_hinge', dual=False,\n                       tol=1e-3),\n             np.logspace(-2.3, -1.3, 10), X_1, y_1),\n            (LinearSVC(penalty='l2', loss='squared_hinge', dual=True,\n                       tol=1e-4),\n             np.logspace(-4.5, -2, 10), X_2, y_2)]\n\ncolors = ['navy', 'cyan', 'darkorange']\nlw = 2\n\nfor fignum, (clf, cs, X, y) in enumerate(clf_sets):\n    # set up the plot for each regressor\n    plt.figure(fignum, figsize=(9, 10))\n\n    for k, train_size in enumerate(np.linspace(0.3, 0.7, 3)[::-1]):\n        param_grid = dict(C=cs)\n        # To get nice curve, we need a large number of iterations to\n        # reduce the variance\n        grid = GridSearchCV(clf, refit=False, param_grid=param_grid,\n                            cv=ShuffleSplit(train_size=train_size,\n                                            n_splits=250, random_state=1))\n        grid.fit(X, y)\n        scores = grid.results_['test_mean_score']\n\n        scales = [(1, 'No scaling'),\n                  ((n_samples * train_size), '1/n_samples'),\n                  ]\n\n        for subplotnum, (scaler, name) in enumerate(scales):\n            plt.subplot(2, 1, subplotnum + 1)\n            plt.xlabel('C')\n            plt.ylabel('CV Score')\n            grid_cs = cs * float(scaler)  # scale the C's\n            plt.semilogx(grid_cs, scores, label=\"fraction %.2f\" %\n                         train_size, color=colors[k], lw=lw)\n            plt.title('scaling=%s, penalty=%s, loss=%s' %\n                      (name, clf.penalty, clf.loss))\n\n    plt.legend(loc=\"best\")\nplt.show()"
+        "print(__doc__)\n\n\n# Author: Andreas Mueller <[email protected]>\n#         Jaques Grobler <[email protected]>\n# License: BSD 3 clause\n\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn.svm import LinearSVC\nfrom sklearn.model_selection import ShuffleSplit\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.utils import check_random_state\nfrom sklearn import datasets\n\nrnd = check_random_state(1)\n\n# set up dataset\nn_samples = 100\nn_features = 300\n\n# l1 data (only 5 informative features)\nX_1, y_1 = datasets.make_classification(n_samples=n_samples,\n                                        n_features=n_features, n_informative=5,\n                                        random_state=1)\n\n# l2 data: non sparse, but less features\ny_2 = np.sign(.5 - rnd.rand(n_samples))\nX_2 = rnd.randn(n_samples, n_features / 5) + y_2[:, np.newaxis]\nX_2 += 5 * rnd.randn(n_samples, n_features / 5)\n\nclf_sets = [(LinearSVC(penalty='l1', loss='squared_hinge', dual=False,\n                       tol=1e-3),\n             np.logspace(-2.3, -1.3, 10), X_1, y_1),\n            (LinearSVC(penalty='l2', loss='squared_hinge', dual=True,\n                       tol=1e-4),\n             np.logspace(-4.5, -2, 10), X_2, y_2)]\n\ncolors = ['navy', 'cyan', 'darkorange']\nlw = 2\n\nfor fignum, (clf, cs, X, y) in enumerate(clf_sets):\n    # set up the plot for each regressor\n    plt.figure(fignum, figsize=(9, 10))\n\n    for k, train_size in enumerate(np.linspace(0.3, 0.7, 3)[::-1]):\n        param_grid = dict(C=cs)\n        # To get nice curve, we need a large number of iterations to\n        # reduce the variance\n        grid = GridSearchCV(clf, refit=False, param_grid=param_grid,\n                            cv=ShuffleSplit(train_size=train_size,\n                                            n_splits=250, random_state=1))\n        grid.fit(X, y)\n        scores = grid.cv_results_['mean_test_score']\n\n        scales = [(1, 'No scaling'),\n                  ((n_samples * train_size), '1/n_samples'),\n                  ]\n\n        for subplotnum, (scaler, name) in enumerate(scales):\n            plt.subplot(2, 1, subplotnum + 1)\n            plt.xlabel('C')\n            plt.ylabel('CV Score')\n            grid_cs = cs * float(scaler)  # scale the C's\n            plt.semilogx(grid_cs, scores, label=\"fraction %.2f\" %\n                         train_size, color=colors[k], lw=lw)\n            plt.title('scaling=%s, penalty=%s, loss=%s' %\n                      (name, clf.penalty, clf.loss))\n\n    plt.legend(loc=\"best\")\nplt.show()"
       ], 
       "outputs": [], 
       "metadata": {
 
@@ -131,7 +131,7 @@
                             cv=ShuffleSplit(train_size=train_size,
                                             n_splits=250, random_state=1))
         grid.fit(X, y)
-        scores = grid.results_['test_mean_score']
+        scores = grid.cv_results_['mean_test_score']
 
         scales = [(1, 'No scaling'),
                   ((n_samples * train_size), '1/n_samples'),
Original file line number	Diff line number	Diff line change
`@@ -171,8 +171,8 @@ def __call__(self, value, clip=None):`
`171`	`171`	`plt.yticks(())`
`172`	`172`	`plt.axis('tight')`
`173`	`173`
`174`		`-scores = grid.results_['test_mean_score'].reshape(len(C_range),`
`175`		`- len(gamma_range))`
	`174`	`+scores = grid.cv_results_['mean_test_score'].reshape(len(C_range),`
	`175`	`+ len(gamma_range))`
`176`	`176`
`177`	`177`	`# Draw heatmap of the validation accuracy as a function of gamma and C`
`178`	`178`	`#`