Skip to content

Commit f7002bb

Browse files
committed
Pushing the docs to dev/ for branch: master, commit 9e0e2d41fe6bf35bb98ede7ee037724a2d790f58
1 parent 01b36ce commit f7002bb

File tree

917 files changed

+2767
-2767
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

917 files changed

+2767
-2767
lines changed
0 Bytes
Binary file not shown.
0 Bytes
Binary file not shown.

dev/_downloads/plot_nested_cross_validation_iris.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
"execution_count": null,
2525
"cell_type": "code",
2626
"source": [
27-
"from sklearn.datasets import load_iris\nfrom matplotlib import pyplot as plt\nfrom sklearn.svm import SVC\nfrom sklearn.model_selection import GridSearchCV, cross_val_score, KFold\nimport numpy as np\n\nprint(__doc__)\n\n# Number of random trials\nNUM_TRIALS = 30\n\n# Load the dataset\niris = load_iris()\nX_iris = iris.data\ny_iris = iris.target\n\n# Set up possible values of parameters to optimize over\np_grid = {\"C\": [1, 10, 100],\n \"gamma\": [.01, .1]}\n\n# We will use a Support Vector Classifier with \"rbf\" kernel\nsvr = SVC(kernel=\"rbf\")\n\n# Arrays to store scores\nnon_nested_scores = np.zeros(NUM_TRIALS)\nnested_scores = np.zeros(NUM_TRIALS)\n\n# Loop for each trial\nfor i in range(NUM_TRIALS):\n\n # Choose cross-validation techniques for the inner and outer loops,\n # independently of the dataset.\n # E.g \"LabelKFold\", \"LeaveOneOut\", \"LeaveOneLabelOut\", etc.\n inner_cv = KFold(n_splits=4, shuffle=True, random_state=i)\n outer_cv = KFold(n_splits=4, shuffle=True, random_state=i)\n\n # Non_nested parameter search and scoring\n clf = GridSearchCV(estimator=svr, param_grid=p_grid, cv=inner_cv)\n clf.fit(X_iris, y_iris)\n non_nested_scores[i] = clf.best_score_\n\n # Nested CV with parameter optimization\n nested_score = cross_val_score(clf, X=X_iris, y=y_iris, cv=outer_cv)\n nested_scores[i] = nested_score.mean()\n\nscore_difference = non_nested_scores - nested_scores\n\nprint(\"Average difference of {0:6f} with std. dev. of {1:6f}.\"\n .format(score_difference.mean(), score_difference.std()))\n\n# Plot scores on each trial for nested and non-nested CV\nplt.figure()\nplt.subplot(211)\nnon_nested_scores_line, = plt.plot(non_nested_scores, color='r')\nnested_line, = plt.plot(nested_scores, color='b')\nplt.ylabel(\"score\", fontsize=\"14\")\nplt.legend([non_nested_scores_line, nested_line],\n [\"Non-Nested CV\", \"Nested CV\"],\n bbox_to_anchor=(0, .4, .5, 0))\nplt.title(\"Non-Nested and Nested Cross Validation on Iris Dataset\",\n x=.5, y=1.1, fontsize=\"15\")\n\n# Plot bar chart of the difference.\nplt.subplot(212)\ndifference_plot = plt.bar(range(NUM_TRIALS), score_difference)\nplt.xlabel(\"Individual Trial #\")\nplt.legend([difference_plot],\n [\"Non-Nested CV - Nested CV Score\"],\n bbox_to_anchor=(0, 1, .8, 0))\nplt.ylabel(\"score difference\", fontsize=\"14\")\n\nplt.show()"
27+
"from sklearn.datasets import load_iris\nfrom matplotlib import pyplot as plt\nfrom sklearn.svm import SVC\nfrom sklearn.model_selection import GridSearchCV, cross_val_score, KFold\nimport numpy as np\n\nprint(__doc__)\n\n# Number of random trials\nNUM_TRIALS = 30\n\n# Load the dataset\niris = load_iris()\nX_iris = iris.data\ny_iris = iris.target\n\n# Set up possible values of parameters to optimize over\np_grid = {\"C\": [1, 10, 100],\n \"gamma\": [.01, .1]}\n\n# We will use a Support Vector Classifier with \"rbf\" kernel\nsvm = SVC(kernel=\"rbf\")\n\n# Arrays to store scores\nnon_nested_scores = np.zeros(NUM_TRIALS)\nnested_scores = np.zeros(NUM_TRIALS)\n\n# Loop for each trial\nfor i in range(NUM_TRIALS):\n\n # Choose cross-validation techniques for the inner and outer loops,\n # independently of the dataset.\n # E.g \"LabelKFold\", \"LeaveOneOut\", \"LeaveOneLabelOut\", etc.\n inner_cv = KFold(n_splits=4, shuffle=True, random_state=i)\n outer_cv = KFold(n_splits=4, shuffle=True, random_state=i)\n\n # Non_nested parameter search and scoring\n clf = GridSearchCV(estimator=svm, param_grid=p_grid, cv=inner_cv)\n clf.fit(X_iris, y_iris)\n non_nested_scores[i] = clf.best_score_\n\n # Nested CV with parameter optimization\n nested_score = cross_val_score(clf, X=X_iris, y=y_iris, cv=outer_cv)\n nested_scores[i] = nested_score.mean()\n\nscore_difference = non_nested_scores - nested_scores\n\nprint(\"Average difference of {0:6f} with std. dev. of {1:6f}.\"\n .format(score_difference.mean(), score_difference.std()))\n\n# Plot scores on each trial for nested and non-nested CV\nplt.figure()\nplt.subplot(211)\nnon_nested_scores_line, = plt.plot(non_nested_scores, color='r')\nnested_line, = plt.plot(nested_scores, color='b')\nplt.ylabel(\"score\", fontsize=\"14\")\nplt.legend([non_nested_scores_line, nested_line],\n [\"Non-Nested CV\", \"Nested CV\"],\n bbox_to_anchor=(0, .4, .5, 0))\nplt.title(\"Non-Nested and Nested Cross Validation on Iris Dataset\",\n x=.5, y=1.1, fontsize=\"15\")\n\n# Plot bar chart of the difference.\nplt.subplot(212)\ndifference_plot = plt.bar(range(NUM_TRIALS), score_difference)\nplt.xlabel(\"Individual Trial #\")\nplt.legend([difference_plot],\n [\"Non-Nested CV - Nested CV Score\"],\n bbox_to_anchor=(0, 1, .8, 0))\nplt.ylabel(\"score difference\", fontsize=\"14\")\n\nplt.show()"
2828
],
2929
"outputs": [],
3030
"metadata": {

dev/_downloads/plot_nested_cross_validation_iris.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@
6464
"gamma": [.01, .1]}
6565

6666
# We will use a Support Vector Classifier with "rbf" kernel
67-
svr = SVC(kernel="rbf")
67+
svm = SVC(kernel="rbf")
6868

6969
# Arrays to store scores
7070
non_nested_scores = np.zeros(NUM_TRIALS)
@@ -80,7 +80,7 @@
8080
outer_cv = KFold(n_splits=4, shuffle=True, random_state=i)
8181

8282
# Non_nested parameter search and scoring
83-
clf = GridSearchCV(estimator=svr, param_grid=p_grid, cv=inner_cv)
83+
clf = GridSearchCV(estimator=svm, param_grid=p_grid, cv=inner_cv)
8484
clf.fit(X_iris, y_iris)
8585
non_nested_scores[i] = clf.best_score_
8686

dev/_downloads/scikit-learn-docs.pdf

3.98 KB
Binary file not shown.

0 commit comments

Comments
 (0)