Skip to content

Commit 9c7f9e4

Browse files
committed
Pushing the docs to dev/ for branch: master, commit eec7649236d5216380d05916bb7f6aa3b2fc5508
1 parent a4dc7cc commit 9c7f9e4

File tree

1,092 files changed

+3156
-15699
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,092 files changed

+3156
-15699
lines changed
0 Bytes
Binary file not shown.
0 Bytes
Binary file not shown.

dev/_downloads/plot_nested_cross_validation_iris.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"from sklearn.datasets import load_iris\nfrom matplotlib import pyplot as plt\nfrom sklearn.svm import SVC\nfrom sklearn.model_selection import GridSearchCV, cross_val_score, KFold\nimport numpy as np\n\nprint(__doc__)\n\n# Number of random trials\nNUM_TRIALS = 30\n\n# Load the dataset\niris = load_iris()\nX_iris = iris.data\ny_iris = iris.target\n\n# Set up possible values of parameters to optimize over\np_grid = {\"C\": [1, 10, 100],\n \"gamma\": [.01, .1]}\n\n# We will use a Support Vector Classifier with \"rbf\" kernel\nsvm = SVC(kernel=\"rbf\")\n\n# Arrays to store scores\nnon_nested_scores = np.zeros(NUM_TRIALS)\nnested_scores = np.zeros(NUM_TRIALS)\n\n# Loop for each trial\nfor i in range(NUM_TRIALS):\n\n # Choose cross-validation techniques for the inner and outer loops,\n # independently of the dataset.\n # E.g \"LabelKFold\", \"LeaveOneOut\", \"LeaveOneLabelOut\", etc.\n inner_cv = KFold(n_splits=4, shuffle=True, random_state=i)\n outer_cv = KFold(n_splits=4, shuffle=True, random_state=i)\n\n # Non_nested parameter search and scoring\n clf = GridSearchCV(estimator=svm, param_grid=p_grid, cv=inner_cv)\n clf.fit(X_iris, y_iris)\n non_nested_scores[i] = clf.best_score_\n\n # Nested CV with parameter optimization\n nested_score = cross_val_score(clf, X=X_iris, y=y_iris, cv=outer_cv)\n nested_scores[i] = nested_score.mean()\n\nscore_difference = non_nested_scores - nested_scores\n\nprint(\"Average difference of {0:6f} with std. dev. of {1:6f}.\"\n .format(score_difference.mean(), score_difference.std()))\n\n# Plot scores on each trial for nested and non-nested CV\nplt.figure()\nplt.subplot(211)\nnon_nested_scores_line, = plt.plot(non_nested_scores, color='r')\nnested_line, = plt.plot(nested_scores, color='b')\nplt.ylabel(\"score\", fontsize=\"14\")\nplt.legend([non_nested_scores_line, nested_line],\n [\"Non-Nested CV\", \"Nested CV\"],\n bbox_to_anchor=(0, .4, .5, 0))\nplt.title(\"Non-Nested and Nested Cross Validation on Iris Dataset\",\n x=.5, y=1.1, fontsize=\"15\")\n\n# Plot bar chart of the difference.\nplt.subplot(212)\ndifference_plot = plt.bar(range(NUM_TRIALS), score_difference)\nplt.xlabel(\"Individual Trial #\")\nplt.legend([difference_plot],\n [\"Non-Nested CV - Nested CV Score\"],\n bbox_to_anchor=(0, 1, .8, 0))\nplt.ylabel(\"score difference\", fontsize=\"14\")\n\nplt.show()"
29+
"from sklearn.datasets import load_iris\nfrom matplotlib import pyplot as plt\nfrom sklearn.svm import SVC\nfrom sklearn.model_selection import GridSearchCV, cross_val_score, KFold\nimport numpy as np\n\nprint(__doc__)\n\n# Number of random trials\nNUM_TRIALS = 30\n\n# Load the dataset\niris = load_iris()\nX_iris = iris.data\ny_iris = iris.target\n\n# Set up possible values of parameters to optimize over\np_grid = {\"C\": [1, 10, 100],\n \"gamma\": [.01, .1]}\n\n# We will use a Support Vector Classifier with \"rbf\" kernel\nsvm = SVC(kernel=\"rbf\")\n\n# Arrays to store scores\nnon_nested_scores = np.zeros(NUM_TRIALS)\nnested_scores = np.zeros(NUM_TRIALS)\n\n# Loop for each trial\nfor i in range(NUM_TRIALS):\n\n # Choose cross-validation techniques for the inner and outer loops,\n # independently of the dataset.\n # E.g \"GroupKFold\", \"LeaveOneOut\", \"LeaveOneGroupOut\", etc.\n inner_cv = KFold(n_splits=4, shuffle=True, random_state=i)\n outer_cv = KFold(n_splits=4, shuffle=True, random_state=i)\n\n # Non_nested parameter search and scoring\n clf = GridSearchCV(estimator=svm, param_grid=p_grid, cv=inner_cv)\n clf.fit(X_iris, y_iris)\n non_nested_scores[i] = clf.best_score_\n\n # Nested CV with parameter optimization\n nested_score = cross_val_score(clf, X=X_iris, y=y_iris, cv=outer_cv)\n nested_scores[i] = nested_score.mean()\n\nscore_difference = non_nested_scores - nested_scores\n\nprint(\"Average difference of {0:6f} with std. dev. of {1:6f}.\"\n .format(score_difference.mean(), score_difference.std()))\n\n# Plot scores on each trial for nested and non-nested CV\nplt.figure()\nplt.subplot(211)\nnon_nested_scores_line, = plt.plot(non_nested_scores, color='r')\nnested_line, = plt.plot(nested_scores, color='b')\nplt.ylabel(\"score\", fontsize=\"14\")\nplt.legend([non_nested_scores_line, nested_line],\n [\"Non-Nested CV\", \"Nested CV\"],\n bbox_to_anchor=(0, .4, .5, 0))\nplt.title(\"Non-Nested and Nested Cross Validation on Iris Dataset\",\n x=.5, y=1.1, fontsize=\"15\")\n\n# Plot bar chart of the difference.\nplt.subplot(212)\ndifference_plot = plt.bar(range(NUM_TRIALS), score_difference)\nplt.xlabel(\"Individual Trial #\")\nplt.legend([difference_plot],\n [\"Non-Nested CV - Nested CV Score\"],\n bbox_to_anchor=(0, 1, .8, 0))\nplt.ylabel(\"score difference\", fontsize=\"14\")\n\nplt.show()"
3030
]
3131
}
3232
],

dev/_downloads/plot_nested_cross_validation_iris.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@
7575

7676
# Choose cross-validation techniques for the inner and outer loops,
7777
# independently of the dataset.
78-
# E.g "LabelKFold", "LeaveOneOut", "LeaveOneLabelOut", etc.
78+
# E.g "GroupKFold", "LeaveOneOut", "LeaveOneGroupOut", etc.
7979
inner_cv = KFold(n_splits=4, shuffle=True, random_state=i)
8080
outer_cv = KFold(n_splits=4, shuffle=True, random_state=i)
8181

dev/_downloads/scikit-learn-docs.pdf

-1.32 MB
Binary file not shown.

dev/_images/iris.png

0 Bytes

0 commit comments

Comments
 (0)