scikit-learn
diff --git a/‎dev/_downloads/auto_examples_jupyter.zip
0 Bytes b/‎dev/_downloads/auto_examples_jupyter.zip
0 Bytes
diff --git a/‎dev/_downloads/auto_examples_python.zip
0 Bytes b/‎dev/_downloads/auto_examples_python.zip
0 Bytes
diff --git a/‎dev/_downloads/plot_nested_cross_validation_iris.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/plot_nested_cross_validation_iris.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/plot_nested_cross_validation_iris.py
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/plot_nested_cross_validation_iris.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/scikit-learn-docs.pdf
-1.32 MB b/‎dev/_downloads/scikit-learn-docs.pdf
-1.32 MB
diff --git a/‎dev/_images/iris.png
0 Bytes b/‎dev/_images/iris.png
0 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
-20 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
-20 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0011.png
-20 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0011.png
-20 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
233 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
233 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0031.png
233 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0031.png
233 Bytes
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "from sklearn.datasets import load_iris\nfrom matplotlib import pyplot as plt\nfrom sklearn.svm import SVC\nfrom sklearn.model_selection import GridSearchCV, cross_val_score, KFold\nimport numpy as np\n\nprint(__doc__)\n\n# Number of random trials\nNUM_TRIALS = 30\n\n# Load the dataset\niris = load_iris()\nX_iris = iris.data\ny_iris = iris.target\n\n# Set up possible values of parameters to optimize over\np_grid = {\"C\": [1, 10, 100],\n          \"gamma\": [.01, .1]}\n\n# We will use a Support Vector Classifier with \"rbf\" kernel\nsvm = SVC(kernel=\"rbf\")\n\n# Arrays to store scores\nnon_nested_scores = np.zeros(NUM_TRIALS)\nnested_scores = np.zeros(NUM_TRIALS)\n\n# Loop for each trial\nfor i in range(NUM_TRIALS):\n\n    # Choose cross-validation techniques for the inner and outer loops,\n    # independently of the dataset.\n    # E.g \"LabelKFold\", \"LeaveOneOut\", \"LeaveOneLabelOut\", etc.\n    inner_cv = KFold(n_splits=4, shuffle=True, random_state=i)\n    outer_cv = KFold(n_splits=4, shuffle=True, random_state=i)\n\n    # Non_nested parameter search and scoring\n    clf = GridSearchCV(estimator=svm, param_grid=p_grid, cv=inner_cv)\n    clf.fit(X_iris, y_iris)\n    non_nested_scores[i] = clf.best_score_\n\n    # Nested CV with parameter optimization\n    nested_score = cross_val_score(clf, X=X_iris, y=y_iris, cv=outer_cv)\n    nested_scores[i] = nested_score.mean()\n\nscore_difference = non_nested_scores - nested_scores\n\nprint(\"Average difference of {0:6f} with std. dev. of {1:6f}.\"\n      .format(score_difference.mean(), score_difference.std()))\n\n# Plot scores on each trial for nested and non-nested CV\nplt.figure()\nplt.subplot(211)\nnon_nested_scores_line, = plt.plot(non_nested_scores, color='r')\nnested_line, = plt.plot(nested_scores, color='b')\nplt.ylabel(\"score\", fontsize=\"14\")\nplt.legend([non_nested_scores_line, nested_line],\n           [\"Non-Nested CV\", \"Nested CV\"],\n           bbox_to_anchor=(0, .4, .5, 0))\nplt.title(\"Non-Nested and Nested Cross Validation on Iris Dataset\",\n          x=.5, y=1.1, fontsize=\"15\")\n\n# Plot bar chart of the difference.\nplt.subplot(212)\ndifference_plot = plt.bar(range(NUM_TRIALS), score_difference)\nplt.xlabel(\"Individual Trial #\")\nplt.legend([difference_plot],\n           [\"Non-Nested CV - Nested CV Score\"],\n           bbox_to_anchor=(0, 1, .8, 0))\nplt.ylabel(\"score difference\", fontsize=\"14\")\n\nplt.show()"
+        "from sklearn.datasets import load_iris\nfrom matplotlib import pyplot as plt\nfrom sklearn.svm import SVC\nfrom sklearn.model_selection import GridSearchCV, cross_val_score, KFold\nimport numpy as np\n\nprint(__doc__)\n\n# Number of random trials\nNUM_TRIALS = 30\n\n# Load the dataset\niris = load_iris()\nX_iris = iris.data\ny_iris = iris.target\n\n# Set up possible values of parameters to optimize over\np_grid = {\"C\": [1, 10, 100],\n          \"gamma\": [.01, .1]}\n\n# We will use a Support Vector Classifier with \"rbf\" kernel\nsvm = SVC(kernel=\"rbf\")\n\n# Arrays to store scores\nnon_nested_scores = np.zeros(NUM_TRIALS)\nnested_scores = np.zeros(NUM_TRIALS)\n\n# Loop for each trial\nfor i in range(NUM_TRIALS):\n\n    # Choose cross-validation techniques for the inner and outer loops,\n    # independently of the dataset.\n    # E.g \"GroupKFold\", \"LeaveOneOut\", \"LeaveOneGroupOut\", etc.\n    inner_cv = KFold(n_splits=4, shuffle=True, random_state=i)\n    outer_cv = KFold(n_splits=4, shuffle=True, random_state=i)\n\n    # Non_nested parameter search and scoring\n    clf = GridSearchCV(estimator=svm, param_grid=p_grid, cv=inner_cv)\n    clf.fit(X_iris, y_iris)\n    non_nested_scores[i] = clf.best_score_\n\n    # Nested CV with parameter optimization\n    nested_score = cross_val_score(clf, X=X_iris, y=y_iris, cv=outer_cv)\n    nested_scores[i] = nested_score.mean()\n\nscore_difference = non_nested_scores - nested_scores\n\nprint(\"Average difference of {0:6f} with std. dev. of {1:6f}.\"\n      .format(score_difference.mean(), score_difference.std()))\n\n# Plot scores on each trial for nested and non-nested CV\nplt.figure()\nplt.subplot(211)\nnon_nested_scores_line, = plt.plot(non_nested_scores, color='r')\nnested_line, = plt.plot(nested_scores, color='b')\nplt.ylabel(\"score\", fontsize=\"14\")\nplt.legend([non_nested_scores_line, nested_line],\n           [\"Non-Nested CV\", \"Nested CV\"],\n           bbox_to_anchor=(0, .4, .5, 0))\nplt.title(\"Non-Nested and Nested Cross Validation on Iris Dataset\",\n          x=.5, y=1.1, fontsize=\"15\")\n\n# Plot bar chart of the difference.\nplt.subplot(212)\ndifference_plot = plt.bar(range(NUM_TRIALS), score_difference)\nplt.xlabel(\"Individual Trial #\")\nplt.legend([difference_plot],\n           [\"Non-Nested CV - Nested CV Score\"],\n           bbox_to_anchor=(0, 1, .8, 0))\nplt.ylabel(\"score difference\", fontsize=\"14\")\n\nplt.show()"
       ]
     }
   ],
 
@@ -75,7 +75,7 @@
 
     # Choose cross-validation techniques for the inner and outer loops,
     # independently of the dataset.
-    # E.g "LabelKFold", "LeaveOneOut", "LeaveOneLabelOut", etc.
+    # E.g "GroupKFold", "LeaveOneOut", "LeaveOneGroupOut", etc.
     inner_cv = KFold(n_splits=4, shuffle=True, random_state=i)
     outer_cv = KFold(n_splits=4, shuffle=True, random_state=i)
Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,7 @@`
`26`	`26`	`},`
`27`	`27`	`"outputs": [],`
`28`	`28`	`"source": [`
`29`		- "from sklearn.datasets import load_iris\nfrom matplotlib import pyplot as plt\nfrom sklearn.svm import SVC\nfrom sklearn.model_selection import GridSearchCV, cross_val_score, KFold\nimport numpy as np\n\nprint(__doc__)\n\n# Number of random trials\nNUM_TRIALS = 30\n\n# Load the dataset\niris = load_iris()\nX_iris = iris.data\ny_iris = iris.target\n\n# Set up possible values of parameters to optimize over\np_grid = {\"C\": [1, 10, 100],\n \"gamma\": [.01, .1]}\n\n# We will use a Support Vector Classifier with \"rbf\" kernel\nsvm = SVC(kernel=\"rbf\")\n\n# Arrays to store scores\nnon_nested_scores = np.zeros(NUM_TRIALS)\nnested_scores = np.zeros(NUM_TRIALS)\n\n# Loop for each trial\nfor i in range(NUM_TRIALS):\n\n # Choose cross-validation techniques for the inner and outer loops,\n # independently of the dataset.\n # E.g \"LabelKFold\", \"LeaveOneOut\", \"LeaveOneLabelOut\", etc.\n inner_cv = KFold(n_splits=4, shuffle=True, random_state=i)\n outer_cv = KFold(n_splits=4, shuffle=True, random_state=i)\n\n # Non_nested parameter search and scoring\n clf = GridSearchCV(estimator=svm, param_grid=p_grid, cv=inner_cv)\n clf.fit(X_iris, y_iris)\n non_nested_scores[i] = clf.best_score_\n\n # Nested CV with parameter optimization\n nested_score = cross_val_score(clf, X=X_iris, y=y_iris, cv=outer_cv)\n nested_scores[i] = nested_score.mean()\n\nscore_difference = non_nested_scores - nested_scores\n\nprint(\"Average difference of {0:6f} with std. dev. of {1:6f}.\"\n .format(score_difference.mean(), score_difference.std()))\n\n# Plot scores on each trial for nested and non-nested CV\nplt.figure()\nplt.subplot(211)\nnon_nested_scores_line, = plt.plot(non_nested_scores, color='r')\nnested_line, = plt.plot(nested_scores, color='b')\nplt.ylabel(\"score\", fontsize=\"14\")\nplt.legend([non_nested_scores_line, nested_line],\n [\"Non-Nested CV\", \"Nested CV\"],\n bbox_to_anchor=(0, .4, .5, 0))\nplt.title(\"Non-Nested and Nested Cross Validation on Iris Dataset\",\n x=.5, y=1.1, fontsize=\"15\")\n\n# Plot bar chart of the difference.\nplt.subplot(212)\ndifference_plot = plt.bar(range(NUM_TRIALS), score_difference)\nplt.xlabel(\"Individual Trial #\")\nplt.legend([difference_plot],\n [\"Non-Nested CV - Nested CV Score\"],\n bbox_to_anchor=(0, 1, .8, 0))\nplt.ylabel(\"score difference\", fontsize=\"14\")\n\nplt.show()"
	`29`	+ "from sklearn.datasets import load_iris\nfrom matplotlib import pyplot as plt\nfrom sklearn.svm import SVC\nfrom sklearn.model_selection import GridSearchCV, cross_val_score, KFold\nimport numpy as np\n\nprint(__doc__)\n\n# Number of random trials\nNUM_TRIALS = 30\n\n# Load the dataset\niris = load_iris()\nX_iris = iris.data\ny_iris = iris.target\n\n# Set up possible values of parameters to optimize over\np_grid = {\"C\": [1, 10, 100],\n \"gamma\": [.01, .1]}\n\n# We will use a Support Vector Classifier with \"rbf\" kernel\nsvm = SVC(kernel=\"rbf\")\n\n# Arrays to store scores\nnon_nested_scores = np.zeros(NUM_TRIALS)\nnested_scores = np.zeros(NUM_TRIALS)\n\n# Loop for each trial\nfor i in range(NUM_TRIALS):\n\n # Choose cross-validation techniques for the inner and outer loops,\n # independently of the dataset.\n # E.g \"GroupKFold\", \"LeaveOneOut\", \"LeaveOneGroupOut\", etc.\n inner_cv = KFold(n_splits=4, shuffle=True, random_state=i)\n outer_cv = KFold(n_splits=4, shuffle=True, random_state=i)\n\n # Non_nested parameter search and scoring\n clf = GridSearchCV(estimator=svm, param_grid=p_grid, cv=inner_cv)\n clf.fit(X_iris, y_iris)\n non_nested_scores[i] = clf.best_score_\n\n # Nested CV with parameter optimization\n nested_score = cross_val_score(clf, X=X_iris, y=y_iris, cv=outer_cv)\n nested_scores[i] = nested_score.mean()\n\nscore_difference = non_nested_scores - nested_scores\n\nprint(\"Average difference of {0:6f} with std. dev. of {1:6f}.\"\n .format(score_difference.mean(), score_difference.std()))\n\n# Plot scores on each trial for nested and non-nested CV\nplt.figure()\nplt.subplot(211)\nnon_nested_scores_line, = plt.plot(non_nested_scores, color='r')\nnested_line, = plt.plot(nested_scores, color='b')\nplt.ylabel(\"score\", fontsize=\"14\")\nplt.legend([non_nested_scores_line, nested_line],\n [\"Non-Nested CV\", \"Nested CV\"],\n bbox_to_anchor=(0, .4, .5, 0))\nplt.title(\"Non-Nested and Nested Cross Validation on Iris Dataset\",\n x=.5, y=1.1, fontsize=\"15\")\n\n# Plot bar chart of the difference.\nplt.subplot(212)\ndifference_plot = plt.bar(range(NUM_TRIALS), score_difference)\nplt.xlabel(\"Individual Trial #\")\nplt.legend([difference_plot],\n [\"Non-Nested CV - Nested CV Score\"],\n bbox_to_anchor=(0, 1, .8, 0))\nplt.ylabel(\"score difference\", fontsize=\"14\")\n\nplt.show()"
`30`	`30`	`]`
`31`	`31`	`}`
`32`	`32`	`],`