scikit-learn
diff --git a/‎dev/_downloads/auto_examples_jupyter.zip
20 Bytes b/‎dev/_downloads/auto_examples_jupyter.zip
20 Bytes
diff --git a/‎dev/_downloads/auto_examples_python.zip
16 Bytes b/‎dev/_downloads/auto_examples_python.zip
16 Bytes
diff --git a/‎dev/_downloads/plot_svm_anova.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/plot_svm_anova.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/plot_svm_anova.py
Lines changed: 8 additions & 6 deletions b/‎dev/_downloads/plot_svm_anova.py
Lines changed: 8 additions & 6 deletions
diff --git a/‎dev/_downloads/scikit-learn-docs.pdf
18.7 KB b/‎dev/_downloads/scikit-learn-docs.pdf
18.7 KB
diff --git a/‎dev/_images/iris.png
0 Bytes b/‎dev/_images/iris.png
0 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
167 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
167 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0011.png
167 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0011.png
167 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
30 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
30 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0031.png
30 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0031.png
30 Bytes
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn import svm, datasets, feature_selection\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.pipeline import Pipeline\n\n# #############################################################################\n# Import some data to play with\ndigits = datasets.load_digits()\ny = digits.target\n# Throw away data, to be in the curse of dimension settings\ny = y[:200]\nX = digits.data[:200]\nn_samples = len(y)\nX = X.reshape((n_samples, -1))\n# add 200 non-informative features\nX = np.hstack((X, 2 * np.random.random((n_samples, 200))))\n\n# #############################################################################\n# Create a feature-selection transform and an instance of SVM that we\n# combine together to have an full-blown estimator\n\ntransform = feature_selection.SelectPercentile(feature_selection.f_classif)\n\nclf = Pipeline([('anova', transform), ('svc', svm.SVC(C=1.0))])\n\n# #############################################################################\n# Plot the cross-validation score as a function of percentile of features\nscore_means = list()\nscore_stds = list()\npercentiles = (1, 3, 6, 10, 15, 20, 30, 40, 60, 80, 100)\n\nfor percentile in percentiles:\n    clf.set_params(anova__percentile=percentile)\n    # Compute cross-validation score using 1 CPU\n    this_scores = cross_val_score(clf, X, y, n_jobs=1)\n    score_means.append(this_scores.mean())\n    score_stds.append(this_scores.std())\n\nplt.errorbar(percentiles, score_means, np.array(score_stds))\n\nplt.title(\n    'Performance of the SVM-Anova varying the percentile of features selected')\nplt.xlabel('Percentile')\nplt.ylabel('Prediction rate')\n\nplt.axis('tight')\nplt.show()"
+        "print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.datasets import load_digits\nfrom sklearn.feature_selection import SelectPercentile, chi2\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.svm import SVC\n\n\n# #############################################################################\n# Import some data to play with\nX, y = load_digits(return_X_y=True)\n# Throw away data, to be in the curse of dimension settings\nX = X[:200]\ny = y[:200]\nn_samples = len(y)\nX = X.reshape((n_samples, -1))\n# add 200 non-informative features\nX = np.hstack((X, 2 * np.random.random((n_samples, 200))))\n\n# #############################################################################\n# Create a feature-selection transform and an instance of SVM that we\n# combine together to have an full-blown estimator\n\ntransform = SelectPercentile(chi2)\n\nclf = Pipeline([('anova', transform), ('svc', SVC(gamma=\"auto\"))])\n\n# #############################################################################\n# Plot the cross-validation score as a function of percentile of features\nscore_means = list()\nscore_stds = list()\npercentiles = (1, 3, 6, 10, 15, 20, 30, 40, 60, 80, 100)\n\nfor percentile in percentiles:\n    clf.set_params(anova__percentile=percentile)\n    # Compute cross-validation score using 1 CPU\n    this_scores = cross_val_score(clf, X, y, n_jobs=1)\n    score_means.append(this_scores.mean())\n    score_stds.append(this_scores.std())\n\nplt.errorbar(percentiles, score_means, np.array(score_stds))\n\nplt.title(\n    'Performance of the SVM-Anova varying the percentile of features selected')\nplt.xlabel('Percentile')\nplt.ylabel('Prediction rate')\n\nplt.axis('tight')\nplt.show()"
       ]
     }
   ],
 
@@ -10,17 +10,19 @@
 
 import numpy as np
 import matplotlib.pyplot as plt
-from sklearn import svm, datasets, feature_selection
+from sklearn.datasets import load_digits
+from sklearn.feature_selection import SelectPercentile, chi2
 from sklearn.model_selection import cross_val_score
 from sklearn.pipeline import Pipeline
+from sklearn.svm import SVC
+
 
 # #############################################################################
 # Import some data to play with
-digits = datasets.load_digits()
-y = digits.target
+X, y = load_digits(return_X_y=True)
 # Throw away data, to be in the curse of dimension settings
+X = X[:200]
 y = y[:200]
-X = digits.data[:200]
 n_samples = len(y)
 X = X.reshape((n_samples, -1))
 # add 200 non-informative features
@@ -30,9 +32,9 @@
 # Create a feature-selection transform and an instance of SVM that we
 # combine together to have an full-blown estimator
 
-transform = feature_selection.SelectPercentile(feature_selection.f_classif)
+transform = SelectPercentile(chi2)
 
-clf = Pipeline([('anova', transform), ('svc', svm.SVC(C=1.0))])
+clf = Pipeline([('anova', transform), ('svc', SVC(gamma="auto"))])
 
 # #############################################################################
 # Plot the cross-validation score as a function of percentile of features
Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,7 @@`
`26`	`26`	`},`
`27`	`27`	`"outputs": [],`
`28`	`28`	`"source": [`
`29`		- "print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn import svm, datasets, feature_selection\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.pipeline import Pipeline\n\n# #############################################################################\n# Import some data to play with\ndigits = datasets.load_digits()\ny = digits.target\n# Throw away data, to be in the curse of dimension settings\ny = y[:200]\nX = digits.data[:200]\nn_samples = len(y)\nX = X.reshape((n_samples, -1))\n# add 200 non-informative features\nX = np.hstack((X, 2 * np.random.random((n_samples, 200))))\n\n# #############################################################################\n# Create a feature-selection transform and an instance of SVM that we\n# combine together to have an full-blown estimator\n\ntransform = feature_selection.SelectPercentile(feature_selection.f_classif)\n\nclf = Pipeline([('anova', transform), ('svc', svm.SVC(C=1.0))])\n\n# #############################################################################\n# Plot the cross-validation score as a function of percentile of features\nscore_means = list()\nscore_stds = list()\npercentiles = (1, 3, 6, 10, 15, 20, 30, 40, 60, 80, 100)\n\nfor percentile in percentiles:\n clf.set_params(anova__percentile=percentile)\n # Compute cross-validation score using 1 CPU\n this_scores = cross_val_score(clf, X, y, n_jobs=1)\n score_means.append(this_scores.mean())\n score_stds.append(this_scores.std())\n\nplt.errorbar(percentiles, score_means, np.array(score_stds))\n\nplt.title(\n 'Performance of the SVM-Anova varying the percentile of features selected')\nplt.xlabel('Percentile')\nplt.ylabel('Prediction rate')\n\nplt.axis('tight')\nplt.show()"
	`29`	+ "print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.datasets import load_digits\nfrom sklearn.feature_selection import SelectPercentile, chi2\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.svm import SVC\n\n\n# #############################################################################\n# Import some data to play with\nX, y = load_digits(return_X_y=True)\n# Throw away data, to be in the curse of dimension settings\nX = X[:200]\ny = y[:200]\nn_samples = len(y)\nX = X.reshape((n_samples, -1))\n# add 200 non-informative features\nX = np.hstack((X, 2 * np.random.random((n_samples, 200))))\n\n# #############################################################################\n# Create a feature-selection transform and an instance of SVM that we\n# combine together to have an full-blown estimator\n\ntransform = SelectPercentile(chi2)\n\nclf = Pipeline([('anova', transform), ('svc', SVC(gamma=\"auto\"))])\n\n# #############################################################################\n# Plot the cross-validation score as a function of percentile of features\nscore_means = list()\nscore_stds = list()\npercentiles = (1, 3, 6, 10, 15, 20, 30, 40, 60, 80, 100)\n\nfor percentile in percentiles:\n clf.set_params(anova__percentile=percentile)\n # Compute cross-validation score using 1 CPU\n this_scores = cross_val_score(clf, X, y, n_jobs=1)\n score_means.append(this_scores.mean())\n score_stds.append(this_scores.std())\n\nplt.errorbar(percentiles, score_means, np.array(score_stds))\n\nplt.title(\n 'Performance of the SVM-Anova varying the percentile of features selected')\nplt.xlabel('Percentile')\nplt.ylabel('Prediction rate')\n\nplt.axis('tight')\nplt.show()"
`30`	`30`	`]`
`31`	`31`	`}`
`32`	`32`	`],`