Skip to content

Commit 3ae5f81

Browse files
committed
Pushing the docs to dev/ for branch: master, commit 40b2a6c7e3d85735ce0be86a48370fd2de1834de
1 parent 750549b commit 3ae5f81

File tree

1,110 files changed

+3032
-3032
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,110 files changed

+3032
-3032
lines changed
-154 Bytes
Binary file not shown.
-154 Bytes
Binary file not shown.

dev/_downloads/plot_compare_gpr_krr.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"print(__doc__)\n\n# Authors: Jan Hendrik Metzen <[email protected]>\n# License: BSD 3 clause\n\n\nimport time\n\nimport numpy as np\n\nimport matplotlib.pyplot as plt\n\nfrom sklearn.kernel_ridge import KernelRidge\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.gaussian_process import GaussianProcessRegressor\nfrom sklearn.gaussian_process.kernels import WhiteKernel, ExpSineSquared\n\nrng = np.random.RandomState(0)\n\n# Generate sample data\nX = 15 * rng.rand(100, 1)\ny = np.sin(X).ravel()\ny += 3 * (0.5 - rng.rand(X.shape[0])) # add noise\n\n# Fit KernelRidge with parameter selection based on 5-fold cross validation\nparam_grid = {\"alpha\": [1e0, 1e-1, 1e-2, 1e-3],\n \"kernel\": [ExpSineSquared(l, p)\n for l in np.logspace(-2, 2, 10)\n for p in np.logspace(0, 2, 10)]}\nkr = GridSearchCV(KernelRidge(), cv=5, param_grid=param_grid)\nstime = time.time()\nkr.fit(X, y)\nprint(\"Time for KRR fitting: %.3f\" % (time.time() - stime))\n\ngp_kernel = ExpSineSquared(1.0, 5.0, periodicity_bounds=(1e-2, 1e1)) \\\n + WhiteKernel(1e-1)\ngpr = GaussianProcessRegressor(kernel=gp_kernel)\nstime = time.time()\ngpr.fit(X, y)\nprint(\"Time for GPR fitting: %.3f\" % (time.time() - stime))\n\n# Predict using kernel ridge\nX_plot = np.linspace(0, 20, 10000)[:, None]\nstime = time.time()\ny_kr = kr.predict(X_plot)\nprint(\"Time for KRR prediction: %.3f\" % (time.time() - stime))\n\n# Predict using gaussian process regressor\nstime = time.time()\ny_gpr = gpr.predict(X_plot, return_std=False)\nprint(\"Time for GPR prediction: %.3f\" % (time.time() - stime))\n\nstime = time.time()\ny_gpr, y_std = gpr.predict(X_plot, return_std=True)\nprint(\"Time for GPR prediction with standard-deviation: %.3f\"\n % (time.time() - stime))\n\n# Plot results\nplt.figure(figsize=(10, 5))\nlw = 2\nplt.scatter(X, y, c='k', label='data')\nplt.plot(X_plot, np.sin(X_plot), color='navy', lw=lw, label='True')\nplt.plot(X_plot, y_kr, color='turquoise', lw=lw,\n label='KRR (%s)' % kr.best_params_)\nplt.plot(X_plot, y_gpr, color='darkorange', lw=lw,\n label='GPR (%s)' % gpr.kernel_)\nplt.fill_between(X_plot[:, 0], y_gpr - y_std, y_gpr + y_std, color='darkorange',\n alpha=0.2)\nplt.xlabel('data')\nplt.ylabel('target')\nplt.xlim(0, 20)\nplt.ylim(-4, 4)\nplt.title('GPR versus Kernel Ridge')\nplt.legend(loc=\"best\", scatterpoints=1, prop={'size': 8})\nplt.show()"
29+
"print(__doc__)\n\n# Authors: Jan Hendrik Metzen <[email protected]>\n# License: BSD 3 clause\n\n\nimport time\n\nimport numpy as np\n\nimport matplotlib.pyplot as plt\n\nfrom sklearn.kernel_ridge import KernelRidge\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.gaussian_process import GaussianProcessRegressor\nfrom sklearn.gaussian_process.kernels import WhiteKernel, ExpSineSquared\n\nrng = np.random.RandomState(0)\n\n# Generate sample data\nX = 15 * rng.rand(100, 1)\ny = np.sin(X).ravel()\ny += 3 * (0.5 - rng.rand(X.shape[0])) # add noise\n\n# Fit KernelRidge with parameter selection based on 5-fold cross validation\nparam_grid = {\"alpha\": [1e0, 1e-1, 1e-2, 1e-3],\n \"kernel\": [ExpSineSquared(l, p)\n for l in np.logspace(-2, 2, 10)\n for p in np.logspace(0, 2, 10)]}\nkr = GridSearchCV(KernelRidge(), param_grid=param_grid)\nstime = time.time()\nkr.fit(X, y)\nprint(\"Time for KRR fitting: %.3f\" % (time.time() - stime))\n\ngp_kernel = ExpSineSquared(1.0, 5.0, periodicity_bounds=(1e-2, 1e1)) \\\n + WhiteKernel(1e-1)\ngpr = GaussianProcessRegressor(kernel=gp_kernel)\nstime = time.time()\ngpr.fit(X, y)\nprint(\"Time for GPR fitting: %.3f\" % (time.time() - stime))\n\n# Predict using kernel ridge\nX_plot = np.linspace(0, 20, 10000)[:, None]\nstime = time.time()\ny_kr = kr.predict(X_plot)\nprint(\"Time for KRR prediction: %.3f\" % (time.time() - stime))\n\n# Predict using gaussian process regressor\nstime = time.time()\ny_gpr = gpr.predict(X_plot, return_std=False)\nprint(\"Time for GPR prediction: %.3f\" % (time.time() - stime))\n\nstime = time.time()\ny_gpr, y_std = gpr.predict(X_plot, return_std=True)\nprint(\"Time for GPR prediction with standard-deviation: %.3f\"\n % (time.time() - stime))\n\n# Plot results\nplt.figure(figsize=(10, 5))\nlw = 2\nplt.scatter(X, y, c='k', label='data')\nplt.plot(X_plot, np.sin(X_plot), color='navy', lw=lw, label='True')\nplt.plot(X_plot, y_kr, color='turquoise', lw=lw,\n label='KRR (%s)' % kr.best_params_)\nplt.plot(X_plot, y_gpr, color='darkorange', lw=lw,\n label='GPR (%s)' % gpr.kernel_)\nplt.fill_between(X_plot[:, 0], y_gpr - y_std, y_gpr + y_std, color='darkorange',\n alpha=0.2)\nplt.xlabel('data')\nplt.ylabel('target')\nplt.xlim(0, 20)\nplt.ylim(-4, 4)\nplt.title('GPR versus Kernel Ridge')\nplt.legend(loc=\"best\", scatterpoints=1, prop={'size': 8})\nplt.show()"
3030
]
3131
}
3232
],

dev/_downloads/plot_compare_gpr_krr.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@
7373
"kernel": [ExpSineSquared(l, p)
7474
for l in np.logspace(-2, 2, 10)
7575
for p in np.logspace(0, 2, 10)]}
76-
kr = GridSearchCV(KernelRidge(), cv=5, param_grid=param_grid)
76+
kr = GridSearchCV(KernelRidge(), param_grid=param_grid)
7777
stime = time.time()
7878
kr.fit(X, y)
7979
print("Time for KRR fitting: %.3f" % (time.time() - stime))

dev/_downloads/plot_compare_reduction.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
},
3434
"outputs": [],
3535
"source": [
36-
"# Authors: Robert McGibbon, Joel Nothman, Guillaume Lemaitre\n\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.datasets import load_digits\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.svm import LinearSVC\nfrom sklearn.decomposition import PCA, NMF\nfrom sklearn.feature_selection import SelectKBest, chi2\n\nprint(__doc__)\n\npipe = Pipeline([\n # the reduce_dim stage is populated by the param_grid\n ('reduce_dim', 'passthrough'),\n ('classify', LinearSVC(dual=False, max_iter=10000))\n])\n\nN_FEATURES_OPTIONS = [2, 4, 8]\nC_OPTIONS = [1, 10, 100, 1000]\nparam_grid = [\n {\n 'reduce_dim': [PCA(iterated_power=7), NMF()],\n 'reduce_dim__n_components': N_FEATURES_OPTIONS,\n 'classify__C': C_OPTIONS\n },\n {\n 'reduce_dim': [SelectKBest(chi2)],\n 'reduce_dim__k': N_FEATURES_OPTIONS,\n 'classify__C': C_OPTIONS\n },\n]\nreducer_labels = ['PCA', 'NMF', 'KBest(chi2)']\n\ngrid = GridSearchCV(pipe, cv=5, n_jobs=1, param_grid=param_grid)\ndigits = load_digits()\ngrid.fit(digits.data, digits.target)\n\nmean_scores = np.array(grid.cv_results_['mean_test_score'])\n# scores are in the order of param_grid iteration, which is alphabetical\nmean_scores = mean_scores.reshape(len(C_OPTIONS), -1, len(N_FEATURES_OPTIONS))\n# select score for best C\nmean_scores = mean_scores.max(axis=0)\nbar_offsets = (np.arange(len(N_FEATURES_OPTIONS)) *\n (len(reducer_labels) + 1) + .5)\n\nplt.figure()\nCOLORS = 'bgrcmyk'\nfor i, (label, reducer_scores) in enumerate(zip(reducer_labels, mean_scores)):\n plt.bar(bar_offsets + i, reducer_scores, label=label, color=COLORS[i])\n\nplt.title(\"Comparing feature reduction techniques\")\nplt.xlabel('Reduced number of features')\nplt.xticks(bar_offsets + len(reducer_labels) / 2, N_FEATURES_OPTIONS)\nplt.ylabel('Digit classification accuracy')\nplt.ylim((0, 1))\nplt.legend(loc='upper left')\n\nplt.show()"
36+
"# Authors: Robert McGibbon, Joel Nothman, Guillaume Lemaitre\n\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.datasets import load_digits\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.svm import LinearSVC\nfrom sklearn.decomposition import PCA, NMF\nfrom sklearn.feature_selection import SelectKBest, chi2\n\nprint(__doc__)\n\npipe = Pipeline([\n # the reduce_dim stage is populated by the param_grid\n ('reduce_dim', 'passthrough'),\n ('classify', LinearSVC(dual=False, max_iter=10000))\n])\n\nN_FEATURES_OPTIONS = [2, 4, 8]\nC_OPTIONS = [1, 10, 100, 1000]\nparam_grid = [\n {\n 'reduce_dim': [PCA(iterated_power=7), NMF()],\n 'reduce_dim__n_components': N_FEATURES_OPTIONS,\n 'classify__C': C_OPTIONS\n },\n {\n 'reduce_dim': [SelectKBest(chi2)],\n 'reduce_dim__k': N_FEATURES_OPTIONS,\n 'classify__C': C_OPTIONS\n },\n]\nreducer_labels = ['PCA', 'NMF', 'KBest(chi2)']\n\ngrid = GridSearchCV(pipe, n_jobs=1, param_grid=param_grid)\ndigits = load_digits()\ngrid.fit(digits.data, digits.target)\n\nmean_scores = np.array(grid.cv_results_['mean_test_score'])\n# scores are in the order of param_grid iteration, which is alphabetical\nmean_scores = mean_scores.reshape(len(C_OPTIONS), -1, len(N_FEATURES_OPTIONS))\n# select score for best C\nmean_scores = mean_scores.max(axis=0)\nbar_offsets = (np.arange(len(N_FEATURES_OPTIONS)) *\n (len(reducer_labels) + 1) + .5)\n\nplt.figure()\nCOLORS = 'bgrcmyk'\nfor i, (label, reducer_scores) in enumerate(zip(reducer_labels, mean_scores)):\n plt.bar(bar_offsets + i, reducer_scores, label=label, color=COLORS[i])\n\nplt.title(\"Comparing feature reduction techniques\")\nplt.xlabel('Reduced number of features')\nplt.xticks(bar_offsets + len(reducer_labels) / 2, N_FEATURES_OPTIONS)\nplt.ylabel('Digit classification accuracy')\nplt.ylim((0, 1))\nplt.legend(loc='upper left')\n\nplt.show()"
3737
]
3838
},
3939
{
@@ -51,7 +51,7 @@
5151
},
5252
"outputs": [],
5353
"source": [
54-
"from joblib import Memory\nfrom shutil import rmtree\n\n# Create a temporary folder to store the transformers of the pipeline\n___location = 'cachedir'\nmemory = Memory(___location=___location, verbose=10)\ncached_pipe = Pipeline([('reduce_dim', PCA()),\n ('classify', LinearSVC(dual=False, max_iter=10000))],\n memory=memory)\n\n# This time, a cached pipeline will be used within the grid search\ngrid = GridSearchCV(cached_pipe, cv=5, n_jobs=1, param_grid=param_grid)\ndigits = load_digits()\ngrid.fit(digits.data, digits.target)\n\n# Delete the temporary cache before exiting\nmemory.clear(warn=False)\nrmtree(___location)"
54+
"from joblib import Memory\nfrom shutil import rmtree\n\n# Create a temporary folder to store the transformers of the pipeline\n___location = 'cachedir'\nmemory = Memory(___location=___location, verbose=10)\ncached_pipe = Pipeline([('reduce_dim', PCA()),\n ('classify', LinearSVC(dual=False, max_iter=10000))],\n memory=memory)\n\n# This time, a cached pipeline will be used within the grid search\ngrid = GridSearchCV(cached_pipe, n_jobs=1, param_grid=param_grid)\ndigits = load_digits()\ngrid.fit(digits.data, digits.target)\n\n# Delete the temporary cache before exiting\nmemory.clear(warn=False)\nrmtree(___location)"
5555
]
5656
},
5757
{

dev/_downloads/plot_compare_reduction.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@
6363
]
6464
reducer_labels = ['PCA', 'NMF', 'KBest(chi2)']
6565

66-
grid = GridSearchCV(pipe, cv=5, n_jobs=1, param_grid=param_grid)
66+
grid = GridSearchCV(pipe, n_jobs=1, param_grid=param_grid)
6767
digits = load_digits()
6868
grid.fit(digits.data, digits.target)
6969

@@ -113,7 +113,7 @@
113113
memory=memory)
114114

115115
# This time, a cached pipeline will be used within the grid search
116-
grid = GridSearchCV(cached_pipe, cv=5, n_jobs=1, param_grid=param_grid)
116+
grid = GridSearchCV(cached_pipe, n_jobs=1, param_grid=param_grid)
117117
digits = load_digits()
118118
grid.fit(digits.data, digits.target)
119119

dev/_downloads/plot_covariance_estimation.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom scipy import linalg\n\nfrom sklearn.covariance import LedoitWolf, OAS, ShrunkCovariance, \\\n log_likelihood, empirical_covariance\nfrom sklearn.model_selection import GridSearchCV\n\n\n# #############################################################################\n# Generate sample data\nn_features, n_samples = 40, 20\nnp.random.seed(42)\nbase_X_train = np.random.normal(size=(n_samples, n_features))\nbase_X_test = np.random.normal(size=(n_samples, n_features))\n\n# Color samples\ncoloring_matrix = np.random.normal(size=(n_features, n_features))\nX_train = np.dot(base_X_train, coloring_matrix)\nX_test = np.dot(base_X_test, coloring_matrix)\n\n# #############################################################################\n# Compute the likelihood on test data\n\n# spanning a range of possible shrinkage coefficient values\nshrinkages = np.logspace(-2, 0, 30)\nnegative_logliks = [-ShrunkCovariance(shrinkage=s).fit(X_train).score(X_test)\n for s in shrinkages]\n\n# under the ground-truth model, which we would not have access to in real\n# settings\nreal_cov = np.dot(coloring_matrix.T, coloring_matrix)\nemp_cov = empirical_covariance(X_train)\nloglik_real = -log_likelihood(emp_cov, linalg.inv(real_cov))\n\n# #############################################################################\n# Compare different approaches to setting the parameter\n\n# GridSearch for an optimal shrinkage coefficient\ntuned_parameters = [{'shrinkage': shrinkages}]\ncv = GridSearchCV(ShrunkCovariance(), tuned_parameters, cv=5)\ncv.fit(X_train)\n\n# Ledoit-Wolf optimal shrinkage coefficient estimate\nlw = LedoitWolf()\nloglik_lw = lw.fit(X_train).score(X_test)\n\n# OAS coefficient estimate\noa = OAS()\nloglik_oa = oa.fit(X_train).score(X_test)\n\n# #############################################################################\n# Plot results\nfig = plt.figure()\nplt.title(\"Regularized covariance: likelihood and shrinkage coefficient\")\nplt.xlabel('Regularization parameter: shrinkage coefficient')\nplt.ylabel('Error: negative log-likelihood on test data')\n# range shrinkage curve\nplt.loglog(shrinkages, negative_logliks, label=\"Negative log-likelihood\")\n\nplt.plot(plt.xlim(), 2 * [loglik_real], '--r',\n label=\"Real covariance likelihood\")\n\n# adjust view\nlik_max = np.amax(negative_logliks)\nlik_min = np.amin(negative_logliks)\nymin = lik_min - 6. * np.log((plt.ylim()[1] - plt.ylim()[0]))\nymax = lik_max + 10. * np.log(lik_max - lik_min)\nxmin = shrinkages[0]\nxmax = shrinkages[-1]\n# LW likelihood\nplt.vlines(lw.shrinkage_, ymin, -loglik_lw, color='magenta',\n linewidth=3, label='Ledoit-Wolf estimate')\n# OAS likelihood\nplt.vlines(oa.shrinkage_, ymin, -loglik_oa, color='purple',\n linewidth=3, label='OAS estimate')\n# best CV estimator likelihood\nplt.vlines(cv.best_estimator_.shrinkage, ymin,\n -cv.best_estimator_.score(X_test), color='cyan',\n linewidth=3, label='Cross-validation best estimate')\n\nplt.ylim(ymin, ymax)\nplt.xlim(xmin, xmax)\nplt.legend()\n\nplt.show()"
29+
"print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom scipy import linalg\n\nfrom sklearn.covariance import LedoitWolf, OAS, ShrunkCovariance, \\\n log_likelihood, empirical_covariance\nfrom sklearn.model_selection import GridSearchCV\n\n\n# #############################################################################\n# Generate sample data\nn_features, n_samples = 40, 20\nnp.random.seed(42)\nbase_X_train = np.random.normal(size=(n_samples, n_features))\nbase_X_test = np.random.normal(size=(n_samples, n_features))\n\n# Color samples\ncoloring_matrix = np.random.normal(size=(n_features, n_features))\nX_train = np.dot(base_X_train, coloring_matrix)\nX_test = np.dot(base_X_test, coloring_matrix)\n\n# #############################################################################\n# Compute the likelihood on test data\n\n# spanning a range of possible shrinkage coefficient values\nshrinkages = np.logspace(-2, 0, 30)\nnegative_logliks = [-ShrunkCovariance(shrinkage=s).fit(X_train).score(X_test)\n for s in shrinkages]\n\n# under the ground-truth model, which we would not have access to in real\n# settings\nreal_cov = np.dot(coloring_matrix.T, coloring_matrix)\nemp_cov = empirical_covariance(X_train)\nloglik_real = -log_likelihood(emp_cov, linalg.inv(real_cov))\n\n# #############################################################################\n# Compare different approaches to setting the parameter\n\n# GridSearch for an optimal shrinkage coefficient\ntuned_parameters = [{'shrinkage': shrinkages}]\ncv = GridSearchCV(ShrunkCovariance(), tuned_parameters)\ncv.fit(X_train)\n\n# Ledoit-Wolf optimal shrinkage coefficient estimate\nlw = LedoitWolf()\nloglik_lw = lw.fit(X_train).score(X_test)\n\n# OAS coefficient estimate\noa = OAS()\nloglik_oa = oa.fit(X_train).score(X_test)\n\n# #############################################################################\n# Plot results\nfig = plt.figure()\nplt.title(\"Regularized covariance: likelihood and shrinkage coefficient\")\nplt.xlabel('Regularization parameter: shrinkage coefficient')\nplt.ylabel('Error: negative log-likelihood on test data')\n# range shrinkage curve\nplt.loglog(shrinkages, negative_logliks, label=\"Negative log-likelihood\")\n\nplt.plot(plt.xlim(), 2 * [loglik_real], '--r',\n label=\"Real covariance likelihood\")\n\n# adjust view\nlik_max = np.amax(negative_logliks)\nlik_min = np.amin(negative_logliks)\nymin = lik_min - 6. * np.log((plt.ylim()[1] - plt.ylim()[0]))\nymax = lik_max + 10. * np.log(lik_max - lik_min)\nxmin = shrinkages[0]\nxmax = shrinkages[-1]\n# LW likelihood\nplt.vlines(lw.shrinkage_, ymin, -loglik_lw, color='magenta',\n linewidth=3, label='Ledoit-Wolf estimate')\n# OAS likelihood\nplt.vlines(oa.shrinkage_, ymin, -loglik_oa, color='purple',\n linewidth=3, label='OAS estimate')\n# best CV estimator likelihood\nplt.vlines(cv.best_estimator_.shrinkage, ymin,\n -cv.best_estimator_.score(X_test), color='cyan',\n linewidth=3, label='Cross-validation best estimate')\n\nplt.ylim(ymin, ymax)\nplt.xlim(xmin, xmax)\nplt.legend()\n\nplt.show()"
3030
]
3131
}
3232
],

dev/_downloads/plot_covariance_estimation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@
8383

8484
# GridSearch for an optimal shrinkage coefficient
8585
tuned_parameters = [{'shrinkage': shrinkages}]
86-
cv = GridSearchCV(ShrunkCovariance(), tuned_parameters, cv=5)
86+
cv = GridSearchCV(ShrunkCovariance(), tuned_parameters)
8787
cv.fit(X_train)
8888

8989
# Ledoit-Wolf optimal shrinkage coefficient estimate

dev/_downloads/plot_cv_digits.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"print(__doc__)\n\n\nimport numpy as np\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn import datasets, svm\n\ndigits = datasets.load_digits()\nX = digits.data\ny = digits.target\n\nsvc = svm.SVC(kernel='linear')\nC_s = np.logspace(-10, 0, 10)\n\nscores = list()\nscores_std = list()\nfor C in C_s:\n svc.C = C\n this_scores = cross_val_score(svc, X, y, cv=5, n_jobs=1)\n scores.append(np.mean(this_scores))\n scores_std.append(np.std(this_scores))\n\n# Do the plotting\nimport matplotlib.pyplot as plt\nplt.figure()\nplt.semilogx(C_s, scores)\nplt.semilogx(C_s, np.array(scores) + np.array(scores_std), 'b--')\nplt.semilogx(C_s, np.array(scores) - np.array(scores_std), 'b--')\nlocs, labels = plt.yticks()\nplt.yticks(locs, list(map(lambda x: \"%g\" % x, locs)))\nplt.ylabel('CV score')\nplt.xlabel('Parameter C')\nplt.ylim(0, 1.1)\nplt.show()"
29+
"print(__doc__)\n\n\nimport numpy as np\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn import datasets, svm\n\ndigits = datasets.load_digits()\nX = digits.data\ny = digits.target\n\nsvc = svm.SVC(kernel='linear')\nC_s = np.logspace(-10, 0, 10)\n\nscores = list()\nscores_std = list()\nfor C in C_s:\n svc.C = C\n this_scores = cross_val_score(svc, X, y, n_jobs=1)\n scores.append(np.mean(this_scores))\n scores_std.append(np.std(this_scores))\n\n# Do the plotting\nimport matplotlib.pyplot as plt\nplt.figure()\nplt.semilogx(C_s, scores)\nplt.semilogx(C_s, np.array(scores) + np.array(scores_std), 'b--')\nplt.semilogx(C_s, np.array(scores) - np.array(scores_std), 'b--')\nlocs, labels = plt.yticks()\nplt.yticks(locs, list(map(lambda x: \"%g\" % x, locs)))\nplt.ylabel('CV score')\nplt.xlabel('Parameter C')\nplt.ylim(0, 1.1)\nplt.show()"
3030
]
3131
}
3232
],

dev/_downloads/plot_cv_digits.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
scores_std = list()
2727
for C in C_s:
2828
svc.C = C
29-
this_scores = cross_val_score(svc, X, y, cv=5, n_jobs=1)
29+
this_scores = cross_val_score(svc, X, y, n_jobs=1)
3030
scores.append(np.mean(this_scores))
3131
scores_std.append(np.std(this_scores))
3232

0 commit comments

Comments
 (0)