codeur66
diff --git a/‎dev/_downloads/auto_examples_jupyter.zip
-154 Bytes b/‎dev/_downloads/auto_examples_jupyter.zip
-154 Bytes
diff --git a/‎dev/_downloads/auto_examples_python.zip
-154 Bytes b/‎dev/_downloads/auto_examples_python.zip
-154 Bytes
diff --git a/‎dev/_downloads/plot_compare_gpr_krr.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/plot_compare_gpr_krr.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/plot_compare_gpr_krr.py
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/plot_compare_gpr_krr.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/plot_compare_reduction.ipynb
Lines changed: 2 additions & 2 deletions b/‎dev/_downloads/plot_compare_reduction.ipynb
Lines changed: 2 additions & 2 deletions
diff --git a/‎dev/_downloads/plot_compare_reduction.py
Lines changed: 2 additions & 2 deletions b/‎dev/_downloads/plot_compare_reduction.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎dev/_downloads/plot_covariance_estimation.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/plot_covariance_estimation.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/plot_covariance_estimation.py
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/plot_covariance_estimation.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/plot_cv_digits.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/plot_cv_digits.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/plot_cv_digits.py
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/plot_cv_digits.py
Lines changed: 1 addition & 1 deletion
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "print(__doc__)\n\n# Authors: Jan Hendrik Metzen <[email protected]>\n# License: BSD 3 clause\n\n\nimport time\n\nimport numpy as np\n\nimport matplotlib.pyplot as plt\n\nfrom sklearn.kernel_ridge import KernelRidge\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.gaussian_process import GaussianProcessRegressor\nfrom sklearn.gaussian_process.kernels import WhiteKernel, ExpSineSquared\n\nrng = np.random.RandomState(0)\n\n# Generate sample data\nX = 15 * rng.rand(100, 1)\ny = np.sin(X).ravel()\ny += 3 * (0.5 - rng.rand(X.shape[0]))  # add noise\n\n# Fit KernelRidge with parameter selection based on 5-fold cross validation\nparam_grid = {\"alpha\": [1e0, 1e-1, 1e-2, 1e-3],\n              \"kernel\": [ExpSineSquared(l, p)\n                         for l in np.logspace(-2, 2, 10)\n                         for p in np.logspace(0, 2, 10)]}\nkr = GridSearchCV(KernelRidge(), cv=5, param_grid=param_grid)\nstime = time.time()\nkr.fit(X, y)\nprint(\"Time for KRR fitting: %.3f\" % (time.time() - stime))\n\ngp_kernel = ExpSineSquared(1.0, 5.0, periodicity_bounds=(1e-2, 1e1)) \\\n    + WhiteKernel(1e-1)\ngpr = GaussianProcessRegressor(kernel=gp_kernel)\nstime = time.time()\ngpr.fit(X, y)\nprint(\"Time for GPR fitting: %.3f\" % (time.time() - stime))\n\n# Predict using kernel ridge\nX_plot = np.linspace(0, 20, 10000)[:, None]\nstime = time.time()\ny_kr = kr.predict(X_plot)\nprint(\"Time for KRR prediction: %.3f\" % (time.time() - stime))\n\n# Predict using gaussian process regressor\nstime = time.time()\ny_gpr = gpr.predict(X_plot, return_std=False)\nprint(\"Time for GPR prediction: %.3f\" % (time.time() - stime))\n\nstime = time.time()\ny_gpr, y_std = gpr.predict(X_plot, return_std=True)\nprint(\"Time for GPR prediction with standard-deviation: %.3f\"\n      % (time.time() - stime))\n\n# Plot results\nplt.figure(figsize=(10, 5))\nlw = 2\nplt.scatter(X, y, c='k', label='data')\nplt.plot(X_plot, np.sin(X_plot), color='navy', lw=lw, label='True')\nplt.plot(X_plot, y_kr, color='turquoise', lw=lw,\n         label='KRR (%s)' % kr.best_params_)\nplt.plot(X_plot, y_gpr, color='darkorange', lw=lw,\n         label='GPR (%s)' % gpr.kernel_)\nplt.fill_between(X_plot[:, 0], y_gpr - y_std, y_gpr + y_std, color='darkorange',\n                 alpha=0.2)\nplt.xlabel('data')\nplt.ylabel('target')\nplt.xlim(0, 20)\nplt.ylim(-4, 4)\nplt.title('GPR versus Kernel Ridge')\nplt.legend(loc=\"best\",  scatterpoints=1, prop={'size': 8})\nplt.show()"
+        "print(__doc__)\n\n# Authors: Jan Hendrik Metzen <[email protected]>\n# License: BSD 3 clause\n\n\nimport time\n\nimport numpy as np\n\nimport matplotlib.pyplot as plt\n\nfrom sklearn.kernel_ridge import KernelRidge\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.gaussian_process import GaussianProcessRegressor\nfrom sklearn.gaussian_process.kernels import WhiteKernel, ExpSineSquared\n\nrng = np.random.RandomState(0)\n\n# Generate sample data\nX = 15 * rng.rand(100, 1)\ny = np.sin(X).ravel()\ny += 3 * (0.5 - rng.rand(X.shape[0]))  # add noise\n\n# Fit KernelRidge with parameter selection based on 5-fold cross validation\nparam_grid = {\"alpha\": [1e0, 1e-1, 1e-2, 1e-3],\n              \"kernel\": [ExpSineSquared(l, p)\n                         for l in np.logspace(-2, 2, 10)\n                         for p in np.logspace(0, 2, 10)]}\nkr = GridSearchCV(KernelRidge(), param_grid=param_grid)\nstime = time.time()\nkr.fit(X, y)\nprint(\"Time for KRR fitting: %.3f\" % (time.time() - stime))\n\ngp_kernel = ExpSineSquared(1.0, 5.0, periodicity_bounds=(1e-2, 1e1)) \\\n    + WhiteKernel(1e-1)\ngpr = GaussianProcessRegressor(kernel=gp_kernel)\nstime = time.time()\ngpr.fit(X, y)\nprint(\"Time for GPR fitting: %.3f\" % (time.time() - stime))\n\n# Predict using kernel ridge\nX_plot = np.linspace(0, 20, 10000)[:, None]\nstime = time.time()\ny_kr = kr.predict(X_plot)\nprint(\"Time for KRR prediction: %.3f\" % (time.time() - stime))\n\n# Predict using gaussian process regressor\nstime = time.time()\ny_gpr = gpr.predict(X_plot, return_std=False)\nprint(\"Time for GPR prediction: %.3f\" % (time.time() - stime))\n\nstime = time.time()\ny_gpr, y_std = gpr.predict(X_plot, return_std=True)\nprint(\"Time for GPR prediction with standard-deviation: %.3f\"\n      % (time.time() - stime))\n\n# Plot results\nplt.figure(figsize=(10, 5))\nlw = 2\nplt.scatter(X, y, c='k', label='data')\nplt.plot(X_plot, np.sin(X_plot), color='navy', lw=lw, label='True')\nplt.plot(X_plot, y_kr, color='turquoise', lw=lw,\n         label='KRR (%s)' % kr.best_params_)\nplt.plot(X_plot, y_gpr, color='darkorange', lw=lw,\n         label='GPR (%s)' % gpr.kernel_)\nplt.fill_between(X_plot[:, 0], y_gpr - y_std, y_gpr + y_std, color='darkorange',\n                 alpha=0.2)\nplt.xlabel('data')\nplt.ylabel('target')\nplt.xlim(0, 20)\nplt.ylim(-4, 4)\nplt.title('GPR versus Kernel Ridge')\nplt.legend(loc=\"best\",  scatterpoints=1, prop={'size': 8})\nplt.show()"
       ]
     }
   ],
 
@@ -73,7 +73,7 @@
               "kernel": [ExpSineSquared(l, p)
                          for l in np.logspace(-2, 2, 10)
                          for p in np.logspace(0, 2, 10)]}
-kr = GridSearchCV(KernelRidge(), cv=5, param_grid=param_grid)
+kr = GridSearchCV(KernelRidge(), param_grid=param_grid)
 stime = time.time()
 kr.fit(X, y)
 print("Time for KRR fitting: %.3f" % (time.time() - stime))
 
@@ -33,7 +33,7 @@
       },
       "outputs": [],
       "source": [
-        "# Authors: Robert McGibbon, Joel Nothman, Guillaume Lemaitre\n\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.datasets import load_digits\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.svm import LinearSVC\nfrom sklearn.decomposition import PCA, NMF\nfrom sklearn.feature_selection import SelectKBest, chi2\n\nprint(__doc__)\n\npipe = Pipeline([\n    # the reduce_dim stage is populated by the param_grid\n    ('reduce_dim', 'passthrough'),\n    ('classify', LinearSVC(dual=False, max_iter=10000))\n])\n\nN_FEATURES_OPTIONS = [2, 4, 8]\nC_OPTIONS = [1, 10, 100, 1000]\nparam_grid = [\n    {\n        'reduce_dim': [PCA(iterated_power=7), NMF()],\n        'reduce_dim__n_components': N_FEATURES_OPTIONS,\n        'classify__C': C_OPTIONS\n    },\n    {\n        'reduce_dim': [SelectKBest(chi2)],\n        'reduce_dim__k': N_FEATURES_OPTIONS,\n        'classify__C': C_OPTIONS\n    },\n]\nreducer_labels = ['PCA', 'NMF', 'KBest(chi2)']\n\ngrid = GridSearchCV(pipe, cv=5, n_jobs=1, param_grid=param_grid)\ndigits = load_digits()\ngrid.fit(digits.data, digits.target)\n\nmean_scores = np.array(grid.cv_results_['mean_test_score'])\n# scores are in the order of param_grid iteration, which is alphabetical\nmean_scores = mean_scores.reshape(len(C_OPTIONS), -1, len(N_FEATURES_OPTIONS))\n# select score for best C\nmean_scores = mean_scores.max(axis=0)\nbar_offsets = (np.arange(len(N_FEATURES_OPTIONS)) *\n               (len(reducer_labels) + 1) + .5)\n\nplt.figure()\nCOLORS = 'bgrcmyk'\nfor i, (label, reducer_scores) in enumerate(zip(reducer_labels, mean_scores)):\n    plt.bar(bar_offsets + i, reducer_scores, label=label, color=COLORS[i])\n\nplt.title(\"Comparing feature reduction techniques\")\nplt.xlabel('Reduced number of features')\nplt.xticks(bar_offsets + len(reducer_labels) / 2, N_FEATURES_OPTIONS)\nplt.ylabel('Digit classification accuracy')\nplt.ylim((0, 1))\nplt.legend(loc='upper left')\n\nplt.show()"
+        "# Authors: Robert McGibbon, Joel Nothman, Guillaume Lemaitre\n\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.datasets import load_digits\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.svm import LinearSVC\nfrom sklearn.decomposition import PCA, NMF\nfrom sklearn.feature_selection import SelectKBest, chi2\n\nprint(__doc__)\n\npipe = Pipeline([\n    # the reduce_dim stage is populated by the param_grid\n    ('reduce_dim', 'passthrough'),\n    ('classify', LinearSVC(dual=False, max_iter=10000))\n])\n\nN_FEATURES_OPTIONS = [2, 4, 8]\nC_OPTIONS = [1, 10, 100, 1000]\nparam_grid = [\n    {\n        'reduce_dim': [PCA(iterated_power=7), NMF()],\n        'reduce_dim__n_components': N_FEATURES_OPTIONS,\n        'classify__C': C_OPTIONS\n    },\n    {\n        'reduce_dim': [SelectKBest(chi2)],\n        'reduce_dim__k': N_FEATURES_OPTIONS,\n        'classify__C': C_OPTIONS\n    },\n]\nreducer_labels = ['PCA', 'NMF', 'KBest(chi2)']\n\ngrid = GridSearchCV(pipe, n_jobs=1, param_grid=param_grid)\ndigits = load_digits()\ngrid.fit(digits.data, digits.target)\n\nmean_scores = np.array(grid.cv_results_['mean_test_score'])\n# scores are in the order of param_grid iteration, which is alphabetical\nmean_scores = mean_scores.reshape(len(C_OPTIONS), -1, len(N_FEATURES_OPTIONS))\n# select score for best C\nmean_scores = mean_scores.max(axis=0)\nbar_offsets = (np.arange(len(N_FEATURES_OPTIONS)) *\n               (len(reducer_labels) + 1) + .5)\n\nplt.figure()\nCOLORS = 'bgrcmyk'\nfor i, (label, reducer_scores) in enumerate(zip(reducer_labels, mean_scores)):\n    plt.bar(bar_offsets + i, reducer_scores, label=label, color=COLORS[i])\n\nplt.title(\"Comparing feature reduction techniques\")\nplt.xlabel('Reduced number of features')\nplt.xticks(bar_offsets + len(reducer_labels) / 2, N_FEATURES_OPTIONS)\nplt.ylabel('Digit classification accuracy')\nplt.ylim((0, 1))\nplt.legend(loc='upper left')\n\nplt.show()"
       ]
     },
     {
@@ -51,7 +51,7 @@
       },
       "outputs": [],
       "source": [
-        "from joblib import Memory\nfrom shutil import rmtree\n\n# Create a temporary folder to store the transformers of the pipeline\n___location = 'cachedir'\nmemory = Memory(___location=___location, verbose=10)\ncached_pipe = Pipeline([('reduce_dim', PCA()),\n                        ('classify', LinearSVC(dual=False, max_iter=10000))],\n                       memory=memory)\n\n# This time, a cached pipeline will be used within the grid search\ngrid = GridSearchCV(cached_pipe, cv=5, n_jobs=1, param_grid=param_grid)\ndigits = load_digits()\ngrid.fit(digits.data, digits.target)\n\n# Delete the temporary cache before exiting\nmemory.clear(warn=False)\nrmtree(___location)"
+        "from joblib import Memory\nfrom shutil import rmtree\n\n# Create a temporary folder to store the transformers of the pipeline\n___location = 'cachedir'\nmemory = Memory(___location=___location, verbose=10)\ncached_pipe = Pipeline([('reduce_dim', PCA()),\n                        ('classify', LinearSVC(dual=False, max_iter=10000))],\n                       memory=memory)\n\n# This time, a cached pipeline will be used within the grid search\ngrid = GridSearchCV(cached_pipe, n_jobs=1, param_grid=param_grid)\ndigits = load_digits()\ngrid.fit(digits.data, digits.target)\n\n# Delete the temporary cache before exiting\nmemory.clear(warn=False)\nrmtree(___location)"
       ]
     },
     {
 
@@ -63,7 +63,7 @@
 ]
 reducer_labels = ['PCA', 'NMF', 'KBest(chi2)']
 
-grid = GridSearchCV(pipe, cv=5, n_jobs=1, param_grid=param_grid)
+grid = GridSearchCV(pipe, n_jobs=1, param_grid=param_grid)
 digits = load_digits()
 grid.fit(digits.data, digits.target)
 
@@ -113,7 +113,7 @@
                        memory=memory)
 
 # This time, a cached pipeline will be used within the grid search
-grid = GridSearchCV(cached_pipe, cv=5, n_jobs=1, param_grid=param_grid)
+grid = GridSearchCV(cached_pipe, n_jobs=1, param_grid=param_grid)
 digits = load_digits()
 grid.fit(digits.data, digits.target)
 
 
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom scipy import linalg\n\nfrom sklearn.covariance import LedoitWolf, OAS, ShrunkCovariance, \\\n    log_likelihood, empirical_covariance\nfrom sklearn.model_selection import GridSearchCV\n\n\n# #############################################################################\n# Generate sample data\nn_features, n_samples = 40, 20\nnp.random.seed(42)\nbase_X_train = np.random.normal(size=(n_samples, n_features))\nbase_X_test = np.random.normal(size=(n_samples, n_features))\n\n# Color samples\ncoloring_matrix = np.random.normal(size=(n_features, n_features))\nX_train = np.dot(base_X_train, coloring_matrix)\nX_test = np.dot(base_X_test, coloring_matrix)\n\n# #############################################################################\n# Compute the likelihood on test data\n\n# spanning a range of possible shrinkage coefficient values\nshrinkages = np.logspace(-2, 0, 30)\nnegative_logliks = [-ShrunkCovariance(shrinkage=s).fit(X_train).score(X_test)\n                    for s in shrinkages]\n\n# under the ground-truth model, which we would not have access to in real\n# settings\nreal_cov = np.dot(coloring_matrix.T, coloring_matrix)\nemp_cov = empirical_covariance(X_train)\nloglik_real = -log_likelihood(emp_cov, linalg.inv(real_cov))\n\n# #############################################################################\n# Compare different approaches to setting the parameter\n\n# GridSearch for an optimal shrinkage coefficient\ntuned_parameters = [{'shrinkage': shrinkages}]\ncv = GridSearchCV(ShrunkCovariance(), tuned_parameters, cv=5)\ncv.fit(X_train)\n\n# Ledoit-Wolf optimal shrinkage coefficient estimate\nlw = LedoitWolf()\nloglik_lw = lw.fit(X_train).score(X_test)\n\n# OAS coefficient estimate\noa = OAS()\nloglik_oa = oa.fit(X_train).score(X_test)\n\n# #############################################################################\n# Plot results\nfig = plt.figure()\nplt.title(\"Regularized covariance: likelihood and shrinkage coefficient\")\nplt.xlabel('Regularization parameter: shrinkage coefficient')\nplt.ylabel('Error: negative log-likelihood on test data')\n# range shrinkage curve\nplt.loglog(shrinkages, negative_logliks, label=\"Negative log-likelihood\")\n\nplt.plot(plt.xlim(), 2 * [loglik_real], '--r',\n         label=\"Real covariance likelihood\")\n\n# adjust view\nlik_max = np.amax(negative_logliks)\nlik_min = np.amin(negative_logliks)\nymin = lik_min - 6. * np.log((plt.ylim()[1] - plt.ylim()[0]))\nymax = lik_max + 10. * np.log(lik_max - lik_min)\nxmin = shrinkages[0]\nxmax = shrinkages[-1]\n# LW likelihood\nplt.vlines(lw.shrinkage_, ymin, -loglik_lw, color='magenta',\n           linewidth=3, label='Ledoit-Wolf estimate')\n# OAS likelihood\nplt.vlines(oa.shrinkage_, ymin, -loglik_oa, color='purple',\n           linewidth=3, label='OAS estimate')\n# best CV estimator likelihood\nplt.vlines(cv.best_estimator_.shrinkage, ymin,\n           -cv.best_estimator_.score(X_test), color='cyan',\n           linewidth=3, label='Cross-validation best estimate')\n\nplt.ylim(ymin, ymax)\nplt.xlim(xmin, xmax)\nplt.legend()\n\nplt.show()"
+        "print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom scipy import linalg\n\nfrom sklearn.covariance import LedoitWolf, OAS, ShrunkCovariance, \\\n    log_likelihood, empirical_covariance\nfrom sklearn.model_selection import GridSearchCV\n\n\n# #############################################################################\n# Generate sample data\nn_features, n_samples = 40, 20\nnp.random.seed(42)\nbase_X_train = np.random.normal(size=(n_samples, n_features))\nbase_X_test = np.random.normal(size=(n_samples, n_features))\n\n# Color samples\ncoloring_matrix = np.random.normal(size=(n_features, n_features))\nX_train = np.dot(base_X_train, coloring_matrix)\nX_test = np.dot(base_X_test, coloring_matrix)\n\n# #############################################################################\n# Compute the likelihood on test data\n\n# spanning a range of possible shrinkage coefficient values\nshrinkages = np.logspace(-2, 0, 30)\nnegative_logliks = [-ShrunkCovariance(shrinkage=s).fit(X_train).score(X_test)\n                    for s in shrinkages]\n\n# under the ground-truth model, which we would not have access to in real\n# settings\nreal_cov = np.dot(coloring_matrix.T, coloring_matrix)\nemp_cov = empirical_covariance(X_train)\nloglik_real = -log_likelihood(emp_cov, linalg.inv(real_cov))\n\n# #############################################################################\n# Compare different approaches to setting the parameter\n\n# GridSearch for an optimal shrinkage coefficient\ntuned_parameters = [{'shrinkage': shrinkages}]\ncv = GridSearchCV(ShrunkCovariance(), tuned_parameters)\ncv.fit(X_train)\n\n# Ledoit-Wolf optimal shrinkage coefficient estimate\nlw = LedoitWolf()\nloglik_lw = lw.fit(X_train).score(X_test)\n\n# OAS coefficient estimate\noa = OAS()\nloglik_oa = oa.fit(X_train).score(X_test)\n\n# #############################################################################\n# Plot results\nfig = plt.figure()\nplt.title(\"Regularized covariance: likelihood and shrinkage coefficient\")\nplt.xlabel('Regularization parameter: shrinkage coefficient')\nplt.ylabel('Error: negative log-likelihood on test data')\n# range shrinkage curve\nplt.loglog(shrinkages, negative_logliks, label=\"Negative log-likelihood\")\n\nplt.plot(plt.xlim(), 2 * [loglik_real], '--r',\n         label=\"Real covariance likelihood\")\n\n# adjust view\nlik_max = np.amax(negative_logliks)\nlik_min = np.amin(negative_logliks)\nymin = lik_min - 6. * np.log((plt.ylim()[1] - plt.ylim()[0]))\nymax = lik_max + 10. * np.log(lik_max - lik_min)\nxmin = shrinkages[0]\nxmax = shrinkages[-1]\n# LW likelihood\nplt.vlines(lw.shrinkage_, ymin, -loglik_lw, color='magenta',\n           linewidth=3, label='Ledoit-Wolf estimate')\n# OAS likelihood\nplt.vlines(oa.shrinkage_, ymin, -loglik_oa, color='purple',\n           linewidth=3, label='OAS estimate')\n# best CV estimator likelihood\nplt.vlines(cv.best_estimator_.shrinkage, ymin,\n           -cv.best_estimator_.score(X_test), color='cyan',\n           linewidth=3, label='Cross-validation best estimate')\n\nplt.ylim(ymin, ymax)\nplt.xlim(xmin, xmax)\nplt.legend()\n\nplt.show()"
       ]
     }
   ],
 
@@ -83,7 +83,7 @@
 
 # GridSearch for an optimal shrinkage coefficient
 tuned_parameters = [{'shrinkage': shrinkages}]
-cv = GridSearchCV(ShrunkCovariance(), tuned_parameters, cv=5)
+cv = GridSearchCV(ShrunkCovariance(), tuned_parameters)
 cv.fit(X_train)
 
 # Ledoit-Wolf optimal shrinkage coefficient estimate
 
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "print(__doc__)\n\n\nimport numpy as np\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn import datasets, svm\n\ndigits = datasets.load_digits()\nX = digits.data\ny = digits.target\n\nsvc = svm.SVC(kernel='linear')\nC_s = np.logspace(-10, 0, 10)\n\nscores = list()\nscores_std = list()\nfor C in C_s:\n    svc.C = C\n    this_scores = cross_val_score(svc, X, y, cv=5, n_jobs=1)\n    scores.append(np.mean(this_scores))\n    scores_std.append(np.std(this_scores))\n\n# Do the plotting\nimport matplotlib.pyplot as plt\nplt.figure()\nplt.semilogx(C_s, scores)\nplt.semilogx(C_s, np.array(scores) + np.array(scores_std), 'b--')\nplt.semilogx(C_s, np.array(scores) - np.array(scores_std), 'b--')\nlocs, labels = plt.yticks()\nplt.yticks(locs, list(map(lambda x: \"%g\" % x, locs)))\nplt.ylabel('CV score')\nplt.xlabel('Parameter C')\nplt.ylim(0, 1.1)\nplt.show()"
+        "print(__doc__)\n\n\nimport numpy as np\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn import datasets, svm\n\ndigits = datasets.load_digits()\nX = digits.data\ny = digits.target\n\nsvc = svm.SVC(kernel='linear')\nC_s = np.logspace(-10, 0, 10)\n\nscores = list()\nscores_std = list()\nfor C in C_s:\n    svc.C = C\n    this_scores = cross_val_score(svc, X, y, n_jobs=1)\n    scores.append(np.mean(this_scores))\n    scores_std.append(np.std(this_scores))\n\n# Do the plotting\nimport matplotlib.pyplot as plt\nplt.figure()\nplt.semilogx(C_s, scores)\nplt.semilogx(C_s, np.array(scores) + np.array(scores_std), 'b--')\nplt.semilogx(C_s, np.array(scores) - np.array(scores_std), 'b--')\nlocs, labels = plt.yticks()\nplt.yticks(locs, list(map(lambda x: \"%g\" % x, locs)))\nplt.ylabel('CV score')\nplt.xlabel('Parameter C')\nplt.ylim(0, 1.1)\nplt.show()"
       ]
     }
   ],
 
@@ -26,7 +26,7 @@
 scores_std = list()
 for C in C_s:
     svc.C = C
-    this_scores = cross_val_score(svc, X, y, cv=5, n_jobs=1)
+    this_scores = cross_val_score(svc, X, y, n_jobs=1)
     scores.append(np.mean(this_scores))
     scores_std.append(np.std(this_scores))
Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,7 @@`
`26`	`26`	`},`
`27`	`27`	`"outputs": [],`
`28`	`28`	`"source": [`
`29`		- "print(__doc__)\n\n# Authors: Jan Hendrik Metzen <[email protected]>\n# License: BSD 3 clause\n\n\nimport time\n\nimport numpy as np\n\nimport matplotlib.pyplot as plt\n\nfrom sklearn.kernel_ridge import KernelRidge\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.gaussian_process import GaussianProcessRegressor\nfrom sklearn.gaussian_process.kernels import WhiteKernel, ExpSineSquared\n\nrng = np.random.RandomState(0)\n\n# Generate sample data\nX = 15 * rng.rand(100, 1)\ny = np.sin(X).ravel()\ny += 3 * (0.5 - rng.rand(X.shape[0])) # add noise\n\n# Fit KernelRidge with parameter selection based on 5-fold cross validation\nparam_grid = {\"alpha\": [1e0, 1e-1, 1e-2, 1e-3],\n \"kernel\": [ExpSineSquared(l, p)\n for l in np.logspace(-2, 2, 10)\n for p in np.logspace(0, 2, 10)]}\nkr = GridSearchCV(KernelRidge(), cv=5, param_grid=param_grid)\nstime = time.time()\nkr.fit(X, y)\nprint(\"Time for KRR fitting: %.3f\" % (time.time() - stime))\n\ngp_kernel = ExpSineSquared(1.0, 5.0, periodicity_bounds=(1e-2, 1e1)) \\\n + WhiteKernel(1e-1)\ngpr = GaussianProcessRegressor(kernel=gp_kernel)\nstime = time.time()\ngpr.fit(X, y)\nprint(\"Time for GPR fitting: %.3f\" % (time.time() - stime))\n\n# Predict using kernel ridge\nX_plot = np.linspace(0, 20, 10000)[:, None]\nstime = time.time()\ny_kr = kr.predict(X_plot)\nprint(\"Time for KRR prediction: %.3f\" % (time.time() - stime))\n\n# Predict using gaussian process regressor\nstime = time.time()\ny_gpr = gpr.predict(X_plot, return_std=False)\nprint(\"Time for GPR prediction: %.3f\" % (time.time() - stime))\n\nstime = time.time()\ny_gpr, y_std = gpr.predict(X_plot, return_std=True)\nprint(\"Time for GPR prediction with standard-deviation: %.3f\"\n % (time.time() - stime))\n\n# Plot results\nplt.figure(figsize=(10, 5))\nlw = 2\nplt.scatter(X, y, c='k', label='data')\nplt.plot(X_plot, np.sin(X_plot), color='navy', lw=lw, label='True')\nplt.plot(X_plot, y_kr, color='turquoise', lw=lw,\n label='KRR (%s)' % kr.best_params_)\nplt.plot(X_plot, y_gpr, color='darkorange', lw=lw,\n label='GPR (%s)' % gpr.kernel_)\nplt.fill_between(X_plot[:, 0], y_gpr - y_std, y_gpr + y_std, color='darkorange',\n alpha=0.2)\nplt.xlabel('data')\nplt.ylabel('target')\nplt.xlim(0, 20)\nplt.ylim(-4, 4)\nplt.title('GPR versus Kernel Ridge')\nplt.legend(loc=\"best\", scatterpoints=1, prop={'size': 8})\nplt.show()"
	`29`	+ "print(__doc__)\n\n# Authors: Jan Hendrik Metzen <[email protected]>\n# License: BSD 3 clause\n\n\nimport time\n\nimport numpy as np\n\nimport matplotlib.pyplot as plt\n\nfrom sklearn.kernel_ridge import KernelRidge\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.gaussian_process import GaussianProcessRegressor\nfrom sklearn.gaussian_process.kernels import WhiteKernel, ExpSineSquared\n\nrng = np.random.RandomState(0)\n\n# Generate sample data\nX = 15 * rng.rand(100, 1)\ny = np.sin(X).ravel()\ny += 3 * (0.5 - rng.rand(X.shape[0])) # add noise\n\n# Fit KernelRidge with parameter selection based on 5-fold cross validation\nparam_grid = {\"alpha\": [1e0, 1e-1, 1e-2, 1e-3],\n \"kernel\": [ExpSineSquared(l, p)\n for l in np.logspace(-2, 2, 10)\n for p in np.logspace(0, 2, 10)]}\nkr = GridSearchCV(KernelRidge(), param_grid=param_grid)\nstime = time.time()\nkr.fit(X, y)\nprint(\"Time for KRR fitting: %.3f\" % (time.time() - stime))\n\ngp_kernel = ExpSineSquared(1.0, 5.0, periodicity_bounds=(1e-2, 1e1)) \\\n + WhiteKernel(1e-1)\ngpr = GaussianProcessRegressor(kernel=gp_kernel)\nstime = time.time()\ngpr.fit(X, y)\nprint(\"Time for GPR fitting: %.3f\" % (time.time() - stime))\n\n# Predict using kernel ridge\nX_plot = np.linspace(0, 20, 10000)[:, None]\nstime = time.time()\ny_kr = kr.predict(X_plot)\nprint(\"Time for KRR prediction: %.3f\" % (time.time() - stime))\n\n# Predict using gaussian process regressor\nstime = time.time()\ny_gpr = gpr.predict(X_plot, return_std=False)\nprint(\"Time for GPR prediction: %.3f\" % (time.time() - stime))\n\nstime = time.time()\ny_gpr, y_std = gpr.predict(X_plot, return_std=True)\nprint(\"Time for GPR prediction with standard-deviation: %.3f\"\n % (time.time() - stime))\n\n# Plot results\nplt.figure(figsize=(10, 5))\nlw = 2\nplt.scatter(X, y, c='k', label='data')\nplt.plot(X_plot, np.sin(X_plot), color='navy', lw=lw, label='True')\nplt.plot(X_plot, y_kr, color='turquoise', lw=lw,\n label='KRR (%s)' % kr.best_params_)\nplt.plot(X_plot, y_gpr, color='darkorange', lw=lw,\n label='GPR (%s)' % gpr.kernel_)\nplt.fill_between(X_plot[:, 0], y_gpr - y_std, y_gpr + y_std, color='darkorange',\n alpha=0.2)\nplt.xlabel('data')\nplt.ylabel('target')\nplt.xlim(0, 20)\nplt.ylim(-4, 4)\nplt.title('GPR versus Kernel Ridge')\nplt.legend(loc=\"best\", scatterpoints=1, prop={'size': 8})\nplt.show()"
`30`	`30`	`]`
`31`	`31`	`}`
`32`	`32`	`],`
Original file line number	Diff line number	Diff line change
`@@ -33,7 +33,7 @@`
`33`	`33`	`},`
`34`	`34`	`"outputs": [],`
`35`	`35`	`"source": [`
`36`		- "# Authors: Robert McGibbon, Joel Nothman, Guillaume Lemaitre\n\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.datasets import load_digits\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.svm import LinearSVC\nfrom sklearn.decomposition import PCA, NMF\nfrom sklearn.feature_selection import SelectKBest, chi2\n\nprint(__doc__)\n\npipe = Pipeline([\n # the reduce_dim stage is populated by the param_grid\n ('reduce_dim', 'passthrough'),\n ('classify', LinearSVC(dual=False, max_iter=10000))\n])\n\nN_FEATURES_OPTIONS = [2, 4, 8]\nC_OPTIONS = [1, 10, 100, 1000]\nparam_grid = [\n {\n 'reduce_dim': [PCA(iterated_power=7), NMF()],\n 'reduce_dim__n_components': N_FEATURES_OPTIONS,\n 'classify__C': C_OPTIONS\n },\n {\n 'reduce_dim': [SelectKBest(chi2)],\n 'reduce_dim__k': N_FEATURES_OPTIONS,\n 'classify__C': C_OPTIONS\n },\n]\nreducer_labels = ['PCA', 'NMF', 'KBest(chi2)']\n\ngrid = GridSearchCV(pipe, cv=5, n_jobs=1, param_grid=param_grid)\ndigits = load_digits()\ngrid.fit(digits.data, digits.target)\n\nmean_scores = np.array(grid.cv_results_['mean_test_score'])\n# scores are in the order of param_grid iteration, which is alphabetical\nmean_scores = mean_scores.reshape(len(C_OPTIONS), -1, len(N_FEATURES_OPTIONS))\n# select score for best C\nmean_scores = mean_scores.max(axis=0)\nbar_offsets = (np.arange(len(N_FEATURES_OPTIONS)) *\n (len(reducer_labels) + 1) + .5)\n\nplt.figure()\nCOLORS = 'bgrcmyk'\nfor i, (label, reducer_scores) in enumerate(zip(reducer_labels, mean_scores)):\n plt.bar(bar_offsets + i, reducer_scores, label=label, color=COLORS[i])\n\nplt.title(\"Comparing feature reduction techniques\")\nplt.xlabel('Reduced number of features')\nplt.xticks(bar_offsets + len(reducer_labels) / 2, N_FEATURES_OPTIONS)\nplt.ylabel('Digit classification accuracy')\nplt.ylim((0, 1))\nplt.legend(loc='upper left')\n\nplt.show()"
	`36`	+ "# Authors: Robert McGibbon, Joel Nothman, Guillaume Lemaitre\n\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.datasets import load_digits\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.svm import LinearSVC\nfrom sklearn.decomposition import PCA, NMF\nfrom sklearn.feature_selection import SelectKBest, chi2\n\nprint(__doc__)\n\npipe = Pipeline([\n # the reduce_dim stage is populated by the param_grid\n ('reduce_dim', 'passthrough'),\n ('classify', LinearSVC(dual=False, max_iter=10000))\n])\n\nN_FEATURES_OPTIONS = [2, 4, 8]\nC_OPTIONS = [1, 10, 100, 1000]\nparam_grid = [\n {\n 'reduce_dim': [PCA(iterated_power=7), NMF()],\n 'reduce_dim__n_components': N_FEATURES_OPTIONS,\n 'classify__C': C_OPTIONS\n },\n {\n 'reduce_dim': [SelectKBest(chi2)],\n 'reduce_dim__k': N_FEATURES_OPTIONS,\n 'classify__C': C_OPTIONS\n },\n]\nreducer_labels = ['PCA', 'NMF', 'KBest(chi2)']\n\ngrid = GridSearchCV(pipe, n_jobs=1, param_grid=param_grid)\ndigits = load_digits()\ngrid.fit(digits.data, digits.target)\n\nmean_scores = np.array(grid.cv_results_['mean_test_score'])\n# scores are in the order of param_grid iteration, which is alphabetical\nmean_scores = mean_scores.reshape(len(C_OPTIONS), -1, len(N_FEATURES_OPTIONS))\n# select score for best C\nmean_scores = mean_scores.max(axis=0)\nbar_offsets = (np.arange(len(N_FEATURES_OPTIONS)) *\n (len(reducer_labels) + 1) + .5)\n\nplt.figure()\nCOLORS = 'bgrcmyk'\nfor i, (label, reducer_scores) in enumerate(zip(reducer_labels, mean_scores)):\n plt.bar(bar_offsets + i, reducer_scores, label=label, color=COLORS[i])\n\nplt.title(\"Comparing feature reduction techniques\")\nplt.xlabel('Reduced number of features')\nplt.xticks(bar_offsets + len(reducer_labels) / 2, N_FEATURES_OPTIONS)\nplt.ylabel('Digit classification accuracy')\nplt.ylim((0, 1))\nplt.legend(loc='upper left')\n\nplt.show()"
`37`	`37`	`]`
`38`	`38`	`},`
`39`	`39`	`{`
`@@ -51,7 +51,7 @@`
`51`	`51`	`},`
`52`	`52`	`"outputs": [],`
`53`	`53`	`"source": [`
`54`		- "from joblib import Memory\nfrom shutil import rmtree\n\n# Create a temporary folder to store the transformers of the pipeline\n___location = 'cachedir'\nmemory = Memory(___location=___location, verbose=10)\ncached_pipe = Pipeline([('reduce_dim', PCA()),\n ('classify', LinearSVC(dual=False, max_iter=10000))],\n memory=memory)\n\n# This time, a cached pipeline will be used within the grid search\ngrid = GridSearchCV(cached_pipe, cv=5, n_jobs=1, param_grid=param_grid)\ndigits = load_digits()\ngrid.fit(digits.data, digits.target)\n\n# Delete the temporary cache before exiting\nmemory.clear(warn=False)\nrmtree(___location)"
	`54`	+ "from joblib import Memory\nfrom shutil import rmtree\n\n# Create a temporary folder to store the transformers of the pipeline\n___location = 'cachedir'\nmemory = Memory(___location=___location, verbose=10)\ncached_pipe = Pipeline([('reduce_dim', PCA()),\n ('classify', LinearSVC(dual=False, max_iter=10000))],\n memory=memory)\n\n# This time, a cached pipeline will be used within the grid search\ngrid = GridSearchCV(cached_pipe, n_jobs=1, param_grid=param_grid)\ndigits = load_digits()\ngrid.fit(digits.data, digits.target)\n\n# Delete the temporary cache before exiting\nmemory.clear(warn=False)\nrmtree(___location)"
`55`	`55`	`]`
`56`	`56`	`},`
`57`	`57`	`{`