scikit-learn
diff --git a/‎dev/_downloads/09342387020f5abd8190ad409affdd7b/plot_model_complexity_influence.py
Lines changed: 4 additions & 4 deletions b/‎dev/_downloads/09342387020f5abd8190ad409affdd7b/plot_model_complexity_influence.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎dev/_downloads/2ae02325ffd71a3699f433ae3baecd85/plot_cv_predict.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/2ae02325ffd71a3699f433ae3baecd85/plot_cv_predict.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/336608b7fc391cd88b2587817f48ffdd/plot_cv_predict.py
Lines changed: 2 additions & 3 deletions b/‎dev/_downloads/336608b7fc391cd88b2587817f48ffdd/plot_cv_predict.py
Lines changed: 2 additions & 3 deletions
diff --git a/‎dev/_downloads/3409d9766d352cc9f9b169d4a799a87a/auto_examples_python.zip
-16 Bytes b/‎dev/_downloads/3409d9766d352cc9f9b169d4a799a87a/auto_examples_python.zip
-16 Bytes
diff --git a/‎dev/_downloads/a9a92784a7617f5a14aa93d32f95dff7/plot_voting_regressor.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/a9a92784a7617f5a14aa93d32f95dff7/plot_voting_regressor.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/acb1430b51f399d6660add7428cadb67/plot_voting_regressor.py
Lines changed: 1 addition & 3 deletions b/‎dev/_downloads/acb1430b51f399d6660add7428cadb67/plot_voting_regressor.py
Lines changed: 1 addition & 3 deletions
diff --git a/‎dev/_downloads/c65d78a64b1ce8a1acbf28e24f9f348c/plot_select_from_model_boston.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/c65d78a64b1ce8a1acbf28e24f9f348c/plot_select_from_model_boston.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/d34667f097c619f8afda4bc936e7af21/auto_examples_jupyter.zip
-20 Bytes b/‎dev/_downloads/d34667f097c619f8afda4bc936e7af21/auto_examples_jupyter.zip
-20 Bytes
diff --git a/‎dev/_downloads/e70e2de8d5690c98b4cb502019310eca/plot_model_complexity_influence.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/e70e2de8d5690c98b4cb502019310eca/plot_model_complexity_influence.ipynb
Lines changed: 1 addition & 1 deletion
@@ -44,12 +44,12 @@
 
 def generate_data(case, sparse=False):
     """Generate regression/classification data."""
-    bunch = None
     if case == 'regression':
-        bunch = datasets.load_boston()
+        X, y = datasets.load_boston(return_X_y=True)
     elif case == 'classification':
-        bunch = datasets.fetch_20newsgroups_vectorized(subset='all')
-    X, y = shuffle(bunch.data, bunch.target)
+        X, y = datasets.fetch_20newsgroups_vectorized(subset='all',
+                                                      return_X_y=True)
+    X, y = shuffle(X, y)
     offset = int(X.shape[0] * 0.8)
     X_train, y_train = X[:offset], y[:offset]
     X_test, y_test = X[offset:], y[offset:]
 
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "from sklearn import datasets\nfrom sklearn.model_selection import cross_val_predict\nfrom sklearn import linear_model\nimport matplotlib.pyplot as plt\n\nlr = linear_model.LinearRegression()\nboston = datasets.load_boston()\ny = boston.target\n\n# cross_val_predict returns an array of the same size as `y` where each entry\n# is a prediction obtained by cross validation:\npredicted = cross_val_predict(lr, boston.data, y, cv=10)\n\nfig, ax = plt.subplots()\nax.scatter(y, predicted, edgecolors=(0, 0, 0))\nax.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4)\nax.set_xlabel('Measured')\nax.set_ylabel('Predicted')\nplt.show()"
+        "from sklearn import datasets\nfrom sklearn.model_selection import cross_val_predict\nfrom sklearn import linear_model\nimport matplotlib.pyplot as plt\n\nlr = linear_model.LinearRegression()\nX, y = datasets.load_boston(return_X_y=True)\n\n# cross_val_predict returns an array of the same size as `y` where each entry\n# is a prediction obtained by cross validation:\npredicted = cross_val_predict(lr, X, y, cv=10)\n\nfig, ax = plt.subplots()\nax.scatter(y, predicted, edgecolors=(0, 0, 0))\nax.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4)\nax.set_xlabel('Measured')\nax.set_ylabel('Predicted')\nplt.show()"
       ]
     }
   ],
 
@@ -13,12 +13,11 @@
 import matplotlib.pyplot as plt
 
 lr = linear_model.LinearRegression()
-boston = datasets.load_boston()
-y = boston.target
+X, y = datasets.load_boston(return_X_y=True)
 
 # cross_val_predict returns an array of the same size as `y` where each entry
 # is a prediction obtained by cross validation:
-predicted = cross_val_predict(lr, boston.data, y, cv=10)
+predicted = cross_val_predict(lr, X, y, cv=10)
 
 fig, ax = plt.subplots()
 ax.scatter(y, predicted, edgecolors=(0, 0, 0))
 
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "print(__doc__)\n\nimport matplotlib.pyplot as plt\n\nfrom sklearn import datasets\nfrom sklearn.ensemble import GradientBoostingRegressor\nfrom sklearn.ensemble import RandomForestRegressor\nfrom sklearn.linear_model import LinearRegression\nfrom sklearn.ensemble import VotingRegressor\n\n# Loading some example data\nboston = datasets.load_boston()\nX = boston.data\ny = boston.target\n\n# Training classifiers\nreg1 = GradientBoostingRegressor(random_state=1, n_estimators=10)\nreg2 = RandomForestRegressor(random_state=1, n_estimators=10)\nreg3 = LinearRegression()\nereg = VotingRegressor([('gb', reg1), ('rf', reg2), ('lr', reg3)])\nreg1.fit(X, y)\nreg2.fit(X, y)\nreg3.fit(X, y)\nereg.fit(X, y)\n\nxt = X[:20]\n\nplt.figure()\nplt.plot(reg1.predict(xt), 'gd', label='GradientBoostingRegressor')\nplt.plot(reg2.predict(xt), 'b^', label='RandomForestRegressor')\nplt.plot(reg3.predict(xt), 'ys', label='LinearRegression')\nplt.plot(ereg.predict(xt), 'r*', label='VotingRegressor')\nplt.tick_params(axis='x', which='both', bottom=False, top=False,\n                labelbottom=False)\nplt.ylabel('predicted')\nplt.xlabel('training samples')\nplt.legend(loc=\"best\")\nplt.title('Comparison of individual predictions with averaged')\nplt.show()"
+        "print(__doc__)\n\nimport matplotlib.pyplot as plt\n\nfrom sklearn import datasets\nfrom sklearn.ensemble import GradientBoostingRegressor\nfrom sklearn.ensemble import RandomForestRegressor\nfrom sklearn.linear_model import LinearRegression\nfrom sklearn.ensemble import VotingRegressor\n\n# Loading some example data\nX, y = datasets.load_boston(return_X_y=True)\n\n# Training classifiers\nreg1 = GradientBoostingRegressor(random_state=1, n_estimators=10)\nreg2 = RandomForestRegressor(random_state=1, n_estimators=10)\nreg3 = LinearRegression()\nereg = VotingRegressor([('gb', reg1), ('rf', reg2), ('lr', reg3)])\nreg1.fit(X, y)\nreg2.fit(X, y)\nreg3.fit(X, y)\nereg.fit(X, y)\n\nxt = X[:20]\n\nplt.figure()\nplt.plot(reg1.predict(xt), 'gd', label='GradientBoostingRegressor')\nplt.plot(reg2.predict(xt), 'b^', label='RandomForestRegressor')\nplt.plot(reg3.predict(xt), 'ys', label='LinearRegression')\nplt.plot(ereg.predict(xt), 'r*', label='VotingRegressor')\nplt.tick_params(axis='x', which='both', bottom=False, top=False,\n                labelbottom=False)\nplt.ylabel('predicted')\nplt.xlabel('training samples')\nplt.legend(loc=\"best\")\nplt.title('Comparison of individual predictions with averaged')\nplt.show()"
       ]
     }
   ],
 
@@ -23,9 +23,7 @@
 from sklearn.ensemble import VotingRegressor
 
 # Loading some example data
-boston = datasets.load_boston()
-X = boston.data
-y = boston.target
+X, y = datasets.load_boston(return_X_y=True)
 
 # Training classifiers
 reg1 = GradientBoostingRegressor(random_state=1, n_estimators=10)
 
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "# Author: Manoj Kumar <[email protected]>\n# License: BSD 3 clause\n\nprint(__doc__)\n\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nfrom sklearn.datasets import load_boston\nfrom sklearn.feature_selection import SelectFromModel\nfrom sklearn.linear_model import LassoCV\n\n# Load the boston dataset.\nboston = load_boston()\nX, y = boston['data'], boston['target']\n\n# We use the base estimator LassoCV since the L1 norm promotes sparsity of features.\nclf = LassoCV()\n\n# Set a minimum threshold of 0.25\nsfm = SelectFromModel(clf, threshold=0.25)\nsfm.fit(X, y)\nn_features = sfm.transform(X).shape[1]\n\n# Reset the threshold till the number of features equals two.\n# Note that the attribute can be set directly instead of repeatedly\n# fitting the metatransformer.\nwhile n_features > 2:\n    sfm.threshold += 0.1\n    X_transform = sfm.transform(X)\n    n_features = X_transform.shape[1]\n\n# Plot the selected two features from X.\nplt.title(\n    \"Features selected from Boston using SelectFromModel with \"\n    \"threshold %0.3f.\" % sfm.threshold)\nfeature1 = X_transform[:, 0]\nfeature2 = X_transform[:, 1] \nplt.plot(feature1, feature2, 'r.')\nplt.xlabel(\"Feature number 1\")\nplt.ylabel(\"Feature number 2\")\nplt.ylim([np.min(feature2), np.max(feature2)])\nplt.show()"
+        "# Author: Manoj Kumar <[email protected]>\n# License: BSD 3 clause\n\nprint(__doc__)\n\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nfrom sklearn.datasets import load_boston\nfrom sklearn.feature_selection import SelectFromModel\nfrom sklearn.linear_model import LassoCV\n\n# Load the boston dataset.\nX, y = load_boston(return_X_y=True)\n\n# We use the base estimator LassoCV since the L1 norm promotes sparsity of features.\nclf = LassoCV()\n\n# Set a minimum threshold of 0.25\nsfm = SelectFromModel(clf, threshold=0.25)\nsfm.fit(X, y)\nn_features = sfm.transform(X).shape[1]\n\n# Reset the threshold till the number of features equals two.\n# Note that the attribute can be set directly instead of repeatedly\n# fitting the metatransformer.\nwhile n_features > 2:\n    sfm.threshold += 0.1\n    X_transform = sfm.transform(X)\n    n_features = X_transform.shape[1]\n\n# Plot the selected two features from X.\nplt.title(\n    \"Features selected from Boston using SelectFromModel with \"\n    \"threshold %0.3f.\" % sfm.threshold)\nfeature1 = X_transform[:, 0]\nfeature2 = X_transform[:, 1] \nplt.plot(feature1, feature2, 'r.')\nplt.xlabel(\"Feature number 1\")\nplt.ylabel(\"Feature number 2\")\nplt.ylim([np.min(feature2), np.max(feature2)])\nplt.show()"
       ]
     }
   ],
 
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "print(__doc__)\n\n# Author: Eustache Diemert <[email protected]>\n# License: BSD 3 clause\n\nimport time\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom mpl_toolkits.axes_grid1.parasite_axes import host_subplot\nfrom mpl_toolkits.axisartist.axislines import Axes\nfrom scipy.sparse.csr import csr_matrix\n\nfrom sklearn import datasets\nfrom sklearn.utils import shuffle\nfrom sklearn.metrics import mean_squared_error\nfrom sklearn.svm.classes import NuSVR\nfrom sklearn.ensemble.gradient_boosting import GradientBoostingRegressor\nfrom sklearn.linear_model.stochastic_gradient import SGDClassifier\nfrom sklearn.metrics import hamming_loss\n\n# #############################################################################\n# Routines\n\n\n# Initialize random generator\nnp.random.seed(0)\n\n\ndef generate_data(case, sparse=False):\n    \"\"\"Generate regression/classification data.\"\"\"\n    bunch = None\n    if case == 'regression':\n        bunch = datasets.load_boston()\n    elif case == 'classification':\n        bunch = datasets.fetch_20newsgroups_vectorized(subset='all')\n    X, y = shuffle(bunch.data, bunch.target)\n    offset = int(X.shape[0] * 0.8)\n    X_train, y_train = X[:offset], y[:offset]\n    X_test, y_test = X[offset:], y[offset:]\n    if sparse:\n        X_train = csr_matrix(X_train)\n        X_test = csr_matrix(X_test)\n    else:\n        X_train = np.array(X_train)\n        X_test = np.array(X_test)\n    y_test = np.array(y_test)\n    y_train = np.array(y_train)\n    data = {'X_train': X_train, 'X_test': X_test, 'y_train': y_train,\n            'y_test': y_test}\n    return data\n\n\ndef benchmark_influence(conf):\n    \"\"\"\n    Benchmark influence of :changing_param: on both MSE and latency.\n    \"\"\"\n    prediction_times = []\n    prediction_powers = []\n    complexities = []\n    for param_value in conf['changing_param_values']:\n        conf['tuned_params'][conf['changing_param']] = param_value\n        estimator = conf['estimator'](**conf['tuned_params'])\n        print(\"Benchmarking %s\" % estimator)\n        estimator.fit(conf['data']['X_train'], conf['data']['y_train'])\n        conf['postfit_hook'](estimator)\n        complexity = conf['complexity_computer'](estimator)\n        complexities.append(complexity)\n        start_time = time.time()\n        for _ in range(conf['n_samples']):\n            y_pred = estimator.predict(conf['data']['X_test'])\n        elapsed_time = (time.time() - start_time) / float(conf['n_samples'])\n        prediction_times.append(elapsed_time)\n        pred_score = conf['prediction_performance_computer'](\n            conf['data']['y_test'], y_pred)\n        prediction_powers.append(pred_score)\n        print(\"Complexity: %d | %s: %.4f | Pred. Time: %fs\\n\" % (\n            complexity, conf['prediction_performance_label'], pred_score,\n            elapsed_time))\n    return prediction_powers, prediction_times, complexities\n\n\ndef plot_influence(conf, mse_values, prediction_times, complexities):\n    \"\"\"\n    Plot influence of model complexity on both accuracy and latency.\n    \"\"\"\n    plt.figure(figsize=(12, 6))\n    host = host_subplot(111, axes_class=Axes)\n    plt.subplots_adjust(right=0.75)\n    par1 = host.twinx()\n    host.set_xlabel('Model Complexity (%s)' % conf['complexity_label'])\n    y1_label = conf['prediction_performance_label']\n    y2_label = \"Time (s)\"\n    host.set_ylabel(y1_label)\n    par1.set_ylabel(y2_label)\n    p1, = host.plot(complexities, mse_values, 'b-', label=\"prediction error\")\n    p2, = par1.plot(complexities, prediction_times, 'r-',\n                    label=\"latency\")\n    host.legend(loc='upper right')\n    host.axis[\"left\"].label.set_color(p1.get_color())\n    par1.axis[\"right\"].label.set_color(p2.get_color())\n    plt.title('Influence of Model Complexity - %s' % conf['estimator'].__name__)\n    plt.show()\n\n\ndef _count_nonzero_coefficients(estimator):\n    a = estimator.coef_.toarray()\n    return np.count_nonzero(a)\n\n# #############################################################################\n# Main code\nregression_data = generate_data('regression')\nclassification_data = generate_data('classification', sparse=True)\nconfigurations = [\n    {'estimator': SGDClassifier,\n     'tuned_params': {'penalty': 'elasticnet', 'alpha': 0.001, 'loss':\n                      'modified_huber', 'fit_intercept': True, 'tol': 1e-3},\n     'changing_param': 'l1_ratio',\n     'changing_param_values': [0.25, 0.5, 0.75, 0.9],\n     'complexity_label': 'non_zero coefficients',\n     'complexity_computer': _count_nonzero_coefficients,\n     'prediction_performance_computer': hamming_loss,\n     'prediction_performance_label': 'Hamming Loss (Misclassification Ratio)',\n     'postfit_hook': lambda x: x.sparsify(),\n     'data': classification_data,\n     'n_samples': 30},\n    {'estimator': NuSVR,\n     'tuned_params': {'C': 1e3, 'gamma': 2 ** -15},\n     'changing_param': 'nu',\n     'changing_param_values': [0.1, 0.25, 0.5, 0.75, 0.9],\n     'complexity_label': 'n_support_vectors',\n     'complexity_computer': lambda x: len(x.support_vectors_),\n     'data': regression_data,\n     'postfit_hook': lambda x: x,\n     'prediction_performance_computer': mean_squared_error,\n     'prediction_performance_label': 'MSE',\n     'n_samples': 30},\n    {'estimator': GradientBoostingRegressor,\n     'tuned_params': {'loss': 'ls'},\n     'changing_param': 'n_estimators',\n     'changing_param_values': [10, 50, 100, 200, 500],\n     'complexity_label': 'n_trees',\n     'complexity_computer': lambda x: x.n_estimators,\n     'data': regression_data,\n     'postfit_hook': lambda x: x,\n     'prediction_performance_computer': mean_squared_error,\n     'prediction_performance_label': 'MSE',\n     'n_samples': 30},\n]\nfor conf in configurations:\n    prediction_performances, prediction_times, complexities = \\\n        benchmark_influence(conf)\n    plot_influence(conf, prediction_performances, prediction_times,\n                   complexities)"
+        "print(__doc__)\n\n# Author: Eustache Diemert <[email protected]>\n# License: BSD 3 clause\n\nimport time\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom mpl_toolkits.axes_grid1.parasite_axes import host_subplot\nfrom mpl_toolkits.axisartist.axislines import Axes\nfrom scipy.sparse.csr import csr_matrix\n\nfrom sklearn import datasets\nfrom sklearn.utils import shuffle\nfrom sklearn.metrics import mean_squared_error\nfrom sklearn.svm.classes import NuSVR\nfrom sklearn.ensemble.gradient_boosting import GradientBoostingRegressor\nfrom sklearn.linear_model.stochastic_gradient import SGDClassifier\nfrom sklearn.metrics import hamming_loss\n\n# #############################################################################\n# Routines\n\n\n# Initialize random generator\nnp.random.seed(0)\n\n\ndef generate_data(case, sparse=False):\n    \"\"\"Generate regression/classification data.\"\"\"\n    if case == 'regression':\n        X, y = datasets.load_boston(return_X_y=True)\n    elif case == 'classification':\n        X, y = datasets.fetch_20newsgroups_vectorized(subset='all',\n                                                      return_X_y=True)\n    X, y = shuffle(X, y)\n    offset = int(X.shape[0] * 0.8)\n    X_train, y_train = X[:offset], y[:offset]\n    X_test, y_test = X[offset:], y[offset:]\n    if sparse:\n        X_train = csr_matrix(X_train)\n        X_test = csr_matrix(X_test)\n    else:\n        X_train = np.array(X_train)\n        X_test = np.array(X_test)\n    y_test = np.array(y_test)\n    y_train = np.array(y_train)\n    data = {'X_train': X_train, 'X_test': X_test, 'y_train': y_train,\n            'y_test': y_test}\n    return data\n\n\ndef benchmark_influence(conf):\n    \"\"\"\n    Benchmark influence of :changing_param: on both MSE and latency.\n    \"\"\"\n    prediction_times = []\n    prediction_powers = []\n    complexities = []\n    for param_value in conf['changing_param_values']:\n        conf['tuned_params'][conf['changing_param']] = param_value\n        estimator = conf['estimator'](**conf['tuned_params'])\n        print(\"Benchmarking %s\" % estimator)\n        estimator.fit(conf['data']['X_train'], conf['data']['y_train'])\n        conf['postfit_hook'](estimator)\n        complexity = conf['complexity_computer'](estimator)\n        complexities.append(complexity)\n        start_time = time.time()\n        for _ in range(conf['n_samples']):\n            y_pred = estimator.predict(conf['data']['X_test'])\n        elapsed_time = (time.time() - start_time) / float(conf['n_samples'])\n        prediction_times.append(elapsed_time)\n        pred_score = conf['prediction_performance_computer'](\n            conf['data']['y_test'], y_pred)\n        prediction_powers.append(pred_score)\n        print(\"Complexity: %d | %s: %.4f | Pred. Time: %fs\\n\" % (\n            complexity, conf['prediction_performance_label'], pred_score,\n            elapsed_time))\n    return prediction_powers, prediction_times, complexities\n\n\ndef plot_influence(conf, mse_values, prediction_times, complexities):\n    \"\"\"\n    Plot influence of model complexity on both accuracy and latency.\n    \"\"\"\n    plt.figure(figsize=(12, 6))\n    host = host_subplot(111, axes_class=Axes)\n    plt.subplots_adjust(right=0.75)\n    par1 = host.twinx()\n    host.set_xlabel('Model Complexity (%s)' % conf['complexity_label'])\n    y1_label = conf['prediction_performance_label']\n    y2_label = \"Time (s)\"\n    host.set_ylabel(y1_label)\n    par1.set_ylabel(y2_label)\n    p1, = host.plot(complexities, mse_values, 'b-', label=\"prediction error\")\n    p2, = par1.plot(complexities, prediction_times, 'r-',\n                    label=\"latency\")\n    host.legend(loc='upper right')\n    host.axis[\"left\"].label.set_color(p1.get_color())\n    par1.axis[\"right\"].label.set_color(p2.get_color())\n    plt.title('Influence of Model Complexity - %s' % conf['estimator'].__name__)\n    plt.show()\n\n\ndef _count_nonzero_coefficients(estimator):\n    a = estimator.coef_.toarray()\n    return np.count_nonzero(a)\n\n# #############################################################################\n# Main code\nregression_data = generate_data('regression')\nclassification_data = generate_data('classification', sparse=True)\nconfigurations = [\n    {'estimator': SGDClassifier,\n     'tuned_params': {'penalty': 'elasticnet', 'alpha': 0.001, 'loss':\n                      'modified_huber', 'fit_intercept': True, 'tol': 1e-3},\n     'changing_param': 'l1_ratio',\n     'changing_param_values': [0.25, 0.5, 0.75, 0.9],\n     'complexity_label': 'non_zero coefficients',\n     'complexity_computer': _count_nonzero_coefficients,\n     'prediction_performance_computer': hamming_loss,\n     'prediction_performance_label': 'Hamming Loss (Misclassification Ratio)',\n     'postfit_hook': lambda x: x.sparsify(),\n     'data': classification_data,\n     'n_samples': 30},\n    {'estimator': NuSVR,\n     'tuned_params': {'C': 1e3, 'gamma': 2 ** -15},\n     'changing_param': 'nu',\n     'changing_param_values': [0.1, 0.25, 0.5, 0.75, 0.9],\n     'complexity_label': 'n_support_vectors',\n     'complexity_computer': lambda x: len(x.support_vectors_),\n     'data': regression_data,\n     'postfit_hook': lambda x: x,\n     'prediction_performance_computer': mean_squared_error,\n     'prediction_performance_label': 'MSE',\n     'n_samples': 30},\n    {'estimator': GradientBoostingRegressor,\n     'tuned_params': {'loss': 'ls'},\n     'changing_param': 'n_estimators',\n     'changing_param_values': [10, 50, 100, 200, 500],\n     'complexity_label': 'n_trees',\n     'complexity_computer': lambda x: x.n_estimators,\n     'data': regression_data,\n     'postfit_hook': lambda x: x,\n     'prediction_performance_computer': mean_squared_error,\n     'prediction_performance_label': 'MSE',\n     'n_samples': 30},\n]\nfor conf in configurations:\n    prediction_performances, prediction_times, complexities = \\\n        benchmark_influence(conf)\n    plot_influence(conf, prediction_performances, prediction_times,\n                   complexities)"
       ]
     }
   ],
Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,7 @@`
`26`	`26`	`},`
`27`	`27`	`"outputs": [],`
`28`	`28`	`"source": [`
`29`		- "from sklearn import datasets\nfrom sklearn.model_selection import cross_val_predict\nfrom sklearn import linear_model\nimport matplotlib.pyplot as plt\n\nlr = linear_model.LinearRegression()\nboston = datasets.load_boston()\ny = boston.target\n\n# cross_val_predict returns an array of the same size as `y` where each entry\n# is a prediction obtained by cross validation:\npredicted = cross_val_predict(lr, boston.data, y, cv=10)\n\nfig, ax = plt.subplots()\nax.scatter(y, predicted, edgecolors=(0, 0, 0))\nax.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4)\nax.set_xlabel('Measured')\nax.set_ylabel('Predicted')\nplt.show()"
	`29`	+ "from sklearn import datasets\nfrom sklearn.model_selection import cross_val_predict\nfrom sklearn import linear_model\nimport matplotlib.pyplot as plt\n\nlr = linear_model.LinearRegression()\nX, y = datasets.load_boston(return_X_y=True)\n\n# cross_val_predict returns an array of the same size as `y` where each entry\n# is a prediction obtained by cross validation:\npredicted = cross_val_predict(lr, X, y, cv=10)\n\nfig, ax = plt.subplots()\nax.scatter(y, predicted, edgecolors=(0, 0, 0))\nax.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4)\nax.set_xlabel('Measured')\nax.set_ylabel('Predicted')\nplt.show()"
`30`	`30`	`]`
`31`	`31`	`}`
`32`	`32`	`],`