dionysiskokkoris
diff --git a/‎dev/_downloads/auto_examples_jupyter.zip
5 Bytes b/‎dev/_downloads/auto_examples_jupyter.zip
5 Bytes
diff --git a/‎dev/_downloads/auto_examples_python.zip
9 Bytes b/‎dev/_downloads/auto_examples_python.zip
9 Bytes
diff --git a/‎dev/_downloads/plot_adaboost_multiclass.py
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/plot_adaboost_multiclass.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/plot_bias_variance.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/plot_bias_variance.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/plot_bias_variance.py
Lines changed: 2 additions & 2 deletions b/‎dev/_downloads/plot_bias_variance.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎dev/_downloads/plot_gpc_isoprobability.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/plot_gpc_isoprobability.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/plot_gpc_isoprobability.py
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/plot_gpc_isoprobability.py
Lines changed: 1 addition & 1 deletion
@@ -1,4 +1,4 @@
-"""
+r"""
 =====================================
 Multi-class AdaBoosted Decision Trees
 =====================================
 
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "print(__doc__)\n\n# Author: Gilles Louppe <[email protected]>\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn.ensemble import BaggingRegressor\nfrom sklearn.tree import DecisionTreeRegressor\n\n# Settings\nn_repeat = 50       # Number of iterations for computing expectations\nn_train = 50        # Size of the training set\nn_test = 1000       # Size of the test set\nnoise = 0.1         # Standard deviation of the noise\nnp.random.seed(0)\n\n# Change this for exploring the bias-variance decomposition of other\n# estimators. This should work well for estimators with high variance (e.g.,\n# decision trees or KNN), but poorly for estimators with low variance (e.g.,\n# linear models).\nestimators = [(\"Tree\", DecisionTreeRegressor()),\n              (\"Bagging(Tree)\", BaggingRegressor(DecisionTreeRegressor()))]\n\nn_estimators = len(estimators)\n\n\n# Generate data\ndef f(x):\n    x = x.ravel()\n\n    return np.exp(-x ** 2) + 1.5 * np.exp(-(x - 2) ** 2)\n\n\ndef generate(n_samples, noise, n_repeat=1):\n    X = np.random.rand(n_samples) * 10 - 5\n    X = np.sort(X)\n\n    if n_repeat == 1:\n        y = f(X) + np.random.normal(0.0, noise, n_samples)\n    else:\n        y = np.zeros((n_samples, n_repeat))\n\n        for i in range(n_repeat):\n            y[:, i] = f(X) + np.random.normal(0.0, noise, n_samples)\n\n    X = X.reshape((n_samples, 1))\n\n    return X, y\n\n\nX_train = []\ny_train = []\n\nfor i in range(n_repeat):\n    X, y = generate(n_samples=n_train, noise=noise)\n    X_train.append(X)\n    y_train.append(y)\n\nX_test, y_test = generate(n_samples=n_test, noise=noise, n_repeat=n_repeat)\n\nplt.figure(figsize=(10, 8))\n\n# Loop over estimators to compare\nfor n, (name, estimator) in enumerate(estimators):\n    # Compute predictions\n    y_predict = np.zeros((n_test, n_repeat))\n\n    for i in range(n_repeat):\n        estimator.fit(X_train[i], y_train[i])\n        y_predict[:, i] = estimator.predict(X_test)\n\n    # Bias^2 + Variance + Noise decomposition of the mean squared error\n    y_error = np.zeros(n_test)\n\n    for i in range(n_repeat):\n        for j in range(n_repeat):\n            y_error += (y_test[:, j] - y_predict[:, i]) ** 2\n\n    y_error /= (n_repeat * n_repeat)\n\n    y_noise = np.var(y_test, axis=1)\n    y_bias = (f(X_test) - np.mean(y_predict, axis=1)) ** 2\n    y_var = np.var(y_predict, axis=1)\n\n    print(\"{0}: {1:.4f} (error) = {2:.4f} (bias^2) \"\n          \" + {3:.4f} (var) + {4:.4f} (noise)\".format(name,\n                                                      np.mean(y_error),\n                                                      np.mean(y_bias),\n                                                      np.mean(y_var),\n                                                      np.mean(y_noise)))\n\n    # Plot figures\n    plt.subplot(2, n_estimators, n + 1)\n    plt.plot(X_test, f(X_test), \"b\", label=\"$f(x)$\")\n    plt.plot(X_train[0], y_train[0], \".b\", label=\"LS ~ $y = f(x)+noise$\")\n\n    for i in range(n_repeat):\n        if i == 0:\n            plt.plot(X_test, y_predict[:, i], \"r\", label=\"$\\^y(x)$\")\n        else:\n            plt.plot(X_test, y_predict[:, i], \"r\", alpha=0.05)\n\n    plt.plot(X_test, np.mean(y_predict, axis=1), \"c\",\n             label=\"$\\mathbb{E}_{LS} \\^y(x)$\")\n\n    plt.xlim([-5, 5])\n    plt.title(name)\n\n    if n == n_estimators - 1:\n        plt.legend(loc=(1.1, .5))\n\n    plt.subplot(2, n_estimators, n_estimators + n + 1)\n    plt.plot(X_test, y_error, \"r\", label=\"$error(x)$\")\n    plt.plot(X_test, y_bias, \"b\", label=\"$bias^2(x)$\"),\n    plt.plot(X_test, y_var, \"g\", label=\"$variance(x)$\"),\n    plt.plot(X_test, y_noise, \"c\", label=\"$noise(x)$\")\n\n    plt.xlim([-5, 5])\n    plt.ylim([0, 0.1])\n\n    if n == n_estimators - 1:\n\n        plt.legend(loc=(1.1, .5))\n\nplt.subplots_adjust(right=.75)\nplt.show()"
+        "print(__doc__)\n\n# Author: Gilles Louppe <[email protected]>\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn.ensemble import BaggingRegressor\nfrom sklearn.tree import DecisionTreeRegressor\n\n# Settings\nn_repeat = 50       # Number of iterations for computing expectations\nn_train = 50        # Size of the training set\nn_test = 1000       # Size of the test set\nnoise = 0.1         # Standard deviation of the noise\nnp.random.seed(0)\n\n# Change this for exploring the bias-variance decomposition of other\n# estimators. This should work well for estimators with high variance (e.g.,\n# decision trees or KNN), but poorly for estimators with low variance (e.g.,\n# linear models).\nestimators = [(\"Tree\", DecisionTreeRegressor()),\n              (\"Bagging(Tree)\", BaggingRegressor(DecisionTreeRegressor()))]\n\nn_estimators = len(estimators)\n\n\n# Generate data\ndef f(x):\n    x = x.ravel()\n\n    return np.exp(-x ** 2) + 1.5 * np.exp(-(x - 2) ** 2)\n\n\ndef generate(n_samples, noise, n_repeat=1):\n    X = np.random.rand(n_samples) * 10 - 5\n    X = np.sort(X)\n\n    if n_repeat == 1:\n        y = f(X) + np.random.normal(0.0, noise, n_samples)\n    else:\n        y = np.zeros((n_samples, n_repeat))\n\n        for i in range(n_repeat):\n            y[:, i] = f(X) + np.random.normal(0.0, noise, n_samples)\n\n    X = X.reshape((n_samples, 1))\n\n    return X, y\n\n\nX_train = []\ny_train = []\n\nfor i in range(n_repeat):\n    X, y = generate(n_samples=n_train, noise=noise)\n    X_train.append(X)\n    y_train.append(y)\n\nX_test, y_test = generate(n_samples=n_test, noise=noise, n_repeat=n_repeat)\n\nplt.figure(figsize=(10, 8))\n\n# Loop over estimators to compare\nfor n, (name, estimator) in enumerate(estimators):\n    # Compute predictions\n    y_predict = np.zeros((n_test, n_repeat))\n\n    for i in range(n_repeat):\n        estimator.fit(X_train[i], y_train[i])\n        y_predict[:, i] = estimator.predict(X_test)\n\n    # Bias^2 + Variance + Noise decomposition of the mean squared error\n    y_error = np.zeros(n_test)\n\n    for i in range(n_repeat):\n        for j in range(n_repeat):\n            y_error += (y_test[:, j] - y_predict[:, i]) ** 2\n\n    y_error /= (n_repeat * n_repeat)\n\n    y_noise = np.var(y_test, axis=1)\n    y_bias = (f(X_test) - np.mean(y_predict, axis=1)) ** 2\n    y_var = np.var(y_predict, axis=1)\n\n    print(\"{0}: {1:.4f} (error) = {2:.4f} (bias^2) \"\n          \" + {3:.4f} (var) + {4:.4f} (noise)\".format(name,\n                                                      np.mean(y_error),\n                                                      np.mean(y_bias),\n                                                      np.mean(y_var),\n                                                      np.mean(y_noise)))\n\n    # Plot figures\n    plt.subplot(2, n_estimators, n + 1)\n    plt.plot(X_test, f(X_test), \"b\", label=\"$f(x)$\")\n    plt.plot(X_train[0], y_train[0], \".b\", label=\"LS ~ $y = f(x)+noise$\")\n\n    for i in range(n_repeat):\n        if i == 0:\n            plt.plot(X_test, y_predict[:, i], \"r\", label=r\"$\\^y(x)$\")\n        else:\n            plt.plot(X_test, y_predict[:, i], \"r\", alpha=0.05)\n\n    plt.plot(X_test, np.mean(y_predict, axis=1), \"c\",\n             label=r\"$\\mathbb{E}_{LS} \\^y(x)$\")\n\n    plt.xlim([-5, 5])\n    plt.title(name)\n\n    if n == n_estimators - 1:\n        plt.legend(loc=(1.1, .5))\n\n    plt.subplot(2, n_estimators, n_estimators + n + 1)\n    plt.plot(X_test, y_error, \"r\", label=\"$error(x)$\")\n    plt.plot(X_test, y_bias, \"b\", label=\"$bias^2(x)$\"),\n    plt.plot(X_test, y_var, \"g\", label=\"$variance(x)$\"),\n    plt.plot(X_test, y_noise, \"c\", label=\"$noise(x)$\")\n\n    plt.xlim([-5, 5])\n    plt.ylim([0, 0.1])\n\n    if n == n_estimators - 1:\n\n        plt.legend(loc=(1.1, .5))\n\nplt.subplots_adjust(right=.75)\nplt.show()"
       ]
     }
   ],
 
@@ -161,12 +161,12 @@ def generate(n_samples, noise, n_repeat=1):
 
     for i in range(n_repeat):
         if i == 0:
-            plt.plot(X_test, y_predict[:, i], "r", label="$\^y(x)$")
+            plt.plot(X_test, y_predict[:, i], "r", label=r"$\^y(x)$")
         else:
             plt.plot(X_test, y_predict[:, i], "r", alpha=0.05)
 
     plt.plot(X_test, np.mean(y_predict, axis=1), "c",
-             label="$\mathbb{E}_{LS} \^y(x)$")
+             label=r"$\mathbb{E}_{LS} \^y(x)$")
 
     plt.xlim([-5, 5])
     plt.title(name)
 
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "print(__doc__)\n\n# Author: Vincent Dubourg <[email protected]>\n# Adapted to GaussianProcessClassifier:\n#         Jan Hendrik Metzen <[email protected]>\n# License: BSD 3 clause\n\nimport numpy as np\n\nfrom matplotlib import pyplot as plt\nfrom matplotlib import cm\n\nfrom sklearn.gaussian_process import GaussianProcessClassifier\nfrom sklearn.gaussian_process.kernels import DotProduct, ConstantKernel as C\n\n# A few constants\nlim = 8\n\n\ndef g(x):\n    \"\"\"The function to predict (classification will then consist in predicting\n    whether g(x) <= 0 or not)\"\"\"\n    return 5. - x[:, 1] - .5 * x[:, 0] ** 2.\n\n# Design of experiments\nX = np.array([[-4.61611719, -6.00099547],\n              [4.10469096, 5.32782448],\n              [0.00000000, -0.50000000],\n              [-6.17289014, -4.6984743],\n              [1.3109306, -6.93271427],\n              [-5.03823144, 3.10584743],\n              [-2.87600388, 6.74310541],\n              [5.21301203, 4.26386883]])\n\n# Observations\ny = np.array(g(X) > 0, dtype=int)\n\n# Instantiate and fit Gaussian Process Model\nkernel = C(0.1, (1e-5, np.inf)) * DotProduct(sigma_0=0.1) ** 2\ngp = GaussianProcessClassifier(kernel=kernel)\ngp.fit(X, y)\nprint(\"Learned kernel: %s \" % gp.kernel_)\n\n# Evaluate real function and the predicted probability\nres = 50\nx1, x2 = np.meshgrid(np.linspace(- lim, lim, res),\n                     np.linspace(- lim, lim, res))\nxx = np.vstack([x1.reshape(x1.size), x2.reshape(x2.size)]).T\n\ny_true = g(xx)\ny_prob = gp.predict_proba(xx)[:, 1]\ny_true = y_true.reshape((res, res))\ny_prob = y_prob.reshape((res, res))\n\n# Plot the probabilistic classification iso-values\nfig = plt.figure(1)\nax = fig.gca()\nax.axes.set_aspect('equal')\nplt.xticks([])\nplt.yticks([])\nax.set_xticklabels([])\nax.set_yticklabels([])\nplt.xlabel('$x_1$')\nplt.ylabel('$x_2$')\n\ncax = plt.imshow(y_prob, cmap=cm.gray_r, alpha=0.8,\n                 extent=(-lim, lim, -lim, lim))\nnorm = plt.matplotlib.colors.Normalize(vmin=0., vmax=0.9)\ncb = plt.colorbar(cax, ticks=[0., 0.2, 0.4, 0.6, 0.8, 1.], norm=norm)\ncb.set_label('${\\\\rm \\mathbb{P}}\\left[\\widehat{G}(\\mathbf{x}) \\leq 0\\\\right]$')\nplt.clim(0, 1)\n\nplt.plot(X[y <= 0, 0], X[y <= 0, 1], 'r.', markersize=12)\n\nplt.plot(X[y > 0, 0], X[y > 0, 1], 'b.', markersize=12)\n\nplt.contour(x1, x2, y_true, [0.], colors='k', linestyles='dashdot')\n\ncs = plt.contour(x1, x2, y_prob, [0.666], colors='b',\n                 linestyles='solid')\nplt.clabel(cs, fontsize=11)\n\ncs = plt.contour(x1, x2, y_prob, [0.5], colors='k',\n                 linestyles='dashed')\nplt.clabel(cs, fontsize=11)\n\ncs = plt.contour(x1, x2, y_prob, [0.334], colors='r',\n                 linestyles='solid')\nplt.clabel(cs, fontsize=11)\n\nplt.show()"
+        "print(__doc__)\n\n# Author: Vincent Dubourg <[email protected]>\n# Adapted to GaussianProcessClassifier:\n#         Jan Hendrik Metzen <[email protected]>\n# License: BSD 3 clause\n\nimport numpy as np\n\nfrom matplotlib import pyplot as plt\nfrom matplotlib import cm\n\nfrom sklearn.gaussian_process import GaussianProcessClassifier\nfrom sklearn.gaussian_process.kernels import DotProduct, ConstantKernel as C\n\n# A few constants\nlim = 8\n\n\ndef g(x):\n    \"\"\"The function to predict (classification will then consist in predicting\n    whether g(x) <= 0 or not)\"\"\"\n    return 5. - x[:, 1] - .5 * x[:, 0] ** 2.\n\n# Design of experiments\nX = np.array([[-4.61611719, -6.00099547],\n              [4.10469096, 5.32782448],\n              [0.00000000, -0.50000000],\n              [-6.17289014, -4.6984743],\n              [1.3109306, -6.93271427],\n              [-5.03823144, 3.10584743],\n              [-2.87600388, 6.74310541],\n              [5.21301203, 4.26386883]])\n\n# Observations\ny = np.array(g(X) > 0, dtype=int)\n\n# Instantiate and fit Gaussian Process Model\nkernel = C(0.1, (1e-5, np.inf)) * DotProduct(sigma_0=0.1) ** 2\ngp = GaussianProcessClassifier(kernel=kernel)\ngp.fit(X, y)\nprint(\"Learned kernel: %s \" % gp.kernel_)\n\n# Evaluate real function and the predicted probability\nres = 50\nx1, x2 = np.meshgrid(np.linspace(- lim, lim, res),\n                     np.linspace(- lim, lim, res))\nxx = np.vstack([x1.reshape(x1.size), x2.reshape(x2.size)]).T\n\ny_true = g(xx)\ny_prob = gp.predict_proba(xx)[:, 1]\ny_true = y_true.reshape((res, res))\ny_prob = y_prob.reshape((res, res))\n\n# Plot the probabilistic classification iso-values\nfig = plt.figure(1)\nax = fig.gca()\nax.axes.set_aspect('equal')\nplt.xticks([])\nplt.yticks([])\nax.set_xticklabels([])\nax.set_yticklabels([])\nplt.xlabel('$x_1$')\nplt.ylabel('$x_2$')\n\ncax = plt.imshow(y_prob, cmap=cm.gray_r, alpha=0.8,\n                 extent=(-lim, lim, -lim, lim))\nnorm = plt.matplotlib.colors.Normalize(vmin=0., vmax=0.9)\ncb = plt.colorbar(cax, ticks=[0., 0.2, 0.4, 0.6, 0.8, 1.], norm=norm)\ncb.set_label(r'${\\rm \\mathbb{P}}\\left[\\widehat{G}(\\mathbf{x}) \\leq 0\\right]$')\nplt.clim(0, 1)\n\nplt.plot(X[y <= 0, 0], X[y <= 0, 1], 'r.', markersize=12)\n\nplt.plot(X[y > 0, 0], X[y > 0, 1], 'b.', markersize=12)\n\nplt.contour(x1, x2, y_true, [0.], colors='k', linestyles='dashdot')\n\ncs = plt.contour(x1, x2, y_prob, [0.666], colors='b',\n                 linestyles='solid')\nplt.clabel(cs, fontsize=11)\n\ncs = plt.contour(x1, x2, y_prob, [0.5], colors='k',\n                 linestyles='dashed')\nplt.clabel(cs, fontsize=11)\n\ncs = plt.contour(x1, x2, y_prob, [0.334], colors='r',\n                 linestyles='solid')\nplt.clabel(cs, fontsize=11)\n\nplt.show()"
       ]
     }
   ],
 
@@ -78,7 +78,7 @@ def g(x):
                  extent=(-lim, lim, -lim, lim))
 norm = plt.matplotlib.colors.Normalize(vmin=0., vmax=0.9)
 cb = plt.colorbar(cax, ticks=[0., 0.2, 0.4, 0.6, 0.8, 1.], norm=norm)
-cb.set_label('${\\rm \mathbb{P}}\left[\widehat{G}(\mathbf{x}) \leq 0\\right]$')
+cb.set_label(r'${\rm \mathbb{P}}\left[\widehat{G}(\mathbf{x}) \leq 0\right]$')
 plt.clim(0, 1)
 
 plt.plot(X[y <= 0, 0], X[y <= 0, 1], 'r.', markersize=12)
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-"""`
	`1`	`+r"""`
`2`	`2`	`=====================================`
`3`	`3`	`Multi-class AdaBoosted Decision Trees`
`4`	`4`	`=====================================`
Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,7 @@`
`26`	`26`	`},`
`27`	`27`	`"outputs": [],`
`28`	`28`	`"source": [`
`29`		- "print(__doc__)\n\n# Author: Gilles Louppe <[email protected]>\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn.ensemble import BaggingRegressor\nfrom sklearn.tree import DecisionTreeRegressor\n\n# Settings\nn_repeat = 50 # Number of iterations for computing expectations\nn_train = 50 # Size of the training set\nn_test = 1000 # Size of the test set\nnoise = 0.1 # Standard deviation of the noise\nnp.random.seed(0)\n\n# Change this for exploring the bias-variance decomposition of other\n# estimators. This should work well for estimators with high variance (e.g.,\n# decision trees or KNN), but poorly for estimators with low variance (e.g.,\n# linear models).\nestimators = [(\"Tree\", DecisionTreeRegressor()),\n (\"Bagging(Tree)\", BaggingRegressor(DecisionTreeRegressor()))]\n\nn_estimators = len(estimators)\n\n\n# Generate data\ndef f(x):\n x = x.ravel()\n\n return np.exp(-x ** 2) + 1.5 * np.exp(-(x - 2) ** 2)\n\n\ndef generate(n_samples, noise, n_repeat=1):\n X = np.random.rand(n_samples) * 10 - 5\n X = np.sort(X)\n\n if n_repeat == 1:\n y = f(X) + np.random.normal(0.0, noise, n_samples)\n else:\n y = np.zeros((n_samples, n_repeat))\n\n for i in range(n_repeat):\n y[:, i] = f(X) + np.random.normal(0.0, noise, n_samples)\n\n X = X.reshape((n_samples, 1))\n\n return X, y\n\n\nX_train = []\ny_train = []\n\nfor i in range(n_repeat):\n X, y = generate(n_samples=n_train, noise=noise)\n X_train.append(X)\n y_train.append(y)\n\nX_test, y_test = generate(n_samples=n_test, noise=noise, n_repeat=n_repeat)\n\nplt.figure(figsize=(10, 8))\n\n# Loop over estimators to compare\nfor n, (name, estimator) in enumerate(estimators):\n # Compute predictions\n y_predict = np.zeros((n_test, n_repeat))\n\n for i in range(n_repeat):\n estimator.fit(X_train[i], y_train[i])\n y_predict[:, i] = estimator.predict(X_test)\n\n # Bias^2 + Variance + Noise decomposition of the mean squared error\n y_error = np.zeros(n_test)\n\n for i in range(n_repeat):\n for j in range(n_repeat):\n y_error += (y_test[:, j] - y_predict[:, i]) ** 2\n\n y_error /= (n_repeat * n_repeat)\n\n y_noise = np.var(y_test, axis=1)\n y_bias = (f(X_test) - np.mean(y_predict, axis=1)) ** 2\n y_var = np.var(y_predict, axis=1)\n\n print(\"{0}: {1:.4f} (error) = {2:.4f} (bias^2) \"\n \" + {3:.4f} (var) + {4:.4f} (noise)\".format(name,\n np.mean(y_error),\n np.mean(y_bias),\n np.mean(y_var),\n np.mean(y_noise)))\n\n # Plot figures\n plt.subplot(2, n_estimators, n + 1)\n plt.plot(X_test, f(X_test), \"b\", label=\"$f(x)$\")\n plt.plot(X_train[0], y_train[0], \".b\", label=\"LS ~ $y = f(x)+noise$\")\n\n for i in range(n_repeat):\n if i == 0:\n plt.plot(X_test, y_predict[:, i], \"r\", label=\"$\\^y(x)$\")\n else:\n plt.plot(X_test, y_predict[:, i], \"r\", alpha=0.05)\n\n plt.plot(X_test, np.mean(y_predict, axis=1), \"c\",\n label=\"$\\mathbb{E}_{LS} \\^y(x)$\")\n\n plt.xlim([-5, 5])\n plt.title(name)\n\n if n == n_estimators - 1:\n plt.legend(loc=(1.1, .5))\n\n plt.subplot(2, n_estimators, n_estimators + n + 1)\n plt.plot(X_test, y_error, \"r\", label=\"$error(x)$\")\n plt.plot(X_test, y_bias, \"b\", label=\"$bias^2(x)$\"),\n plt.plot(X_test, y_var, \"g\", label=\"$variance(x)$\"),\n plt.plot(X_test, y_noise, \"c\", label=\"$noise(x)$\")\n\n plt.xlim([-5, 5])\n plt.ylim([0, 0.1])\n\n if n == n_estimators - 1:\n\n plt.legend(loc=(1.1, .5))\n\nplt.subplots_adjust(right=.75)\nplt.show()"
	`29`	+ "print(__doc__)\n\n# Author: Gilles Louppe <[email protected]>\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn.ensemble import BaggingRegressor\nfrom sklearn.tree import DecisionTreeRegressor\n\n# Settings\nn_repeat = 50 # Number of iterations for computing expectations\nn_train = 50 # Size of the training set\nn_test = 1000 # Size of the test set\nnoise = 0.1 # Standard deviation of the noise\nnp.random.seed(0)\n\n# Change this for exploring the bias-variance decomposition of other\n# estimators. This should work well for estimators with high variance (e.g.,\n# decision trees or KNN), but poorly for estimators with low variance (e.g.,\n# linear models).\nestimators = [(\"Tree\", DecisionTreeRegressor()),\n (\"Bagging(Tree)\", BaggingRegressor(DecisionTreeRegressor()))]\n\nn_estimators = len(estimators)\n\n\n# Generate data\ndef f(x):\n x = x.ravel()\n\n return np.exp(-x ** 2) + 1.5 * np.exp(-(x - 2) ** 2)\n\n\ndef generate(n_samples, noise, n_repeat=1):\n X = np.random.rand(n_samples) * 10 - 5\n X = np.sort(X)\n\n if n_repeat == 1:\n y = f(X) + np.random.normal(0.0, noise, n_samples)\n else:\n y = np.zeros((n_samples, n_repeat))\n\n for i in range(n_repeat):\n y[:, i] = f(X) + np.random.normal(0.0, noise, n_samples)\n\n X = X.reshape((n_samples, 1))\n\n return X, y\n\n\nX_train = []\ny_train = []\n\nfor i in range(n_repeat):\n X, y = generate(n_samples=n_train, noise=noise)\n X_train.append(X)\n y_train.append(y)\n\nX_test, y_test = generate(n_samples=n_test, noise=noise, n_repeat=n_repeat)\n\nplt.figure(figsize=(10, 8))\n\n# Loop over estimators to compare\nfor n, (name, estimator) in enumerate(estimators):\n # Compute predictions\n y_predict = np.zeros((n_test, n_repeat))\n\n for i in range(n_repeat):\n estimator.fit(X_train[i], y_train[i])\n y_predict[:, i] = estimator.predict(X_test)\n\n # Bias^2 + Variance + Noise decomposition of the mean squared error\n y_error = np.zeros(n_test)\n\n for i in range(n_repeat):\n for j in range(n_repeat):\n y_error += (y_test[:, j] - y_predict[:, i]) ** 2\n\n y_error /= (n_repeat * n_repeat)\n\n y_noise = np.var(y_test, axis=1)\n y_bias = (f(X_test) - np.mean(y_predict, axis=1)) ** 2\n y_var = np.var(y_predict, axis=1)\n\n print(\"{0}: {1:.4f} (error) = {2:.4f} (bias^2) \"\n \" + {3:.4f} (var) + {4:.4f} (noise)\".format(name,\n np.mean(y_error),\n np.mean(y_bias),\n np.mean(y_var),\n np.mean(y_noise)))\n\n # Plot figures\n plt.subplot(2, n_estimators, n + 1)\n plt.plot(X_test, f(X_test), \"b\", label=\"$f(x)$\")\n plt.plot(X_train[0], y_train[0], \".b\", label=\"LS ~ $y = f(x)+noise$\")\n\n for i in range(n_repeat):\n if i == 0:\n plt.plot(X_test, y_predict[:, i], \"r\", label=r\"$\\^y(x)$\")\n else:\n plt.plot(X_test, y_predict[:, i], \"r\", alpha=0.05)\n\n plt.plot(X_test, np.mean(y_predict, axis=1), \"c\",\n label=r\"$\\mathbb{E}_{LS} \\^y(x)$\")\n\n plt.xlim([-5, 5])\n plt.title(name)\n\n if n == n_estimators - 1:\n plt.legend(loc=(1.1, .5))\n\n plt.subplot(2, n_estimators, n_estimators + n + 1)\n plt.plot(X_test, y_error, \"r\", label=\"$error(x)$\")\n plt.plot(X_test, y_bias, \"b\", label=\"$bias^2(x)$\"),\n plt.plot(X_test, y_var, \"g\", label=\"$variance(x)$\"),\n plt.plot(X_test, y_noise, \"c\", label=\"$noise(x)$\")\n\n plt.xlim([-5, 5])\n plt.ylim([0, 0.1])\n\n if n == n_estimators - 1:\n\n plt.legend(loc=(1.1, .5))\n\nplt.subplots_adjust(right=.75)\nplt.show()"
`30`	`30`	`]`
`31`	`31`	`}`
`32`	`32`	`],`