"# Authors: Jan Hendrik Metzen <
[email protected]>\n# License: BSD 3 clause\n\n\nfrom __future__ import division\nimport time\n\nimport numpy as np\n\nfrom sklearn.svm import SVR\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.model_selection import learning_curve\nfrom sklearn.kernel_ridge import KernelRidge\nimport matplotlib.pyplot as plt\n\nrng = np.random.RandomState(0)\n\n# #############################################################################\n# Generate sample data\nX = 5 * rng.rand(10000, 1)\ny = np.sin(X).ravel()\n\n# Add noise to targets\ny[::5] += 3 * (0.5 - rng.rand(X.shape[0] // 5))\n\nX_plot = np.linspace(0, 5, 100000)[:, None]\n\n# #############################################################################\n# Fit regression model\ntrain_size = 100\nsvr = GridSearchCV(SVR(kernel='rbf', gamma=0.1), cv=5,\n param_grid={\"C\": [1e0, 1e1, 1e2, 1e3],\n \"gamma\": np.logspace(-2, 2, 5)})\n\nkr = GridSearchCV(KernelRidge(kernel='rbf', gamma=0.1), cv=5,\n param_grid={\"alpha\": [1e0, 0.1, 1e-2, 1e-3],\n \"gamma\": np.logspace(-2, 2, 5)})\n\nt0 = time.time()\nsvr.fit(X[:train_size], y[:train_size])\nsvr_fit = time.time() - t0\nprint(\"SVR complexity and bandwidth selected and model fitted in %.3f s\"\n % svr_fit)\n\nt0 = time.time()\nkr.fit(X[:train_size], y[:train_size])\nkr_fit = time.time() - t0\nprint(\"KRR complexity and bandwidth selected and model fitted in %.3f s\"\n % kr_fit)\n\nsv_ratio = svr.best_estimator_.support_.shape[0] / train_size\nprint(\"Support vector ratio: %.3f\" % sv_ratio)\n\nt0 = time.time()\ny_svr = svr.predict(X_plot)\nsvr_predict = time.time() - t0\nprint(\"SVR prediction for %d inputs in %.3f s\"\n % (X_plot.shape[0], svr_predict))\n\nt0 = time.time()\ny_kr = kr.predict(X_plot)\nkr_predict = time.time() - t0\nprint(\"KRR prediction for %d inputs in %.3f s\"\n % (X_plot.shape[0], kr_predict))\n\n\n# #############################################################################\n# Look at the results\nsv_ind = svr.best_estimator_.support_\nplt.scatter(X[sv_ind], y[sv_ind], c='r', s=50, label='SVR support vectors',\n zorder=2, edgecolors=(0, 0, 0))\nplt.scatter(X[:100], y[:100], c='k', label='data', zorder=1,\n edgecolors=(0, 0, 0))\nplt.hold('on')\nplt.plot(X_plot, y_svr, c='r',\n label='SVR (fit: %.3fs, predict: %.3fs)' % (svr_fit, svr_predict))\nplt.plot(X_plot, y_kr, c='g',\n label='KRR (fit: %.3fs, predict: %.3fs)' % (kr_fit, kr_predict))\nplt.xlabel('data')\nplt.ylabel('target')\nplt.title('SVR versus Kernel Ridge')\nplt.legend()\n\n# Visualize training and prediction time\nplt.figure()\n\n# Generate sample data\nX = 5 * rng.rand(10000, 1)\ny = np.sin(X).ravel()\ny[::5] += 3 * (0.5 - rng.rand(X.shape[0] // 5))\nsizes = np.logspace(1, 4, 7, dtype=np.int)\nfor name, estimator in {\"KRR\": KernelRidge(kernel='rbf', alpha=0.1,\n gamma=10),\n \"SVR\": SVR(kernel='rbf', C=1e1, gamma=10)}.items():\n train_time = []\n test_time = []\n for train_test_size in sizes:\n t0 = time.time()\n estimator.fit(X[:train_test_size], y[:train_test_size])\n train_time.append(time.time() - t0)\n\n t0 = time.time()\n estimator.predict(X_plot[:1000])\n test_time.append(time.time() - t0)\n\n plt.plot(sizes, train_time, 'o-', color=\"r\" if name == \"SVR\" else \"g\",\n label=\"%s (train)\" % name)\n plt.plot(sizes, test_time, 'o--', color=\"r\" if name == \"SVR\" else \"g\",\n label=\"%s (test)\" % name)\n\nplt.xscale(\"log\")\nplt.yscale(\"log\")\nplt.xlabel(\"Train size\")\nplt.ylabel(\"Time (seconds)\")\nplt.title('Execution Time')\nplt.legend(loc=\"best\")\n\n# Visualize learning curves\nplt.figure()\n\nsvr = SVR(kernel='rbf', C=1e1, gamma=0.1)\nkr = KernelRidge(kernel='rbf', alpha=0.1, gamma=0.1)\ntrain_sizes, train_scores_svr, test_scores_svr = \\\n learning_curve(svr, X[:100], y[:100], train_sizes=np.linspace(0.1, 1, 10),\n scoring=\"neg_mean_squared_error\", cv=10)\ntrain_sizes_abs, train_scores_kr, test_scores_kr = \\\n learning_curve(kr, X[:100], y[:100], train_sizes=np.linspace(0.1, 1, 10),\n scoring=\"neg_mean_squared_error\", cv=10)\n\nplt.plot(train_sizes, -test_scores_svr.mean(1), 'o-', color=\"r\",\n label=\"SVR\")\nplt.plot(train_sizes, -test_scores_kr.mean(1), 'o-', color=\"g\",\n label=\"KRR\")\nplt.xlabel(\"Train size\")\nplt.ylabel(\"Mean Squared Error\")\nplt.title('Learning curves')\nplt.legend(loc=\"best\")\n\nplt.show()"
0 commit comments