|
40 | 40 | },
|
41 | 41 | "outputs": [],
|
42 | 42 | "source": [
|
43 |
| - "n_samples = len(digits.data)\ndata = digits.data / 16.0\ndata -= data.mean(axis=0)\n\n# We learn the digits on the first half of the digits\ndata_train, targets_train = (data[: n_samples // 2], digits.target[: n_samples // 2])\n\n\n# Now predict the value of the digit on the second half:\ndata_test, targets_test = (data[n_samples // 2 :], digits.target[n_samples // 2 :])\n# data_test = scaler.transform(data_test)\n\n# Create a classifier: a support vector classifier\nkernel_svm = svm.SVC(gamma=0.2)\nlinear_svm = svm.LinearSVC(dual=\"auto\")\n\n# create pipeline from kernel approximation\n# and linear svm\nfeature_map_fourier = RBFSampler(gamma=0.2, random_state=1)\nfeature_map_nystroem = Nystroem(gamma=0.2, random_state=1)\nfourier_approx_svm = pipeline.Pipeline(\n [(\"feature_map\", feature_map_fourier), (\"svm\", svm.LinearSVC(dual=\"auto\"))]\n)\n\nnystroem_approx_svm = pipeline.Pipeline(\n [(\"feature_map\", feature_map_nystroem), (\"svm\", svm.LinearSVC(dual=\"auto\"))]\n)\n\n# fit and predict using linear and kernel svm:\n\nkernel_svm_time = time()\nkernel_svm.fit(data_train, targets_train)\nkernel_svm_score = kernel_svm.score(data_test, targets_test)\nkernel_svm_time = time() - kernel_svm_time\n\nlinear_svm_time = time()\nlinear_svm.fit(data_train, targets_train)\nlinear_svm_score = linear_svm.score(data_test, targets_test)\nlinear_svm_time = time() - linear_svm_time\n\nsample_sizes = 30 * np.arange(1, 10)\nfourier_scores = []\nnystroem_scores = []\nfourier_times = []\nnystroem_times = []\n\nfor D in sample_sizes:\n fourier_approx_svm.set_params(feature_map__n_components=D)\n nystroem_approx_svm.set_params(feature_map__n_components=D)\n start = time()\n nystroem_approx_svm.fit(data_train, targets_train)\n nystroem_times.append(time() - start)\n\n start = time()\n fourier_approx_svm.fit(data_train, targets_train)\n fourier_times.append(time() - start)\n\n fourier_score = fourier_approx_svm.score(data_test, targets_test)\n nystroem_score = nystroem_approx_svm.score(data_test, targets_test)\n nystroem_scores.append(nystroem_score)\n fourier_scores.append(fourier_score)\n\n# plot the results:\nplt.figure(figsize=(16, 4))\naccuracy = plt.subplot(121)\n# second y axis for timings\ntimescale = plt.subplot(122)\n\naccuracy.plot(sample_sizes, nystroem_scores, label=\"Nystroem approx. kernel\")\ntimescale.plot(sample_sizes, nystroem_times, \"--\", label=\"Nystroem approx. kernel\")\n\naccuracy.plot(sample_sizes, fourier_scores, label=\"Fourier approx. kernel\")\ntimescale.plot(sample_sizes, fourier_times, \"--\", label=\"Fourier approx. kernel\")\n\n# horizontal lines for exact rbf and linear kernels:\naccuracy.plot(\n [sample_sizes[0], sample_sizes[-1]],\n [linear_svm_score, linear_svm_score],\n label=\"linear svm\",\n)\ntimescale.plot(\n [sample_sizes[0], sample_sizes[-1]],\n [linear_svm_time, linear_svm_time],\n \"--\",\n label=\"linear svm\",\n)\n\naccuracy.plot(\n [sample_sizes[0], sample_sizes[-1]],\n [kernel_svm_score, kernel_svm_score],\n label=\"rbf svm\",\n)\ntimescale.plot(\n [sample_sizes[0], sample_sizes[-1]],\n [kernel_svm_time, kernel_svm_time],\n \"--\",\n label=\"rbf svm\",\n)\n\n# vertical line for dataset dimensionality = 64\naccuracy.plot([64, 64], [0.7, 1], label=\"n_features\")\n\n# legends and labels\naccuracy.set_title(\"Classification accuracy\")\ntimescale.set_title(\"Training times\")\naccuracy.set_xlim(sample_sizes[0], sample_sizes[-1])\naccuracy.set_xticks(())\naccuracy.set_ylim(np.min(fourier_scores), 1)\ntimescale.set_xlabel(\"Sampling steps = transformed feature dimension\")\naccuracy.set_ylabel(\"Classification accuracy\")\ntimescale.set_ylabel(\"Training time in seconds\")\naccuracy.legend(loc=\"best\")\ntimescale.legend(loc=\"best\")\nplt.tight_layout()\nplt.show()" |
| 43 | + "n_samples = len(digits.data)\ndata = digits.data / 16.0\ndata -= data.mean(axis=0)\n\n# We learn the digits on the first half of the digits\ndata_train, targets_train = (data[: n_samples // 2], digits.target[: n_samples // 2])\n\n\n# Now predict the value of the digit on the second half:\ndata_test, targets_test = (data[n_samples // 2 :], digits.target[n_samples // 2 :])\n# data_test = scaler.transform(data_test)\n\n# Create a classifier: a support vector classifier\nkernel_svm = svm.SVC(gamma=0.2)\nlinear_svm = svm.LinearSVC(dual=\"auto\", random_state=42)\n\n# create pipeline from kernel approximation\n# and linear svm\nfeature_map_fourier = RBFSampler(gamma=0.2, random_state=1)\nfeature_map_nystroem = Nystroem(gamma=0.2, random_state=1)\nfourier_approx_svm = pipeline.Pipeline(\n [\n (\"feature_map\", feature_map_fourier),\n (\"svm\", svm.LinearSVC(dual=\"auto\", random_state=42)),\n ]\n)\n\nnystroem_approx_svm = pipeline.Pipeline(\n [\n (\"feature_map\", feature_map_nystroem),\n (\"svm\", svm.LinearSVC(dual=\"auto\", random_state=42)),\n ]\n)\n\n# fit and predict using linear and kernel svm:\n\nkernel_svm_time = time()\nkernel_svm.fit(data_train, targets_train)\nkernel_svm_score = kernel_svm.score(data_test, targets_test)\nkernel_svm_time = time() - kernel_svm_time\n\nlinear_svm_time = time()\nlinear_svm.fit(data_train, targets_train)\nlinear_svm_score = linear_svm.score(data_test, targets_test)\nlinear_svm_time = time() - linear_svm_time\n\nsample_sizes = 30 * np.arange(1, 10)\nfourier_scores = []\nnystroem_scores = []\nfourier_times = []\nnystroem_times = []\n\nfor D in sample_sizes:\n fourier_approx_svm.set_params(feature_map__n_components=D)\n nystroem_approx_svm.set_params(feature_map__n_components=D)\n start = time()\n nystroem_approx_svm.fit(data_train, targets_train)\n nystroem_times.append(time() - start)\n\n start = time()\n fourier_approx_svm.fit(data_train, targets_train)\n fourier_times.append(time() - start)\n\n fourier_score = fourier_approx_svm.score(data_test, targets_test)\n nystroem_score = nystroem_approx_svm.score(data_test, targets_test)\n nystroem_scores.append(nystroem_score)\n fourier_scores.append(fourier_score)\n\n# plot the results:\nplt.figure(figsize=(16, 4))\naccuracy = plt.subplot(121)\n# second y axis for timings\ntimescale = plt.subplot(122)\n\naccuracy.plot(sample_sizes, nystroem_scores, label=\"Nystroem approx. kernel\")\ntimescale.plot(sample_sizes, nystroem_times, \"--\", label=\"Nystroem approx. kernel\")\n\naccuracy.plot(sample_sizes, fourier_scores, label=\"Fourier approx. kernel\")\ntimescale.plot(sample_sizes, fourier_times, \"--\", label=\"Fourier approx. kernel\")\n\n# horizontal lines for exact rbf and linear kernels:\naccuracy.plot(\n [sample_sizes[0], sample_sizes[-1]],\n [linear_svm_score, linear_svm_score],\n label=\"linear svm\",\n)\ntimescale.plot(\n [sample_sizes[0], sample_sizes[-1]],\n [linear_svm_time, linear_svm_time],\n \"--\",\n label=\"linear svm\",\n)\n\naccuracy.plot(\n [sample_sizes[0], sample_sizes[-1]],\n [kernel_svm_score, kernel_svm_score],\n label=\"rbf svm\",\n)\ntimescale.plot(\n [sample_sizes[0], sample_sizes[-1]],\n [kernel_svm_time, kernel_svm_time],\n \"--\",\n label=\"rbf svm\",\n)\n\n# vertical line for dataset dimensionality = 64\naccuracy.plot([64, 64], [0.7, 1], label=\"n_features\")\n\n# legends and labels\naccuracy.set_title(\"Classification accuracy\")\ntimescale.set_title(\"Training times\")\naccuracy.set_xlim(sample_sizes[0], sample_sizes[-1])\naccuracy.set_xticks(())\naccuracy.set_ylim(np.min(fourier_scores), 1)\ntimescale.set_xlabel(\"Sampling steps = transformed feature dimension\")\naccuracy.set_ylabel(\"Classification accuracy\")\ntimescale.set_ylabel(\"Training time in seconds\")\naccuracy.legend(loc=\"best\")\ntimescale.legend(loc=\"best\")\nplt.tight_layout()\nplt.show()" |
44 | 44 | ]
|
45 | 45 | },
|
46 | 46 | {
|
|
58 | 58 | },
|
59 | 59 | "outputs": [],
|
60 | 60 | "source": [
|
61 |
| - "# visualize the decision surface, projected down to the first\n# two principal components of the dataset\npca = PCA(n_components=8).fit(data_train)\n\nX = pca.transform(data_train)\n\n# Generate grid along first two principal components\nmultiples = np.arange(-2, 2, 0.1)\n# steps along first component\nfirst = multiples[:, np.newaxis] * pca.components_[0, :]\n# steps along second component\nsecond = multiples[:, np.newaxis] * pca.components_[1, :]\n# combine\ngrid = first[np.newaxis, :, :] + second[:, np.newaxis, :]\nflat_grid = grid.reshape(-1, data.shape[1])\n\n# title for the plots\ntitles = [\n \"SVC with rbf kernel\",\n \"SVC (linear kernel)\\n with Fourier rbf feature map\\nn_components=100\",\n \"SVC (linear kernel)\\n with Nystroem rbf feature map\\nn_components=100\",\n]\n\nplt.figure(figsize=(18, 7.5))\nplt.rcParams.update({\"font.size\": 14})\n# predict and plot\nfor i, clf in enumerate((kernel_svm, nystroem_approx_svm, fourier_approx_svm)):\n # Plot the decision boundary. For that, we will assign a color to each\n # point in the mesh [x_min, x_max]x[y_min, y_max].\n plt.subplot(1, 3, i + 1)\n Z = clf.predict(flat_grid)\n\n # Put the result into a color plot\n Z = Z.reshape(grid.shape[:-1])\n levels = np.arange(10)\n lv_eps = 0.01 # Adjust a mapping from calculated contour levels to color.\n plt.contourf(\n multiples,\n multiples,\n Z,\n levels=levels - lv_eps,\n cmap=plt.cm.tab10,\n vmin=0,\n vmax=10,\n alpha=0.7,\n )\n plt.axis(\"off\")\n\n # Plot also the training points\n plt.scatter(\n X[:, 0],\n X[:, 1],\n c=targets_train,\n cmap=plt.cm.tab10,\n edgecolors=(0, 0, 0),\n vmin=0,\n vmax=10,\n )\n\n plt.title(titles[i])\nplt.tight_layout()\nplt.show()" |
| 61 | + "# visualize the decision surface, projected down to the first\n# two principal components of the dataset\npca = PCA(n_components=8, random_state=42).fit(data_train)\n\nX = pca.transform(data_train)\n\n# Generate grid along first two principal components\nmultiples = np.arange(-2, 2, 0.1)\n# steps along first component\nfirst = multiples[:, np.newaxis] * pca.components_[0, :]\n# steps along second component\nsecond = multiples[:, np.newaxis] * pca.components_[1, :]\n# combine\ngrid = first[np.newaxis, :, :] + second[:, np.newaxis, :]\nflat_grid = grid.reshape(-1, data.shape[1])\n\n# title for the plots\ntitles = [\n \"SVC with rbf kernel\",\n \"SVC (linear kernel)\\n with Fourier rbf feature map\\nn_components=100\",\n \"SVC (linear kernel)\\n with Nystroem rbf feature map\\nn_components=100\",\n]\n\nplt.figure(figsize=(18, 7.5))\nplt.rcParams.update({\"font.size\": 14})\n# predict and plot\nfor i, clf in enumerate((kernel_svm, nystroem_approx_svm, fourier_approx_svm)):\n # Plot the decision boundary. For that, we will assign a color to each\n # point in the mesh [x_min, x_max]x[y_min, y_max].\n plt.subplot(1, 3, i + 1)\n Z = clf.predict(flat_grid)\n\n # Put the result into a color plot\n Z = Z.reshape(grid.shape[:-1])\n levels = np.arange(10)\n lv_eps = 0.01 # Adjust a mapping from calculated contour levels to color.\n plt.contourf(\n multiples,\n multiples,\n Z,\n levels=levels - lv_eps,\n cmap=plt.cm.tab10,\n vmin=0,\n vmax=10,\n alpha=0.7,\n )\n plt.axis(\"off\")\n\n # Plot also the training points\n plt.scatter(\n X[:, 0],\n X[:, 1],\n c=targets_train,\n cmap=plt.cm.tab10,\n edgecolors=(0, 0, 0),\n vmin=0,\n vmax=10,\n )\n\n plt.title(titles[i])\nplt.tight_layout()\nplt.show()" |
62 | 62 | ]
|
63 | 63 | }
|
64 | 64 | ],
|
|
0 commit comments