DalavanCloud
diff --git a/‎dev/_downloads/auto_examples_jupyter.zip
-1.41 KB b/‎dev/_downloads/auto_examples_jupyter.zip
-1.41 KB
diff --git a/‎dev/_downloads/auto_examples_python.zip
-1.18 KB b/‎dev/_downloads/auto_examples_python.zip
-1.18 KB
diff --git a/‎dev/_downloads/grid_search_text_feature_extraction.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/grid_search_text_feature_extraction.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/grid_search_text_feature_extraction.py
Lines changed: 0 additions & 3 deletions b/‎dev/_downloads/grid_search_text_feature_extraction.py
Lines changed: 0 additions & 3 deletions
diff --git a/‎dev/_downloads/plot_all_scaling.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/plot_all_scaling.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/plot_all_scaling.py
Lines changed: 0 additions & 2 deletions b/‎dev/_downloads/plot_all_scaling.py
Lines changed: 0 additions & 2 deletions
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "# Author: Olivier Grisel <[email protected]>\n#         Peter Prettenhofer <[email protected]>\n#         Mathieu Blondel <[email protected]>\n# License: BSD 3 clause\n\nfrom __future__ import print_function\n\nfrom pprint import pprint\nfrom time import time\nimport logging\n\nfrom sklearn.datasets import fetch_20newsgroups\nfrom sklearn.feature_extraction.text import CountVectorizer\nfrom sklearn.feature_extraction.text import TfidfTransformer\nfrom sklearn.linear_model import SGDClassifier\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.pipeline import Pipeline\n\nprint(__doc__)\n\n# Display progress logs on stdout\nlogging.basicConfig(level=logging.INFO,\n                    format='%(asctime)s %(levelname)s %(message)s')\n\n\n# #############################################################################\n# Load some categories from the training set\ncategories = [\n    'alt.atheism',\n    'talk.religion.misc',\n]\n# Uncomment the following to do the analysis on all the categories\n#categories = None\n\nprint(\"Loading 20 newsgroups dataset for categories:\")\nprint(categories)\n\ndata = fetch_20newsgroups(subset='train', categories=categories)\nprint(\"%d documents\" % len(data.filenames))\nprint(\"%d categories\" % len(data.target_names))\nprint()\n\n# #############################################################################\n# Define a pipeline combining a text feature extractor with a simple\n# classifier\npipeline = Pipeline([\n    ('vect', CountVectorizer()),\n    ('tfidf', TfidfTransformer()),\n    ('clf', SGDClassifier(tol=1e-3)),\n])\n\n# uncommenting more parameters will give better exploring power but will\n# increase processing time in a combinatorial way\nparameters = {\n    'vect__max_df': (0.5, 0.75, 1.0),\n    # 'vect__max_features': (None, 5000, 10000, 50000),\n    'vect__ngram_range': ((1, 1), (1, 2)),  # unigrams or bigrams\n    # 'tfidf__use_idf': (True, False),\n    # 'tfidf__norm': ('l1', 'l2'),\n    'clf__max_iter': (20,),\n    'clf__alpha': (0.00001, 0.000001),\n    'clf__penalty': ('l2', 'elasticnet'),\n    # 'clf__max_iter': (10, 50, 80),\n}\n\nif __name__ == \"__main__\":\n    # multiprocessing requires the fork to happen in a __main__ protected\n    # block\n\n    # find the best parameters for both the feature extraction and the\n    # classifier\n    grid_search = GridSearchCV(pipeline, parameters, cv=5,\n                               n_jobs=-1, verbose=1)\n\n    print(\"Performing grid search...\")\n    print(\"pipeline:\", [name for name, _ in pipeline.steps])\n    print(\"parameters:\")\n    pprint(parameters)\n    t0 = time()\n    grid_search.fit(data.data, data.target)\n    print(\"done in %0.3fs\" % (time() - t0))\n    print()\n\n    print(\"Best score: %0.3f\" % grid_search.best_score_)\n    print(\"Best parameters set:\")\n    best_parameters = grid_search.best_estimator_.get_params()\n    for param_name in sorted(parameters.keys()):\n        print(\"\\t%s: %r\" % (param_name, best_parameters[param_name]))"
+        "# Author: Olivier Grisel <[email protected]>\n#         Peter Prettenhofer <[email protected]>\n#         Mathieu Blondel <[email protected]>\n# License: BSD 3 clause\nfrom pprint import pprint\nfrom time import time\nimport logging\n\nfrom sklearn.datasets import fetch_20newsgroups\nfrom sklearn.feature_extraction.text import CountVectorizer\nfrom sklearn.feature_extraction.text import TfidfTransformer\nfrom sklearn.linear_model import SGDClassifier\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.pipeline import Pipeline\n\nprint(__doc__)\n\n# Display progress logs on stdout\nlogging.basicConfig(level=logging.INFO,\n                    format='%(asctime)s %(levelname)s %(message)s')\n\n\n# #############################################################################\n# Load some categories from the training set\ncategories = [\n    'alt.atheism',\n    'talk.religion.misc',\n]\n# Uncomment the following to do the analysis on all the categories\n#categories = None\n\nprint(\"Loading 20 newsgroups dataset for categories:\")\nprint(categories)\n\ndata = fetch_20newsgroups(subset='train', categories=categories)\nprint(\"%d documents\" % len(data.filenames))\nprint(\"%d categories\" % len(data.target_names))\nprint()\n\n# #############################################################################\n# Define a pipeline combining a text feature extractor with a simple\n# classifier\npipeline = Pipeline([\n    ('vect', CountVectorizer()),\n    ('tfidf', TfidfTransformer()),\n    ('clf', SGDClassifier(tol=1e-3)),\n])\n\n# uncommenting more parameters will give better exploring power but will\n# increase processing time in a combinatorial way\nparameters = {\n    'vect__max_df': (0.5, 0.75, 1.0),\n    # 'vect__max_features': (None, 5000, 10000, 50000),\n    'vect__ngram_range': ((1, 1), (1, 2)),  # unigrams or bigrams\n    # 'tfidf__use_idf': (True, False),\n    # 'tfidf__norm': ('l1', 'l2'),\n    'clf__max_iter': (20,),\n    'clf__alpha': (0.00001, 0.000001),\n    'clf__penalty': ('l2', 'elasticnet'),\n    # 'clf__max_iter': (10, 50, 80),\n}\n\nif __name__ == \"__main__\":\n    # multiprocessing requires the fork to happen in a __main__ protected\n    # block\n\n    # find the best parameters for both the feature extraction and the\n    # classifier\n    grid_search = GridSearchCV(pipeline, parameters, cv=5,\n                               n_jobs=-1, verbose=1)\n\n    print(\"Performing grid search...\")\n    print(\"pipeline:\", [name for name, _ in pipeline.steps])\n    print(\"parameters:\")\n    pprint(parameters)\n    t0 = time()\n    grid_search.fit(data.data, data.target)\n    print(\"done in %0.3fs\" % (time() - t0))\n    print()\n\n    print(\"Best score: %0.3f\" % grid_search.best_score_)\n    print(\"Best parameters set:\")\n    best_parameters = grid_search.best_estimator_.get_params()\n    for param_name in sorted(parameters.keys()):\n        print(\"\\t%s: %r\" % (param_name, best_parameters[param_name]))"
       ]
     }
   ],
 
@@ -46,9 +46,6 @@
 #         Peter Prettenhofer <[email protected]>
 #         Mathieu Blondel <[email protected]>
 # License: BSD 3 clause
-
-from __future__ import print_function
-
 from pprint import pprint
 from time import time
 import logging
 
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "# Author:  Raghav RV <[email protected]>\n#          Guillaume Lemaitre <[email protected]>\n#          Thomas Unterthiner\n# License: BSD 3 clause\n\nfrom __future__ import print_function\n\nimport numpy as np\n\nimport matplotlib as mpl\nfrom matplotlib import pyplot as plt\nfrom matplotlib import cm\n\nfrom sklearn.preprocessing import MinMaxScaler\nfrom sklearn.preprocessing import minmax_scale\nfrom sklearn.preprocessing import MaxAbsScaler\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.preprocessing import RobustScaler\nfrom sklearn.preprocessing import Normalizer\nfrom sklearn.preprocessing import QuantileTransformer\nfrom sklearn.preprocessing import PowerTransformer\n\nfrom sklearn.datasets import fetch_california_housing\n\nprint(__doc__)\n\ndataset = fetch_california_housing()\nX_full, y_full = dataset.data, dataset.target\n\n# Take only 2 features to make visualization easier\n# Feature of 0 has a long tail distribution.\n# Feature 5 has a few but very large outliers.\n\nX = X_full[:, [0, 5]]\n\ndistributions = [\n    ('Unscaled data', X),\n    ('Data after standard scaling',\n        StandardScaler().fit_transform(X)),\n    ('Data after min-max scaling',\n        MinMaxScaler().fit_transform(X)),\n    ('Data after max-abs scaling',\n        MaxAbsScaler().fit_transform(X)),\n    ('Data after robust scaling',\n        RobustScaler(quantile_range=(25, 75)).fit_transform(X)),\n    ('Data after power transformation (Yeo-Johnson)',\n     PowerTransformer(method='yeo-johnson').fit_transform(X)),\n    ('Data after power transformation (Box-Cox)',\n     PowerTransformer(method='box-cox').fit_transform(X)),\n    ('Data after quantile transformation (gaussian pdf)',\n        QuantileTransformer(output_distribution='normal')\n        .fit_transform(X)),\n    ('Data after quantile transformation (uniform pdf)',\n        QuantileTransformer(output_distribution='uniform')\n        .fit_transform(X)),\n    ('Data after sample-wise L2 normalizing',\n        Normalizer().fit_transform(X)),\n]\n\n# scale the output between 0 and 1 for the colorbar\ny = minmax_scale(y_full)\n\n# plasma does not exist in matplotlib < 1.5\ncmap = getattr(cm, 'plasma_r', cm.hot_r)\n\ndef create_axes(title, figsize=(16, 6)):\n    fig = plt.figure(figsize=figsize)\n    fig.suptitle(title)\n\n    # define the axis for the first plot\n    left, width = 0.1, 0.22\n    bottom, height = 0.1, 0.7\n    bottom_h = height + 0.15\n    left_h = left + width + 0.02\n\n    rect_scatter = [left, bottom, width, height]\n    rect_histx = [left, bottom_h, width, 0.1]\n    rect_histy = [left_h, bottom, 0.05, height]\n\n    ax_scatter = plt.axes(rect_scatter)\n    ax_histx = plt.axes(rect_histx)\n    ax_histy = plt.axes(rect_histy)\n\n    # define the axis for the zoomed-in plot\n    left = width + left + 0.2\n    left_h = left + width + 0.02\n\n    rect_scatter = [left, bottom, width, height]\n    rect_histx = [left, bottom_h, width, 0.1]\n    rect_histy = [left_h, bottom, 0.05, height]\n\n    ax_scatter_zoom = plt.axes(rect_scatter)\n    ax_histx_zoom = plt.axes(rect_histx)\n    ax_histy_zoom = plt.axes(rect_histy)\n\n    # define the axis for the colorbar\n    left, width = width + left + 0.13, 0.01\n\n    rect_colorbar = [left, bottom, width, height]\n    ax_colorbar = plt.axes(rect_colorbar)\n\n    return ((ax_scatter, ax_histy, ax_histx),\n            (ax_scatter_zoom, ax_histy_zoom, ax_histx_zoom),\n            ax_colorbar)\n\n\ndef plot_distribution(axes, X, y, hist_nbins=50, title=\"\",\n                      x0_label=\"\", x1_label=\"\"):\n    ax, hist_X1, hist_X0 = axes\n\n    ax.set_title(title)\n    ax.set_xlabel(x0_label)\n    ax.set_ylabel(x1_label)\n\n    # The scatter plot\n    colors = cmap(y)\n    ax.scatter(X[:, 0], X[:, 1], alpha=0.5, marker='o', s=5, lw=0, c=colors)\n\n    # Removing the top and the right spine for aesthetics\n    # make nice axis layout\n    ax.spines['top'].set_visible(False)\n    ax.spines['right'].set_visible(False)\n    ax.get_xaxis().tick_bottom()\n    ax.get_yaxis().tick_left()\n    ax.spines['left'].set_position(('outward', 10))\n    ax.spines['bottom'].set_position(('outward', 10))\n\n    # Histogram for axis X1 (feature 5)\n    hist_X1.set_ylim(ax.get_ylim())\n    hist_X1.hist(X[:, 1], bins=hist_nbins, orientation='horizontal',\n                 color='grey', ec='grey')\n    hist_X1.axis('off')\n\n    # Histogram for axis X0 (feature 0)\n    hist_X0.set_xlim(ax.get_xlim())\n    hist_X0.hist(X[:, 0], bins=hist_nbins, orientation='vertical',\n                 color='grey', ec='grey')\n    hist_X0.axis('off')"
+        "# Author:  Raghav RV <[email protected]>\n#          Guillaume Lemaitre <[email protected]>\n#          Thomas Unterthiner\n# License: BSD 3 clause\n\nimport numpy as np\n\nimport matplotlib as mpl\nfrom matplotlib import pyplot as plt\nfrom matplotlib import cm\n\nfrom sklearn.preprocessing import MinMaxScaler\nfrom sklearn.preprocessing import minmax_scale\nfrom sklearn.preprocessing import MaxAbsScaler\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.preprocessing import RobustScaler\nfrom sklearn.preprocessing import Normalizer\nfrom sklearn.preprocessing import QuantileTransformer\nfrom sklearn.preprocessing import PowerTransformer\n\nfrom sklearn.datasets import fetch_california_housing\n\nprint(__doc__)\n\ndataset = fetch_california_housing()\nX_full, y_full = dataset.data, dataset.target\n\n# Take only 2 features to make visualization easier\n# Feature of 0 has a long tail distribution.\n# Feature 5 has a few but very large outliers.\n\nX = X_full[:, [0, 5]]\n\ndistributions = [\n    ('Unscaled data', X),\n    ('Data after standard scaling',\n        StandardScaler().fit_transform(X)),\n    ('Data after min-max scaling',\n        MinMaxScaler().fit_transform(X)),\n    ('Data after max-abs scaling',\n        MaxAbsScaler().fit_transform(X)),\n    ('Data after robust scaling',\n        RobustScaler(quantile_range=(25, 75)).fit_transform(X)),\n    ('Data after power transformation (Yeo-Johnson)',\n     PowerTransformer(method='yeo-johnson').fit_transform(X)),\n    ('Data after power transformation (Box-Cox)',\n     PowerTransformer(method='box-cox').fit_transform(X)),\n    ('Data after quantile transformation (gaussian pdf)',\n        QuantileTransformer(output_distribution='normal')\n        .fit_transform(X)),\n    ('Data after quantile transformation (uniform pdf)',\n        QuantileTransformer(output_distribution='uniform')\n        .fit_transform(X)),\n    ('Data after sample-wise L2 normalizing',\n        Normalizer().fit_transform(X)),\n]\n\n# scale the output between 0 and 1 for the colorbar\ny = minmax_scale(y_full)\n\n# plasma does not exist in matplotlib < 1.5\ncmap = getattr(cm, 'plasma_r', cm.hot_r)\n\ndef create_axes(title, figsize=(16, 6)):\n    fig = plt.figure(figsize=figsize)\n    fig.suptitle(title)\n\n    # define the axis for the first plot\n    left, width = 0.1, 0.22\n    bottom, height = 0.1, 0.7\n    bottom_h = height + 0.15\n    left_h = left + width + 0.02\n\n    rect_scatter = [left, bottom, width, height]\n    rect_histx = [left, bottom_h, width, 0.1]\n    rect_histy = [left_h, bottom, 0.05, height]\n\n    ax_scatter = plt.axes(rect_scatter)\n    ax_histx = plt.axes(rect_histx)\n    ax_histy = plt.axes(rect_histy)\n\n    # define the axis for the zoomed-in plot\n    left = width + left + 0.2\n    left_h = left + width + 0.02\n\n    rect_scatter = [left, bottom, width, height]\n    rect_histx = [left, bottom_h, width, 0.1]\n    rect_histy = [left_h, bottom, 0.05, height]\n\n    ax_scatter_zoom = plt.axes(rect_scatter)\n    ax_histx_zoom = plt.axes(rect_histx)\n    ax_histy_zoom = plt.axes(rect_histy)\n\n    # define the axis for the colorbar\n    left, width = width + left + 0.13, 0.01\n\n    rect_colorbar = [left, bottom, width, height]\n    ax_colorbar = plt.axes(rect_colorbar)\n\n    return ((ax_scatter, ax_histy, ax_histx),\n            (ax_scatter_zoom, ax_histy_zoom, ax_histx_zoom),\n            ax_colorbar)\n\n\ndef plot_distribution(axes, X, y, hist_nbins=50, title=\"\",\n                      x0_label=\"\", x1_label=\"\"):\n    ax, hist_X1, hist_X0 = axes\n\n    ax.set_title(title)\n    ax.set_xlabel(x0_label)\n    ax.set_ylabel(x1_label)\n\n    # The scatter plot\n    colors = cmap(y)\n    ax.scatter(X[:, 0], X[:, 1], alpha=0.5, marker='o', s=5, lw=0, c=colors)\n\n    # Removing the top and the right spine for aesthetics\n    # make nice axis layout\n    ax.spines['top'].set_visible(False)\n    ax.spines['right'].set_visible(False)\n    ax.get_xaxis().tick_bottom()\n    ax.get_yaxis().tick_left()\n    ax.spines['left'].set_position(('outward', 10))\n    ax.spines['bottom'].set_position(('outward', 10))\n\n    # Histogram for axis X1 (feature 5)\n    hist_X1.set_ylim(ax.get_ylim())\n    hist_X1.hist(X[:, 1], bins=hist_nbins, orientation='horizontal',\n                 color='grey', ec='grey')\n    hist_X1.axis('off')\n\n    # Histogram for axis X0 (feature 0)\n    hist_X0.set_xlim(ax.get_xlim())\n    hist_X0.hist(X[:, 0], bins=hist_nbins, orientation='vertical',\n                 color='grey', ec='grey')\n    hist_X0.axis('off')"
       ]
     },
     {
 
@@ -47,8 +47,6 @@
 #          Thomas Unterthiner
 # License: BSD 3 clause
 
-from __future__ import print_function
-
 import numpy as np
 
 import matplotlib as mpl
Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,7 @@`
`26`	`26`	`},`
`27`	`27`	`"outputs": [],`
`28`	`28`	`"source": [`
`29`		- "# Author: Olivier Grisel <[email protected]>\n# Peter Prettenhofer <[email protected]>\n# Mathieu Blondel <[email protected]>\n# License: BSD 3 clause\n\nfrom __future__ import print_function\n\nfrom pprint import pprint\nfrom time import time\nimport logging\n\nfrom sklearn.datasets import fetch_20newsgroups\nfrom sklearn.feature_extraction.text import CountVectorizer\nfrom sklearn.feature_extraction.text import TfidfTransformer\nfrom sklearn.linear_model import SGDClassifier\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.pipeline import Pipeline\n\nprint(__doc__)\n\n# Display progress logs on stdout\nlogging.basicConfig(level=logging.INFO,\n format='%(asctime)s %(levelname)s %(message)s')\n\n\n# #############################################################################\n# Load some categories from the training set\ncategories = [\n 'alt.atheism',\n 'talk.religion.misc',\n]\n# Uncomment the following to do the analysis on all the categories\n#categories = None\n\nprint(\"Loading 20 newsgroups dataset for categories:\")\nprint(categories)\n\ndata = fetch_20newsgroups(subset='train', categories=categories)\nprint(\"%d documents\" % len(data.filenames))\nprint(\"%d categories\" % len(data.target_names))\nprint()\n\n# #############################################################################\n# Define a pipeline combining a text feature extractor with a simple\n# classifier\npipeline = Pipeline([\n ('vect', CountVectorizer()),\n ('tfidf', TfidfTransformer()),\n ('clf', SGDClassifier(tol=1e-3)),\n])\n\n# uncommenting more parameters will give better exploring power but will\n# increase processing time in a combinatorial way\nparameters = {\n 'vect__max_df': (0.5, 0.75, 1.0),\n # 'vect__max_features': (None, 5000, 10000, 50000),\n 'vect__ngram_range': ((1, 1), (1, 2)), # unigrams or bigrams\n # 'tfidf__use_idf': (True, False),\n # 'tfidf__norm': ('l1', 'l2'),\n 'clf__max_iter': (20,),\n 'clf__alpha': (0.00001, 0.000001),\n 'clf__penalty': ('l2', 'elasticnet'),\n # 'clf__max_iter': (10, 50, 80),\n}\n\nif __name__ == \"__main__\":\n # multiprocessing requires the fork to happen in a __main__ protected\n # block\n\n # find the best parameters for both the feature extraction and the\n # classifier\n grid_search = GridSearchCV(pipeline, parameters, cv=5,\n n_jobs=-1, verbose=1)\n\n print(\"Performing grid search...\")\n print(\"pipeline:\", [name for name, _ in pipeline.steps])\n print(\"parameters:\")\n pprint(parameters)\n t0 = time()\n grid_search.fit(data.data, data.target)\n print(\"done in %0.3fs\" % (time() - t0))\n print()\n\n print(\"Best score: %0.3f\" % grid_search.best_score_)\n print(\"Best parameters set:\")\n best_parameters = grid_search.best_estimator_.get_params()\n for param_name in sorted(parameters.keys()):\n print(\"\\t%s: %r\" % (param_name, best_parameters[param_name]))"
	`29`	+ "# Author: Olivier Grisel <[email protected]>\n# Peter Prettenhofer <[email protected]>\n# Mathieu Blondel <[email protected]>\n# License: BSD 3 clause\nfrom pprint import pprint\nfrom time import time\nimport logging\n\nfrom sklearn.datasets import fetch_20newsgroups\nfrom sklearn.feature_extraction.text import CountVectorizer\nfrom sklearn.feature_extraction.text import TfidfTransformer\nfrom sklearn.linear_model import SGDClassifier\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.pipeline import Pipeline\n\nprint(__doc__)\n\n# Display progress logs on stdout\nlogging.basicConfig(level=logging.INFO,\n format='%(asctime)s %(levelname)s %(message)s')\n\n\n# #############################################################################\n# Load some categories from the training set\ncategories = [\n 'alt.atheism',\n 'talk.religion.misc',\n]\n# Uncomment the following to do the analysis on all the categories\n#categories = None\n\nprint(\"Loading 20 newsgroups dataset for categories:\")\nprint(categories)\n\ndata = fetch_20newsgroups(subset='train', categories=categories)\nprint(\"%d documents\" % len(data.filenames))\nprint(\"%d categories\" % len(data.target_names))\nprint()\n\n# #############################################################################\n# Define a pipeline combining a text feature extractor with a simple\n# classifier\npipeline = Pipeline([\n ('vect', CountVectorizer()),\n ('tfidf', TfidfTransformer()),\n ('clf', SGDClassifier(tol=1e-3)),\n])\n\n# uncommenting more parameters will give better exploring power but will\n# increase processing time in a combinatorial way\nparameters = {\n 'vect__max_df': (0.5, 0.75, 1.0),\n # 'vect__max_features': (None, 5000, 10000, 50000),\n 'vect__ngram_range': ((1, 1), (1, 2)), # unigrams or bigrams\n # 'tfidf__use_idf': (True, False),\n # 'tfidf__norm': ('l1', 'l2'),\n 'clf__max_iter': (20,),\n 'clf__alpha': (0.00001, 0.000001),\n 'clf__penalty': ('l2', 'elasticnet'),\n # 'clf__max_iter': (10, 50, 80),\n}\n\nif __name__ == \"__main__\":\n # multiprocessing requires the fork to happen in a __main__ protected\n # block\n\n # find the best parameters for both the feature extraction and the\n # classifier\n grid_search = GridSearchCV(pipeline, parameters, cv=5,\n n_jobs=-1, verbose=1)\n\n print(\"Performing grid search...\")\n print(\"pipeline:\", [name for name, _ in pipeline.steps])\n print(\"parameters:\")\n pprint(parameters)\n t0 = time()\n grid_search.fit(data.data, data.target)\n print(\"done in %0.3fs\" % (time() - t0))\n print()\n\n print(\"Best score: %0.3f\" % grid_search.best_score_)\n print(\"Best parameters set:\")\n best_parameters = grid_search.best_estimator_.get_params()\n for param_name in sorted(parameters.keys()):\n print(\"\\t%s: %r\" % (param_name, best_parameters[param_name]))"
`30`	`30`	`]`
`31`	`31`	`}`
`32`	`32`	`],`