linuxdevhub
diff --git a/‎dev/_downloads/3409d9766d352cc9f9b169d4a799a87a/auto_examples_python.zip
496 Bytes b/‎dev/_downloads/3409d9766d352cc9f9b169d4a799a87a/auto_examples_python.zip
496 Bytes
diff --git a/‎dev/_downloads/388641d133587cc11aa26f2dbef4b950/plot_document_classification_20newsgroups.py
Lines changed: 15 additions & 9 deletions b/‎dev/_downloads/388641d133587cc11aa26f2dbef4b950/plot_document_classification_20newsgroups.py
Lines changed: 15 additions & 9 deletions
diff --git a/‎dev/_downloads/3b31bf37034a6ece04667cd422e5ff79/plot_document_classification_20newsgroups.ipynb
Lines changed: 56 additions & 2 deletions b/‎dev/_downloads/3b31bf37034a6ece04667cd422e5ff79/plot_document_classification_20newsgroups.ipynb
Lines changed: 56 additions & 2 deletions
diff --git a/‎dev/_downloads/d34667f097c619f8afda4bc936e7af21/auto_examples_jupyter.zip
1.06 KB b/‎dev/_downloads/d34667f097c619f8afda4bc936e7af21/auto_examples_jupyter.zip
1.06 KB
diff --git a/‎dev/_downloads/scikit-learn-docs.pdf
21.6 KB b/‎dev/_downloads/scikit-learn-docs.pdf
21.6 KB
diff --git a/‎dev/_images/iris.png
0 Bytes b/‎dev/_images/iris.png
0 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
82 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
82 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0011.png
82 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0011.png
82 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
114 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
114 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0031.png
114 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0031.png
114 Bytes
@@ -11,9 +11,6 @@
 The dataset used in this example is the 20 newsgroups dataset. It will be
 automatically downloaded, then cached.
 
-The bar plot indicates the accuracy, training time (normalized) and test time
-(normalized) of each classifier.
-
 """
 
 # Author: Peter Prettenhofer <[email protected]>
@@ -51,8 +48,6 @@
 logging.basicConfig(level=logging.INFO,
                     format='%(asctime)s %(levelname)s %(message)s')
 
-
-# parse commandline arguments
 op = OptionParser()
 op.add_option("--report",
               action="store_true", dest="print_report",
@@ -98,8 +93,12 @@ def is_interactive():
 print()
 
 
-# #############################################################################
-# Load some categories from the training set
+##############################################################################
+# Load data from the training set
+# ------------------------------------
+# Let's load data from the newsgroups dataset which comprises around 18000
+# newsgroups posts on 20 topics split in two subsets: one for training (or
+# development) and the other one for testing (or for performance evaluation).
 if opts.all_categories:
     categories = None
 else:
@@ -200,8 +199,11 @@ def trim(s):
     return s if len(s) <= 80 else s[:77] + "..."
 
 
-# #############################################################################
+##############################################################################
 # Benchmark classifiers
+# ------------------------------------
+# We train and test the datasets with 15 different classification models
+# and get performance results for each model.
 def benchmark(clf):
     print('_' * 80)
     print("Training: ")
@@ -294,8 +296,12 @@ def benchmark(clf):
                                                   tol=1e-3))),
   ('classification', LinearSVC(penalty="l2"))])))
 
-# make some plots
 
+##############################################################################
+# Add plots
+# ------------------------------------
+# The bar plot indicates the accuracy, training time (normalized) and test time
+# (normalized) of each classifier.
 indices = np.arange(len(results))
 
 results = [[x[i] for x in results] for i in range(4)]