"# Author: Peter Prettenhofer <
[email protected]>\n# Olivier Grisel <
[email protected]>\n# Mathieu Blondel <
[email protected]>\n# Lars Buitinck\n# License: BSD 3 clause\n\nfrom __future__ import print_function\n\nimport logging\nimport numpy as np\nfrom optparse import OptionParser\nimport sys\nfrom time import time\nimport matplotlib.pyplot as plt\n\nfrom sklearn.datasets import fetch_20newsgroups\nfrom sklearn.feature_extraction.text import TfidfVectorizer\nfrom sklearn.feature_extraction.text import HashingVectorizer\nfrom sklearn.feature_selection import SelectKBest, chi2\nfrom sklearn.linear_model import RidgeClassifier\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.svm import LinearSVC\nfrom sklearn.linear_model import SGDClassifier\nfrom sklearn.linear_model import Perceptron\nfrom sklearn.linear_model import PassiveAggressiveClassifier\nfrom sklearn.naive_bayes import BernoulliNB, MultinomialNB\nfrom sklearn.neighbors import KNeighborsClassifier\nfrom sklearn.neighbors import NearestCentroid\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.utils.extmath import density\nfrom sklearn import metrics\n\n\n# Display progress logs on stdout\nlogging.basicConfig(level=logging.INFO,\n format='%(asctime)s %(levelname)s %(message)s')\n\n\n# parse commandline arguments\nop = OptionParser()\nop.add_option(\"--report\",\n action=\"store_true\", dest=\"print_report\",\n help=\"Print a detailed classification report.\")\nop.add_option(\"--chi2_select\",\n action=\"store\", type=\"int\", dest=\"select_chi2\",\n help=\"Select some number of features using a chi-squared test\")\nop.add_option(\"--confusion_matrix\",\n action=\"store_true\", dest=\"print_cm\",\n help=\"Print the confusion matrix.\")\nop.add_option(\"--top10\",\n action=\"store_true\", dest=\"print_top10\",\n help=\"Print ten most discriminative terms per class\"\n \" for every classifier.\")\nop.add_option(\"--all_categories\",\n action=\"store_true\", dest=\"all_categories\",\n help=\"Whether to use all categories or not.\")\nop.add_option(\"--use_hashing\",\n action=\"store_true\",\n help=\"Use a hashing vectorizer.\")\nop.add_option(\"--n_features\",\n action=\"store\", type=int, default=2 ** 16,\n help=\"n_features when using the hashing vectorizer.\")\nop.add_option(\"--filtered\",\n action=\"store_true\",\n help=\"Remove newsgroup information that is easily overfit: \"\n \"headers, signatures, and quoting.\")\n\n\ndef is_interactive():\n return not hasattr(sys.modules['__main__ '], '__file__')\n\n# work-around for Jupyter notebook and IPython console\nargv = [] if is_interactive() else sys.argv[1:]\n(opts, args) = op.parse_args(argv)\nif len(args) > 0:\n op.error(\"this script takes no arguments.\")\n sys.exit(1)\n\nprint(__doc__)\nop.print_help()\nprint()"
0 commit comments