sli1989
diff --git a/‎dev/_downloads/auto_examples_jupyter.zip
-10 Bytes b/‎dev/_downloads/auto_examples_jupyter.zip
-10 Bytes
diff --git a/‎dev/_downloads/auto_examples_python.zip
-10 Bytes b/‎dev/_downloads/auto_examples_python.zip
-10 Bytes
diff --git a/‎dev/_downloads/plot_out_of_core_classification.ipynb
Lines changed: 2 additions & 2 deletions b/‎dev/_downloads/plot_out_of_core_classification.ipynb
Lines changed: 2 additions & 2 deletions
diff --git a/‎dev/_downloads/plot_out_of_core_classification.py
Lines changed: 3 additions & 3 deletions b/‎dev/_downloads/plot_out_of_core_classification.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎dev/_downloads/scikit-learn-docs.pdf
-9.96 KB b/‎dev/_downloads/scikit-learn-docs.pdf
-9.96 KB
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
-134 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
-134 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0011.png
-134 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0011.png
-134 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_002.png
-154 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_002.png
-154 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0021.png
-154 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0021.png
-154 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
11 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
11 Bytes
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "# Authors: Eustache Diemert <[email protected]>\n#          @FedericoV <https://github.com/FedericoV/>\n# License: BSD 3 clause\n\nfrom __future__ import print_function\n\nfrom glob import glob\nimport itertools\nimport os.path\nimport re\nimport tarfile\nimport time\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom matplotlib import rcParams\n\nfrom sklearn.externals.six.moves import html_parser\nfrom sklearn.externals.six.moves import urllib\nfrom sklearn.datasets import get_data_home\nfrom sklearn.feature_extraction.text import HashingVectorizer\nfrom sklearn.linear_model import SGDClassifier\nfrom sklearn.linear_model import PassiveAggressiveClassifier\nfrom sklearn.linear_model import Perceptron\nfrom sklearn.naive_bayes import MultinomialNB\n\n\ndef _not_in_sphinx():\n    # Hack to detect whether we are running by the sphinx builder\n    return '__file__' in globals()"
+        "# Authors: Eustache Diemert <[email protected]>\n#          @FedericoV <https://github.com/FedericoV/>\n# License: BSD 3 clause\n\nfrom __future__ import print_function\n\nfrom glob import glob\nimport itertools\nimport os.path\nimport re\nimport tarfile\nimport time\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom matplotlib import rcParams\n\nfrom sklearn.externals.six.moves import html_parser\nfrom sklearn.externals.six.moves.urllib.request import urlretrieve\nfrom sklearn.datasets import get_data_home\nfrom sklearn.feature_extraction.text import HashingVectorizer\nfrom sklearn.linear_model import SGDClassifier\nfrom sklearn.linear_model import PassiveAggressiveClassifier\nfrom sklearn.linear_model import Perceptron\nfrom sklearn.naive_bayes import MultinomialNB\n\n\ndef _not_in_sphinx():\n    # Hack to detect whether we are running by the sphinx builder\n    return '__file__' in globals()"
       ]
     },
     {
@@ -44,7 +44,7 @@
       },
       "outputs": [],
       "source": [
-        "class ReutersParser(html_parser.HTMLParser):\n    \"\"\"Utility class to parse a SGML file and yield documents one at a time.\"\"\"\n\n    def __init__(self, encoding='latin-1'):\n        html_parser.HTMLParser.__init__(self)\n        self._reset()\n        self.encoding = encoding\n\n    def handle_starttag(self, tag, attrs):\n        method = 'start_' + tag\n        getattr(self, method, lambda x: None)(attrs)\n\n    def handle_endtag(self, tag):\n        method = 'end_' + tag\n        getattr(self, method, lambda: None)()\n\n    def _reset(self):\n        self.in_title = 0\n        self.in_body = 0\n        self.in_topics = 0\n        self.in_topic_d = 0\n        self.title = \"\"\n        self.body = \"\"\n        self.topics = []\n        self.topic_d = \"\"\n\n    def parse(self, fd):\n        self.docs = []\n        for chunk in fd:\n            self.feed(chunk.decode(self.encoding))\n            for doc in self.docs:\n                yield doc\n            self.docs = []\n        self.close()\n\n    def handle_data(self, data):\n        if self.in_body:\n            self.body += data\n        elif self.in_title:\n            self.title += data\n        elif self.in_topic_d:\n            self.topic_d += data\n\n    def start_reuters(self, attributes):\n        pass\n\n    def end_reuters(self):\n        self.body = re.sub(r'\\s+', r' ', self.body)\n        self.docs.append({'title': self.title,\n                          'body': self.body,\n                          'topics': self.topics})\n        self._reset()\n\n    def start_title(self, attributes):\n        self.in_title = 1\n\n    def end_title(self):\n        self.in_title = 0\n\n    def start_body(self, attributes):\n        self.in_body = 1\n\n    def end_body(self):\n        self.in_body = 0\n\n    def start_topics(self, attributes):\n        self.in_topics = 1\n\n    def end_topics(self):\n        self.in_topics = 0\n\n    def start_d(self, attributes):\n        self.in_topic_d = 1\n\n    def end_d(self):\n        self.in_topic_d = 0\n        self.topics.append(self.topic_d)\n        self.topic_d = \"\"\n\n\ndef stream_reuters_documents(data_path=None):\n    \"\"\"Iterate over documents of the Reuters dataset.\n\n    The Reuters archive will automatically be downloaded and uncompressed if\n    the `data_path` directory does not exist.\n\n    Documents are represented as dictionaries with 'body' (str),\n    'title' (str), 'topics' (list(str)) keys.\n\n    \"\"\"\n\n    DOWNLOAD_URL = ('http://archive.ics.uci.edu/ml/machine-learning-databases/'\n                    'reuters21578-mld/reuters21578.tar.gz')\n    ARCHIVE_FILENAME = 'reuters21578.tar.gz'\n\n    if data_path is None:\n        data_path = os.path.join(get_data_home(), \"reuters\")\n    if not os.path.exists(data_path):\n        \"\"\"Download the dataset.\"\"\"\n        print(\"downloading dataset (once and for all) into %s\" %\n              data_path)\n        os.mkdir(data_path)\n\n        def progress(blocknum, bs, size):\n            total_sz_mb = '%.2f MB' % (size / 1e6)\n            current_sz_mb = '%.2f MB' % ((blocknum * bs) / 1e6)\n            if _not_in_sphinx():\n                print('\\rdownloaded %s / %s' % (current_sz_mb, total_sz_mb),\n                      end='')\n\n        archive_path = os.path.join(data_path, ARCHIVE_FILENAME)\n        urllib.request.urlretrieve(DOWNLOAD_URL, filename=archive_path,\n                                   reporthook=progress)\n        if _not_in_sphinx():\n            print('\\r', end='')\n        print(\"untarring Reuters dataset...\")\n        tarfile.open(archive_path, 'r:gz').extractall(data_path)\n        print(\"done.\")\n\n    parser = ReutersParser()\n    for filename in glob(os.path.join(data_path, \"*.sgm\")):\n        for doc in parser.parse(open(filename, 'rb')):\n            yield doc"
+        "class ReutersParser(html_parser.HTMLParser):\n    \"\"\"Utility class to parse a SGML file and yield documents one at a time.\"\"\"\n\n    def __init__(self, encoding='latin-1'):\n        html_parser.HTMLParser.__init__(self)\n        self._reset()\n        self.encoding = encoding\n\n    def handle_starttag(self, tag, attrs):\n        method = 'start_' + tag\n        getattr(self, method, lambda x: None)(attrs)\n\n    def handle_endtag(self, tag):\n        method = 'end_' + tag\n        getattr(self, method, lambda: None)()\n\n    def _reset(self):\n        self.in_title = 0\n        self.in_body = 0\n        self.in_topics = 0\n        self.in_topic_d = 0\n        self.title = \"\"\n        self.body = \"\"\n        self.topics = []\n        self.topic_d = \"\"\n\n    def parse(self, fd):\n        self.docs = []\n        for chunk in fd:\n            self.feed(chunk.decode(self.encoding))\n            for doc in self.docs:\n                yield doc\n            self.docs = []\n        self.close()\n\n    def handle_data(self, data):\n        if self.in_body:\n            self.body += data\n        elif self.in_title:\n            self.title += data\n        elif self.in_topic_d:\n            self.topic_d += data\n\n    def start_reuters(self, attributes):\n        pass\n\n    def end_reuters(self):\n        self.body = re.sub(r'\\s+', r' ', self.body)\n        self.docs.append({'title': self.title,\n                          'body': self.body,\n                          'topics': self.topics})\n        self._reset()\n\n    def start_title(self, attributes):\n        self.in_title = 1\n\n    def end_title(self):\n        self.in_title = 0\n\n    def start_body(self, attributes):\n        self.in_body = 1\n\n    def end_body(self):\n        self.in_body = 0\n\n    def start_topics(self, attributes):\n        self.in_topics = 1\n\n    def end_topics(self):\n        self.in_topics = 0\n\n    def start_d(self, attributes):\n        self.in_topic_d = 1\n\n    def end_d(self):\n        self.in_topic_d = 0\n        self.topics.append(self.topic_d)\n        self.topic_d = \"\"\n\n\ndef stream_reuters_documents(data_path=None):\n    \"\"\"Iterate over documents of the Reuters dataset.\n\n    The Reuters archive will automatically be downloaded and uncompressed if\n    the `data_path` directory does not exist.\n\n    Documents are represented as dictionaries with 'body' (str),\n    'title' (str), 'topics' (list(str)) keys.\n\n    \"\"\"\n\n    DOWNLOAD_URL = ('http://archive.ics.uci.edu/ml/machine-learning-databases/'\n                    'reuters21578-mld/reuters21578.tar.gz')\n    ARCHIVE_FILENAME = 'reuters21578.tar.gz'\n\n    if data_path is None:\n        data_path = os.path.join(get_data_home(), \"reuters\")\n    if not os.path.exists(data_path):\n        \"\"\"Download the dataset.\"\"\"\n        print(\"downloading dataset (once and for all) into %s\" %\n              data_path)\n        os.mkdir(data_path)\n\n        def progress(blocknum, bs, size):\n            total_sz_mb = '%.2f MB' % (size / 1e6)\n            current_sz_mb = '%.2f MB' % ((blocknum * bs) / 1e6)\n            if _not_in_sphinx():\n                print('\\rdownloaded %s / %s' % (current_sz_mb, total_sz_mb),\n                      end='')\n\n        archive_path = os.path.join(data_path, ARCHIVE_FILENAME)\n        urlretrieve(DOWNLOAD_URL, filename=archive_path,\n                    reporthook=progress)\n        if _not_in_sphinx():\n            print('\\r', end='')\n        print(\"untarring Reuters dataset...\")\n        tarfile.open(archive_path, 'r:gz').extractall(data_path)\n        print(\"done.\")\n\n    parser = ReutersParser()\n    for filename in glob(os.path.join(data_path, \"*.sgm\")):\n        for doc in parser.parse(open(filename, 'rb')):\n            yield doc"
       ]
     },
     {
 
@@ -41,7 +41,7 @@
 from matplotlib import rcParams
 
 from sklearn.externals.six.moves import html_parser
-from sklearn.externals.six.moves import urllib
+from sklearn.externals.six.moves.urllib.request import urlretrieve
 from sklearn.datasets import get_data_home
 from sklearn.feature_extraction.text import HashingVectorizer
 from sklearn.linear_model import SGDClassifier
@@ -172,8 +172,8 @@ def progress(blocknum, bs, size):
                       end='')
 
         archive_path = os.path.join(data_path, ARCHIVE_FILENAME)
-        urllib.request.urlretrieve(DOWNLOAD_URL, filename=archive_path,
-                                   reporthook=progress)
+        urlretrieve(DOWNLOAD_URL, filename=archive_path,
+                    reporthook=progress)
         if _not_in_sphinx():
             print('\r', end='')
         print("untarring Reuters dataset...")
Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,7 @@`
`26`	`26`	`},`
`27`	`27`	`"outputs": [],`
`28`	`28`	`"source": [`
`29`		- "# Authors: Eustache Diemert <[email protected]>\n# @FedericoV <https://github.com/FedericoV/>\n# License: BSD 3 clause\n\nfrom __future__ import print_function\n\nfrom glob import glob\nimport itertools\nimport os.path\nimport re\nimport tarfile\nimport time\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom matplotlib import rcParams\n\nfrom sklearn.externals.six.moves import html_parser\nfrom sklearn.externals.six.moves import urllib\nfrom sklearn.datasets import get_data_home\nfrom sklearn.feature_extraction.text import HashingVectorizer\nfrom sklearn.linear_model import SGDClassifier\nfrom sklearn.linear_model import PassiveAggressiveClassifier\nfrom sklearn.linear_model import Perceptron\nfrom sklearn.naive_bayes import MultinomialNB\n\n\ndef _not_in_sphinx():\n # Hack to detect whether we are running by the sphinx builder\n return '__file__' in globals()"
	`29`	+ "# Authors: Eustache Diemert <[email protected]>\n# @FedericoV <https://github.com/FedericoV/>\n# License: BSD 3 clause\n\nfrom __future__ import print_function\n\nfrom glob import glob\nimport itertools\nimport os.path\nimport re\nimport tarfile\nimport time\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom matplotlib import rcParams\n\nfrom sklearn.externals.six.moves import html_parser\nfrom sklearn.externals.six.moves.urllib.request import urlretrieve\nfrom sklearn.datasets import get_data_home\nfrom sklearn.feature_extraction.text import HashingVectorizer\nfrom sklearn.linear_model import SGDClassifier\nfrom sklearn.linear_model import PassiveAggressiveClassifier\nfrom sklearn.linear_model import Perceptron\nfrom sklearn.naive_bayes import MultinomialNB\n\n\ndef _not_in_sphinx():\n # Hack to detect whether we are running by the sphinx builder\n return '__file__' in globals()"
`30`	`30`	`]`
`31`	`31`	`},`
`32`	`32`	`{`
`@@ -44,7 +44,7 @@`
`44`	`44`	`},`
`45`	`45`	`"outputs": [],`
`46`	`46`	`"source": [`
`47`		- "class ReutersParser(html_parser.HTMLParser):\n \"\"\"Utility class to parse a SGML file and yield documents one at a time.\"\"\"\n\n def __init__(self, encoding='latin-1'):\n html_parser.HTMLParser.__init__(self)\n self._reset()\n self.encoding = encoding\n\n def handle_starttag(self, tag, attrs):\n method = 'start_' + tag\n getattr(self, method, lambda x: None)(attrs)\n\n def handle_endtag(self, tag):\n method = 'end_' + tag\n getattr(self, method, lambda: None)()\n\n def _reset(self):\n self.in_title = 0\n self.in_body = 0\n self.in_topics = 0\n self.in_topic_d = 0\n self.title = \"\"\n self.body = \"\"\n self.topics = []\n self.topic_d = \"\"\n\n def parse(self, fd):\n self.docs = []\n for chunk in fd:\n self.feed(chunk.decode(self.encoding))\n for doc in self.docs:\n yield doc\n self.docs = []\n self.close()\n\n def handle_data(self, data):\n if self.in_body:\n self.body += data\n elif self.in_title:\n self.title += data\n elif self.in_topic_d:\n self.topic_d += data\n\n def start_reuters(self, attributes):\n pass\n\n def end_reuters(self):\n self.body = re.sub(r'\\s+', r' ', self.body)\n self.docs.append({'title': self.title,\n 'body': self.body,\n 'topics': self.topics})\n self._reset()\n\n def start_title(self, attributes):\n self.in_title = 1\n\n def end_title(self):\n self.in_title = 0\n\n def start_body(self, attributes):\n self.in_body = 1\n\n def end_body(self):\n self.in_body = 0\n\n def start_topics(self, attributes):\n self.in_topics = 1\n\n def end_topics(self):\n self.in_topics = 0\n\n def start_d(self, attributes):\n self.in_topic_d = 1\n\n def end_d(self):\n self.in_topic_d = 0\n self.topics.append(self.topic_d)\n self.topic_d = \"\"\n\n\ndef stream_reuters_documents(data_path=None):\n \"\"\"Iterate over documents of the Reuters dataset.\n\n The Reuters archive will automatically be downloaded and uncompressed if\n the `data_path` directory does not exist.\n\n Documents are represented as dictionaries with 'body' (str),\n 'title' (str), 'topics' (list(str)) keys.\n\n \"\"\"\n\n DOWNLOAD_URL = ('http://archive.ics.uci.edu/ml/machine-learning-databases/'\n 'reuters21578-mld/reuters21578.tar.gz')\n ARCHIVE_FILENAME = 'reuters21578.tar.gz'\n\n if data_path is None:\n data_path = os.path.join(get_data_home(), \"reuters\")\n if not os.path.exists(data_path):\n \"\"\"Download the dataset.\"\"\"\n print(\"downloading dataset (once and for all) into %s\" %\n data_path)\n os.mkdir(data_path)\n\n def progress(blocknum, bs, size):\n total_sz_mb = '%.2f MB' % (size / 1e6)\n current_sz_mb = '%.2f MB' % ((blocknum * bs) / 1e6)\n if _not_in_sphinx():\n print('\\rdownloaded %s / %s' % (current_sz_mb, total_sz_mb),\n end='')\n\n archive_path = os.path.join(data_path, ARCHIVE_FILENAME)\n urllib.request.urlretrieve(DOWNLOAD_URL, filename=archive_path,\n reporthook=progress)\n if _not_in_sphinx():\n print('\\r', end='')\n print(\"untarring Reuters dataset...\")\n tarfile.open(archive_path, 'r:gz').extractall(data_path)\n print(\"done.\")\n\n parser = ReutersParser()\n for filename in glob(os.path.join(data_path, \"*.sgm\")):\n for doc in parser.parse(open(filename, 'rb')):\n yield doc"
	`47`	+ "class ReutersParser(html_parser.HTMLParser):\n \"\"\"Utility class to parse a SGML file and yield documents one at a time.\"\"\"\n\n def __init__(self, encoding='latin-1'):\n html_parser.HTMLParser.__init__(self)\n self._reset()\n self.encoding = encoding\n\n def handle_starttag(self, tag, attrs):\n method = 'start_' + tag\n getattr(self, method, lambda x: None)(attrs)\n\n def handle_endtag(self, tag):\n method = 'end_' + tag\n getattr(self, method, lambda: None)()\n\n def _reset(self):\n self.in_title = 0\n self.in_body = 0\n self.in_topics = 0\n self.in_topic_d = 0\n self.title = \"\"\n self.body = \"\"\n self.topics = []\n self.topic_d = \"\"\n\n def parse(self, fd):\n self.docs = []\n for chunk in fd:\n self.feed(chunk.decode(self.encoding))\n for doc in self.docs:\n yield doc\n self.docs = []\n self.close()\n\n def handle_data(self, data):\n if self.in_body:\n self.body += data\n elif self.in_title:\n self.title += data\n elif self.in_topic_d:\n self.topic_d += data\n\n def start_reuters(self, attributes):\n pass\n\n def end_reuters(self):\n self.body = re.sub(r'\\s+', r' ', self.body)\n self.docs.append({'title': self.title,\n 'body': self.body,\n 'topics': self.topics})\n self._reset()\n\n def start_title(self, attributes):\n self.in_title = 1\n\n def end_title(self):\n self.in_title = 0\n\n def start_body(self, attributes):\n self.in_body = 1\n\n def end_body(self):\n self.in_body = 0\n\n def start_topics(self, attributes):\n self.in_topics = 1\n\n def end_topics(self):\n self.in_topics = 0\n\n def start_d(self, attributes):\n self.in_topic_d = 1\n\n def end_d(self):\n self.in_topic_d = 0\n self.topics.append(self.topic_d)\n self.topic_d = \"\"\n\n\ndef stream_reuters_documents(data_path=None):\n \"\"\"Iterate over documents of the Reuters dataset.\n\n The Reuters archive will automatically be downloaded and uncompressed if\n the `data_path` directory does not exist.\n\n Documents are represented as dictionaries with 'body' (str),\n 'title' (str), 'topics' (list(str)) keys.\n\n \"\"\"\n\n DOWNLOAD_URL = ('http://archive.ics.uci.edu/ml/machine-learning-databases/'\n 'reuters21578-mld/reuters21578.tar.gz')\n ARCHIVE_FILENAME = 'reuters21578.tar.gz'\n\n if data_path is None:\n data_path = os.path.join(get_data_home(), \"reuters\")\n if not os.path.exists(data_path):\n \"\"\"Download the dataset.\"\"\"\n print(\"downloading dataset (once and for all) into %s\" %\n data_path)\n os.mkdir(data_path)\n\n def progress(blocknum, bs, size):\n total_sz_mb = '%.2f MB' % (size / 1e6)\n current_sz_mb = '%.2f MB' % ((blocknum * bs) / 1e6)\n if _not_in_sphinx():\n print('\\rdownloaded %s / %s' % (current_sz_mb, total_sz_mb),\n end='')\n\n archive_path = os.path.join(data_path, ARCHIVE_FILENAME)\n urlretrieve(DOWNLOAD_URL, filename=archive_path,\n reporthook=progress)\n if _not_in_sphinx():\n print('\\r', end='')\n print(\"untarring Reuters dataset...\")\n tarfile.open(archive_path, 'r:gz').extractall(data_path)\n print(\"done.\")\n\n parser = ReutersParser()\n for filename in glob(os.path.join(data_path, \"*.sgm\")):\n for doc in parser.parse(open(filename, 'rb')):\n yield doc"
`48`	`48`	`]`
`49`	`49`	`},`
`50`	`50`	`{`